3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define new_constant(a,b,c,d,e,f,g) \
27 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
29 #define pl_yylval (PL_parser->yylval)
31 /* YYINITDEPTH -- initial size of the parser's stacks. */
32 #define YYINITDEPTH 200
34 /* XXX temporary backwards compatibility */
35 #define PL_lex_brackets (PL_parser->lex_brackets)
36 #define PL_lex_brackstack (PL_parser->lex_brackstack)
37 #define PL_lex_casemods (PL_parser->lex_casemods)
38 #define PL_lex_casestack (PL_parser->lex_casestack)
39 #define PL_lex_defer (PL_parser->lex_defer)
40 #define PL_lex_dojoin (PL_parser->lex_dojoin)
41 #define PL_lex_expect (PL_parser->lex_expect)
42 #define PL_lex_formbrack (PL_parser->lex_formbrack)
43 #define PL_lex_inpat (PL_parser->lex_inpat)
44 #define PL_lex_inwhat (PL_parser->lex_inwhat)
45 #define PL_lex_op (PL_parser->lex_op)
46 #define PL_lex_repl (PL_parser->lex_repl)
47 #define PL_lex_starts (PL_parser->lex_starts)
48 #define PL_lex_stuff (PL_parser->lex_stuff)
49 #define PL_multi_start (PL_parser->multi_start)
50 #define PL_multi_open (PL_parser->multi_open)
51 #define PL_multi_close (PL_parser->multi_close)
52 #define PL_pending_ident (PL_parser->pending_ident)
53 #define PL_preambled (PL_parser->preambled)
54 #define PL_sublex_info (PL_parser->sublex_info)
55 #define PL_linestr (PL_parser->linestr)
56 #define PL_expect (PL_parser->expect)
57 #define PL_copline (PL_parser->copline)
58 #define PL_bufptr (PL_parser->bufptr)
59 #define PL_oldbufptr (PL_parser->oldbufptr)
60 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
61 #define PL_linestart (PL_parser->linestart)
62 #define PL_bufend (PL_parser->bufend)
63 #define PL_last_uni (PL_parser->last_uni)
64 #define PL_last_lop (PL_parser->last_lop)
65 #define PL_last_lop_op (PL_parser->last_lop_op)
66 #define PL_lex_state (PL_parser->lex_state)
67 #define PL_rsfp (PL_parser->rsfp)
68 #define PL_rsfp_filters (PL_parser->rsfp_filters)
69 #define PL_in_my (PL_parser->in_my)
70 #define PL_in_my_stash (PL_parser->in_my_stash)
71 #define PL_tokenbuf (PL_parser->tokenbuf)
72 #define PL_multi_end (PL_parser->multi_end)
73 #define PL_error_count (PL_parser->error_count)
76 # define PL_endwhite (PL_parser->endwhite)
77 # define PL_faketokens (PL_parser->faketokens)
78 # define PL_lasttoke (PL_parser->lasttoke)
79 # define PL_nextwhite (PL_parser->nextwhite)
80 # define PL_realtokenstart (PL_parser->realtokenstart)
81 # define PL_skipwhite (PL_parser->skipwhite)
82 # define PL_thisclose (PL_parser->thisclose)
83 # define PL_thismad (PL_parser->thismad)
84 # define PL_thisopen (PL_parser->thisopen)
85 # define PL_thisstuff (PL_parser->thisstuff)
86 # define PL_thistoken (PL_parser->thistoken)
87 # define PL_thiswhite (PL_parser->thiswhite)
88 # define PL_thiswhite (PL_parser->thiswhite)
89 # define PL_nexttoke (PL_parser->nexttoke)
90 # define PL_curforce (PL_parser->curforce)
92 # define PL_nexttoke (PL_parser->nexttoke)
93 # define PL_nexttype (PL_parser->nexttype)
94 # define PL_nextval (PL_parser->nextval)
98 S_pending_ident(pTHX);
100 static const char ident_too_long[] = "Identifier too long";
101 static const char commaless_variable_list[] = "comma-less variable list";
103 #ifndef PERL_NO_UTF16_FILTER
104 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
105 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
109 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
110 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
112 # define CURMAD(slot,sv)
113 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
116 #define XFAKEBRACK 128
117 #define XENUMMASK 127
119 #ifdef USE_UTF8_SCRIPTS
120 # define UTF (!IN_BYTES)
122 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
125 /* In variables named $^X, these are the legal values for X.
126 * 1999-02-27 mjd-perl-patch@plover.com */
127 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
129 /* On MacOS, respect nonbreaking spaces */
130 #ifdef MACOS_TRADITIONAL
131 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
133 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
136 /* LEX_* are values for PL_lex_state, the state of the lexer.
137 * They are arranged oddly so that the guard on the switch statement
138 * can get by with a single comparison (if the compiler is smart enough).
141 /* #define LEX_NOTPARSING 11 is done in perl.h. */
143 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
144 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
145 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
146 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
147 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
149 /* at end of code, eg "$x" followed by: */
150 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
151 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
153 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
154 string or after \E, $foo, etc */
155 #define LEX_INTERPCONST 2 /* NOT USED */
156 #define LEX_FORMLINE 1 /* expecting a format line */
157 #define LEX_KNOWNEXT 0 /* next token known; just return it */
161 static const char* const lex_state_names[] = {
180 #include "keywords.h"
182 /* CLINE is a macro that ensures PL_copline has a sane value */
187 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
190 # define SKIPSPACE0(s) skipspace0(s)
191 # define SKIPSPACE1(s) skipspace1(s)
192 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
193 # define PEEKSPACE(s) skipspace2(s,0)
195 # define SKIPSPACE0(s) skipspace(s)
196 # define SKIPSPACE1(s) skipspace(s)
197 # define SKIPSPACE2(s,tsv) skipspace(s)
198 # define PEEKSPACE(s) skipspace(s)
202 * Convenience functions to return different tokens and prime the
203 * lexer for the next token. They all take an argument.
205 * TOKEN : generic token (used for '(', DOLSHARP, etc)
206 * OPERATOR : generic operator
207 * AOPERATOR : assignment operator
208 * PREBLOCK : beginning the block after an if, while, foreach, ...
209 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
210 * PREREF : *EXPR where EXPR is not a simple identifier
211 * TERM : expression term
212 * LOOPX : loop exiting command (goto, last, dump, etc)
213 * FTST : file test operator
214 * FUN0 : zero-argument function
215 * FUN1 : not used, except for not, which isn't a UNIOP
216 * BOop : bitwise or or xor
218 * SHop : shift operator
219 * PWop : power operator
220 * PMop : pattern-matching operator
221 * Aop : addition-level operator
222 * Mop : multiplication-level operator
223 * Eop : equality-testing operator
224 * Rop : relational operator <= != gt
226 * Also see LOP and lop() below.
229 #ifdef DEBUGGING /* Serve -DT. */
230 # define REPORT(retval) tokereport((I32)retval)
232 # define REPORT(retval) (retval)
235 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
236 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
237 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
238 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
239 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
240 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
241 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
242 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
243 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
244 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
245 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
246 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
247 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
248 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
249 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
250 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
251 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
252 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
253 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
254 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
256 /* This bit of chicanery makes a unary function followed by
257 * a parenthesis into a function with one argument, highest precedence.
258 * The UNIDOR macro is for unary functions that can be followed by the //
259 * operator (such as C<shift // 0>).
261 #define UNI2(f,x) { \
262 pl_yylval.ival = f; \
265 PL_last_uni = PL_oldbufptr; \
266 PL_last_lop_op = f; \
268 return REPORT( (int)FUNC1 ); \
270 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
272 #define UNI(f) UNI2(f,XTERM)
273 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
275 #define UNIBRACK(f) { \
276 pl_yylval.ival = f; \
278 PL_last_uni = PL_oldbufptr; \
280 return REPORT( (int)FUNC1 ); \
282 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
285 /* grandfather return to old style */
286 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
290 /* how to interpret the pl_yylval associated with the token */
294 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
300 static struct debug_tokens {
302 enum token_type type;
304 } const debug_tokens[] =
306 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
307 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
308 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
309 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
310 { ARROW, TOKENTYPE_NONE, "ARROW" },
311 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
312 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
313 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
314 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
315 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
316 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
317 { DO, TOKENTYPE_NONE, "DO" },
318 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
319 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
320 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
321 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
322 { ELSE, TOKENTYPE_NONE, "ELSE" },
323 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
324 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
325 { FOR, TOKENTYPE_IVAL, "FOR" },
326 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
330 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
331 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
332 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
333 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
334 { IF, TOKENTYPE_IVAL, "IF" },
335 { LABEL, TOKENTYPE_PVAL, "LABEL" },
336 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
337 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
338 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
339 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
340 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
341 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
342 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
343 { MY, TOKENTYPE_IVAL, "MY" },
344 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
351 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
352 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
353 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
354 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
355 { PREINC, TOKENTYPE_NONE, "PREINC" },
356 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
357 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
358 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
359 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
360 { SUB, TOKENTYPE_NONE, "SUB" },
361 { THING, TOKENTYPE_OPVAL, "THING" },
362 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
363 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
364 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
365 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
366 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
367 { USE, TOKENTYPE_IVAL, "USE" },
368 { WHEN, TOKENTYPE_IVAL, "WHEN" },
369 { WHILE, TOKENTYPE_IVAL, "WHILE" },
370 { WORD, TOKENTYPE_OPVAL, "WORD" },
371 { 0, TOKENTYPE_NONE, NULL }
374 /* dump the returned token in rv, plus any optional arg in pl_yylval */
377 S_tokereport(pTHX_ I32 rv)
381 const char *name = NULL;
382 enum token_type type = TOKENTYPE_NONE;
383 const struct debug_tokens *p;
384 SV* const report = newSVpvs("<== ");
386 for (p = debug_tokens; p->token; p++) {
387 if (p->token == (int)rv) {
394 Perl_sv_catpv(aTHX_ report, name);
395 else if ((char)rv > ' ' && (char)rv < '~')
396 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
398 sv_catpvs(report, "EOF");
400 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
403 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
406 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)pl_yylval.ival);
408 case TOKENTYPE_OPNUM:
409 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
410 PL_op_name[pl_yylval.ival]);
413 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", pl_yylval.pval);
415 case TOKENTYPE_OPVAL:
416 if (pl_yylval.opval) {
417 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
418 PL_op_name[pl_yylval.opval->op_type]);
419 if (pl_yylval.opval->op_type == OP_CONST) {
420 Perl_sv_catpvf(aTHX_ report, " %s",
421 SvPEEK(cSVOPx_sv(pl_yylval.opval)));
426 sv_catpvs(report, "(opval=null)");
429 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
435 /* print the buffer with suitable escapes */
438 S_printbuf(pTHX_ const char* fmt, const char* s)
440 SV* const tmp = newSVpvs("");
441 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
450 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
451 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
455 S_ao(pTHX_ int toketype)
458 if (*PL_bufptr == '=') {
460 if (toketype == ANDAND)
461 pl_yylval.ival = OP_ANDASSIGN;
462 else if (toketype == OROR)
463 pl_yylval.ival = OP_ORASSIGN;
464 else if (toketype == DORDOR)
465 pl_yylval.ival = OP_DORASSIGN;
473 * When Perl expects an operator and finds something else, no_op
474 * prints the warning. It always prints "<something> found where
475 * operator expected. It prints "Missing semicolon on previous line?"
476 * if the surprise occurs at the start of the line. "do you need to
477 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
478 * where the compiler doesn't know if foo is a method call or a function.
479 * It prints "Missing operator before end of line" if there's nothing
480 * after the missing operator, or "... before <...>" if there is something
481 * after the missing operator.
485 S_no_op(pTHX_ const char *what, char *s)
488 char * const oldbp = PL_bufptr;
489 const bool is_first = (PL_oldbufptr == PL_linestart);
495 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
496 if (ckWARN_d(WARN_SYNTAX)) {
498 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
499 "\t(Missing semicolon on previous line?)\n");
500 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
502 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
504 if (t < PL_bufptr && isSPACE(*t))
505 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
506 "\t(Do you need to predeclare %.*s?)\n",
507 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
511 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
512 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
520 * Complain about missing quote/regexp/heredoc terminator.
521 * If it's called with NULL then it cauterizes the line buffer.
522 * If we're in a delimited string and the delimiter is a control
523 * character, it's reformatted into a two-char sequence like ^C.
528 S_missingterm(pTHX_ char *s)
534 char * const nl = strrchr(s,'\n');
540 iscntrl(PL_multi_close)
542 PL_multi_close < 32 || PL_multi_close == 127
546 tmpbuf[1] = (char)toCTRL(PL_multi_close);
551 *tmpbuf = (char)PL_multi_close;
555 q = strchr(s,'"') ? '\'' : '"';
556 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
559 #define FEATURE_IS_ENABLED(name) \
560 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
561 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
562 /* The longest string we pass in. */
563 #define MAX_FEATURE_LEN (sizeof("switch")-1)
566 * S_feature_is_enabled
567 * Check whether the named feature is enabled.
570 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
573 HV * const hinthv = GvHV(PL_hintgv);
574 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
575 assert(namelen <= MAX_FEATURE_LEN);
576 memcpy(&he_name[8], name, namelen);
578 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
586 Perl_deprecate(pTHX_ const char *s)
588 if (ckWARN(WARN_DEPRECATED))
589 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
593 Perl_deprecate_old(pTHX_ const char *s)
595 /* This function should NOT be called for any new deprecated warnings */
596 /* Use Perl_deprecate instead */
598 /* It is here to maintain backward compatibility with the pre-5.8 */
599 /* warnings category hierarchy. The "deprecated" category used to */
600 /* live under the "syntax" category. It is now a top-level category */
601 /* in its own right. */
603 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
604 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
605 "Use of %s is deprecated", s);
609 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
610 * utf16-to-utf8-reversed.
613 #ifdef PERL_CR_FILTER
617 register const char *s = SvPVX_const(sv);
618 register const char * const e = s + SvCUR(sv);
619 /* outer loop optimized to do nothing if there are no CR-LFs */
621 if (*s++ == '\r' && *s == '\n') {
622 /* hit a CR-LF, need to copy the rest */
623 register char *d = s - 1;
626 if (*s == '\r' && s[1] == '\n')
637 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
639 const I32 count = FILTER_READ(idx+1, sv, maxlen);
640 if (count > 0 && !maxlen)
651 * Create a parser object and initialise its parser and lexer fields
653 * rsfp is the opened file handle to read from (if any),
655 * line holds any initial content already read from the file (or in
656 * the case of no file, such as an eval, the whole contents);
658 * new_filter indicates that this is a new file and it shouldn't inherit
659 * the filters from the current parser (ie require).
663 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
666 const char *s = NULL;
668 yy_parser *parser, *oparser;
670 /* create and initialise a parser */
672 Newxz(parser, 1, yy_parser);
673 parser->old_parser = oparser = PL_parser;
676 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
677 parser->ps = parser->stack;
678 parser->stack_size = YYINITDEPTH;
680 parser->stack->state = 0;
681 parser->yyerrstatus = 0;
682 parser->yychar = YYEMPTY; /* Cause a token to be read. */
684 /* on scope exit, free this parser and restore any outer one */
686 parser->saved_curcop = PL_curcop;
688 /* initialise lexer state */
691 parser->curforce = -1;
693 parser->nexttoke = 0;
695 parser->copline = NOLINE;
696 parser->lex_state = LEX_NORMAL;
697 parser->expect = XSTATE;
699 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
700 : (AV*)SvREFCNT_inc(oparser->rsfp_filters);
702 Newx(parser->lex_brackstack, 120, char);
703 Newx(parser->lex_casestack, 12, char);
704 *parser->lex_casestack = '\0';
707 s = SvPV_const(line, len);
713 parser->linestr = newSVpvs("\n;");
714 } else if (SvREADONLY(line) || s[len-1] != ';') {
715 parser->linestr = newSVsv(line);
717 sv_catpvs(parser->linestr, "\n;");
720 SvREFCNT_inc_simple_void_NN(line);
721 parser->linestr = line;
723 parser->oldoldbufptr =
726 parser->linestart = SvPVX(parser->linestr);
727 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
728 parser->last_lop = parser->last_uni = NULL;
732 /* delete a parser object */
735 Perl_parser_free(pTHX_ const yy_parser *parser)
737 PL_curcop = parser->saved_curcop;
738 SvREFCNT_dec(parser->linestr);
740 if (parser->rsfp == PerlIO_stdin())
741 PerlIO_clearerr(parser->rsfp);
742 else if (parser->rsfp && parser->old_parser
743 && parser->rsfp != parser->old_parser->rsfp)
744 PerlIO_close(parser->rsfp);
745 SvREFCNT_dec(parser->rsfp_filters);
747 Safefree(parser->stack);
748 Safefree(parser->lex_brackstack);
749 Safefree(parser->lex_casestack);
750 PL_parser = parser->old_parser;
757 * Finalizer for lexing operations. Must be called when the parser is
758 * done with the lexer.
765 PL_doextract = FALSE;
770 * This subroutine has nothing to do with tilting, whether at windmills
771 * or pinball tables. Its name is short for "increment line". It
772 * increments the current line number in CopLINE(PL_curcop) and checks
773 * to see whether the line starts with a comment of the form
774 * # line 500 "foo.pm"
775 * If so, it sets the current line number and file to the values in the comment.
779 S_incline(pTHX_ const char *s)
786 CopLINE_inc(PL_curcop);
789 while (SPACE_OR_TAB(*s))
791 if (strnEQ(s, "line", 4))
795 if (SPACE_OR_TAB(*s))
799 while (SPACE_OR_TAB(*s))
807 while (SPACE_OR_TAB(*s))
809 if (*s == '"' && (t = strchr(s+1, '"'))) {
819 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
821 if (*e != '\n' && *e != '\0')
822 return; /* false alarm */
825 const STRLEN len = t - s;
827 SV *const temp_sv = CopFILESV(PL_curcop);
833 tmplen = SvCUR(temp_sv);
839 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
840 /* must copy *{"::_<(eval N)[oldfilename:L]"}
841 * to *{"::_<newfilename"} */
842 /* However, the long form of evals is only turned on by the
843 debugger - usually they're "(eval %lu)" */
847 STRLEN tmplen2 = len;
848 if (tmplen + 2 <= sizeof smallbuf)
851 Newx(tmpbuf, tmplen + 2, char);
854 memcpy(tmpbuf + 2, cf, tmplen);
856 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
861 if (tmplen2 + 2 <= sizeof smallbuf)
864 Newx(tmpbuf2, tmplen2 + 2, char);
866 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
867 /* Either they malloc'd it, or we malloc'd it,
868 so no prefix is present in ours. */
873 memcpy(tmpbuf2 + 2, s, tmplen2);
876 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
878 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
879 /* adjust ${"::_<newfilename"} to store the new file name */
880 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
881 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
882 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
885 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
887 if (tmpbuf != smallbuf) Safefree(tmpbuf);
890 CopFILE_free(PL_curcop);
891 CopFILE_setn(PL_curcop, s, len);
893 CopLINE_set(PL_curcop, atoi(n)-1);
897 /* skip space before PL_thistoken */
900 S_skipspace0(pTHX_ register char *s)
907 PL_thiswhite = newSVpvs("");
908 sv_catsv(PL_thiswhite, PL_skipwhite);
909 sv_free(PL_skipwhite);
912 PL_realtokenstart = s - SvPVX(PL_linestr);
916 /* skip space after PL_thistoken */
919 S_skipspace1(pTHX_ register char *s)
921 const char *start = s;
922 I32 startoff = start - SvPVX(PL_linestr);
927 start = SvPVX(PL_linestr) + startoff;
928 if (!PL_thistoken && PL_realtokenstart >= 0) {
929 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
930 PL_thistoken = newSVpvn(tstart, start - tstart);
932 PL_realtokenstart = -1;
935 PL_nextwhite = newSVpvs("");
936 sv_catsv(PL_nextwhite, PL_skipwhite);
937 sv_free(PL_skipwhite);
944 S_skipspace2(pTHX_ register char *s, SV **svp)
947 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
948 const I32 startoff = s - SvPVX(PL_linestr);
951 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
952 if (!PL_madskills || !svp)
954 start = SvPVX(PL_linestr) + startoff;
955 if (!PL_thistoken && PL_realtokenstart >= 0) {
956 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
957 PL_thistoken = newSVpvn(tstart, start - tstart);
958 PL_realtokenstart = -1;
963 sv_setsv(*svp, PL_skipwhite);
964 sv_free(PL_skipwhite);
973 S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
975 AV *av = CopFILEAVx(PL_curcop);
977 SV * const sv = newSV_type(SVt_PVMG);
979 sv_setsv(sv, orig_sv);
981 sv_setpvn(sv, buf, len);
984 av_store(av, (I32)CopLINE(PL_curcop), sv);
990 * Called to gobble the appropriate amount and type of whitespace.
991 * Skips comments as well.
995 S_skipspace(pTHX_ register char *s)
1000 int startoff = s - SvPVX(PL_linestr);
1003 sv_free(PL_skipwhite);
1008 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1009 while (s < PL_bufend && SPACE_OR_TAB(*s))
1019 SSize_t oldprevlen, oldoldprevlen;
1020 SSize_t oldloplen = 0, oldunilen = 0;
1021 while (s < PL_bufend && isSPACE(*s)) {
1022 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1027 if (s < PL_bufend && *s == '#') {
1028 while (s < PL_bufend && *s != '\n')
1030 if (s < PL_bufend) {
1032 if (PL_in_eval && !PL_rsfp) {
1039 /* only continue to recharge the buffer if we're at the end
1040 * of the buffer, we're not reading from a source filter, and
1041 * we're in normal lexing mode
1043 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1044 PL_lex_state == LEX_FORMLINE)
1051 /* try to recharge the buffer */
1053 curoff = s - SvPVX(PL_linestr);
1056 if ((s = filter_gets(PL_linestr, PL_rsfp,
1057 (prevlen = SvCUR(PL_linestr)))) == NULL)
1060 if (PL_madskills && curoff != startoff) {
1062 PL_skipwhite = newSVpvs("");
1063 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1067 /* mustn't throw out old stuff yet if madpropping */
1068 SvCUR(PL_linestr) = curoff;
1069 s = SvPVX(PL_linestr) + curoff;
1071 if (curoff && s[-1] == '\n')
1075 /* end of file. Add on the -p or -n magic */
1076 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1079 sv_catpvs(PL_linestr,
1080 ";}continue{print or die qq(-p destination: $!\\n);}");
1082 sv_setpvs(PL_linestr,
1083 ";}continue{print or die qq(-p destination: $!\\n);}");
1085 PL_minus_n = PL_minus_p = 0;
1087 else if (PL_minus_n) {
1089 sv_catpvn(PL_linestr, ";}", 2);
1091 sv_setpvn(PL_linestr, ";}", 2);
1097 sv_catpvn(PL_linestr,";", 1);
1099 sv_setpvn(PL_linestr,";", 1);
1102 /* reset variables for next time we lex */
1103 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1109 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1110 PL_last_lop = PL_last_uni = NULL;
1112 /* Close the filehandle. Could be from
1113 * STDIN, or a regular file. If we were reading code from
1114 * STDIN (because the commandline held no -e or filename)
1115 * then we don't close it, we reset it so the code can
1116 * read from STDIN too.
1119 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1120 PerlIO_clearerr(PL_rsfp);
1122 (void)PerlIO_close(PL_rsfp);
1127 /* not at end of file, so we only read another line */
1128 /* make corresponding updates to old pointers, for yyerror() */
1129 oldprevlen = PL_oldbufptr - PL_bufend;
1130 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1132 oldunilen = PL_last_uni - PL_bufend;
1134 oldloplen = PL_last_lop - PL_bufend;
1135 PL_linestart = PL_bufptr = s + prevlen;
1136 PL_bufend = s + SvCUR(PL_linestr);
1138 PL_oldbufptr = s + oldprevlen;
1139 PL_oldoldbufptr = s + oldoldprevlen;
1141 PL_last_uni = s + oldunilen;
1143 PL_last_lop = s + oldloplen;
1146 /* debugger active and we're not compiling the debugger code,
1147 * so store the line into the debugger's array of lines
1149 if (PERLDB_LINE && PL_curstash != PL_debstash)
1150 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1157 PL_skipwhite = newSVpvs("");
1158 curoff = s - SvPVX(PL_linestr);
1159 if (curoff - startoff)
1160 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1169 * Check the unary operators to ensure there's no ambiguity in how they're
1170 * used. An ambiguous piece of code would be:
1172 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1173 * the +5 is its argument.
1183 if (PL_oldoldbufptr != PL_last_uni)
1185 while (isSPACE(*PL_last_uni))
1188 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1190 if ((t = strchr(s, '(')) && t < PL_bufptr)
1193 if (ckWARN_d(WARN_AMBIGUOUS)){
1194 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1195 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1196 (int)(s - PL_last_uni), PL_last_uni);
1201 * LOP : macro to build a list operator. Its behaviour has been replaced
1202 * with a subroutine, S_lop() for which LOP is just another name.
1205 #define LOP(f,x) return lop(f,x,s)
1209 * Build a list operator (or something that might be one). The rules:
1210 * - if we have a next token, then it's a list operator [why?]
1211 * - if the next thing is an opening paren, then it's a function
1212 * - else it's a list operator
1216 S_lop(pTHX_ I32 f, int x, char *s)
1223 PL_last_lop = PL_oldbufptr;
1224 PL_last_lop_op = (OPCODE)f;
1227 return REPORT(LSTOP);
1230 return REPORT(LSTOP);
1233 return REPORT(FUNC);
1236 return REPORT(FUNC);
1238 return REPORT(LSTOP);
1244 * Sets up for an eventual force_next(). start_force(0) basically does
1245 * an unshift, while start_force(-1) does a push. yylex removes items
1250 S_start_force(pTHX_ int where)
1254 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1255 where = PL_lasttoke;
1256 assert(PL_curforce < 0 || PL_curforce == where);
1257 if (PL_curforce != where) {
1258 for (i = PL_lasttoke; i > where; --i) {
1259 PL_nexttoke[i] = PL_nexttoke[i-1];
1263 if (PL_curforce < 0) /* in case of duplicate start_force() */
1264 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1265 PL_curforce = where;
1268 curmad('^', newSVpvs(""));
1269 CURMAD('_', PL_nextwhite);
1274 S_curmad(pTHX_ char slot, SV *sv)
1280 if (PL_curforce < 0)
1281 where = &PL_thismad;
1283 where = &PL_nexttoke[PL_curforce].next_mad;
1286 sv_setpvn(sv, "", 0);
1289 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1291 else if (PL_encoding) {
1292 sv_recode_to_utf8(sv, PL_encoding);
1297 /* keep a slot open for the head of the list? */
1298 if (slot != '_' && *where && (*where)->mad_key == '^') {
1299 (*where)->mad_key = slot;
1300 sv_free((SV*)((*where)->mad_val));
1301 (*where)->mad_val = (void*)sv;
1304 addmad(newMADsv(slot, sv), where, 0);
1307 # define start_force(where) NOOP
1308 # define curmad(slot, sv) NOOP
1313 * When the lexer realizes it knows the next token (for instance,
1314 * it is reordering tokens for the parser) then it can call S_force_next
1315 * to know what token to return the next time the lexer is called. Caller
1316 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1317 * and possibly PL_expect to ensure the lexer handles the token correctly.
1321 S_force_next(pTHX_ I32 type)
1325 if (PL_curforce < 0)
1326 start_force(PL_lasttoke);
1327 PL_nexttoke[PL_curforce].next_type = type;
1328 if (PL_lex_state != LEX_KNOWNEXT)
1329 PL_lex_defer = PL_lex_state;
1330 PL_lex_state = LEX_KNOWNEXT;
1331 PL_lex_expect = PL_expect;
1334 PL_nexttype[PL_nexttoke] = type;
1336 if (PL_lex_state != LEX_KNOWNEXT) {
1337 PL_lex_defer = PL_lex_state;
1338 PL_lex_expect = PL_expect;
1339 PL_lex_state = LEX_KNOWNEXT;
1345 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1348 SV * const sv = newSVpvn_utf8(start, len,
1350 && is_utf8_string((const U8*)start, len));
1356 * When the lexer knows the next thing is a word (for instance, it has
1357 * just seen -> and it knows that the next char is a word char, then
1358 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1362 * char *start : buffer position (must be within PL_linestr)
1363 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1364 * int check_keyword : if true, Perl checks to make sure the word isn't
1365 * a keyword (do this if the word is a label, e.g. goto FOO)
1366 * int allow_pack : if true, : characters will also be allowed (require,
1367 * use, etc. do this)
1368 * int allow_initial_tick : used by the "sub" lexer only.
1372 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1378 start = SKIPSPACE1(start);
1380 if (isIDFIRST_lazy_if(s,UTF) ||
1381 (allow_pack && *s == ':') ||
1382 (allow_initial_tick && *s == '\'') )
1384 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1385 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1387 start_force(PL_curforce);
1389 curmad('X', newSVpvn(start,s-start));
1390 if (token == METHOD) {
1395 PL_expect = XOPERATOR;
1399 curmad('g', newSVpvs( "forced" ));
1400 NEXTVAL_NEXTTOKE.opval
1401 = (OP*)newSVOP(OP_CONST,0,
1402 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1403 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1411 * Called when the lexer wants $foo *foo &foo etc, but the program
1412 * text only contains the "foo" portion. The first argument is a pointer
1413 * to the "foo", and the second argument is the type symbol to prefix.
1414 * Forces the next token to be a "WORD".
1415 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1419 S_force_ident(pTHX_ register const char *s, int kind)
1423 const STRLEN len = strlen(s);
1424 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1425 start_force(PL_curforce);
1426 NEXTVAL_NEXTTOKE.opval = o;
1429 o->op_private = OPpCONST_ENTERED;
1430 /* XXX see note in pp_entereval() for why we forgo typo
1431 warnings if the symbol must be introduced in an eval.
1433 gv_fetchpvn_flags(s, len,
1434 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1436 kind == '$' ? SVt_PV :
1437 kind == '@' ? SVt_PVAV :
1438 kind == '%' ? SVt_PVHV :
1446 Perl_str_to_version(pTHX_ SV *sv)
1451 const char *start = SvPV_const(sv,len);
1452 const char * const end = start + len;
1453 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1454 while (start < end) {
1458 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1463 retval += ((NV)n)/nshift;
1472 * Forces the next token to be a version number.
1473 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1474 * and if "guessing" is TRUE, then no new token is created (and the caller
1475 * must use an alternative parsing method).
1479 S_force_version(pTHX_ char *s, int guessing)
1485 I32 startoff = s - SvPVX(PL_linestr);
1494 while (isDIGIT(*d) || *d == '_' || *d == '.')
1498 start_force(PL_curforce);
1499 curmad('X', newSVpvn(s,d-s));
1502 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1504 s = scan_num(s, &pl_yylval);
1505 version = pl_yylval.opval;
1506 ver = cSVOPx(version)->op_sv;
1507 if (SvPOK(ver) && !SvNIOK(ver)) {
1508 SvUPGRADE(ver, SVt_PVNV);
1509 SvNV_set(ver, str_to_version(ver));
1510 SvNOK_on(ver); /* hint that it is a version */
1513 else if (guessing) {
1516 sv_free(PL_nextwhite); /* let next token collect whitespace */
1518 s = SvPVX(PL_linestr) + startoff;
1526 if (PL_madskills && !version) {
1527 sv_free(PL_nextwhite); /* let next token collect whitespace */
1529 s = SvPVX(PL_linestr) + startoff;
1532 /* NOTE: The parser sees the package name and the VERSION swapped */
1533 start_force(PL_curforce);
1534 NEXTVAL_NEXTTOKE.opval = version;
1542 * Tokenize a quoted string passed in as an SV. It finds the next
1543 * chunk, up to end of string or a backslash. It may make a new
1544 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1549 S_tokeq(pTHX_ SV *sv)
1553 register char *send;
1561 s = SvPV_force(sv, len);
1562 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1565 while (s < send && *s != '\\')
1570 if ( PL_hints & HINT_NEW_STRING ) {
1571 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
1575 if (s + 1 < send && (s[1] == '\\'))
1576 s++; /* all that, just for this */
1581 SvCUR_set(sv, d - SvPVX_const(sv));
1583 if ( PL_hints & HINT_NEW_STRING )
1584 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
1589 * Now come three functions related to double-quote context,
1590 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1591 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1592 * interact with PL_lex_state, and create fake ( ... ) argument lists
1593 * to handle functions and concatenation.
1594 * They assume that whoever calls them will be setting up a fake
1595 * join call, because each subthing puts a ',' after it. This lets
1598 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1600 * (I'm not sure whether the spurious commas at the end of lcfirst's
1601 * arguments and join's arguments are created or not).
1606 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1608 * Pattern matching will set PL_lex_op to the pattern-matching op to
1609 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
1611 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1613 * Everything else becomes a FUNC.
1615 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1616 * had an OP_CONST or OP_READLINE). This just sets us up for a
1617 * call to S_sublex_push().
1621 S_sublex_start(pTHX)
1624 register const I32 op_type = pl_yylval.ival;
1626 if (op_type == OP_NULL) {
1627 pl_yylval.opval = PL_lex_op;
1631 if (op_type == OP_CONST || op_type == OP_READLINE) {
1632 SV *sv = tokeq(PL_lex_stuff);
1634 if (SvTYPE(sv) == SVt_PVIV) {
1635 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1637 const char * const p = SvPV_const(sv, len);
1638 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
1642 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1643 PL_lex_stuff = NULL;
1644 /* Allow <FH> // "foo" */
1645 if (op_type == OP_READLINE)
1646 PL_expect = XTERMORDORDOR;
1649 else if (op_type == OP_BACKTICK && PL_lex_op) {
1650 /* readpipe() vas overriden */
1651 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1652 pl_yylval.opval = PL_lex_op;
1654 PL_lex_stuff = NULL;
1658 PL_sublex_info.super_state = PL_lex_state;
1659 PL_sublex_info.sub_inwhat = (U16)op_type;
1660 PL_sublex_info.sub_op = PL_lex_op;
1661 PL_lex_state = LEX_INTERPPUSH;
1665 pl_yylval.opval = PL_lex_op;
1675 * Create a new scope to save the lexing state. The scope will be
1676 * ended in S_sublex_done. Returns a '(', starting the function arguments
1677 * to the uc, lc, etc. found before.
1678 * Sets PL_lex_state to LEX_INTERPCONCAT.
1687 PL_lex_state = PL_sublex_info.super_state;
1688 SAVEBOOL(PL_lex_dojoin);
1689 SAVEI32(PL_lex_brackets);
1690 SAVEI32(PL_lex_casemods);
1691 SAVEI32(PL_lex_starts);
1692 SAVEI8(PL_lex_state);
1693 SAVEVPTR(PL_lex_inpat);
1694 SAVEI16(PL_lex_inwhat);
1695 SAVECOPLINE(PL_curcop);
1696 SAVEPPTR(PL_bufptr);
1697 SAVEPPTR(PL_bufend);
1698 SAVEPPTR(PL_oldbufptr);
1699 SAVEPPTR(PL_oldoldbufptr);
1700 SAVEPPTR(PL_last_lop);
1701 SAVEPPTR(PL_last_uni);
1702 SAVEPPTR(PL_linestart);
1703 SAVESPTR(PL_linestr);
1704 SAVEGENERICPV(PL_lex_brackstack);
1705 SAVEGENERICPV(PL_lex_casestack);
1707 PL_linestr = PL_lex_stuff;
1708 PL_lex_stuff = NULL;
1710 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1711 = SvPVX(PL_linestr);
1712 PL_bufend += SvCUR(PL_linestr);
1713 PL_last_lop = PL_last_uni = NULL;
1714 SAVEFREESV(PL_linestr);
1716 PL_lex_dojoin = FALSE;
1717 PL_lex_brackets = 0;
1718 Newx(PL_lex_brackstack, 120, char);
1719 Newx(PL_lex_casestack, 12, char);
1720 PL_lex_casemods = 0;
1721 *PL_lex_casestack = '\0';
1723 PL_lex_state = LEX_INTERPCONCAT;
1724 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1726 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1727 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1728 PL_lex_inpat = PL_sublex_info.sub_op;
1730 PL_lex_inpat = NULL;
1737 * Restores lexer state after a S_sublex_push.
1744 if (!PL_lex_starts++) {
1745 SV * const sv = newSVpvs("");
1746 if (SvUTF8(PL_linestr))
1748 PL_expect = XOPERATOR;
1749 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1753 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1754 PL_lex_state = LEX_INTERPCASEMOD;
1758 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1759 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1760 PL_linestr = PL_lex_repl;
1762 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1763 PL_bufend += SvCUR(PL_linestr);
1764 PL_last_lop = PL_last_uni = NULL;
1765 SAVEFREESV(PL_linestr);
1766 PL_lex_dojoin = FALSE;
1767 PL_lex_brackets = 0;
1768 PL_lex_casemods = 0;
1769 *PL_lex_casestack = '\0';
1771 if (SvEVALED(PL_lex_repl)) {
1772 PL_lex_state = LEX_INTERPNORMAL;
1774 /* we don't clear PL_lex_repl here, so that we can check later
1775 whether this is an evalled subst; that means we rely on the
1776 logic to ensure sublex_done() is called again only via the
1777 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1780 PL_lex_state = LEX_INTERPCONCAT;
1790 PL_endwhite = newSVpvs("");
1791 sv_catsv(PL_endwhite, PL_thiswhite);
1795 sv_setpvn(PL_thistoken,"",0);
1797 PL_realtokenstart = -1;
1801 PL_bufend = SvPVX(PL_linestr);
1802 PL_bufend += SvCUR(PL_linestr);
1803 PL_expect = XOPERATOR;
1804 PL_sublex_info.sub_inwhat = 0;
1812 Extracts a pattern, double-quoted string, or transliteration. This
1815 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1816 processing a pattern (PL_lex_inpat is true), a transliteration
1817 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1819 Returns a pointer to the character scanned up to. If this is
1820 advanced from the start pointer supplied (i.e. if anything was
1821 successfully parsed), will leave an OP for the substring scanned
1822 in pl_yylval. Caller must intuit reason for not parsing further
1823 by looking at the next characters herself.
1827 double-quoted style: \r and \n
1828 regexp special ones: \D \s
1831 case and quoting: \U \Q \E
1832 stops on @ and $, but not for $ as tail anchor
1834 In transliterations:
1835 characters are VERY literal, except for - not at the start or end
1836 of the string, which indicates a range. If the range is in bytes,
1837 scan_const expands the range to the full set of intermediate
1838 characters. If the range is in utf8, the hyphen is replaced with
1839 a certain range mark which will be handled by pmtrans() in op.c.
1841 In double-quoted strings:
1843 double-quoted style: \r and \n
1845 deprecated backrefs: \1 (in substitution replacements)
1846 case and quoting: \U \Q \E
1849 scan_const does *not* construct ops to handle interpolated strings.
1850 It stops processing as soon as it finds an embedded $ or @ variable
1851 and leaves it to the caller to work out what's going on.
1853 embedded arrays (whether in pattern or not) could be:
1854 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1856 $ in double-quoted strings must be the symbol of an embedded scalar.
1858 $ in pattern could be $foo or could be tail anchor. Assumption:
1859 it's a tail anchor if $ is the last thing in the string, or if it's
1860 followed by one of "()| \r\n\t"
1862 \1 (backreferences) are turned into $1
1864 The structure of the code is
1865 while (there's a character to process) {
1866 handle transliteration ranges
1867 skip regexp comments /(?#comment)/ and codes /(?{code})/
1868 skip #-initiated comments in //x patterns
1869 check for embedded arrays
1870 check for embedded scalars
1872 leave intact backslashes from leaveit (below)
1873 deprecate \1 in substitution replacements
1874 handle string-changing backslashes \l \U \Q \E, etc.
1875 switch (what was escaped) {
1876 handle \- in a transliteration (becomes a literal -)
1877 handle \132 (octal characters)
1878 handle \x15 and \x{1234} (hex characters)
1879 handle \N{name} (named characters)
1880 handle \cV (control characters)
1881 handle printf-style backslashes (\f, \r, \n, etc)
1883 } (end if backslash)
1884 } (end while character to read)
1889 S_scan_const(pTHX_ char *start)
1892 register char *send = PL_bufend; /* end of the constant */
1893 SV *sv = newSV(send - start); /* sv for the constant */
1894 register char *s = start; /* start of the constant */
1895 register char *d = SvPVX(sv); /* destination for copies */
1896 bool dorange = FALSE; /* are we in a translit range? */
1897 bool didrange = FALSE; /* did we just finish a range? */
1898 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1899 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1902 UV literal_endpoint = 0;
1903 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1906 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1907 /* If we are doing a trans and we know we want UTF8 set expectation */
1908 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1909 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1913 while (s < send || dorange) {
1914 /* get transliterations out of the way (they're most literal) */
1915 if (PL_lex_inwhat == OP_TRANS) {
1916 /* expand a range A-Z to the full set of characters. AIE! */
1918 I32 i; /* current expanded character */
1919 I32 min; /* first character in range */
1920 I32 max; /* last character in range */
1931 char * const c = (char*)utf8_hop((U8*)d, -1);
1935 *c = (char)UTF_TO_NATIVE(0xff);
1936 /* mark the range as done, and continue */
1942 i = d - SvPVX_const(sv); /* remember current offset */
1945 SvLEN(sv) + (has_utf8 ?
1946 (512 - UTF_CONTINUATION_MARK +
1949 /* How many two-byte within 0..255: 128 in UTF-8,
1950 * 96 in UTF-8-mod. */
1952 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1954 d = SvPVX(sv) + i; /* refresh d after realloc */
1958 for (j = 0; j <= 1; j++) {
1959 char * const c = (char*)utf8_hop((U8*)d, -1);
1960 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1966 max = (U8)0xff; /* only to \xff */
1967 uvmax = uv; /* \x{100} to uvmax */
1969 d = c; /* eat endpoint chars */
1974 d -= 2; /* eat the first char and the - */
1975 min = (U8)*d; /* first char in range */
1976 max = (U8)d[1]; /* last char in range */
1983 "Invalid range \"%c-%c\" in transliteration operator",
1984 (char)min, (char)max);
1988 if (literal_endpoint == 2 &&
1989 ((isLOWER(min) && isLOWER(max)) ||
1990 (isUPPER(min) && isUPPER(max)))) {
1992 for (i = min; i <= max; i++)
1994 *d++ = NATIVE_TO_NEED(has_utf8,i);
1996 for (i = min; i <= max; i++)
1998 *d++ = NATIVE_TO_NEED(has_utf8,i);
2003 for (i = min; i <= max; i++)
2006 const U8 ch = (U8)NATIVE_TO_UTF(i);
2007 if (UNI_IS_INVARIANT(ch))
2010 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2011 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2020 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2022 *d++ = (char)UTF_TO_NATIVE(0xff);
2024 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2028 /* mark the range as done, and continue */
2032 literal_endpoint = 0;
2037 /* range begins (ignore - as first or last char) */
2038 else if (*s == '-' && s+1 < send && s != start) {
2040 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2047 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2057 literal_endpoint = 0;
2058 native_range = TRUE;
2063 /* if we get here, we're not doing a transliteration */
2065 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2066 except for the last char, which will be done separately. */
2067 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2069 while (s+1 < send && *s != ')')
2070 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2072 else if (s[2] == '{' /* This should match regcomp.c */
2073 || (s[2] == '?' && s[3] == '{'))
2076 char *regparse = s + (s[2] == '{' ? 3 : 4);
2079 while (count && (c = *regparse)) {
2080 if (c == '\\' && regparse[1])
2088 if (*regparse != ')')
2089 regparse--; /* Leave one char for continuation. */
2090 while (s < regparse)
2091 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2095 /* likewise skip #-initiated comments in //x patterns */
2096 else if (*s == '#' && PL_lex_inpat &&
2097 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2098 while (s+1 < send && *s != '\n')
2099 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2102 /* check for embedded arrays
2103 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2105 else if (*s == '@' && s[1]) {
2106 if (isALNUM_lazy_if(s+1,UTF))
2108 if (strchr(":'{$", s[1]))
2110 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2111 break; /* in regexp, neither @+ nor @- are interpolated */
2114 /* check for embedded scalars. only stop if we're sure it's a
2117 else if (*s == '$') {
2118 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2120 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2121 break; /* in regexp, $ might be tail anchor */
2124 /* End of else if chain - OP_TRANS rejoin rest */
2127 if (*s == '\\' && s+1 < send) {
2130 /* deprecate \1 in strings and substitution replacements */
2131 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2132 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2134 if (ckWARN(WARN_SYNTAX))
2135 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2140 /* string-change backslash escapes */
2141 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2145 /* skip any other backslash escapes in a pattern */
2146 else if (PL_lex_inpat) {
2147 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2148 goto default_action;
2151 /* if we get here, it's either a quoted -, or a digit */
2154 /* quoted - in transliterations */
2156 if (PL_lex_inwhat == OP_TRANS) {
2163 if ((isALPHA(*s) || isDIGIT(*s)) &&
2165 Perl_warner(aTHX_ packWARN(WARN_MISC),
2166 "Unrecognized escape \\%c passed through",
2168 /* default action is to copy the quoted character */
2169 goto default_action;
2172 /* \132 indicates an octal constant */
2173 case '0': case '1': case '2': case '3':
2174 case '4': case '5': case '6': case '7':
2178 uv = grok_oct(s, &len, &flags, NULL);
2181 goto NUM_ESCAPE_INSERT;
2183 /* \x24 indicates a hex constant */
2187 char* const e = strchr(s, '}');
2188 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2189 PERL_SCAN_DISALLOW_PREFIX;
2194 yyerror("Missing right brace on \\x{}");
2198 uv = grok_hex(s, &len, &flags, NULL);
2204 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2205 uv = grok_hex(s, &len, &flags, NULL);
2211 /* Insert oct or hex escaped character.
2212 * There will always enough room in sv since such
2213 * escapes will be longer than any UTF-8 sequence
2214 * they can end up as. */
2216 /* We need to map to chars to ASCII before doing the tests
2219 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2220 if (!has_utf8 && uv > 255) {
2221 /* Might need to recode whatever we have
2222 * accumulated so far if it contains any
2225 * (Can't we keep track of that and avoid
2226 * this rescan? --jhi)
2230 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2231 if (!NATIVE_IS_INVARIANT(*c)) {
2236 const STRLEN offset = d - SvPVX_const(sv);
2238 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2242 while (src >= (const U8 *)SvPVX_const(sv)) {
2243 if (!NATIVE_IS_INVARIANT(*src)) {
2244 const U8 ch = NATIVE_TO_ASCII(*src);
2245 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2246 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2256 if (has_utf8 || uv > 255) {
2257 d = (char*)uvchr_to_utf8((U8*)d, uv);
2259 if (PL_lex_inwhat == OP_TRANS &&
2260 PL_sublex_info.sub_op) {
2261 PL_sublex_info.sub_op->op_private |=
2262 (PL_lex_repl ? OPpTRANS_FROM_UTF
2266 if (uv > 255 && !dorange)
2267 native_range = FALSE;
2279 /* \N{LATIN SMALL LETTER A} is a named character */
2283 char* e = strchr(s, '}');
2289 yyerror("Missing right brace on \\N{}");
2293 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2295 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2296 PERL_SCAN_DISALLOW_PREFIX;
2299 uv = grok_hex(s, &len, &flags, NULL);
2300 if ( e > s && len != (STRLEN)(e - s) ) {
2304 goto NUM_ESCAPE_INSERT;
2306 res = newSVpvn(s + 1, e - s - 1);
2307 res = new_constant( NULL, 0, "charnames",
2308 res, NULL, s - 2, e - s + 3 );
2310 sv_utf8_upgrade(res);
2311 str = SvPV_const(res,len);
2312 #ifdef EBCDIC_NEVER_MIND
2313 /* charnames uses pack U and that has been
2314 * recently changed to do the below uni->native
2315 * mapping, so this would be redundant (and wrong,
2316 * the code point would be doubly converted).
2317 * But leave this in just in case the pack U change
2318 * gets revoked, but the semantics is still
2319 * desireable for charnames. --jhi */
2321 UV uv = utf8_to_uvchr((const U8*)str, 0);
2324 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2326 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2327 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2328 str = SvPV_const(res, len);
2332 if (!has_utf8 && SvUTF8(res)) {
2333 const char * const ostart = SvPVX_const(sv);
2334 SvCUR_set(sv, d - ostart);
2337 sv_utf8_upgrade(sv);
2338 /* this just broke our allocation above... */
2339 SvGROW(sv, (STRLEN)(send - start));
2340 d = SvPVX(sv) + SvCUR(sv);
2343 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2344 const char * const odest = SvPVX_const(sv);
2346 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2347 d = SvPVX(sv) + (d - odest);
2351 native_range = FALSE; /* \N{} is guessed to be Unicode */
2353 Copy(str, d, len, char);
2360 yyerror("Missing braces on \\N{}");
2363 /* \c is a control character */
2372 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2375 yyerror("Missing control char name in \\c");
2379 /* printf-style backslashes, formfeeds, newlines, etc */
2381 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2384 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2387 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2390 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2393 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2396 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2399 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2405 } /* end if (backslash) */
2412 /* If we started with encoded form, or already know we want it
2413 and then encode the next character */
2414 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2416 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2417 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2420 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2421 const STRLEN off = d - SvPVX_const(sv);
2422 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2424 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2427 if (uv > 255 && !dorange)
2428 native_range = FALSE;
2432 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2434 } /* while loop to process each character */
2436 /* terminate the string and set up the sv */
2438 SvCUR_set(sv, d - SvPVX_const(sv));
2439 if (SvCUR(sv) >= SvLEN(sv))
2440 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2443 if (PL_encoding && !has_utf8) {
2444 sv_recode_to_utf8(sv, PL_encoding);
2450 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2451 PL_sublex_info.sub_op->op_private |=
2452 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2456 /* shrink the sv if we allocated more than we used */
2457 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2458 SvPV_shrink_to_cur(sv);
2461 /* return the substring (via pl_yylval) only if we parsed anything */
2462 if (s > PL_bufptr) {
2463 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2464 const char *const key = PL_lex_inpat ? "qr" : "q";
2465 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2469 if (PL_lex_inwhat == OP_TRANS) {
2472 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2480 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2483 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2490 * Returns TRUE if there's more to the expression (e.g., a subscript),
2493 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2495 * ->[ and ->{ return TRUE
2496 * { and [ outside a pattern are always subscripts, so return TRUE
2497 * if we're outside a pattern and it's not { or [, then return FALSE
2498 * if we're in a pattern and the first char is a {
2499 * {4,5} (any digits around the comma) returns FALSE
2500 * if we're in a pattern and the first char is a [
2502 * [SOMETHING] has a funky algorithm to decide whether it's a
2503 * character class or not. It has to deal with things like
2504 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2505 * anything else returns TRUE
2508 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2511 S_intuit_more(pTHX_ register char *s)
2514 if (PL_lex_brackets)
2516 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2518 if (*s != '{' && *s != '[')
2523 /* In a pattern, so maybe we have {n,m}. */
2540 /* On the other hand, maybe we have a character class */
2543 if (*s == ']' || *s == '^')
2546 /* this is terrifying, and it works */
2547 int weight = 2; /* let's weigh the evidence */
2549 unsigned char un_char = 255, last_un_char;
2550 const char * const send = strchr(s,']');
2551 char tmpbuf[sizeof PL_tokenbuf * 4];
2553 if (!send) /* has to be an expression */
2556 Zero(seen,256,char);
2559 else if (isDIGIT(*s)) {
2561 if (isDIGIT(s[1]) && s[2] == ']')
2567 for (; s < send; s++) {
2568 last_un_char = un_char;
2569 un_char = (unsigned char)*s;
2574 weight -= seen[un_char] * 10;
2575 if (isALNUM_lazy_if(s+1,UTF)) {
2577 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2578 len = (int)strlen(tmpbuf);
2579 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2584 else if (*s == '$' && s[1] &&
2585 strchr("[#!%*<>()-=",s[1])) {
2586 if (/*{*/ strchr("])} =",s[2]))
2595 if (strchr("wds]",s[1]))
2597 else if (seen[(U8)'\''] || seen[(U8)'"'])
2599 else if (strchr("rnftbxcav",s[1]))
2601 else if (isDIGIT(s[1])) {
2603 while (s[1] && isDIGIT(s[1]))
2613 if (strchr("aA01! ",last_un_char))
2615 if (strchr("zZ79~",s[1]))
2617 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2618 weight -= 5; /* cope with negative subscript */
2621 if (!isALNUM(last_un_char)
2622 && !(last_un_char == '$' || last_un_char == '@'
2623 || last_un_char == '&')
2624 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2629 if (keyword(tmpbuf, d - tmpbuf, 0))
2632 if (un_char == last_un_char + 1)
2634 weight -= seen[un_char];
2639 if (weight >= 0) /* probably a character class */
2649 * Does all the checking to disambiguate
2651 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2652 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2654 * First argument is the stuff after the first token, e.g. "bar".
2656 * Not a method if bar is a filehandle.
2657 * Not a method if foo is a subroutine prototyped to take a filehandle.
2658 * Not a method if it's really "Foo $bar"
2659 * Method if it's "foo $bar"
2660 * Not a method if it's really "print foo $bar"
2661 * Method if it's really "foo package::" (interpreted as package->foo)
2662 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2663 * Not a method if bar is a filehandle or package, but is quoted with
2668 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2671 char *s = start + (*start == '$');
2672 char tmpbuf[sizeof PL_tokenbuf];
2680 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2684 const char *proto = SvPVX_const(cv);
2695 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2696 /* start is the beginning of the possible filehandle/object,
2697 * and s is the end of it
2698 * tmpbuf is a copy of it
2701 if (*start == '$') {
2702 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2703 isUPPER(*PL_tokenbuf))
2706 len = start - SvPVX(PL_linestr);
2710 start = SvPVX(PL_linestr) + len;
2714 return *s == '(' ? FUNCMETH : METHOD;
2716 if (!keyword(tmpbuf, len, 0)) {
2717 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2721 soff = s - SvPVX(PL_linestr);
2725 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2726 if (indirgv && GvCVu(indirgv))
2728 /* filehandle or package name makes it a method */
2729 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2731 soff = s - SvPVX(PL_linestr);
2734 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2735 return 0; /* no assumptions -- "=>" quotes bearword */
2737 start_force(PL_curforce);
2738 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2739 newSVpvn(tmpbuf,len));
2740 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2742 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2747 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2749 return *s == '(' ? FUNCMETH : METHOD;
2755 /* Encoded script support. filter_add() effectively inserts a
2756 * 'pre-processing' function into the current source input stream.
2757 * Note that the filter function only applies to the current source file
2758 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2760 * The datasv parameter (which may be NULL) can be used to pass
2761 * private data to this instance of the filter. The filter function
2762 * can recover the SV using the FILTER_DATA macro and use it to
2763 * store private buffers and state information.
2765 * The supplied datasv parameter is upgraded to a PVIO type
2766 * and the IoDIRP/IoANY field is used to store the function pointer,
2767 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2768 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2769 * private use must be set using malloc'd pointers.
2773 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2782 if (!PL_rsfp_filters)
2783 PL_rsfp_filters = newAV();
2786 SvUPGRADE(datasv, SVt_PVIO);
2787 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2788 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2789 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2790 FPTR2DPTR(void *, IoANY(datasv)),
2791 SvPV_nolen(datasv)));
2792 av_unshift(PL_rsfp_filters, 1);
2793 av_store(PL_rsfp_filters, 0, datasv) ;
2798 /* Delete most recently added instance of this filter function. */
2800 Perl_filter_del(pTHX_ filter_t funcp)
2806 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2807 FPTR2DPTR(void*, funcp)));
2809 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2811 /* if filter is on top of stack (usual case) just pop it off */
2812 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2813 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2814 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2815 IoANY(datasv) = (void *)NULL;
2816 sv_free(av_pop(PL_rsfp_filters));
2820 /* we need to search for the correct entry and clear it */
2821 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2825 /* Invoke the idxth filter function for the current rsfp. */
2826 /* maxlen 0 = read one text line */
2828 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2833 /* This API is bad. It should have been using unsigned int for maxlen.
2834 Not sure if we want to change the API, but if not we should sanity
2835 check the value here. */
2836 const unsigned int correct_length
2845 if (!PL_parser || !PL_rsfp_filters)
2847 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2848 /* Provide a default input filter to make life easy. */
2849 /* Note that we append to the line. This is handy. */
2850 DEBUG_P(PerlIO_printf(Perl_debug_log,
2851 "filter_read %d: from rsfp\n", idx));
2852 if (correct_length) {
2855 const int old_len = SvCUR(buf_sv);
2857 /* ensure buf_sv is large enough */
2858 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2859 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2860 correct_length)) <= 0) {
2861 if (PerlIO_error(PL_rsfp))
2862 return -1; /* error */
2864 return 0 ; /* end of file */
2866 SvCUR_set(buf_sv, old_len + len) ;
2869 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2870 if (PerlIO_error(PL_rsfp))
2871 return -1; /* error */
2873 return 0 ; /* end of file */
2876 return SvCUR(buf_sv);
2878 /* Skip this filter slot if filter has been deleted */
2879 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2880 DEBUG_P(PerlIO_printf(Perl_debug_log,
2881 "filter_read %d: skipped (filter deleted)\n",
2883 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2885 /* Get function pointer hidden within datasv */
2886 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2887 DEBUG_P(PerlIO_printf(Perl_debug_log,
2888 "filter_read %d: via function %p (%s)\n",
2889 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2890 /* Call function. The function is expected to */
2891 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2892 /* Return: <0:error, =0:eof, >0:not eof */
2893 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2897 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2900 #ifdef PERL_CR_FILTER
2901 if (!PL_rsfp_filters) {
2902 filter_add(S_cr_textfilter,NULL);
2905 if (PL_rsfp_filters) {
2907 SvCUR_set(sv, 0); /* start with empty line */
2908 if (FILTER_READ(0, sv, 0) > 0)
2909 return ( SvPVX(sv) ) ;
2914 return (sv_gets(sv, fp, append));
2918 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
2923 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2927 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2928 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2930 return GvHV(gv); /* Foo:: */
2933 /* use constant CLASS => 'MyClass' */
2934 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2935 if (gv && GvCV(gv)) {
2936 SV * const sv = cv_const_sv(GvCV(gv));
2938 pkgname = SvPV_const(sv, len);
2941 return gv_stashpvn(pkgname, len, 0);
2945 * S_readpipe_override
2946 * Check whether readpipe() is overriden, and generates the appropriate
2947 * optree, provided sublex_start() is called afterwards.
2950 S_readpipe_override(pTHX)
2953 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2954 pl_yylval.ival = OP_BACKTICK;
2956 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2958 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2959 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
2960 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2962 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2963 append_elem(OP_LIST,
2964 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2965 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2972 * The intent of this yylex wrapper is to minimize the changes to the
2973 * tokener when we aren't interested in collecting madprops. It remains
2974 * to be seen how successful this strategy will be...
2981 char *s = PL_bufptr;
2983 /* make sure PL_thiswhite is initialized */
2987 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2988 if (PL_pending_ident)
2989 return S_pending_ident(aTHX);
2991 /* previous token ate up our whitespace? */
2992 if (!PL_lasttoke && PL_nextwhite) {
2993 PL_thiswhite = PL_nextwhite;
2997 /* isolate the token, and figure out where it is without whitespace */
2998 PL_realtokenstart = -1;
3002 assert(PL_curforce < 0);
3004 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3005 if (!PL_thistoken) {
3006 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3007 PL_thistoken = newSVpvs("");
3009 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3010 PL_thistoken = newSVpvn(tstart, s - tstart);
3013 if (PL_thismad) /* install head */
3014 CURMAD('X', PL_thistoken);
3017 /* last whitespace of a sublex? */
3018 if (optype == ')' && PL_endwhite) {
3019 CURMAD('X', PL_endwhite);
3024 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3025 if (!PL_thiswhite && !PL_endwhite && !optype) {
3026 sv_free(PL_thistoken);
3031 /* put off final whitespace till peg */
3032 if (optype == ';' && !PL_rsfp) {
3033 PL_nextwhite = PL_thiswhite;
3036 else if (PL_thisopen) {
3037 CURMAD('q', PL_thisopen);
3039 sv_free(PL_thistoken);
3043 /* Store actual token text as madprop X */
3044 CURMAD('X', PL_thistoken);
3048 /* add preceding whitespace as madprop _ */
3049 CURMAD('_', PL_thiswhite);
3053 /* add quoted material as madprop = */
3054 CURMAD('=', PL_thisstuff);
3058 /* add terminating quote as madprop Q */
3059 CURMAD('Q', PL_thisclose);
3063 /* special processing based on optype */
3067 /* opval doesn't need a TOKEN since it can already store mp */
3077 if (pl_yylval.opval)
3078 append_madprops(PL_thismad, pl_yylval.opval, 0);
3086 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3095 /* remember any fake bracket that lexer is about to discard */
3096 if (PL_lex_brackets == 1 &&
3097 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3100 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3103 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3104 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3107 break; /* don't bother looking for trailing comment */
3116 /* attach a trailing comment to its statement instead of next token */
3120 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3122 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3124 if (*s == '\n' || *s == '#') {
3125 while (s < PL_bufend && *s != '\n')
3129 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3130 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3147 /* Create new token struct. Note: opvals return early above. */
3148 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3155 S_tokenize_use(pTHX_ int is_use, char *s) {
3157 if (PL_expect != XSTATE)
3158 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3159 is_use ? "use" : "no"));
3161 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3162 s = force_version(s, TRUE);
3163 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3164 start_force(PL_curforce);
3165 NEXTVAL_NEXTTOKE.opval = NULL;
3168 else if (*s == 'v') {
3169 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3170 s = force_version(s, FALSE);
3174 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3175 s = force_version(s, FALSE);
3177 pl_yylval.ival = is_use;
3181 static const char* const exp_name[] =
3182 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3183 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3190 Works out what to call the token just pulled out of the input
3191 stream. The yacc parser takes care of taking the ops we return and
3192 stitching them into a tree.
3198 if read an identifier
3199 if we're in a my declaration
3200 croak if they tried to say my($foo::bar)
3201 build the ops for a my() declaration
3202 if it's an access to a my() variable
3203 are we in a sort block?
3204 croak if my($a); $a <=> $b
3205 build ops for access to a my() variable
3206 if in a dq string, and they've said @foo and we can't find @foo
3208 build ops for a bareword
3209 if we already built the token before, use it.
3214 #pragma segment Perl_yylex
3220 register char *s = PL_bufptr;
3225 /* orig_keyword, gvp, and gv are initialized here because
3226 * jump to the label just_a_word_zero can bypass their
3227 * initialization later. */
3228 I32 orig_keyword = 0;
3233 SV* tmp = newSVpvs("");
3234 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3235 (IV)CopLINE(PL_curcop),
3236 lex_state_names[PL_lex_state],
3237 exp_name[PL_expect],
3238 pv_display(tmp, s, strlen(s), 0, 60));
3241 /* check if there's an identifier for us to look at */
3242 if (PL_pending_ident)
3243 return REPORT(S_pending_ident(aTHX));
3245 /* no identifier pending identification */
3247 switch (PL_lex_state) {
3249 case LEX_NORMAL: /* Some compilers will produce faster */
3250 case LEX_INTERPNORMAL: /* code if we comment these out. */
3254 /* when we've already built the next token, just pull it out of the queue */
3258 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3260 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3261 PL_nexttoke[PL_lasttoke].next_mad = 0;
3262 if (PL_thismad && PL_thismad->mad_key == '_') {
3263 PL_thiswhite = (SV*)PL_thismad->mad_val;
3264 PL_thismad->mad_val = 0;
3265 mad_free(PL_thismad);
3270 PL_lex_state = PL_lex_defer;
3271 PL_expect = PL_lex_expect;
3272 PL_lex_defer = LEX_NORMAL;
3273 if (!PL_nexttoke[PL_lasttoke].next_type)
3278 pl_yylval = PL_nextval[PL_nexttoke];
3280 PL_lex_state = PL_lex_defer;
3281 PL_expect = PL_lex_expect;
3282 PL_lex_defer = LEX_NORMAL;
3286 /* FIXME - can these be merged? */
3287 return(PL_nexttoke[PL_lasttoke].next_type);
3289 return REPORT(PL_nexttype[PL_nexttoke]);
3292 /* interpolated case modifiers like \L \U, including \Q and \E.
3293 when we get here, PL_bufptr is at the \
3295 case LEX_INTERPCASEMOD:
3297 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3298 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3300 /* handle \E or end of string */
3301 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3303 if (PL_lex_casemods) {
3304 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3305 PL_lex_casestack[PL_lex_casemods] = '\0';
3307 if (PL_bufptr != PL_bufend
3308 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3310 PL_lex_state = LEX_INTERPCONCAT;
3313 PL_thistoken = newSVpvs("\\E");
3319 while (PL_bufptr != PL_bufend &&
3320 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3322 PL_thiswhite = newSVpvs("");
3323 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3327 if (PL_bufptr != PL_bufend)
3330 PL_lex_state = LEX_INTERPCONCAT;
3334 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3335 "### Saw case modifier\n"); });
3337 if (s[1] == '\\' && s[2] == 'E') {
3340 PL_thiswhite = newSVpvs("");
3341 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3344 PL_lex_state = LEX_INTERPCONCAT;
3349 if (!PL_madskills) /* when just compiling don't need correct */
3350 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3351 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3352 if ((*s == 'L' || *s == 'U') &&
3353 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3354 PL_lex_casestack[--PL_lex_casemods] = '\0';
3357 if (PL_lex_casemods > 10)
3358 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3359 PL_lex_casestack[PL_lex_casemods++] = *s;
3360 PL_lex_casestack[PL_lex_casemods] = '\0';
3361 PL_lex_state = LEX_INTERPCONCAT;
3362 start_force(PL_curforce);
3363 NEXTVAL_NEXTTOKE.ival = 0;
3365 start_force(PL_curforce);
3367 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3369 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3371 NEXTVAL_NEXTTOKE.ival = OP_LC;
3373 NEXTVAL_NEXTTOKE.ival = OP_UC;
3375 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3377 Perl_croak(aTHX_ "panic: yylex");
3379 SV* const tmpsv = newSVpvs("\\ ");
3380 /* replace the space with the character we want to escape
3382 SvPVX(tmpsv)[1] = *s;
3388 if (PL_lex_starts) {
3394 sv_free(PL_thistoken);
3395 PL_thistoken = newSVpvs("");
3398 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3399 if (PL_lex_casemods == 1 && PL_lex_inpat)
3408 case LEX_INTERPPUSH:
3409 return REPORT(sublex_push());
3411 case LEX_INTERPSTART:
3412 if (PL_bufptr == PL_bufend)
3413 return REPORT(sublex_done());
3414 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3415 "### Interpolated variable\n"); });
3417 PL_lex_dojoin = (*PL_bufptr == '@');
3418 PL_lex_state = LEX_INTERPNORMAL;
3419 if (PL_lex_dojoin) {
3420 start_force(PL_curforce);
3421 NEXTVAL_NEXTTOKE.ival = 0;
3423 start_force(PL_curforce);
3424 force_ident("\"", '$');
3425 start_force(PL_curforce);
3426 NEXTVAL_NEXTTOKE.ival = 0;
3428 start_force(PL_curforce);
3429 NEXTVAL_NEXTTOKE.ival = 0;
3431 start_force(PL_curforce);
3432 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3435 if (PL_lex_starts++) {
3440 sv_free(PL_thistoken);
3441 PL_thistoken = newSVpvs("");
3444 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3445 if (!PL_lex_casemods && PL_lex_inpat)
3452 case LEX_INTERPENDMAYBE:
3453 if (intuit_more(PL_bufptr)) {
3454 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3460 if (PL_lex_dojoin) {
3461 PL_lex_dojoin = FALSE;
3462 PL_lex_state = LEX_INTERPCONCAT;
3466 sv_free(PL_thistoken);
3467 PL_thistoken = newSVpvs("");
3472 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3473 && SvEVALED(PL_lex_repl))
3475 if (PL_bufptr != PL_bufend)
3476 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3480 case LEX_INTERPCONCAT:
3482 if (PL_lex_brackets)
3483 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3485 if (PL_bufptr == PL_bufend)
3486 return REPORT(sublex_done());
3488 if (SvIVX(PL_linestr) == '\'') {
3489 SV *sv = newSVsv(PL_linestr);
3492 else if ( PL_hints & HINT_NEW_RE )
3493 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
3494 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3498 s = scan_const(PL_bufptr);
3500 PL_lex_state = LEX_INTERPCASEMOD;
3502 PL_lex_state = LEX_INTERPSTART;
3505 if (s != PL_bufptr) {
3506 start_force(PL_curforce);
3508 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3510 NEXTVAL_NEXTTOKE = pl_yylval;
3513 if (PL_lex_starts++) {
3517 sv_free(PL_thistoken);
3518 PL_thistoken = newSVpvs("");
3521 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3522 if (!PL_lex_casemods && PL_lex_inpat)
3535 PL_lex_state = LEX_NORMAL;
3536 s = scan_formline(PL_bufptr);
3537 if (!PL_lex_formbrack)
3543 PL_oldoldbufptr = PL_oldbufptr;
3549 sv_free(PL_thistoken);
3552 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3556 if (isIDFIRST_lazy_if(s,UTF))
3558 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
3559 Perl_croak(aTHX_ "Unrecognized character \\x%02X in column %d", *s & 255, (int) len + 1);
3562 goto fake_eof; /* emulate EOF on ^D or ^Z */
3571 if (PL_lex_brackets) {
3572 yyerror((const char *)
3574 ? "Format not terminated"
3575 : "Missing right curly or square bracket"));
3577 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3578 "### Tokener got EOF\n");
3582 if (s++ < PL_bufend)
3583 goto retry; /* ignore stray nulls */
3586 if (!PL_in_eval && !PL_preambled) {
3587 PL_preambled = TRUE;
3593 /* Generate a string of Perl code to load the debugger.
3594 * If PERL5DB is set, it will return the contents of that,
3595 * otherwise a compile-time require of perl5db.pl. */
3597 const char * const pdb = PerlEnv_getenv("PERL5DB");
3600 sv_setpv(PL_linestr, pdb);
3601 sv_catpvs(PL_linestr,";");
3603 SETERRNO(0,SS_NORMAL);
3604 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
3607 sv_setpvs(PL_linestr,"");
3608 if (PL_preambleav) {
3609 SV **svp = AvARRAY(PL_preambleav);
3610 SV **const end = svp + AvFILLp(PL_preambleav);
3612 sv_catsv(PL_linestr, *svp);
3614 sv_catpvs(PL_linestr, ";");
3616 sv_free((SV*)PL_preambleav);
3617 PL_preambleav = NULL;
3619 if (PL_minus_n || PL_minus_p) {
3620 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3622 sv_catpvs(PL_linestr,"chomp;");
3625 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3626 || *PL_splitstr == '"')
3627 && strchr(PL_splitstr + 1, *PL_splitstr))
3628 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3630 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3631 bytes can be used as quoting characters. :-) */
3632 const char *splits = PL_splitstr;
3633 sv_catpvs(PL_linestr, "our @F=split(q\0");
3636 if (*splits == '\\')
3637 sv_catpvn(PL_linestr, splits, 1);
3638 sv_catpvn(PL_linestr, splits, 1);
3639 } while (*splits++);
3640 /* This loop will embed the trailing NUL of
3641 PL_linestr as the last thing it does before
3643 sv_catpvs(PL_linestr, ");");
3647 sv_catpvs(PL_linestr,"our @F=split(' ');");
3651 sv_catpvs(PL_linestr,
3652 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3653 sv_catpvs(PL_linestr, "\n");
3654 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3655 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3656 PL_last_lop = PL_last_uni = NULL;
3657 if (PERLDB_LINE && PL_curstash != PL_debstash)
3658 update_debugger_info(PL_linestr, NULL, 0);
3662 bof = PL_rsfp ? TRUE : FALSE;
3663 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3666 PL_realtokenstart = -1;
3669 if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3670 PerlIO_clearerr(PL_rsfp);
3672 (void)PerlIO_close(PL_rsfp);
3674 PL_doextract = FALSE;
3676 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3682 sv_setpvs(PL_linestr, ";}continue{print;}");
3684 sv_setpvs(PL_linestr, ";}");
3685 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3686 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3687 PL_last_lop = PL_last_uni = NULL;
3688 PL_minus_n = PL_minus_p = 0;
3691 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3692 PL_last_lop = PL_last_uni = NULL;
3693 sv_setpvn(PL_linestr,"",0);
3694 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3696 /* If it looks like the start of a BOM or raw UTF-16,
3697 * check if it in fact is. */
3703 #ifdef PERLIO_IS_STDIO
3704 # ifdef __GNU_LIBRARY__
3705 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3706 # define FTELL_FOR_PIPE_IS_BROKEN
3710 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3711 # define FTELL_FOR_PIPE_IS_BROKEN
3716 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3718 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3719 s = swallow_bom((U8*)s);
3723 /* Incest with pod. */
3726 sv_catsv(PL_thiswhite, PL_linestr);
3728 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3729 sv_setpvn(PL_linestr, "", 0);
3730 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3731 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3732 PL_last_lop = PL_last_uni = NULL;
3733 PL_doextract = FALSE;
3737 } while (PL_doextract);
3738 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3739 if (PERLDB_LINE && PL_curstash != PL_debstash)
3740 update_debugger_info(PL_linestr, NULL, 0);
3741 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3742 PL_last_lop = PL_last_uni = NULL;
3743 if (CopLINE(PL_curcop) == 1) {
3744 while (s < PL_bufend && isSPACE(*s))
3746 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3750 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3754 if (*s == '#' && *(s+1) == '!')
3756 #ifdef ALTERNATE_SHEBANG
3758 static char const as[] = ALTERNATE_SHEBANG;
3759 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3760 d = s + (sizeof(as) - 1);
3762 #endif /* ALTERNATE_SHEBANG */
3771 while (*d && !isSPACE(*d))
3775 #ifdef ARG_ZERO_IS_SCRIPT
3776 if (ipathend > ipath) {
3778 * HP-UX (at least) sets argv[0] to the script name,
3779 * which makes $^X incorrect. And Digital UNIX and Linux,
3780 * at least, set argv[0] to the basename of the Perl
3781 * interpreter. So, having found "#!", we'll set it right.
3783 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3785 assert(SvPOK(x) || SvGMAGICAL(x));
3786 if (sv_eq(x, CopFILESV(PL_curcop))) {
3787 sv_setpvn(x, ipath, ipathend - ipath);
3793 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3794 const char * const lstart = SvPV_const(x,llen);
3796 bstart += blen - llen;
3797 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3798 sv_setpvn(x, ipath, ipathend - ipath);
3803 TAINT_NOT; /* $^X is always tainted, but that's OK */
3805 #endif /* ARG_ZERO_IS_SCRIPT */
3810 d = instr(s,"perl -");
3812 d = instr(s,"perl");
3814 /* avoid getting into infinite loops when shebang
3815 * line contains "Perl" rather than "perl" */
3817 for (d = ipathend-4; d >= ipath; --d) {
3818 if ((*d == 'p' || *d == 'P')
3819 && !ibcmp(d, "perl", 4))
3829 #ifdef ALTERNATE_SHEBANG
3831 * If the ALTERNATE_SHEBANG on this system starts with a
3832 * character that can be part of a Perl expression, then if
3833 * we see it but not "perl", we're probably looking at the
3834 * start of Perl code, not a request to hand off to some
3835 * other interpreter. Similarly, if "perl" is there, but
3836 * not in the first 'word' of the line, we assume the line
3837 * contains the start of the Perl program.
3839 if (d && *s != '#') {
3840 const char *c = ipath;
3841 while (*c && !strchr("; \t\r\n\f\v#", *c))
3844 d = NULL; /* "perl" not in first word; ignore */
3846 *s = '#'; /* Don't try to parse shebang line */
3848 #endif /* ALTERNATE_SHEBANG */
3849 #ifndef MACOS_TRADITIONAL
3854 !instr(s,"indir") &&
3855 instr(PL_origargv[0],"perl"))
3862 while (s < PL_bufend && isSPACE(*s))
3864 if (s < PL_bufend) {
3865 Newxz(newargv,PL_origargc+3,char*);
3867 while (s < PL_bufend && !isSPACE(*s))
3870 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3873 newargv = PL_origargv;
3876 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3878 Perl_croak(aTHX_ "Can't exec %s", ipath);
3882 while (*d && !isSPACE(*d))
3884 while (SPACE_OR_TAB(*d))
3888 const bool switches_done = PL_doswitches;
3889 const U32 oldpdb = PL_perldb;
3890 const bool oldn = PL_minus_n;
3891 const bool oldp = PL_minus_p;
3895 if (*d1 == 'M' || *d1 == 'm' || *d1 == 'C') {
3896 const char * const m = d1;
3897 while (*d1 && !isSPACE(*d1))
3899 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3902 d1 = moreswitches(d1);
3904 if (PL_doswitches && !switches_done) {
3905 int argc = PL_origargc;
3906 char **argv = PL_origargv;
3909 } while (argc && argv[0][0] == '-' && argv[0][1]);
3910 init_argv_symbols(argc,argv);
3912 if ((PERLDB_LINE && !oldpdb) ||
3913 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3914 /* if we have already added "LINE: while (<>) {",
3915 we must not do it again */
3917 sv_setpvn(PL_linestr, "", 0);
3918 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3919 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3920 PL_last_lop = PL_last_uni = NULL;
3921 PL_preambled = FALSE;
3923 (void)gv_fetchfile(PL_origfilename);
3930 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3932 PL_lex_state = LEX_FORMLINE;
3937 #ifdef PERL_STRICT_CR
3938 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3940 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3942 case ' ': case '\t': case '\f': case 013:
3943 #ifdef MACOS_TRADITIONAL
3947 PL_realtokenstart = -1;
3949 PL_thiswhite = newSVpvs("");
3950 sv_catpvn(PL_thiswhite, s, 1);
3957 PL_realtokenstart = -1;
3961 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3962 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3963 /* handle eval qq[#line 1 "foo"\n ...] */
3964 CopLINE_dec(PL_curcop);
3967 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3969 if (!PL_in_eval || PL_rsfp)
3974 while (d < PL_bufend && *d != '\n')
3978 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3979 Perl_croak(aTHX_ "panic: input overflow");
3982 PL_thiswhite = newSVpvn(s, d - s);
3987 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3989 PL_lex_state = LEX_FORMLINE;
3995 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
3996 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
3999 TOKEN(PEG); /* make sure any #! line is accessible */
4004 /* if (PL_madskills && PL_lex_formbrack) { */
4006 while (d < PL_bufend && *d != '\n')
4010 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4011 Perl_croak(aTHX_ "panic: input overflow");
4012 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4014 PL_thiswhite = newSVpvs("");
4015 if (CopLINE(PL_curcop) == 1) {
4016 sv_setpvn(PL_thiswhite, "", 0);
4019 sv_catpvn(PL_thiswhite, s, d - s);
4033 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4041 while (s < PL_bufend && SPACE_OR_TAB(*s))
4044 if (strnEQ(s,"=>",2)) {
4045 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4046 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4047 OPERATOR('-'); /* unary minus */
4049 PL_last_uni = PL_oldbufptr;
4051 case 'r': ftst = OP_FTEREAD; break;
4052 case 'w': ftst = OP_FTEWRITE; break;
4053 case 'x': ftst = OP_FTEEXEC; break;
4054 case 'o': ftst = OP_FTEOWNED; break;
4055 case 'R': ftst = OP_FTRREAD; break;
4056 case 'W': ftst = OP_FTRWRITE; break;
4057 case 'X': ftst = OP_FTREXEC; break;
4058 case 'O': ftst = OP_FTROWNED; break;
4059 case 'e': ftst = OP_FTIS; break;
4060 case 'z': ftst = OP_FTZERO; break;
4061 case 's': ftst = OP_FTSIZE; break;
4062 case 'f': ftst = OP_FTFILE; break;
4063 case 'd': ftst = OP_FTDIR; break;
4064 case 'l': ftst = OP_FTLINK; break;
4065 case 'p': ftst = OP_FTPIPE; break;
4066 case 'S': ftst = OP_FTSOCK; break;
4067 case 'u': ftst = OP_FTSUID; break;
4068 case 'g': ftst = OP_FTSGID; break;
4069 case 'k': ftst = OP_FTSVTX; break;
4070 case 'b': ftst = OP_FTBLK; break;
4071 case 'c': ftst = OP_FTCHR; break;
4072 case 't': ftst = OP_FTTTY; break;
4073 case 'T': ftst = OP_FTTEXT; break;
4074 case 'B': ftst = OP_FTBINARY; break;
4075 case 'M': case 'A': case 'C':
4076 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4078 case 'M': ftst = OP_FTMTIME; break;
4079 case 'A': ftst = OP_FTATIME; break;
4080 case 'C': ftst = OP_FTCTIME; break;
4088 PL_last_lop_op = (OPCODE)ftst;
4089 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4090 "### Saw file test %c\n", (int)tmp);
4095 /* Assume it was a minus followed by a one-letter named
4096 * subroutine call (or a -bareword), then. */
4097 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4098 "### '-%c' looked like a file test but was not\n",
4105 const char tmp = *s++;
4108 if (PL_expect == XOPERATOR)
4113 else if (*s == '>') {
4116 if (isIDFIRST_lazy_if(s,UTF)) {
4117 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4125 if (PL_expect == XOPERATOR)
4128 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4130 OPERATOR('-'); /* unary minus */
4136 const char tmp = *s++;
4139 if (PL_expect == XOPERATOR)
4144 if (PL_expect == XOPERATOR)
4147 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4154 if (PL_expect != XOPERATOR) {
4155 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4156 PL_expect = XOPERATOR;
4157 force_ident(PL_tokenbuf, '*');
4170 if (PL_expect == XOPERATOR) {
4174 PL_tokenbuf[0] = '%';
4175 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4176 sizeof PL_tokenbuf - 1, FALSE);
4177 if (!PL_tokenbuf[1]) {
4180 PL_pending_ident = '%';
4191 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4198 const char tmp = *s++;
4204 goto just_a_word_zero_gv;
4207 switch (PL_expect) {
4213 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4215 PL_bufptr = s; /* update in case we back off */
4221 PL_expect = XTERMBLOCK;
4224 stuffstart = s - SvPVX(PL_linestr) - 1;
4228 while (isIDFIRST_lazy_if(s,UTF)) {
4231 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4232 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4233 if (tmp < 0) tmp = -tmp;
4247 sv = newSVpvn(s, len);
4249 d = scan_str(d,TRUE,TRUE);
4251 /* MUST advance bufptr here to avoid bogus
4252 "at end of line" context messages from yyerror().
4254 PL_bufptr = s + len;
4255 yyerror("Unterminated attribute parameter in attribute list");
4259 return REPORT(0); /* EOF indicator */
4263 sv_catsv(sv, PL_lex_stuff);
4264 attrs = append_elem(OP_LIST, attrs,
4265 newSVOP(OP_CONST, 0, sv));
4266 SvREFCNT_dec(PL_lex_stuff);
4267 PL_lex_stuff = NULL;
4270 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4272 if (PL_in_my == KEY_our) {
4274 GvUNIQUE_on(cGVOPx_gv(pl_yylval.opval));
4276 /* skip to avoid loading attributes.pm */
4278 deprecate(":unique");
4281 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4284 /* NOTE: any CV attrs applied here need to be part of
4285 the CVf_BUILTIN_ATTRS define in cv.h! */
4286 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4288 CvLVALUE_on(PL_compcv);
4290 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4292 CvLOCKED_on(PL_compcv);
4294 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4296 CvMETHOD_on(PL_compcv);
4298 /* After we've set the flags, it could be argued that
4299 we don't need to do the attributes.pm-based setting
4300 process, and shouldn't bother appending recognized
4301 flags. To experiment with that, uncomment the
4302 following "else". (Note that's already been
4303 uncommented. That keeps the above-applied built-in
4304 attributes from being intercepted (and possibly
4305 rejected) by a package's attribute routines, but is
4306 justified by the performance win for the common case
4307 of applying only built-in attributes.) */
4309 attrs = append_elem(OP_LIST, attrs,
4310 newSVOP(OP_CONST, 0,
4314 if (*s == ':' && s[1] != ':')
4317 break; /* require real whitespace or :'s */
4318 /* XXX losing whitespace on sequential attributes here */
4322 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4323 if (*s != ';' && *s != '}' && *s != tmp
4324 && (tmp != '=' || *s != ')')) {
4325 const char q = ((*s == '\'') ? '"' : '\'');
4326 /* If here for an expression, and parsed no attrs, back
4328 if (tmp == '=' && !attrs) {
4332 /* MUST advance bufptr here to avoid bogus "at end of line"
4333 context messages from yyerror().
4336 yyerror( (const char *)
4338 ? Perl_form(aTHX_ "Invalid separator character "
4339 "%c%c%c in attribute list", q, *s, q)
4340 : "Unterminated attribute list" ) );
4348 start_force(PL_curforce);
4349 NEXTVAL_NEXTTOKE.opval = attrs;
4350 CURMAD('_', PL_nextwhite);
4355 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4356 (s - SvPVX(PL_linestr)) - stuffstart);
4364 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4365 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4373 const char tmp = *s++;
4378 const char tmp = *s++;
4386 if (PL_lex_brackets <= 0)
4387 yyerror("Unmatched right square bracket");
4390 if (PL_lex_state == LEX_INTERPNORMAL) {
4391 if (PL_lex_brackets == 0) {
4392 if (*s == '-' && s[1] == '>')
4393 PL_lex_state = LEX_INTERPENDMAYBE;
4394 else if (*s != '[' && *s != '{')
4395 PL_lex_state = LEX_INTERPEND;
4402 if (PL_lex_brackets > 100) {
4403 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4405 switch (PL_expect) {
4407 if (PL_lex_formbrack) {
4411 if (PL_oldoldbufptr == PL_last_lop)
4412 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4414 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4415 OPERATOR(HASHBRACK);
4417 while (s < PL_bufend && SPACE_OR_TAB(*s))
4420 PL_tokenbuf[0] = '\0';
4421 if (d < PL_bufend && *d == '-') {
4422 PL_tokenbuf[0] = '-';
4424 while (d < PL_bufend && SPACE_OR_TAB(*d))
4427 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4428 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4430 while (d < PL_bufend && SPACE_OR_TAB(*d))
4433 const char minus = (PL_tokenbuf[0] == '-');
4434 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4442 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4447 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4452 if (PL_oldoldbufptr == PL_last_lop)
4453 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4455 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4458 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4460 /* This hack is to get the ${} in the message. */
4462 yyerror("syntax error");
4465 OPERATOR(HASHBRACK);
4467 /* This hack serves to disambiguate a pair of curlies
4468 * as being a block or an anon hash. Normally, expectation
4469 * determines that, but in cases where we're not in a
4470 * position to expect anything in particular (like inside
4471 * eval"") we have to resolve the ambiguity. This code
4472 * covers the case where the first term in the curlies is a
4473 * quoted string. Most other cases need to be explicitly
4474 * disambiguated by prepending a "+" before the opening
4475 * curly in order to force resolution as an anon hash.
4477 * XXX should probably propagate the outer expectation
4478 * into eval"" to rely less on this hack, but that could
4479 * potentially break current behavior of eval"".
4483 if (*s == '\'' || *s == '"' || *s == '`') {
4484 /* common case: get past first string, handling escapes */
4485 for (t++; t < PL_bufend && *t != *s;)
4486 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4490 else if (*s == 'q') {
4493 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4496 /* skip q//-like construct */
4498 char open, close, term;
4501 while (t < PL_bufend && isSPACE(*t))
4503 /* check for q => */
4504 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4505 OPERATOR(HASHBRACK);
4509 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4513 for (t++; t < PL_bufend; t++) {
4514 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4516 else if (*t == open)
4520 for (t++; t < PL_bufend; t++) {
4521 if (*t == '\\' && t+1 < PL_bufend)
4523 else if (*t == close && --brackets <= 0)
4525 else if (*t == open)
4532 /* skip plain q word */
4533 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4536 else if (isALNUM_lazy_if(t,UTF)) {
4538 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4541 while (t < PL_bufend && isSPACE(*t))
4543 /* if comma follows first term, call it an anon hash */
4544 /* XXX it could be a comma expression with loop modifiers */
4545 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4546 || (*t == '=' && t[1] == '>')))
4547 OPERATOR(HASHBRACK);
4548 if (PL_expect == XREF)
4551 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4557 pl_yylval.ival = CopLINE(PL_curcop);
4558 if (isSPACE(*s) || *s == '#')
4559 PL_copline = NOLINE; /* invalidate current command line number */
4564 if (PL_lex_brackets <= 0)
4565 yyerror("Unmatched right curly bracket");
4567 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4568 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4569 PL_lex_formbrack = 0;
4570 if (PL_lex_state == LEX_INTERPNORMAL) {
4571 if (PL_lex_brackets == 0) {
4572 if (PL_expect & XFAKEBRACK) {
4573 PL_expect &= XENUMMASK;
4574 PL_lex_state = LEX_INTERPEND;
4579 PL_thiswhite = newSVpvs("");
4580 sv_catpvn(PL_thiswhite,"}",1);
4583 return yylex(); /* ignore fake brackets */
4585 if (*s == '-' && s[1] == '>')
4586 PL_lex_state = LEX_INTERPENDMAYBE;
4587 else if (*s != '[' && *s != '{')
4588 PL_lex_state = LEX_INTERPEND;
4591 if (PL_expect & XFAKEBRACK) {
4592 PL_expect &= XENUMMASK;
4594 return yylex(); /* ignore fake brackets */
4596 start_force(PL_curforce);
4598 curmad('X', newSVpvn(s-1,1));
4599 CURMAD('_', PL_thiswhite);
4604 PL_thistoken = newSVpvs("");
4612 if (PL_expect == XOPERATOR) {
4613 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4614 && isIDFIRST_lazy_if(s,UTF))
4616 CopLINE_dec(PL_curcop);
4617 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4618 CopLINE_inc(PL_curcop);
4623 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4625 PL_expect = XOPERATOR;
4626 force_ident(PL_tokenbuf, '&');
4630 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
4642 const char tmp = *s++;
4649 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4650 && strchr("+-*/%.^&|<",tmp))
4651 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4652 "Reversed %c= operator",(int)tmp);
4654 if (PL_expect == XSTATE && isALPHA(tmp) &&
4655 (s == PL_linestart+1 || s[-2] == '\n') )
4657 if (PL_in_eval && !PL_rsfp) {
4662 if (strnEQ(s,"=cut",4)) {
4678 PL_thiswhite = newSVpvs("");
4679 sv_catpvn(PL_thiswhite, PL_linestart,
4680 PL_bufend - PL_linestart);
4684 PL_doextract = TRUE;
4688 if (PL_lex_brackets < PL_lex_formbrack) {
4690 #ifdef PERL_STRICT_CR
4691 while (SPACE_OR_TAB(*t))
4693 while (SPACE_OR_TAB(*t) || *t == '\r')
4696 if (*t == '\n' || *t == '#') {
4707 const char tmp = *s++;
4709 /* was this !=~ where !~ was meant?
4710 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4712 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4713 const char *t = s+1;
4715 while (t < PL_bufend && isSPACE(*t))
4718 if (*t == '/' || *t == '?' ||
4719 ((*t == 'm' || *t == 's' || *t == 'y')
4720 && !isALNUM(t[1])) ||
4721 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4722 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4723 "!=~ should be !~");
4733 if (PL_expect != XOPERATOR) {
4734 if (s[1] != '<' && !strchr(s,'>'))
4737 s = scan_heredoc(s);
4739 s = scan_inputsymbol(s);
4740 TERM(sublex_start());
4746 SHop(OP_LEFT_SHIFT);
4760 const char tmp = *s++;
4762 SHop(OP_RIGHT_SHIFT);
4763 else if (tmp == '=')
4772 if (PL_expect == XOPERATOR) {
4773 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4775 deprecate_old(commaless_variable_list);
4776 return REPORT(','); /* grandfather non-comma-format format */
4780 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4781 PL_tokenbuf[0] = '@';
4782 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4783 sizeof PL_tokenbuf - 1, FALSE);
4784 if (PL_expect == XOPERATOR)
4785 no_op("Array length", s);
4786 if (!PL_tokenbuf[1])
4788 PL_expect = XOPERATOR;
4789 PL_pending_ident = '#';
4793 PL_tokenbuf[0] = '$';
4794 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4795 sizeof PL_tokenbuf - 1, FALSE);
4796 if (PL_expect == XOPERATOR)
4798 if (!PL_tokenbuf[1]) {
4800 yyerror("Final $ should be \\$ or $name");
4804 /* This kludge not intended to be bulletproof. */
4805 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4806 pl_yylval.opval = newSVOP(OP_CONST, 0,
4807 newSViv(CopARYBASE_get(&PL_compiling)));
4808 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
4814 const char tmp = *s;
4815 if (PL_lex_state == LEX_NORMAL)
4818 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4819 && intuit_more(s)) {
4821 PL_tokenbuf[0] = '@';
4822 if (ckWARN(WARN_SYNTAX)) {
4825 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4828 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4829 while (t < PL_bufend && *t != ']')
4831 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4832 "Multidimensional syntax %.*s not supported",
4833 (int)((t - PL_bufptr) + 1), PL_bufptr);
4837 else if (*s == '{') {
4839 PL_tokenbuf[0] = '%';
4840 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4841 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4843 char tmpbuf[sizeof PL_tokenbuf];
4846 } while (isSPACE(*t));
4847 if (isIDFIRST_lazy_if(t,UTF)) {
4849 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4853 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4854 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4855 "You need to quote \"%s\"",
4862 PL_expect = XOPERATOR;
4863 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4864 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4865 if (!islop || PL_last_lop_op == OP_GREPSTART)
4866 PL_expect = XOPERATOR;
4867 else if (strchr("$@\"'`q", *s))
4868 PL_expect = XTERM; /* e.g. print $fh "foo" */
4869 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4870 PL_expect = XTERM; /* e.g. print $fh &sub */
4871 else if (isIDFIRST_lazy_if(s,UTF)) {
4872 char tmpbuf[sizeof PL_tokenbuf];
4874 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4875 if ((t2 = keyword(tmpbuf, len, 0))) {
4876 /* binary operators exclude handle interpretations */
4888 PL_expect = XTERM; /* e.g. print $fh length() */
4893 PL_expect = XTERM; /* e.g. print $fh subr() */
4896 else if (isDIGIT(*s))
4897 PL_expect = XTERM; /* e.g. print $fh 3 */
4898 else if (*s == '.' && isDIGIT(s[1]))
4899 PL_expect = XTERM; /* e.g. print $fh .3 */
4900 else if ((*s == '?' || *s == '-' || *s == '+')
4901 && !isSPACE(s[1]) && s[1] != '=')
4902 PL_expect = XTERM; /* e.g. print $fh -1 */
4903 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4905 PL_expect = XTERM; /* e.g. print $fh /.../
4906 XXX except DORDOR operator
4908 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4910 PL_expect = XTERM; /* print $fh <<"EOF" */
4913 PL_pending_ident = '$';
4917 if (PL_expect == XOPERATOR)
4919 PL_tokenbuf[0] = '@';
4920 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4921 if (!PL_tokenbuf[1]) {
4924 if (PL_lex_state == LEX_NORMAL)
4926 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4928 PL_tokenbuf[0] = '%';
4930 /* Warn about @ where they meant $. */
4931 if (*s == '[' || *s == '{') {
4932 if (ckWARN(WARN_SYNTAX)) {
4933 const char *t = s + 1;
4934 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4936 if (*t == '}' || *t == ']') {
4938 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4939 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4940 "Scalar value %.*s better written as $%.*s",
4941 (int)(t-PL_bufptr), PL_bufptr,
4942 (int)(t-PL_bufptr-1), PL_bufptr+1);
4947 PL_pending_ident = '@';
4950 case '/': /* may be division, defined-or, or pattern */
4951 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4955 case '?': /* may either be conditional or pattern */
4956 if(PL_expect == XOPERATOR) {
4964 /* A // operator. */
4974 /* Disable warning on "study /blah/" */
4975 if (PL_oldoldbufptr == PL_last_uni
4976 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4977 || memNE(PL_last_uni, "study", 5)
4978 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4981 s = scan_pat(s,OP_MATCH);
4982 TERM(sublex_start());
4986 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4987 #ifdef PERL_STRICT_CR
4990 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
4992 && (s == PL_linestart || s[-1] == '\n') )
4994 PL_lex_formbrack = 0;
4998 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5004 pl_yylval.ival = OPf_SPECIAL;
5010 if (PL_expect != XOPERATOR)
5015 case '0': case '1': case '2': case '3': case '4':
5016 case '5': case '6': case '7': case '8': case '9':
5017 s = scan_num(s, &pl_yylval);
5018 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5019 if (PL_expect == XOPERATOR)
5024 s = scan_str(s,!!PL_madskills,FALSE);
5025 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5026 if (PL_expect == XOPERATOR) {
5027 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5029 deprecate_old(commaless_variable_list);
5030 return REPORT(','); /* grandfather non-comma-format format */
5037 pl_yylval.ival = OP_CONST;
5038 TERM(sublex_start());
5041 s = scan_str(s,!!PL_madskills,FALSE);
5042 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5043 if (PL_expect == XOPERATOR) {
5044 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5046 deprecate_old(commaless_variable_list);
5047 return REPORT(','); /* grandfather non-comma-format format */
5054 pl_yylval.ival = OP_CONST;
5055 /* FIXME. I think that this can be const if char *d is replaced by
5056 more localised variables. */
5057 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5058 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5059 pl_yylval.ival = OP_STRINGIFY;
5063 TERM(sublex_start());
5066 s = scan_str(s,!!PL_madskills,FALSE);
5067 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5068 if (PL_expect == XOPERATOR)
5069 no_op("Backticks",s);
5072 readpipe_override();
5073 TERM(sublex_start());
5077 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
5078 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5080 if (PL_expect == XOPERATOR)
5081 no_op("Backslash",s);
5085 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5086 char *start = s + 2;
5087 while (isDIGIT(*start) || *start == '_')
5089 if (*start == '.' && isDIGIT(start[1])) {
5090 s = scan_num(s, &pl_yylval);
5093 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5094 else if (!isALPHA(*start) && (PL_expect == XTERM
5095 || PL_expect == XREF || PL_expect == XSTATE
5096 || PL_expect == XTERMORDORDOR)) {
5097 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5099 s = scan_num(s, &pl_yylval);
5106 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5148 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5150 /* Some keywords can be followed by any delimiter, including ':' */
5151 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5152 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5153 (PL_tokenbuf[0] == 'q' &&
5154 strchr("qwxr", PL_tokenbuf[1])))));
5156 /* x::* is just a word, unless x is "CORE" */
5157 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5161 while (d < PL_bufend && isSPACE(*d))
5162 d++; /* no comments skipped here, or s### is misparsed */
5164 /* Is this a label? */
5165 if (!tmp && PL_expect == XSTATE
5166 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5168 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5173 /* Check for keywords */
5174 tmp = keyword(PL_tokenbuf, len, 0);
5176 /* Is this a word before a => operator? */
5177 if (*d == '=' && d[1] == '>') {
5180 = (OP*)newSVOP(OP_CONST, 0,
5181 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5182 pl_yylval.opval->op_private = OPpCONST_BARE;
5186 if (tmp < 0) { /* second-class keyword? */
5187 GV *ogv = NULL; /* override (winner) */
5188 GV *hgv = NULL; /* hidden (loser) */
5189 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5191 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5194 if (GvIMPORTED_CV(gv))
5196 else if (! CvMETHOD(cv))
5200 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5201 (gv = *gvp) && isGV_with_GP(gv) &&
5202 GvCVu(gv) && GvIMPORTED_CV(gv))
5209 tmp = 0; /* overridden by import or by GLOBAL */
5212 && -tmp==KEY_lock /* XXX generalizable kludge */
5215 tmp = 0; /* any sub overrides "weak" keyword */
5217 else { /* no override */
5219 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
5220 Perl_warner(aTHX_ packWARN(WARN_MISC),
5221 "dump() better written as CORE::dump()");
5225 if (hgv && tmp != KEY_x && tmp != KEY_CORE
5226 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
5227 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5228 "Ambiguous call resolved as CORE::%s(), %s",
5229 GvENAME(hgv), "qualify as such or use &");
5236 default: /* not a keyword */
5237 /* Trade off - by using this evil construction we can pull the
5238 variable gv into the block labelled keylookup. If not, then
5239 we have to give it function scope so that the goto from the
5240 earlier ':' case doesn't bypass the initialisation. */
5242 just_a_word_zero_gv:
5250 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5253 SV *nextPL_nextwhite = 0;
5257 /* Get the rest if it looks like a package qualifier */
5259 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5261 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5264 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5265 *s == '\'' ? "'" : "::");
5270 if (PL_expect == XOPERATOR) {
5271 if (PL_bufptr == PL_linestart) {
5272 CopLINE_dec(PL_curcop);
5273 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
5274 CopLINE_inc(PL_curcop);
5277 no_op("Bareword",s);
5280 /* Look for a subroutine with this name in current package,
5281 unless name is "Foo::", in which case Foo is a bearword
5282 (and a package name). */
5284 if (len > 2 && !PL_madskills &&
5285 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5287 if (ckWARN(WARN_BAREWORD)
5288 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5289 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5290 "Bareword \"%s\" refers to nonexistent package",
5293 PL_tokenbuf[len] = '\0';
5299 /* Mustn't actually add anything to a symbol table.
5300 But also don't want to "initialise" any placeholder
5301 constants that might already be there into full
5302 blown PVGVs with attached PVCV. */
5303 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5304 GV_NOADD_NOINIT, SVt_PVCV);
5309 /* if we saw a global override before, get the right name */
5312 sv = newSVpvs("CORE::GLOBAL::");
5313 sv_catpv(sv,PL_tokenbuf);
5316 /* If len is 0, newSVpv does strlen(), which is correct.
5317 If len is non-zero, then it will be the true length,
5318 and so the scalar will be created correctly. */
5319 sv = newSVpv(PL_tokenbuf,len);
5322 if (PL_madskills && !PL_thistoken) {
5323 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5324 PL_thistoken = newSVpvn(start,s - start);
5325 PL_realtokenstart = s - SvPVX(PL_linestr);
5329 /* Presume this is going to be a bareword of some sort. */
5332 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5333 pl_yylval.opval->op_private = OPpCONST_BARE;
5334 /* UTF-8 package name? */
5335 if (UTF && !IN_BYTES &&
5336 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5339 /* And if "Foo::", then that's what it certainly is. */
5344 /* Do the explicit type check so that we don't need to force
5345 the initialisation of the symbol table to have a real GV.
5346 Beware - gv may not really be a PVGV, cv may not really be
5347 a PVCV, (because of the space optimisations that gv_init
5348 understands) But they're true if for this symbol there is
5349 respectively a typeglob and a subroutine.
5351 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5352 /* Real typeglob, so get the real subroutine: */
5354 /* A proxy for a subroutine in this package? */
5355 : SvOK(gv) ? (CV *) gv : NULL)
5358 /* See if it's the indirect object for a list operator. */
5360 if (PL_oldoldbufptr &&
5361 PL_oldoldbufptr < PL_bufptr &&
5362 (PL_oldoldbufptr == PL_last_lop
5363 || PL_oldoldbufptr == PL_last_uni) &&
5364 /* NO SKIPSPACE BEFORE HERE! */
5365 (PL_expect == XREF ||
5366 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5368 bool immediate_paren = *s == '(';
5370 /* (Now we can afford to cross potential line boundary.) */
5371 s = SKIPSPACE2(s,nextPL_nextwhite);
5373 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5376 /* Two barewords in a row may indicate method call. */
5378 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5379 (tmp = intuit_method(s, gv, cv)))
5382 /* If not a declared subroutine, it's an indirect object. */
5383 /* (But it's an indir obj regardless for sort.) */
5384 /* Also, if "_" follows a filetest operator, it's a bareword */
5387 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5389 (PL_last_lop_op != OP_MAPSTART &&
5390 PL_last_lop_op != OP_GREPSTART))))
5391 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5392 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5395 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5400 PL_expect = XOPERATOR;
5403 s = SKIPSPACE2(s,nextPL_nextwhite);
5404 PL_nextwhite = nextPL_nextwhite;
5409 /* Is this a word before a => operator? */
5410 if (*s == '=' && s[1] == '>' && !pkgname) {
5412 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
5413 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5414 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
5418 /* If followed by a paren, it's certainly a subroutine. */
5423 while (SPACE_OR_TAB(*d))
5425 if (*d == ')' && (sv = gv_const_sv(gv))) {
5432 PL_nextwhite = PL_thiswhite;
5435 start_force(PL_curforce);
5437 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5438 PL_expect = XOPERATOR;
5441 PL_nextwhite = nextPL_nextwhite;
5442 curmad('X', PL_thistoken);
5443 PL_thistoken = newSVpvs("");
5451 /* If followed by var or block, call it a method (unless sub) */
5453 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5454 PL_last_lop = PL_oldbufptr;
5455 PL_last_lop_op = OP_METHOD;
5459 /* If followed by a bareword, see if it looks like indir obj. */
5462 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5463 && (tmp = intuit_method(s, gv, cv)))
5466 /* Not a method, so call it a subroutine (if defined) */
5469 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5470 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5471 "Ambiguous use of -%s resolved as -&%s()",
5472 PL_tokenbuf, PL_tokenbuf);
5473 /* Check for a constant sub */
5474 if ((sv = gv_const_sv(gv))) {
5476 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
5477 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5478 pl_yylval.opval->op_private = 0;
5482 /* Resolve to GV now. */
5483 if (SvTYPE(gv) != SVt_PVGV) {
5484 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5485 assert (SvTYPE(gv) == SVt_PVGV);
5486 /* cv must have been some sort of placeholder, so
5487 now needs replacing with a real code reference. */
5491 op_free(pl_yylval.opval);
5492 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5493 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5494 PL_last_lop = PL_oldbufptr;
5495 PL_last_lop_op = OP_ENTERSUB;
5496 /* Is there a prototype? */
5504 const char *proto = SvPV_const((SV*)cv, protolen);
5507 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5509 while (*proto == ';')
5511 if (*proto == '&' && *s == '{') {
5513 sv_setpvs(PL_subname, "__ANON__");
5515 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5522 PL_nextwhite = PL_thiswhite;
5525 start_force(PL_curforce);
5526 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5529 PL_nextwhite = nextPL_nextwhite;
5530 curmad('X', PL_thistoken);
5531 PL_thistoken = newSVpvs("");
5538 /* Guess harder when madskills require "best effort". */
5539 if (PL_madskills && (!gv || !GvCVu(gv))) {
5540 int probable_sub = 0;
5541 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5543 else if (isALPHA(*s)) {
5547 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5548 if (!keyword(tmpbuf, tmplen, 0))
5551 while (d < PL_bufend && isSPACE(*d))
5553 if (*d == '=' && d[1] == '>')
5558 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5559 op_free(pl_yylval.opval);
5560 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5561 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5562 PL_last_lop = PL_oldbufptr;
5563 PL_last_lop_op = OP_ENTERSUB;
5564 PL_nextwhite = PL_thiswhite;
5566 start_force(PL_curforce);
5567 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5569 PL_nextwhite = nextPL_nextwhite;
5570 curmad('X', PL_thistoken);
5571 PL_thistoken = newSVpvs("");
5576 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5583 /* Call it a bare word */
5585 if (PL_hints & HINT_STRICT_SUBS)
5586 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5589 if (lastchar != '-') {
5590 if (ckWARN(WARN_RESERVED)) {
5594 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5595 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5602 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5603 && ckWARN_d(WARN_AMBIGUOUS)) {
5604 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5605 "Operator or semicolon missing before %c%s",
5606 lastchar, PL_tokenbuf);
5607 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5608 "Ambiguous use of %c resolved as operator %c",
5609 lastchar, lastchar);
5615 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5616 newSVpv(CopFILE(PL_curcop),0));
5620 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5621 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5624 case KEY___PACKAGE__:
5625 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5627 ? newSVhek(HvNAME_HEK(PL_curstash))
5634 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5635 const char *pname = "main";
5636 if (PL_tokenbuf[2] == 'D')
5637 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5638 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5642 GvIOp(gv) = newIO();
5643 IoIFP(GvIOp(gv)) = PL_rsfp;
5644 #if defined(HAS_FCNTL) && defined(F_SETFD)
5646 const int fd = PerlIO_fileno(PL_rsfp);
5647 fcntl(fd,F_SETFD,fd >= 3);
5650 /* Mark this internal pseudo-handle as clean */
5651 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5652 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5653 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5655 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5656 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5657 /* if the script was opened in binmode, we need to revert
5658 * it to text mode for compatibility; but only iff it has CRs
5659 * XXX this is a questionable hack at best. */
5660 if (PL_bufend-PL_bufptr > 2
5661 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5664 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5665 loc = PerlIO_tell(PL_rsfp);
5666 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5669 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5671 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5672 #endif /* NETWARE */
5673 #ifdef PERLIO_IS_STDIO /* really? */
5674 # if defined(__BORLANDC__)
5675 /* XXX see note in do_binmode() */
5676 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5680 PerlIO_seek(PL_rsfp, loc, 0);
5684 #ifdef PERLIO_LAYERS
5687 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5688 else if (PL_encoding) {
5695 XPUSHs(PL_encoding);
5697 call_method("name", G_SCALAR);
5701 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5702 Perl_form(aTHX_ ":encoding(%"SVf")",
5711 if (PL_realtokenstart >= 0) {
5712 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5714 PL_endwhite = newSVpvs("");
5715 sv_catsv(PL_endwhite, PL_thiswhite);
5717 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5718 PL_realtokenstart = -1;
5720 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5721 SvCUR(PL_endwhite))) != NULL) ;
5736 if (PL_expect == XSTATE) {
5743 if (*s == ':' && s[1] == ':') {
5746 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5747 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5748 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5751 else if (tmp == KEY_require || tmp == KEY_do)
5752 /* that's a way to remember we saw "CORE::" */
5765 LOP(OP_ACCEPT,XTERM);
5771 LOP(OP_ATAN2,XTERM);
5777 LOP(OP_BINMODE,XTERM);
5780 LOP(OP_BLESS,XTERM);
5789 /* When 'use switch' is in effect, continue has a dual
5790 life as a control operator. */
5792 if (!FEATURE_IS_ENABLED("switch"))
5795 /* We have to disambiguate the two senses of
5796 "continue". If the next token is a '{' then
5797 treat it as the start of a continue block;
5798 otherwise treat it as a control operator.
5810 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5827 if (!PL_cryptseen) {
5828 PL_cryptseen = TRUE;
5832 LOP(OP_CRYPT,XTERM);
5835 LOP(OP_CHMOD,XTERM);
5838 LOP(OP_CHOWN,XTERM);
5841 LOP(OP_CONNECT,XTERM);
5860 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5861 if (orig_keyword == KEY_do) {
5870 PL_hints |= HINT_BLOCK_SCOPE;
5880 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5881 LOP(OP_DBMOPEN,XTERM);
5887 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5894 pl_yylval.ival = CopLINE(PL_curcop);
5910 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
5911 UNIBRACK(OP_ENTEREVAL);
5925 case KEY_endhostent:
5931 case KEY_endservent:
5934 case KEY_endprotoent:
5945 pl_yylval.ival = CopLINE(PL_curcop);
5947 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
5950 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
5953 if ((PL_bufend - p) >= 3 &&
5954 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
5956 else if ((PL_bufend - p) >= 4 &&
5957 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
5960 if (isIDFIRST_lazy_if(p,UTF)) {
5961 p = scan_ident(p, PL_bufend,
5962 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5966 Perl_croak(aTHX_ "Missing $ on loop variable");
5968 s = SvPVX(PL_linestr) + soff;
5974 LOP(OP_FORMLINE,XTERM);
5980 LOP(OP_FCNTL,XTERM);
5986 LOP(OP_FLOCK,XTERM);
5995 LOP(OP_GREPSTART, XREF);
5998 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6013 case KEY_getpriority:
6014 LOP(OP_GETPRIORITY,XTERM);
6016 case KEY_getprotobyname:
6019 case KEY_getprotobynumber:
6020 LOP(OP_GPBYNUMBER,XTERM);
6022 case KEY_getprotoent:
6034 case KEY_getpeername:
6035 UNI(OP_GETPEERNAME);
6037 case KEY_gethostbyname:
6040 case KEY_gethostbyaddr:
6041 LOP(OP_GHBYADDR,XTERM);
6043 case KEY_gethostent:
6046 case KEY_getnetbyname:
6049 case KEY_getnetbyaddr:
6050 LOP(OP_GNBYADDR,XTERM);
6055 case KEY_getservbyname:
6056 LOP(OP_GSBYNAME,XTERM);
6058 case KEY_getservbyport:
6059 LOP(OP_GSBYPORT,XTERM);
6061 case KEY_getservent:
6064 case KEY_getsockname:
6065 UNI(OP_GETSOCKNAME);
6067 case KEY_getsockopt:
6068 LOP(OP_GSOCKOPT,XTERM);
6083 pl_yylval.ival = CopLINE(PL_curcop);
6093 pl_yylval.ival = CopLINE(PL_curcop);
6097 LOP(OP_INDEX,XTERM);
6103 LOP(OP_IOCTL,XTERM);
6115 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6147 LOP(OP_LISTEN,XTERM);
6156 s = scan_pat(s,OP_MATCH);
6157 TERM(sublex_start());
6160 LOP(OP_MAPSTART, XREF);
6163 LOP(OP_MKDIR,XTERM);
6166 LOP(OP_MSGCTL,XTERM);
6169 LOP(OP_MSGGET,XTERM);
6172 LOP(OP_MSGRCV,XTERM);
6175 LOP(OP_MSGSND,XTERM);
6180 PL_in_my = (U16)tmp;
6182 if (isIDFIRST_lazy_if(s,UTF)) {
6186 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6187 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6189 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6190 if (!PL_in_my_stash) {
6193 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6197 if (PL_madskills) { /* just add type to declarator token */
6198 sv_catsv(PL_thistoken, PL_nextwhite);
6200 sv_catpvn(PL_thistoken, start, s - start);
6208 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6215 s = tokenize_use(0, s);
6219 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6226 if (isIDFIRST_lazy_if(s,UTF)) {
6228 for (d = s; isALNUM_lazy_if(d,UTF);)
6230 for (t=d; isSPACE(*t);)
6232 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6234 && !(t[0] == '=' && t[1] == '>')
6236 int parms_len = (int)(d-s);
6237 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6238 "Precedence problem: open %.*s should be open(%.*s)",
6239 parms_len, s, parms_len, s);
6245 pl_yylval.ival = OP_OR;
6255 LOP(OP_OPEN_DIR,XTERM);
6258 checkcomma(s,PL_tokenbuf,"filehandle");
6262 checkcomma(s,PL_tokenbuf,"filehandle");
6281 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6285 LOP(OP_PIPE_OP,XTERM);
6288 s = scan_str(s,!!PL_madskills,FALSE);
6291 pl_yylval.ival = OP_CONST;
6292 TERM(sublex_start());
6298 s = scan_str(s,!!PL_madskills,FALSE);
6301 PL_expect = XOPERATOR;
6303 if (SvCUR(PL_lex_stuff)) {
6306 d = SvPV_force(PL_lex_stuff, len);
6308 for (; isSPACE(*d) && len; --len, ++d)
6313 if (!warned && ckWARN(WARN_QW)) {
6314 for (; !isSPACE(*d) && len; --len, ++d) {
6316 Perl_warner(aTHX_ packWARN(WARN_QW),
6317 "Possible attempt to separate words with commas");
6320 else if (*d == '#') {
6321 Perl_warner(aTHX_ packWARN(WARN_QW),
6322 "Possible attempt to put comments in qw() list");
6328 for (; !isSPACE(*d) && len; --len, ++d)
6331 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
6332 words = append_elem(OP_LIST, words,
6333 newSVOP(OP_CONST, 0, tokeq(sv)));
6337 start_force(PL_curforce);
6338 NEXTVAL_NEXTTOKE.opval = words;
6343 SvREFCNT_dec(PL_lex_stuff);
6344 PL_lex_stuff = NULL;
6350 s = scan_str(s,!!PL_madskills,FALSE);
6353 pl_yylval.ival = OP_STRINGIFY;
6354 if (SvIVX(PL_lex_stuff) == '\'')
6355 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6356 TERM(sublex_start());
6359 s = scan_pat(s,OP_QR);
6360 TERM(sublex_start());
6363 s = scan_str(s,!!PL_madskills,FALSE);
6366 readpipe_override();
6367 TERM(sublex_start());
6375 s = force_version(s, FALSE);
6377 else if (*s != 'v' || !isDIGIT(s[1])
6378 || (s = force_version(s, TRUE), *s == 'v'))
6380 *PL_tokenbuf = '\0';
6381 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6382 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6383 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6385 yyerror("<> should be quotes");
6387 if (orig_keyword == KEY_require) {
6395 PL_last_uni = PL_oldbufptr;
6396 PL_last_lop_op = OP_REQUIRE;
6398 return REPORT( (int)REQUIRE );
6404 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6408 LOP(OP_RENAME,XTERM);
6417 LOP(OP_RINDEX,XTERM);
6426 UNIDOR(OP_READLINE);
6429 UNIDOR(OP_BACKTICK);
6438 LOP(OP_REVERSE,XTERM);
6441 UNIDOR(OP_READLINK);
6448 if (pl_yylval.opval)
6449 TERM(sublex_start());
6451 TOKEN(1); /* force error */
6454 checkcomma(s,PL_tokenbuf,"filehandle");
6464 LOP(OP_SELECT,XTERM);
6470 LOP(OP_SEMCTL,XTERM);
6473 LOP(OP_SEMGET,XTERM);
6476 LOP(OP_SEMOP,XTERM);
6482 LOP(OP_SETPGRP,XTERM);
6484 case KEY_setpriority:
6485 LOP(OP_SETPRIORITY,XTERM);
6487 case KEY_sethostent:
6493 case KEY_setservent:
6496 case KEY_setprotoent:
6506 LOP(OP_SEEKDIR,XTERM);
6508 case KEY_setsockopt:
6509 LOP(OP_SSOCKOPT,XTERM);
6515 LOP(OP_SHMCTL,XTERM);
6518 LOP(OP_SHMGET,XTERM);
6521 LOP(OP_SHMREAD,XTERM);
6524 LOP(OP_SHMWRITE,XTERM);
6527 LOP(OP_SHUTDOWN,XTERM);
6536 LOP(OP_SOCKET,XTERM);
6538 case KEY_socketpair:
6539 LOP(OP_SOCKPAIR,XTERM);
6542 checkcomma(s,PL_tokenbuf,"subroutine name");
6544 if (*s == ';' || *s == ')') /* probably a close */
6545 Perl_croak(aTHX_ "sort is now a reserved word");
6547 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6551 LOP(OP_SPLIT,XTERM);
6554 LOP(OP_SPRINTF,XTERM);
6557 LOP(OP_SPLICE,XTERM);
6572 LOP(OP_SUBSTR,XTERM);
6578 char tmpbuf[sizeof PL_tokenbuf];
6579 SSize_t tboffset = 0;
6580 expectation attrful;
6581 bool have_name, have_proto;
6582 const int key = tmp;
6587 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6588 SV *subtoken = newSVpvn(tstart, s - tstart);
6592 s = SKIPSPACE2(s,tmpwhite);
6597 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6598 (*s == ':' && s[1] == ':'))
6605 attrful = XATTRBLOCK;
6606 /* remember buffer pos'n for later force_word */
6607 tboffset = s - PL_oldbufptr;
6608 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6611 nametoke = newSVpvn(s, d - s);
6613 if (memchr(tmpbuf, ':', len))
6614 sv_setpvn(PL_subname, tmpbuf, len);
6616 sv_setsv(PL_subname,PL_curstname);
6617 sv_catpvs(PL_subname,"::");
6618 sv_catpvn(PL_subname,tmpbuf,len);
6625 CURMAD('X', nametoke);
6626 CURMAD('_', tmpwhite);
6627 (void) force_word(PL_oldbufptr + tboffset, WORD,
6630 s = SKIPSPACE2(d,tmpwhite);
6637 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6638 PL_expect = XTERMBLOCK;
6639 attrful = XATTRTERM;
6640 sv_setpvn(PL_subname,"?",1);
6644 if (key == KEY_format) {
6646 PL_lex_formbrack = PL_lex_brackets + 1;
6648 PL_thistoken = subtoken;
6652 (void) force_word(PL_oldbufptr + tboffset, WORD,
6658 /* Look for a prototype */
6661 bool bad_proto = FALSE;
6662 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6664 s = scan_str(s,!!PL_madskills,FALSE);
6666 Perl_croak(aTHX_ "Prototype not terminated");
6667 /* strip spaces and check for bad characters */
6668 d = SvPVX(PL_lex_stuff);
6670 for (p = d; *p; ++p) {
6673 if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
6679 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6680 "Illegal character in prototype for %"SVf" : %s",
6681 SVfARG(PL_subname), d);
6682 SvCUR_set(PL_lex_stuff, tmp);
6687 CURMAD('q', PL_thisopen);
6688 CURMAD('_', tmpwhite);
6689 CURMAD('=', PL_thisstuff);
6690 CURMAD('Q', PL_thisclose);
6691 NEXTVAL_NEXTTOKE.opval =
6692 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6693 PL_lex_stuff = NULL;
6696 s = SKIPSPACE2(s,tmpwhite);
6704 if (*s == ':' && s[1] != ':')
6705 PL_expect = attrful;
6706 else if (*s != '{' && key == KEY_sub) {
6708 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6710 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6717 curmad('^', newSVpvs(""));
6718 CURMAD('_', tmpwhite);
6722 PL_thistoken = subtoken;
6725 NEXTVAL_NEXTTOKE.opval =
6726 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6727 PL_lex_stuff = NULL;
6733 sv_setpvs(PL_subname, "__ANON__");
6735 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6739 (void) force_word(PL_oldbufptr + tboffset, WORD,
6748 LOP(OP_SYSTEM,XREF);
6751 LOP(OP_SYMLINK,XTERM);
6754 LOP(OP_SYSCALL,XTERM);
6757 LOP(OP_SYSOPEN,XTERM);
6760 LOP(OP_SYSSEEK,XTERM);
6763 LOP(OP_SYSREAD,XTERM);
6766 LOP(OP_SYSWRITE,XTERM);
6770 TERM(sublex_start());
6791 LOP(OP_TRUNCATE,XTERM);
6803 pl_yylval.ival = CopLINE(PL_curcop);
6807 pl_yylval.ival = CopLINE(PL_curcop);
6811 LOP(OP_UNLINK,XTERM);
6817 LOP(OP_UNPACK,XTERM);
6820 LOP(OP_UTIME,XTERM);
6826 LOP(OP_UNSHIFT,XTERM);
6829 s = tokenize_use(1, s);
6839 pl_yylval.ival = CopLINE(PL_curcop);
6843 pl_yylval.ival = CopLINE(PL_curcop);
6847 PL_hints |= HINT_BLOCK_SCOPE;
6854 LOP(OP_WAITPID,XTERM);
6863 ctl_l[0] = toCTRL('L');
6865 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6868 /* Make sure $^L is defined */
6869 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
6874 if (PL_expect == XOPERATOR)
6880 pl_yylval.ival = OP_XOR;
6885 TERM(sublex_start());
6890 #pragma segment Main
6894 S_pending_ident(pTHX)
6899 /* pit holds the identifier we read and pending_ident is reset */
6900 char pit = PL_pending_ident;
6901 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
6902 /* All routes through this function want to know if there is a colon. */
6903 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
6904 PL_pending_ident = 0;
6906 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
6907 DEBUG_T({ PerlIO_printf(Perl_debug_log,
6908 "### Pending identifier '%s'\n", PL_tokenbuf); });
6910 /* if we're in a my(), we can't allow dynamics here.
6911 $foo'bar has already been turned into $foo::bar, so
6912 just check for colons.
6914 if it's a legal name, the OP is a PADANY.
6917 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
6919 yyerror(Perl_form(aTHX_ "No package name allowed for "
6920 "variable %s in \"our\"",
6922 tmp = allocmy(PL_tokenbuf);
6926 yyerror(Perl_form(aTHX_ PL_no_myglob,
6927 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
6929 pl_yylval.opval = newOP(OP_PADANY, 0);
6930 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf);
6936 build the ops for accesses to a my() variable.
6938 Deny my($a) or my($b) in a sort block, *if* $a or $b is
6939 then used in a comparison. This catches most, but not
6940 all cases. For instance, it catches
6941 sort { my($a); $a <=> $b }
6943 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
6944 (although why you'd do that is anyone's guess).
6949 tmp = pad_findmy(PL_tokenbuf);
6950 if (tmp != NOT_IN_PAD) {
6951 /* might be an "our" variable" */
6952 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6953 /* build ops for a bareword */
6954 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
6955 HEK * const stashname = HvNAME_HEK(stash);
6956 SV * const sym = newSVhek(stashname);
6957 sv_catpvs(sym, "::");
6958 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6959 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
6960 pl_yylval.opval->op_private = OPpCONST_ENTERED;
6963 ? (GV_ADDMULTI | GV_ADDINEVAL)
6966 ((PL_tokenbuf[0] == '$') ? SVt_PV
6967 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6972 /* if it's a sort block and they're naming $a or $b */
6973 if (PL_last_lop_op == OP_SORT &&
6974 PL_tokenbuf[0] == '$' &&
6975 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
6978 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
6979 d < PL_bufend && *d != '\n';
6982 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
6983 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
6989 pl_yylval.opval = newOP(OP_PADANY, 0);
6990 pl_yylval.opval->op_targ = tmp;
6996 Whine if they've said @foo in a doublequoted string,
6997 and @foo isn't a variable we can find in the symbol
7000 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7001 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7003 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7004 && ckWARN(WARN_AMBIGUOUS)
7005 /* DO NOT warn for @- and @+ */
7006 && !( PL_tokenbuf[2] == '\0' &&
7007 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7010 /* Downgraded from fatal to warning 20000522 mjd */
7011 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7012 "Possible unintended interpolation of %s in string",
7017 /* build ops for a bareword */
7018 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7020 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7022 PL_tokenbuf + 1, tokenbuf_len - 1,
7023 /* If the identifier refers to a stash, don't autovivify it.
7024 * Change 24660 had the side effect of causing symbol table
7025 * hashes to always be defined, even if they were freshly
7026 * created and the only reference in the entire program was
7027 * the single statement with the defined %foo::bar:: test.
7028 * It appears that all code in the wild doing this actually
7029 * wants to know whether sub-packages have been loaded, so
7030 * by avoiding auto-vivifying symbol tables, we ensure that
7031 * defined %foo::bar:: continues to be false, and the existing
7032 * tests still give the expected answers, even though what
7033 * they're actually testing has now changed subtly.
7035 (*PL_tokenbuf == '%'
7036 && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7039 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7040 ((PL_tokenbuf[0] == '$') ? SVt_PV
7041 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7047 * The following code was generated by perl_keyword.pl.
7051 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7056 case 1: /* 5 tokens of length 1 */
7088 case 2: /* 18 tokens of length 2 */
7234 case 3: /* 29 tokens of length 3 */
7238 if (name[1] == 'N' &&
7301 if (name[1] == 'i' &&
7333 if (name[1] == 'o' &&
7342 if (name[1] == 'e' &&
7351 if (name[1] == 'n' &&
7360 if (name[1] == 'o' &&
7369 if (name[1] == 'a' &&
7378 if (name[1] == 'o' &&
7440 if (name[1] == 'e' &&
7454 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7480 if (name[1] == 'i' &&
7489 if (name[1] == 's' &&
7498 if (name[1] == 'e' &&
7507 if (name[1] == 'o' &&
7519 case 4: /* 41 tokens of length 4 */
7523 if (name[1] == 'O' &&
7533 if (name[1] == 'N' &&
7543 if (name[1] == 'i' &&
7553 if (name[1] == 'h' &&
7563 if (name[1] == 'u' &&
7576 if (name[2] == 'c' &&
7585 if (name[2] == 's' &&
7594 if (name[2] == 'a' &&
7630 if (name[1] == 'o' &&
7643 if (name[2] == 't' &&
7652 if (name[2] == 'o' &&
7661 if (name[2] == 't' &&
7670 if (name[2] == 'e' &&
7683 if (name[1] == 'o' &&
7696 if (name[2] == 'y' &&
7705 if (name[2] == 'l' &&
7721 if (name[2] == 's' &&
7730 if (name[2] == 'n' &&
7739 if (name[2] == 'c' &&
7752 if (name[1] == 'e' &&
7762 if (name[1] == 'p' &&
7775 if (name[2] == 'c' &&
7784 if (name[2] == 'p' &&
7793 if (name[2] == 's' &&
7809 if (name[2] == 'n' &&
7879 if (name[2] == 'r' &&
7888 if (name[2] == 'r' &&
7897 if (name[2] == 'a' &&
7913 if (name[2] == 'l' &&
7975 if (name[2] == 'e' &&
7978 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
7991 case 5: /* 39 tokens of length 5 */
7995 if (name[1] == 'E' &&
8006 if (name[1] == 'H' &&
8020 if (name[2] == 'a' &&
8030 if (name[2] == 'a' &&
8047 if (name[2] == 'e' &&
8057 if (name[2] == 'e' &&
8061 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8077 if (name[3] == 'i' &&
8086 if (name[3] == 'o' &&
8122 if (name[2] == 'o' &&
8132 if (name[2] == 'y' &&
8146 if (name[1] == 'l' &&
8160 if (name[2] == 'n' &&
8170 if (name[2] == 'o' &&
8184 if (name[1] == 'i' &&
8189 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8198 if (name[2] == 'd' &&
8208 if (name[2] == 'c' &&
8225 if (name[2] == 'c' &&
8235 if (name[2] == 't' &&
8249 if (name[1] == 'k' &&
8260 if (name[1] == 'r' &&
8274 if (name[2] == 's' &&
8284 if (name[2] == 'd' &&
8301 if (name[2] == 'm' &&
8311 if (name[2] == 'i' &&
8321 if (name[2] == 'e' &&
8331 if (name[2] == 'l' &&
8341 if (name[2] == 'a' &&
8354 if (name[3] == 't' &&
8357 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8363 if (name[3] == 'd' &&
8380 if (name[1] == 'i' &&
8394 if (name[2] == 'a' &&
8407 if (name[3] == 'e' &&
8442 if (name[2] == 'i' &&
8459 if (name[2] == 'i' &&
8469 if (name[2] == 'i' &&
8486 case 6: /* 33 tokens of length 6 */
8490 if (name[1] == 'c' &&
8505 if (name[2] == 'l' &&
8516 if (name[2] == 'r' &&
8531 if (name[1] == 'e' &&
8546 if (name[2] == 's' &&
8551 if(ckWARN_d(WARN_SYNTAX))
8552 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8558 if (name[2] == 'i' &&
8576 if (name[2] == 'l' &&
8587 if (name[2] == 'r' &&
8602 if (name[1] == 'm' &&
8617 if (name[2] == 'n' &&
8628 if (name[2] == 's' &&
8643 if (name[1] == 's' &&
8649 if (name[4] == 't' &&
8658 if (name[4] == 'e' &&
8667 if (name[4] == 'c' &&
8676 if (name[4] == 'n' &&
8692 if (name[1] == 'r' &&
8710 if (name[3] == 'a' &&
8720 if (name[3] == 'u' &&
8734 if (name[2] == 'n' &&
8752 if (name[2] == 'a' &&
8766 if (name[3] == 'e' &&
8779 if (name[4] == 't' &&
8788 if (name[4] == 'e' &&
8810 if (name[4] == 't' &&
8819 if (name[4] == 'e' &&
8835 if (name[2] == 'c' &&
8846 if (name[2] == 'l' &&
8857 if (name[2] == 'b' &&
8868 if (name[2] == 's' &&
8891 if (name[4] == 's' &&
8900 if (name[4] == 'n' &&
8913 if (name[3] == 'a' &&
8930 if (name[1] == 'a' &&
8945 case 7: /* 29 tokens of length 7 */
8949 if (name[1] == 'E' &&
8962 if (name[1] == '_' &&
8975 if (name[1] == 'i' &&
8982 return -KEY_binmode;
8988 if (name[1] == 'o' &&
8995 return -KEY_connect;
9004 if (name[2] == 'm' &&
9010 return -KEY_dbmopen;
9021 if (name[4] == 'u' &&
9025 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9031 if (name[4] == 'n' &&
9052 if (name[1] == 'o' &&
9065 if (name[1] == 'e' &&
9072 if (name[5] == 'r' &&
9075 return -KEY_getpgrp;
9081 if (name[5] == 'i' &&
9084 return -KEY_getppid;
9097 if (name[1] == 'c' &&
9104 return -KEY_lcfirst;
9110 if (name[1] == 'p' &&
9117 return -KEY_opendir;
9123 if (name[1] == 'a' &&
9141 if (name[3] == 'd' &&
9146 return -KEY_readdir;
9152 if (name[3] == 'u' &&
9163 if (name[3] == 'e' &&
9168 return -KEY_reverse;
9187 if (name[3] == 'k' &&
9192 return -KEY_seekdir;
9198 if (name[3] == 'p' &&
9203 return -KEY_setpgrp;
9213 if (name[2] == 'm' &&
9219 return -KEY_shmread;
9225 if (name[2] == 'r' &&
9231 return -KEY_sprintf;
9240 if (name[3] == 'l' &&
9245 return -KEY_symlink;
9254 if (name[4] == 'a' &&
9258 return -KEY_syscall;
9264 if (name[4] == 'p' &&
9268 return -KEY_sysopen;
9274 if (name[4] == 'e' &&
9278 return -KEY_sysread;
9284 if (name[4] == 'e' &&
9288 return -KEY_sysseek;
9306 if (name[1] == 'e' &&
9313 return -KEY_telldir;
9322 if (name[2] == 'f' &&
9328 return -KEY_ucfirst;
9334 if (name[2] == 's' &&
9340 return -KEY_unshift;
9350 if (name[1] == 'a' &&
9357 return -KEY_waitpid;
9366 case 8: /* 26 tokens of length 8 */
9370 if (name[1] == 'U' &&
9378 return KEY_AUTOLOAD;
9389 if (name[3] == 'A' &&
9395 return KEY___DATA__;
9401 if (name[3] == 'I' &&
9407 return -KEY___FILE__;
9413 if (name[3] == 'I' &&
9419 return -KEY___LINE__;
9435 if (name[2] == 'o' &&
9442 return -KEY_closedir;
9448 if (name[2] == 'n' &&
9455 return -KEY_continue;
9465 if (name[1] == 'b' &&
9473 return -KEY_dbmclose;
9479 if (name[1] == 'n' &&
9485 if (name[4] == 'r' &&
9490 return -KEY_endgrent;
9496 if (name[4] == 'w' &&
9501 return -KEY_endpwent;
9514 if (name[1] == 'o' &&
9522 return -KEY_formline;
9528 if (name[1] == 'e' &&
9539 if (name[6] == 'n' &&
9542 return -KEY_getgrent;
9548 if (name[6] == 'i' &&
9551 return -KEY_getgrgid;
9557 if (name[6] == 'a' &&
9560 return -KEY_getgrnam;
9573 if (name[4] == 'o' &&
9578 return -KEY_getlogin;
9589 if (name[6] == 'n' &&
9592 return -KEY_getpwent;
9598 if (name[6] == 'a' &&
9601 return -KEY_getpwnam;
9607 if (name[6] == 'i' &&
9610 return -KEY_getpwuid;
9630 if (name[1] == 'e' &&
9637 if (name[5] == 'i' &&
9644 return -KEY_readline;
9649 return -KEY_readlink;
9660 if (name[5] == 'i' &&
9664 return -KEY_readpipe;
9685 if (name[4] == 'r' &&
9690 return -KEY_setgrent;
9696 if (name[4] == 'w' &&
9701 return -KEY_setpwent;
9717 if (name[3] == 'w' &&
9723 return -KEY_shmwrite;
9729 if (name[3] == 't' &&
9735 return -KEY_shutdown;
9745 if (name[2] == 's' &&
9752 return -KEY_syswrite;
9762 if (name[1] == 'r' &&
9770 return -KEY_truncate;
9779 case 9: /* 9 tokens of length 9 */
9783 if (name[1] == 'N' &&
9792 return KEY_UNITCHECK;
9798 if (name[1] == 'n' &&
9807 return -KEY_endnetent;
9813 if (name[1] == 'e' &&
9822 return -KEY_getnetent;
9828 if (name[1] == 'o' &&
9837 return -KEY_localtime;
9843 if (name[1] == 'r' &&
9852 return KEY_prototype;
9858 if (name[1] == 'u' &&
9867 return -KEY_quotemeta;
9873 if (name[1] == 'e' &&
9882 return -KEY_rewinddir;
9888 if (name[1] == 'e' &&
9897 return -KEY_setnetent;
9903 if (name[1] == 'a' &&
9912 return -KEY_wantarray;
9921 case 10: /* 9 tokens of length 10 */
9925 if (name[1] == 'n' &&
9931 if (name[4] == 'o' &&
9938 return -KEY_endhostent;
9944 if (name[4] == 'e' &&
9951 return -KEY_endservent;
9964 if (name[1] == 'e' &&
9970 if (name[4] == 'o' &&
9977 return -KEY_gethostent;
9986 if (name[5] == 'r' &&
9992 return -KEY_getservent;
9998 if (name[5] == 'c' &&
10004 return -KEY_getsockopt;
10024 if (name[2] == 't')
10029 if (name[4] == 'o' &&
10036 return -KEY_sethostent;
10045 if (name[5] == 'r' &&
10051 return -KEY_setservent;
10057 if (name[5] == 'c' &&
10063 return -KEY_setsockopt;
10080 if (name[2] == 'c' &&
10089 return -KEY_socketpair;
10102 case 11: /* 8 tokens of length 11 */
10106 if (name[1] == '_' &&
10116 { /* __PACKAGE__ */
10117 return -KEY___PACKAGE__;
10123 if (name[1] == 'n' &&
10133 { /* endprotoent */
10134 return -KEY_endprotoent;
10140 if (name[1] == 'e' &&
10149 if (name[5] == 'e' &&
10155 { /* getpeername */
10156 return -KEY_getpeername;
10165 if (name[6] == 'o' &&
10170 { /* getpriority */
10171 return -KEY_getpriority;
10177 if (name[6] == 't' &&
10182 { /* getprotoent */
10183 return -KEY_getprotoent;
10197 if (name[4] == 'o' &&
10204 { /* getsockname */
10205 return -KEY_getsockname;
10218 if (name[1] == 'e' &&
10226 if (name[6] == 'o' &&
10231 { /* setpriority */
10232 return -KEY_setpriority;
10238 if (name[6] == 't' &&
10243 { /* setprotoent */
10244 return -KEY_setprotoent;
10260 case 12: /* 2 tokens of length 12 */
10261 if (name[0] == 'g' &&
10273 if (name[9] == 'd' &&
10276 { /* getnetbyaddr */
10277 return -KEY_getnetbyaddr;
10283 if (name[9] == 'a' &&
10286 { /* getnetbyname */
10287 return -KEY_getnetbyname;
10299 case 13: /* 4 tokens of length 13 */
10300 if (name[0] == 'g' &&
10307 if (name[4] == 'o' &&
10316 if (name[10] == 'd' &&
10319 { /* gethostbyaddr */
10320 return -KEY_gethostbyaddr;
10326 if (name[10] == 'a' &&
10329 { /* gethostbyname */
10330 return -KEY_gethostbyname;
10343 if (name[4] == 'e' &&
10352 if (name[10] == 'a' &&
10355 { /* getservbyname */
10356 return -KEY_getservbyname;
10362 if (name[10] == 'o' &&
10365 { /* getservbyport */
10366 return -KEY_getservbyport;
10385 case 14: /* 1 tokens of length 14 */
10386 if (name[0] == 'g' &&
10400 { /* getprotobyname */
10401 return -KEY_getprotobyname;
10406 case 16: /* 1 tokens of length 16 */
10407 if (name[0] == 'g' &&
10423 { /* getprotobynumber */
10424 return -KEY_getprotobynumber;
10438 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10442 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10443 if (ckWARN(WARN_SYNTAX)) {
10446 for (w = s+2; *w && level; w++) {
10449 else if (*w == ')')
10452 while (isSPACE(*w))
10454 /* the list of chars below is for end of statements or
10455 * block / parens, boolean operators (&&, ||, //) and branch
10456 * constructs (or, and, if, until, unless, while, err, for).
10457 * Not a very solid hack... */
10458 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10459 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10460 "%s (...) interpreted as function",name);
10463 while (s < PL_bufend && isSPACE(*s))
10467 while (s < PL_bufend && isSPACE(*s))
10469 if (isIDFIRST_lazy_if(s,UTF)) {
10470 const char * const w = s++;
10471 while (isALNUM_lazy_if(s,UTF))
10473 while (s < PL_bufend && isSPACE(*s))
10477 if (keyword(w, s - w, 0))
10480 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10481 if (gv && GvCVu(gv))
10483 Perl_croak(aTHX_ "No comma allowed after %s", what);
10488 /* Either returns sv, or mortalizes sv and returns a new SV*.
10489 Best used as sv=new_constant(..., sv, ...).
10490 If s, pv are NULL, calls subroutine with one argument,
10491 and type is used with error messages only. */
10494 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10495 SV *sv, SV *pv, const char *type, STRLEN typelen)
10498 HV * const table = GvHV(PL_hintgv); /* ^H */
10502 const char *why1 = "", *why2 = "", *why3 = "";
10504 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10507 why2 = (const char *)
10508 (strEQ(key,"charnames")
10509 ? "(possibly a missing \"use charnames ...\")"
10511 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10512 (type ? type: "undef"), why2);
10514 /* This is convoluted and evil ("goto considered harmful")
10515 * but I do not understand the intricacies of all the different
10516 * failure modes of %^H in here. The goal here is to make
10517 * the most probable error message user-friendly. --jhi */
10522 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10523 (type ? type: "undef"), why1, why2, why3);
10525 yyerror(SvPVX_const(msg));
10529 cvp = hv_fetch(table, key, keylen, FALSE);
10530 if (!cvp || !SvOK(*cvp)) {
10533 why3 = "} is not defined";
10536 sv_2mortal(sv); /* Parent created it permanently */
10539 pv = newSVpvn_flags(s, len, SVs_TEMP);
10541 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
10543 typesv = &PL_sv_undef;
10545 PUSHSTACKi(PERLSI_OVERLOAD);
10557 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10561 /* Check the eval first */
10562 if (!PL_in_eval && SvTRUE(ERRSV)) {
10563 sv_catpvs(ERRSV, "Propagated");
10564 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10566 res = SvREFCNT_inc_simple(sv);
10570 SvREFCNT_inc_simple_void(res);
10579 why1 = "Call to &{$^H{";
10581 why3 = "}} did not return a defined value";
10589 /* Returns a NUL terminated string, with the length of the string written to
10593 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10596 register char *d = dest;
10597 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10600 Perl_croak(aTHX_ ident_too_long);
10601 if (isALNUM(*s)) /* UTF handled below */
10603 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10608 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10612 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10613 char *t = s + UTF8SKIP(s);
10615 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10619 Perl_croak(aTHX_ ident_too_long);
10620 Copy(s, d, len, char);
10633 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10636 char *bracket = NULL;
10638 register char *d = dest;
10639 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10644 while (isDIGIT(*s)) {
10646 Perl_croak(aTHX_ ident_too_long);
10653 Perl_croak(aTHX_ ident_too_long);
10654 if (isALNUM(*s)) /* UTF handled below */
10656 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10661 else if (*s == ':' && s[1] == ':') {
10665 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10666 char *t = s + UTF8SKIP(s);
10667 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10669 if (d + (t - s) > e)
10670 Perl_croak(aTHX_ ident_too_long);
10671 Copy(s, d, t - s, char);
10682 if (PL_lex_state != LEX_NORMAL)
10683 PL_lex_state = LEX_INTERPENDMAYBE;
10686 if (*s == '$' && s[1] &&
10687 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10700 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10705 if (isSPACE(s[-1])) {
10707 const char ch = *s++;
10708 if (!SPACE_OR_TAB(ch)) {
10714 if (isIDFIRST_lazy_if(d,UTF)) {
10718 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10719 end += UTF8SKIP(end);
10720 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10721 end += UTF8SKIP(end);
10723 Copy(s, d, end - s, char);
10728 while ((isALNUM(*s) || *s == ':') && d < e)
10731 Perl_croak(aTHX_ ident_too_long);
10734 while (s < send && SPACE_OR_TAB(*s))
10736 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10737 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10738 const char * const brack =
10740 ((*s == '[') ? "[...]" : "{...}");
10741 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10742 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10743 funny, dest, brack, funny, dest, brack);
10746 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10750 /* Handle extended ${^Foo} variables
10751 * 1999-02-27 mjd-perl-patch@plover.com */
10752 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10756 while (isALNUM(*s) && d < e) {
10760 Perl_croak(aTHX_ ident_too_long);
10765 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10766 PL_lex_state = LEX_INTERPEND;
10769 if (PL_lex_state == LEX_NORMAL) {
10770 if (ckWARN(WARN_AMBIGUOUS) &&
10771 (keyword(dest, d - dest, 0)
10772 || get_cvn_flags(dest, d - dest, 0)))
10776 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10777 "Ambiguous use of %c{%s} resolved to %c%s",
10778 funny, dest, funny, dest);
10783 s = bracket; /* let the parser handle it */
10787 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10788 PL_lex_state = LEX_INTERPEND;
10793 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10795 PERL_UNUSED_CONTEXT;
10799 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
10800 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10801 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10802 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10803 case KEEPCOPY_PAT_MOD: *pmfl |= PMf_KEEPCOPY; break;
10809 S_scan_pat(pTHX_ char *start, I32 type)
10813 char *s = scan_str(start,!!PL_madskills,FALSE);
10814 const char * const valid_flags =
10815 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10822 const char * const delimiter = skipspace(start);
10826 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10827 : "Search pattern not terminated" ));
10830 pm = (PMOP*)newPMOP(type, 0);
10831 if (PL_multi_open == '?') {
10832 /* This is the only point in the code that sets PMf_ONCE: */
10833 pm->op_pmflags |= PMf_ONCE;
10835 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10836 allows us to restrict the list needed by reset to just the ??
10838 assert(type != OP_TRANS);
10840 MAGIC *mg = mg_find((SV*)PL_curstash, PERL_MAGIC_symtab);
10843 mg = sv_magicext((SV*)PL_curstash, 0, PERL_MAGIC_symtab, 0, 0,
10846 elements = mg->mg_len / sizeof(PMOP**);
10847 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10848 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10849 mg->mg_len = elements * sizeof(PMOP**);
10850 PmopSTASH_set(pm,PL_curstash);
10856 while (*s && strchr(valid_flags, *s))
10857 pmflag(&pm->op_pmflags,*s++);
10859 if (PL_madskills && modstart != s) {
10860 SV* tmptoken = newSVpvn(modstart, s - modstart);
10861 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
10864 /* issue a warning if /c is specified,but /g is not */
10865 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
10866 && ckWARN(WARN_REGEXP))
10868 Perl_warner(aTHX_ packWARN(WARN_REGEXP),
10869 "Use of /c modifier is meaningless without /g" );
10872 PL_lex_op = (OP*)pm;
10873 pl_yylval.ival = OP_MATCH;
10878 S_scan_subst(pTHX_ char *start)
10889 pl_yylval.ival = OP_NULL;
10891 s = scan_str(start,!!PL_madskills,FALSE);
10894 Perl_croak(aTHX_ "Substitution pattern not terminated");
10896 if (s[-1] == PL_multi_open)
10899 if (PL_madskills) {
10900 CURMAD('q', PL_thisopen);
10901 CURMAD('_', PL_thiswhite);
10902 CURMAD('E', PL_thisstuff);
10903 CURMAD('Q', PL_thisclose);
10904 PL_realtokenstart = s - SvPVX(PL_linestr);
10908 first_start = PL_multi_start;
10909 s = scan_str(s,!!PL_madskills,FALSE);
10911 if (PL_lex_stuff) {
10912 SvREFCNT_dec(PL_lex_stuff);
10913 PL_lex_stuff = NULL;
10915 Perl_croak(aTHX_ "Substitution replacement not terminated");
10917 PL_multi_start = first_start; /* so whole substitution is taken together */
10919 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10922 if (PL_madskills) {
10923 CURMAD('z', PL_thisopen);
10924 CURMAD('R', PL_thisstuff);
10925 CURMAD('Z', PL_thisclose);
10931 if (*s == EXEC_PAT_MOD) {
10935 else if (strchr(S_PAT_MODS, *s))
10936 pmflag(&pm->op_pmflags,*s++);
10942 if (PL_madskills) {
10944 curmad('m', newSVpvn(modstart, s - modstart));
10945 append_madprops(PL_thismad, (OP*)pm, 0);
10949 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
10950 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10954 SV * const repl = newSVpvs("");
10956 PL_sublex_info.super_bufptr = s;
10957 PL_sublex_info.super_bufend = PL_bufend;
10959 pm->op_pmflags |= PMf_EVAL;
10962 sv_catpvs(repl, "eval ");
10964 sv_catpvs(repl, "do ");
10966 sv_catpvs(repl, "{");
10967 sv_catsv(repl, PL_lex_repl);
10968 if (strchr(SvPVX(PL_lex_repl), '#'))
10969 sv_catpvs(repl, "\n");
10970 sv_catpvs(repl, "}");
10972 SvREFCNT_dec(PL_lex_repl);
10973 PL_lex_repl = repl;
10976 PL_lex_op = (OP*)pm;
10977 pl_yylval.ival = OP_SUBST;
10982 S_scan_trans(pTHX_ char *start)
10995 pl_yylval.ival = OP_NULL;
10997 s = scan_str(start,!!PL_madskills,FALSE);
10999 Perl_croak(aTHX_ "Transliteration pattern not terminated");
11001 if (s[-1] == PL_multi_open)
11004 if (PL_madskills) {
11005 CURMAD('q', PL_thisopen);
11006 CURMAD('_', PL_thiswhite);
11007 CURMAD('E', PL_thisstuff);
11008 CURMAD('Q', PL_thisclose);
11009 PL_realtokenstart = s - SvPVX(PL_linestr);
11013 s = scan_str(s,!!PL_madskills,FALSE);
11015 if (PL_lex_stuff) {
11016 SvREFCNT_dec(PL_lex_stuff);
11017 PL_lex_stuff = NULL;
11019 Perl_croak(aTHX_ "Transliteration replacement not terminated");
11021 if (PL_madskills) {
11022 CURMAD('z', PL_thisopen);
11023 CURMAD('R', PL_thisstuff);
11024 CURMAD('Z', PL_thisclose);
11027 complement = del = squash = 0;
11034 complement = OPpTRANS_COMPLEMENT;
11037 del = OPpTRANS_DELETE;
11040 squash = OPpTRANS_SQUASH;
11049 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11050 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11051 o->op_private &= ~OPpTRANS_ALL;
11052 o->op_private |= del|squash|complement|
11053 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11054 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11057 pl_yylval.ival = OP_TRANS;
11060 if (PL_madskills) {
11062 curmad('m', newSVpvn(modstart, s - modstart));
11063 append_madprops(PL_thismad, o, 0);
11072 S_scan_heredoc(pTHX_ register char *s)
11076 I32 op_type = OP_SCALAR;
11080 const char *found_newline;
11084 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11086 I32 stuffstart = s - SvPVX(PL_linestr);
11089 PL_realtokenstart = -1;
11094 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11098 while (SPACE_OR_TAB(*peek))
11100 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11103 s = delimcpy(d, e, s, PL_bufend, term, &len);
11113 if (!isALNUM_lazy_if(s,UTF))
11114 deprecate_old("bare << to mean <<\"\"");
11115 for (; isALNUM_lazy_if(s,UTF); s++) {
11120 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11121 Perl_croak(aTHX_ "Delimiter for here document is too long");
11124 len = d - PL_tokenbuf;
11127 if (PL_madskills) {
11128 tstart = PL_tokenbuf + !outer;
11129 PL_thisclose = newSVpvn(tstart, len - !outer);
11130 tstart = SvPVX(PL_linestr) + stuffstart;
11131 PL_thisopen = newSVpvn(tstart, s - tstart);
11132 stuffstart = s - SvPVX(PL_linestr);
11135 #ifndef PERL_STRICT_CR
11136 d = strchr(s, '\r');
11138 char * const olds = s;
11140 while (s < PL_bufend) {
11146 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11155 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11162 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11163 herewas = newSVpvn(s,PL_bufend-s);
11167 herewas = newSVpvn(s-1,found_newline-s+1);
11170 herewas = newSVpvn(s,found_newline-s);
11174 if (PL_madskills) {
11175 tstart = SvPVX(PL_linestr) + stuffstart;
11177 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11179 PL_thisstuff = newSVpvn(tstart, s - tstart);
11182 s += SvCUR(herewas);
11185 stuffstart = s - SvPVX(PL_linestr);
11191 tmpstr = newSV_type(SVt_PVIV);
11192 SvGROW(tmpstr, 80);
11193 if (term == '\'') {
11194 op_type = OP_CONST;
11195 SvIV_set(tmpstr, -1);
11197 else if (term == '`') {
11198 op_type = OP_BACKTICK;
11199 SvIV_set(tmpstr, '\\');
11203 PL_multi_start = CopLINE(PL_curcop);
11204 PL_multi_open = PL_multi_close = '<';
11205 term = *PL_tokenbuf;
11206 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11207 char * const bufptr = PL_sublex_info.super_bufptr;
11208 char * const bufend = PL_sublex_info.super_bufend;
11209 char * const olds = s - SvCUR(herewas);
11210 s = strchr(bufptr, '\n');
11214 while (s < bufend &&
11215 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11217 CopLINE_inc(PL_curcop);
11220 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11221 missingterm(PL_tokenbuf);
11223 sv_setpvn(herewas,bufptr,d-bufptr+1);
11224 sv_setpvn(tmpstr,d+1,s-d);
11226 sv_catpvn(herewas,s,bufend-s);
11227 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11234 while (s < PL_bufend &&
11235 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11237 CopLINE_inc(PL_curcop);
11239 if (s >= PL_bufend) {
11240 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11241 missingterm(PL_tokenbuf);
11243 sv_setpvn(tmpstr,d+1,s-d);
11245 if (PL_madskills) {
11247 sv_catpvn(PL_thisstuff, d + 1, s - d);
11249 PL_thisstuff = newSVpvn(d + 1, s - d);
11250 stuffstart = s - SvPVX(PL_linestr);
11254 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11256 sv_catpvn(herewas,s,PL_bufend-s);
11257 sv_setsv(PL_linestr,herewas);
11258 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11259 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11260 PL_last_lop = PL_last_uni = NULL;
11263 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
11264 while (s >= PL_bufend) { /* multiple line string? */
11266 if (PL_madskills) {
11267 tstart = SvPVX(PL_linestr) + stuffstart;
11269 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11271 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11275 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11276 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11277 missingterm(PL_tokenbuf);
11280 stuffstart = s - SvPVX(PL_linestr);
11282 CopLINE_inc(PL_curcop);
11283 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11284 PL_last_lop = PL_last_uni = NULL;
11285 #ifndef PERL_STRICT_CR
11286 if (PL_bufend - PL_linestart >= 2) {
11287 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11288 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11290 PL_bufend[-2] = '\n';
11292 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11294 else if (PL_bufend[-1] == '\r')
11295 PL_bufend[-1] = '\n';
11297 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11298 PL_bufend[-1] = '\n';
11300 if (PERLDB_LINE && PL_curstash != PL_debstash)
11301 update_debugger_info(PL_linestr, NULL, 0);
11302 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11303 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11304 *(SvPVX(PL_linestr) + off ) = ' ';
11305 sv_catsv(PL_linestr,herewas);
11306 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11307 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11311 sv_catsv(tmpstr,PL_linestr);
11316 PL_multi_end = CopLINE(PL_curcop);
11317 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11318 SvPV_shrink_to_cur(tmpstr);
11320 SvREFCNT_dec(herewas);
11322 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11324 else if (PL_encoding)
11325 sv_recode_to_utf8(tmpstr, PL_encoding);
11327 PL_lex_stuff = tmpstr;
11328 pl_yylval.ival = op_type;
11332 /* scan_inputsymbol
11333 takes: current position in input buffer
11334 returns: new position in input buffer
11335 side-effects: pl_yylval and lex_op are set.
11340 <FH> read from filehandle
11341 <pkg::FH> read from package qualified filehandle
11342 <pkg'FH> read from package qualified filehandle
11343 <$fh> read from filehandle in $fh
11344 <*.h> filename glob
11349 S_scan_inputsymbol(pTHX_ char *start)
11352 register char *s = start; /* current position in buffer */
11356 char *d = PL_tokenbuf; /* start of temp holding space */
11357 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11359 end = strchr(s, '\n');
11362 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11364 /* die if we didn't have space for the contents of the <>,
11365 or if it didn't end, or if we see a newline
11368 if (len >= (I32)sizeof PL_tokenbuf)
11369 Perl_croak(aTHX_ "Excessively long <> operator");
11371 Perl_croak(aTHX_ "Unterminated <> operator");
11376 Remember, only scalar variables are interpreted as filehandles by
11377 this code. Anything more complex (e.g., <$fh{$num}>) will be
11378 treated as a glob() call.
11379 This code makes use of the fact that except for the $ at the front,
11380 a scalar variable and a filehandle look the same.
11382 if (*d == '$' && d[1]) d++;
11384 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11385 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11388 /* If we've tried to read what we allow filehandles to look like, and
11389 there's still text left, then it must be a glob() and not a getline.
11390 Use scan_str to pull out the stuff between the <> and treat it
11391 as nothing more than a string.
11394 if (d - PL_tokenbuf != len) {
11395 pl_yylval.ival = OP_GLOB;
11396 s = scan_str(start,!!PL_madskills,FALSE);
11398 Perl_croak(aTHX_ "Glob not terminated");
11402 bool readline_overriden = FALSE;
11405 /* we're in a filehandle read situation */
11408 /* turn <> into <ARGV> */
11410 Copy("ARGV",d,5,char);
11412 /* Check whether readline() is overriden */
11413 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11415 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11417 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11418 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11419 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11420 readline_overriden = TRUE;
11422 /* if <$fh>, create the ops to turn the variable into a
11426 /* try to find it in the pad for this block, otherwise find
11427 add symbol table ops
11429 const PADOFFSET tmp = pad_findmy(d);
11430 if (tmp != NOT_IN_PAD) {
11431 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11432 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11433 HEK * const stashname = HvNAME_HEK(stash);
11434 SV * const sym = sv_2mortal(newSVhek(stashname));
11435 sv_catpvs(sym, "::");
11436 sv_catpv(sym, d+1);
11441 OP * const o = newOP(OP_PADSV, 0);
11443 PL_lex_op = readline_overriden
11444 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11445 append_elem(OP_LIST, o,
11446 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11447 : (OP*)newUNOP(OP_READLINE, 0, o);
11456 ? (GV_ADDMULTI | GV_ADDINEVAL)
11459 PL_lex_op = readline_overriden
11460 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11461 append_elem(OP_LIST,
11462 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11463 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11464 : (OP*)newUNOP(OP_READLINE, 0,
11465 newUNOP(OP_RV2SV, 0,
11466 newGVOP(OP_GV, 0, gv)));
11468 if (!readline_overriden)
11469 PL_lex_op->op_flags |= OPf_SPECIAL;
11470 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11471 pl_yylval.ival = OP_NULL;
11474 /* If it's none of the above, it must be a literal filehandle
11475 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11477 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11478 PL_lex_op = readline_overriden
11479 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11480 append_elem(OP_LIST,
11481 newGVOP(OP_GV, 0, gv),
11482 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11483 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11484 pl_yylval.ival = OP_NULL;
11493 takes: start position in buffer
11494 keep_quoted preserve \ on the embedded delimiter(s)
11495 keep_delims preserve the delimiters around the string
11496 returns: position to continue reading from buffer
11497 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11498 updates the read buffer.
11500 This subroutine pulls a string out of the input. It is called for:
11501 q single quotes q(literal text)
11502 ' single quotes 'literal text'
11503 qq double quotes qq(interpolate $here please)
11504 " double quotes "interpolate $here please"
11505 qx backticks qx(/bin/ls -l)
11506 ` backticks `/bin/ls -l`
11507 qw quote words @EXPORT_OK = qw( func() $spam )
11508 m// regexp match m/this/
11509 s/// regexp substitute s/this/that/
11510 tr/// string transliterate tr/this/that/
11511 y/// string transliterate y/this/that/
11512 ($*@) sub prototypes sub foo ($)
11513 (stuff) sub attr parameters sub foo : attr(stuff)
11514 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11516 In most of these cases (all but <>, patterns and transliterate)
11517 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11518 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11519 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11522 It skips whitespace before the string starts, and treats the first
11523 character as the delimiter. If the delimiter is one of ([{< then
11524 the corresponding "close" character )]}> is used as the closing
11525 delimiter. It allows quoting of delimiters, and if the string has
11526 balanced delimiters ([{<>}]) it allows nesting.
11528 On success, the SV with the resulting string is put into lex_stuff or,
11529 if that is already non-NULL, into lex_repl. The second case occurs only
11530 when parsing the RHS of the special constructs s/// and tr/// (y///).
11531 For convenience, the terminating delimiter character is stuffed into
11536 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11539 SV *sv; /* scalar value: string */
11540 const char *tmps; /* temp string, used for delimiter matching */
11541 register char *s = start; /* current position in the buffer */
11542 register char term; /* terminating character */
11543 register char *to; /* current position in the sv's data */
11544 I32 brackets = 1; /* bracket nesting level */
11545 bool has_utf8 = FALSE; /* is there any utf8 content? */
11546 I32 termcode; /* terminating char. code */
11547 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11548 STRLEN termlen; /* length of terminating string */
11549 int last_off = 0; /* last position for nesting bracket */
11555 /* skip space before the delimiter */
11561 if (PL_realtokenstart >= 0) {
11562 stuffstart = PL_realtokenstart;
11563 PL_realtokenstart = -1;
11566 stuffstart = start - SvPVX(PL_linestr);
11568 /* mark where we are, in case we need to report errors */
11571 /* after skipping whitespace, the next character is the terminator */
11574 termcode = termstr[0] = term;
11578 termcode = utf8_to_uvchr((U8*)s, &termlen);
11579 Copy(s, termstr, termlen, U8);
11580 if (!UTF8_IS_INVARIANT(term))
11584 /* mark where we are */
11585 PL_multi_start = CopLINE(PL_curcop);
11586 PL_multi_open = term;
11588 /* find corresponding closing delimiter */
11589 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11590 termcode = termstr[0] = term = tmps[5];
11592 PL_multi_close = term;
11594 /* create a new SV to hold the contents. 79 is the SV's initial length.
11595 What a random number. */
11596 sv = newSV_type(SVt_PVIV);
11598 SvIV_set(sv, termcode);
11599 (void)SvPOK_only(sv); /* validate pointer */
11601 /* move past delimiter and try to read a complete string */
11603 sv_catpvn(sv, s, termlen);
11606 tstart = SvPVX(PL_linestr) + stuffstart;
11607 if (!PL_thisopen && !keep_delims) {
11608 PL_thisopen = newSVpvn(tstart, s - tstart);
11609 stuffstart = s - SvPVX(PL_linestr);
11613 if (PL_encoding && !UTF) {
11617 int offset = s - SvPVX_const(PL_linestr);
11618 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11619 &offset, (char*)termstr, termlen);
11620 const char * const ns = SvPVX_const(PL_linestr) + offset;
11621 char * const svlast = SvEND(sv) - 1;
11623 for (; s < ns; s++) {
11624 if (*s == '\n' && !PL_rsfp)
11625 CopLINE_inc(PL_curcop);
11628 goto read_more_line;
11630 /* handle quoted delimiters */
11631 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11633 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11635 if ((svlast-1 - t) % 2) {
11636 if (!keep_quoted) {
11637 *(svlast-1) = term;
11639 SvCUR_set(sv, SvCUR(sv) - 1);
11644 if (PL_multi_open == PL_multi_close) {
11650 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11651 /* At here, all closes are "was quoted" one,
11652 so we don't check PL_multi_close. */
11654 if (!keep_quoted && *(t+1) == PL_multi_open)
11659 else if (*t == PL_multi_open)
11667 SvCUR_set(sv, w - SvPVX_const(sv));
11669 last_off = w - SvPVX(sv);
11670 if (--brackets <= 0)
11675 if (!keep_delims) {
11676 SvCUR_set(sv, SvCUR(sv) - 1);
11682 /* extend sv if need be */
11683 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11684 /* set 'to' to the next character in the sv's string */
11685 to = SvPVX(sv)+SvCUR(sv);
11687 /* if open delimiter is the close delimiter read unbridle */
11688 if (PL_multi_open == PL_multi_close) {
11689 for (; s < PL_bufend; s++,to++) {
11690 /* embedded newlines increment the current line number */
11691 if (*s == '\n' && !PL_rsfp)
11692 CopLINE_inc(PL_curcop);
11693 /* handle quoted delimiters */
11694 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11695 if (!keep_quoted && s[1] == term)
11697 /* any other quotes are simply copied straight through */
11701 /* terminate when run out of buffer (the for() condition), or
11702 have found the terminator */
11703 else if (*s == term) {
11706 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11709 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11715 /* if the terminator isn't the same as the start character (e.g.,
11716 matched brackets), we have to allow more in the quoting, and
11717 be prepared for nested brackets.
11720 /* read until we run out of string, or we find the terminator */
11721 for (; s < PL_bufend; s++,to++) {
11722 /* embedded newlines increment the line count */
11723 if (*s == '\n' && !PL_rsfp)
11724 CopLINE_inc(PL_curcop);
11725 /* backslashes can escape the open or closing characters */
11726 if (*s == '\\' && s+1 < PL_bufend) {
11727 if (!keep_quoted &&
11728 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11733 /* allow nested opens and closes */
11734 else if (*s == PL_multi_close && --brackets <= 0)
11736 else if (*s == PL_multi_open)
11738 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11743 /* terminate the copied string and update the sv's end-of-string */
11745 SvCUR_set(sv, to - SvPVX_const(sv));
11748 * this next chunk reads more into the buffer if we're not done yet
11752 break; /* handle case where we are done yet :-) */
11754 #ifndef PERL_STRICT_CR
11755 if (to - SvPVX_const(sv) >= 2) {
11756 if ((to[-2] == '\r' && to[-1] == '\n') ||
11757 (to[-2] == '\n' && to[-1] == '\r'))
11761 SvCUR_set(sv, to - SvPVX_const(sv));
11763 else if (to[-1] == '\r')
11766 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11771 /* if we're out of file, or a read fails, bail and reset the current
11772 line marker so we can report where the unterminated string began
11775 if (PL_madskills) {
11776 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11778 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11780 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11784 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11786 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11792 /* we read a line, so increment our line counter */
11793 CopLINE_inc(PL_curcop);
11795 /* update debugger info */
11796 if (PERLDB_LINE && PL_curstash != PL_debstash)
11797 update_debugger_info(PL_linestr, NULL, 0);
11799 /* having changed the buffer, we must update PL_bufend */
11800 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11801 PL_last_lop = PL_last_uni = NULL;
11804 /* at this point, we have successfully read the delimited string */
11806 if (!PL_encoding || UTF) {
11808 if (PL_madskills) {
11809 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11810 const int len = s - tstart;
11812 sv_catpvn(PL_thisstuff, tstart, len);
11814 PL_thisstuff = newSVpvn(tstart, len);
11815 if (!PL_thisclose && !keep_delims)
11816 PL_thisclose = newSVpvn(s,termlen);
11821 sv_catpvn(sv, s, termlen);
11826 if (PL_madskills) {
11827 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11828 const int len = s - tstart - termlen;
11830 sv_catpvn(PL_thisstuff, tstart, len);
11832 PL_thisstuff = newSVpvn(tstart, len);
11833 if (!PL_thisclose && !keep_delims)
11834 PL_thisclose = newSVpvn(s - termlen,termlen);
11838 if (has_utf8 || PL_encoding)
11841 PL_multi_end = CopLINE(PL_curcop);
11843 /* if we allocated too much space, give some back */
11844 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11845 SvLEN_set(sv, SvCUR(sv) + 1);
11846 SvPV_renew(sv, SvLEN(sv));
11849 /* decide whether this is the first or second quoted string we've read
11862 takes: pointer to position in buffer
11863 returns: pointer to new position in buffer
11864 side-effects: builds ops for the constant in pl_yylval.op
11866 Read a number in any of the formats that Perl accepts:
11868 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11869 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11872 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11874 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11877 If it reads a number without a decimal point or an exponent, it will
11878 try converting the number to an integer and see if it can do so
11879 without loss of precision.
11883 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11886 register const char *s = start; /* current position in buffer */
11887 register char *d; /* destination in temp buffer */
11888 register char *e; /* end of temp buffer */
11889 NV nv; /* number read, as a double */
11890 SV *sv = NULL; /* place to put the converted number */
11891 bool floatit; /* boolean: int or float? */
11892 const char *lastub = NULL; /* position of last underbar */
11893 static char const number_too_long[] = "Number too long";
11895 /* We use the first character to decide what type of number this is */
11899 Perl_croak(aTHX_ "panic: scan_num");
11901 /* if it starts with a 0, it could be an octal number, a decimal in
11902 0.13 disguise, or a hexadecimal number, or a binary number. */
11906 u holds the "number so far"
11907 shift the power of 2 of the base
11908 (hex == 4, octal == 3, binary == 1)
11909 overflowed was the number more than we can hold?
11911 Shift is used when we add a digit. It also serves as an "are
11912 we in octal/hex/binary?" indicator to disallow hex characters
11913 when in octal mode.
11918 bool overflowed = FALSE;
11919 bool just_zero = TRUE; /* just plain 0 or binary number? */
11920 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11921 static const char* const bases[5] =
11922 { "", "binary", "", "octal", "hexadecimal" };
11923 static const char* const Bases[5] =
11924 { "", "Binary", "", "Octal", "Hexadecimal" };
11925 static const char* const maxima[5] =
11927 "0b11111111111111111111111111111111",
11931 const char *base, *Base, *max;
11933 /* check for hex */
11938 } else if (s[1] == 'b') {
11943 /* check for a decimal in disguise */
11944 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11946 /* so it must be octal */
11953 if (ckWARN(WARN_SYNTAX))
11954 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11955 "Misplaced _ in number");
11959 base = bases[shift];
11960 Base = Bases[shift];
11961 max = maxima[shift];
11963 /* read the rest of the number */
11965 /* x is used in the overflow test,
11966 b is the digit we're adding on. */
11971 /* if we don't mention it, we're done */
11975 /* _ are ignored -- but warned about if consecutive */
11977 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11978 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11979 "Misplaced _ in number");
11983 /* 8 and 9 are not octal */
11984 case '8': case '9':
11986 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11990 case '2': case '3': case '4':
11991 case '5': case '6': case '7':
11993 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11996 case '0': case '1':
11997 b = *s++ & 15; /* ASCII digit -> value of digit */
12001 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12002 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12003 /* make sure they said 0x */
12006 b = (*s++ & 7) + 9;
12008 /* Prepare to put the digit we have onto the end
12009 of the number so far. We check for overflows.
12015 x = u << shift; /* make room for the digit */
12017 if ((x >> shift) != u
12018 && !(PL_hints & HINT_NEW_BINARY)) {
12021 if (ckWARN_d(WARN_OVERFLOW))
12022 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12023 "Integer overflow in %s number",
12026 u = x | b; /* add the digit to the end */
12029 n *= nvshift[shift];
12030 /* If an NV has not enough bits in its
12031 * mantissa to represent an UV this summing of
12032 * small low-order numbers is a waste of time
12033 * (because the NV cannot preserve the
12034 * low-order bits anyway): we could just
12035 * remember when did we overflow and in the
12036 * end just multiply n by the right
12044 /* if we get here, we had success: make a scalar value from
12049 /* final misplaced underbar check */
12050 if (s[-1] == '_') {
12051 if (ckWARN(WARN_SYNTAX))
12052 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12057 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
12058 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12059 "%s number > %s non-portable",
12065 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
12066 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12067 "%s number > %s non-portable",
12072 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12073 sv = new_constant(start, s - start, "integer",
12074 sv, NULL, NULL, 0);
12075 else if (PL_hints & HINT_NEW_BINARY)
12076 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12081 handle decimal numbers.
12082 we're also sent here when we read a 0 as the first digit
12084 case '1': case '2': case '3': case '4': case '5':
12085 case '6': case '7': case '8': case '9': case '.':
12088 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12091 /* read next group of digits and _ and copy into d */
12092 while (isDIGIT(*s) || *s == '_') {
12093 /* skip underscores, checking for misplaced ones
12097 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12098 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12099 "Misplaced _ in number");
12103 /* check for end of fixed-length buffer */
12105 Perl_croak(aTHX_ number_too_long);
12106 /* if we're ok, copy the character */
12111 /* final misplaced underbar check */
12112 if (lastub && s == lastub + 1) {
12113 if (ckWARN(WARN_SYNTAX))
12114 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12117 /* read a decimal portion if there is one. avoid
12118 3..5 being interpreted as the number 3. followed
12121 if (*s == '.' && s[1] != '.') {
12126 if (ckWARN(WARN_SYNTAX))
12127 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12128 "Misplaced _ in number");
12132 /* copy, ignoring underbars, until we run out of digits.
12134 for (; isDIGIT(*s) || *s == '_'; s++) {
12135 /* fixed length buffer check */
12137 Perl_croak(aTHX_ number_too_long);
12139 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12140 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12141 "Misplaced _ in number");
12147 /* fractional part ending in underbar? */
12148 if (s[-1] == '_') {
12149 if (ckWARN(WARN_SYNTAX))
12150 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12151 "Misplaced _ in number");
12153 if (*s == '.' && isDIGIT(s[1])) {
12154 /* oops, it's really a v-string, but without the "v" */
12160 /* read exponent part, if present */
12161 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12165 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12166 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12168 /* stray preinitial _ */
12170 if (ckWARN(WARN_SYNTAX))
12171 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12172 "Misplaced _ in number");
12176 /* allow positive or negative exponent */
12177 if (*s == '+' || *s == '-')
12180 /* stray initial _ */
12182 if (ckWARN(WARN_SYNTAX))
12183 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12184 "Misplaced _ in number");
12188 /* read digits of exponent */
12189 while (isDIGIT(*s) || *s == '_') {
12192 Perl_croak(aTHX_ number_too_long);
12196 if (((lastub && s == lastub + 1) ||
12197 (!isDIGIT(s[1]) && s[1] != '_'))
12198 && ckWARN(WARN_SYNTAX))
12199 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12200 "Misplaced _ in number");
12207 /* make an sv from the string */
12211 We try to do an integer conversion first if no characters
12212 indicating "float" have been found.
12217 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12219 if (flags == IS_NUMBER_IN_UV) {
12221 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12224 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12225 if (uv <= (UV) IV_MIN)
12226 sv_setiv(sv, -(IV)uv);
12233 /* terminate the string */
12235 nv = Atof(PL_tokenbuf);
12240 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12241 const char *const key = floatit ? "float" : "integer";
12242 const STRLEN keylen = floatit ? 5 : 7;
12243 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12244 key, keylen, sv, NULL, NULL, 0);
12248 /* if it starts with a v, it could be a v-string */
12251 sv = newSV(5); /* preallocate storage space */
12252 s = scan_vstring(s, PL_bufend, sv);
12256 /* make the op for the constant and return */
12259 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12261 lvalp->opval = NULL;
12267 S_scan_formline(pTHX_ register char *s)
12270 register char *eol;
12272 SV * const stuff = newSVpvs("");
12273 bool needargs = FALSE;
12274 bool eofmt = FALSE;
12276 char *tokenstart = s;
12279 if (PL_madskills) {
12280 savewhite = PL_thiswhite;
12285 while (!needargs) {
12288 #ifdef PERL_STRICT_CR
12289 while (SPACE_OR_TAB(*t))
12292 while (SPACE_OR_TAB(*t) || *t == '\r')
12295 if (*t == '\n' || t == PL_bufend) {
12300 if (PL_in_eval && !PL_rsfp) {
12301 eol = (char *) memchr(s,'\n',PL_bufend-s);
12306 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12308 for (t = s; t < eol; t++) {
12309 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12311 goto enough; /* ~~ must be first line in formline */
12313 if (*t == '@' || *t == '^')
12317 sv_catpvn(stuff, s, eol-s);
12318 #ifndef PERL_STRICT_CR
12319 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12320 char *end = SvPVX(stuff) + SvCUR(stuff);
12323 SvCUR_set(stuff, SvCUR(stuff) - 1);
12333 if (PL_madskills) {
12335 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12337 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12340 s = filter_gets(PL_linestr, PL_rsfp, 0);
12342 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12344 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12346 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12347 PL_last_lop = PL_last_uni = NULL;
12356 if (SvCUR(stuff)) {
12359 PL_lex_state = LEX_NORMAL;
12360 start_force(PL_curforce);
12361 NEXTVAL_NEXTTOKE.ival = 0;
12365 PL_lex_state = LEX_FORMLINE;
12367 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12369 else if (PL_encoding)
12370 sv_recode_to_utf8(stuff, PL_encoding);
12372 start_force(PL_curforce);
12373 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12375 start_force(PL_curforce);
12376 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12380 SvREFCNT_dec(stuff);
12382 PL_lex_formbrack = 0;
12386 if (PL_madskills) {
12388 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12390 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12391 PL_thiswhite = savewhite;
12398 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12401 const I32 oldsavestack_ix = PL_savestack_ix;
12402 CV* const outsidecv = PL_compcv;
12405 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12407 SAVEI32(PL_subline);
12408 save_item(PL_subname);
12409 SAVESPTR(PL_compcv);
12411 PL_compcv = (CV*)newSV_type(is_format ? SVt_PVFM : SVt_PVCV);
12412 CvFLAGS(PL_compcv) |= flags;
12414 PL_subline = CopLINE(PL_curcop);
12415 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12416 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
12417 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12419 return oldsavestack_ix;
12423 #pragma segment Perl_yylex
12426 Perl_yywarn(pTHX_ const char *s)
12429 PL_in_eval |= EVAL_WARNONLY;
12431 PL_in_eval &= ~EVAL_WARNONLY;
12436 Perl_yyerror(pTHX_ const char *s)
12439 const char *where = NULL;
12440 const char *context = NULL;
12443 int yychar = PL_parser->yychar;
12445 if (!yychar || (yychar == ';' && !PL_rsfp))
12447 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12448 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12449 PL_oldbufptr != PL_bufptr) {
12452 The code below is removed for NetWare because it abends/crashes on NetWare
12453 when the script has error such as not having the closing quotes like:
12454 if ($var eq "value)
12455 Checking of white spaces is anyway done in NetWare code.
12458 while (isSPACE(*PL_oldoldbufptr))
12461 context = PL_oldoldbufptr;
12462 contlen = PL_bufptr - PL_oldoldbufptr;
12464 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12465 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12468 The code below is removed for NetWare because it abends/crashes on NetWare
12469 when the script has error such as not having the closing quotes like:
12470 if ($var eq "value)
12471 Checking of white spaces is anyway done in NetWare code.
12474 while (isSPACE(*PL_oldbufptr))
12477 context = PL_oldbufptr;
12478 contlen = PL_bufptr - PL_oldbufptr;
12480 else if (yychar > 255)
12481 where = "next token ???";
12482 else if (yychar == -2) { /* YYEMPTY */
12483 if (PL_lex_state == LEX_NORMAL ||
12484 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12485 where = "at end of line";
12486 else if (PL_lex_inpat)
12487 where = "within pattern";
12489 where = "within string";
12492 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
12494 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12495 else if (isPRINT_LC(yychar)) {
12496 const char string = yychar;
12497 sv_catpvn(where_sv, &string, 1);
12500 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12501 where = SvPVX_const(where_sv);
12503 msg = sv_2mortal(newSVpv(s, 0));
12504 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12505 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12507 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12509 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12510 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12511 Perl_sv_catpvf(aTHX_ msg,
12512 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12513 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12516 if (PL_in_eval & EVAL_WARNONLY) {
12517 if (ckWARN_d(WARN_SYNTAX))
12518 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12522 if (PL_error_count >= 10) {
12523 if (PL_in_eval && SvCUR(ERRSV))
12524 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12525 SVfARG(ERRSV), OutCopFILE(PL_curcop));
12527 Perl_croak(aTHX_ "%s has too many errors.\n",
12528 OutCopFILE(PL_curcop));
12531 PL_in_my_stash = NULL;
12535 #pragma segment Main
12539 S_swallow_bom(pTHX_ U8 *s)
12542 const STRLEN slen = SvCUR(PL_linestr);
12545 if (s[1] == 0xFE) {
12546 /* UTF-16 little-endian? (or UTF32-LE?) */
12547 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12548 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12549 #ifndef PERL_NO_UTF16_FILTER
12550 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12553 if (PL_bufend > (char*)s) {
12557 filter_add(utf16rev_textfilter, NULL);
12558 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12559 utf16_to_utf8_reversed(s, news,
12560 PL_bufend - (char*)s - 1,
12562 sv_setpvn(PL_linestr, (const char*)news, newlen);
12564 s = (U8*)SvPVX(PL_linestr);
12565 Copy(news, s, newlen, U8);
12569 SvUTF8_on(PL_linestr);
12570 s = (U8*)SvPVX(PL_linestr);
12572 /* FIXME - is this a general bug fix? */
12575 PL_bufend = SvPVX(PL_linestr) + newlen;
12578 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12583 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12584 #ifndef PERL_NO_UTF16_FILTER
12585 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12588 if (PL_bufend > (char *)s) {
12592 filter_add(utf16_textfilter, NULL);
12593 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12594 utf16_to_utf8(s, news,
12595 PL_bufend - (char*)s,
12597 sv_setpvn(PL_linestr, (const char*)news, newlen);
12599 SvUTF8_on(PL_linestr);
12600 s = (U8*)SvPVX(PL_linestr);
12601 PL_bufend = SvPVX(PL_linestr) + newlen;
12604 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12609 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12610 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12611 s += 3; /* UTF-8 */
12617 if (s[2] == 0xFE && s[3] == 0xFF) {
12618 /* UTF-32 big-endian */
12619 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12622 else if (s[2] == 0 && s[3] != 0) {
12625 * are a good indicator of UTF-16BE. */
12626 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12632 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12633 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12634 s += 4; /* UTF-8 */
12640 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12643 * are a good indicator of UTF-16LE. */
12644 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12652 #ifndef PERL_NO_UTF16_FILTER
12654 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12657 const STRLEN old = SvCUR(sv);
12658 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12659 DEBUG_P(PerlIO_printf(Perl_debug_log,
12660 "utf16_textfilter(%p): %d %d (%d)\n",
12661 FPTR2DPTR(void *, utf16_textfilter),
12662 idx, maxlen, (int) count));
12666 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12667 Copy(SvPVX_const(sv), tmps, old, char);
12668 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12669 SvCUR(sv) - old, &newlen);
12670 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12672 DEBUG_P({sv_dump(sv);});
12677 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12680 const STRLEN old = SvCUR(sv);
12681 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12682 DEBUG_P(PerlIO_printf(Perl_debug_log,
12683 "utf16rev_textfilter(%p): %d %d (%d)\n",
12684 FPTR2DPTR(void *, utf16rev_textfilter),
12685 idx, maxlen, (int) count));
12689 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12690 Copy(SvPVX_const(sv), tmps, old, char);
12691 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12692 SvCUR(sv) - old, &newlen);
12693 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12695 DEBUG_P({ sv_dump(sv); });
12701 Returns a pointer to the next character after the parsed
12702 vstring, as well as updating the passed in sv.
12704 Function must be called like
12707 s = scan_vstring(s,e,sv);
12709 where s and e are the start and end of the string.
12710 The sv should already be large enough to store the vstring
12711 passed in, for performance reasons.
12716 Perl_scan_vstring(pTHX_ const char *s, const char *e, SV *sv)
12719 const char *pos = s;
12720 const char *start = s;
12721 if (*pos == 'v') pos++; /* get past 'v' */
12722 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12724 if ( *pos != '.') {
12725 /* this may not be a v-string if followed by => */
12726 const char *next = pos;
12727 while (next < e && isSPACE(*next))
12729 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12730 /* return string not v-string */
12731 sv_setpvn(sv,(char *)s,pos-s);
12732 return (char *)pos;
12736 if (!isALPHA(*pos)) {
12737 U8 tmpbuf[UTF8_MAXBYTES+1];
12740 s++; /* get past 'v' */
12742 sv_setpvn(sv, "", 0);
12745 /* this is atoi() that tolerates underscores */
12748 const char *end = pos;
12750 while (--end >= s) {
12752 const UV orev = rev;
12753 rev += (*end - '0') * mult;
12755 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12756 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12757 "Integer overflow in decimal number");
12761 if (rev > 0x7FFFFFFF)
12762 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12764 /* Append native character for the rev point */
12765 tmpend = uvchr_to_utf8(tmpbuf, rev);
12766 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12767 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12769 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12775 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12779 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12787 * c-indentation-style: bsd
12788 * c-basic-offset: 4
12789 * indent-tabs-mode: t
12792 * ex: set ts=8 sts=4 sw=4 noet: