3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define new_constant(a,b,c,d,e,f,g) \
27 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
29 #define pl_yylval (PL_parser->yylval)
31 /* YYINITDEPTH -- initial size of the parser's stacks. */
32 #define YYINITDEPTH 200
34 /* XXX temporary backwards compatibility */
35 #define PL_lex_brackets (PL_parser->lex_brackets)
36 #define PL_lex_brackstack (PL_parser->lex_brackstack)
37 #define PL_lex_casemods (PL_parser->lex_casemods)
38 #define PL_lex_casestack (PL_parser->lex_casestack)
39 #define PL_lex_defer (PL_parser->lex_defer)
40 #define PL_lex_dojoin (PL_parser->lex_dojoin)
41 #define PL_lex_expect (PL_parser->lex_expect)
42 #define PL_lex_formbrack (PL_parser->lex_formbrack)
43 #define PL_lex_inpat (PL_parser->lex_inpat)
44 #define PL_lex_inwhat (PL_parser->lex_inwhat)
45 #define PL_lex_op (PL_parser->lex_op)
46 #define PL_lex_repl (PL_parser->lex_repl)
47 #define PL_lex_starts (PL_parser->lex_starts)
48 #define PL_lex_stuff (PL_parser->lex_stuff)
49 #define PL_multi_start (PL_parser->multi_start)
50 #define PL_multi_open (PL_parser->multi_open)
51 #define PL_multi_close (PL_parser->multi_close)
52 #define PL_pending_ident (PL_parser->pending_ident)
53 #define PL_preambled (PL_parser->preambled)
54 #define PL_sublex_info (PL_parser->sublex_info)
55 #define PL_linestr (PL_parser->linestr)
56 #define PL_expect (PL_parser->expect)
57 #define PL_copline (PL_parser->copline)
58 #define PL_bufptr (PL_parser->bufptr)
59 #define PL_oldbufptr (PL_parser->oldbufptr)
60 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
61 #define PL_linestart (PL_parser->linestart)
62 #define PL_bufend (PL_parser->bufend)
63 #define PL_last_uni (PL_parser->last_uni)
64 #define PL_last_lop (PL_parser->last_lop)
65 #define PL_last_lop_op (PL_parser->last_lop_op)
66 #define PL_lex_state (PL_parser->lex_state)
67 #define PL_rsfp (PL_parser->rsfp)
68 #define PL_rsfp_filters (PL_parser->rsfp_filters)
69 #define PL_in_my (PL_parser->in_my)
70 #define PL_in_my_stash (PL_parser->in_my_stash)
71 #define PL_tokenbuf (PL_parser->tokenbuf)
72 #define PL_multi_end (PL_parser->multi_end)
73 #define PL_error_count (PL_parser->error_count)
76 # define PL_endwhite (PL_parser->endwhite)
77 # define PL_faketokens (PL_parser->faketokens)
78 # define PL_lasttoke (PL_parser->lasttoke)
79 # define PL_nextwhite (PL_parser->nextwhite)
80 # define PL_realtokenstart (PL_parser->realtokenstart)
81 # define PL_skipwhite (PL_parser->skipwhite)
82 # define PL_thisclose (PL_parser->thisclose)
83 # define PL_thismad (PL_parser->thismad)
84 # define PL_thisopen (PL_parser->thisopen)
85 # define PL_thisstuff (PL_parser->thisstuff)
86 # define PL_thistoken (PL_parser->thistoken)
87 # define PL_thiswhite (PL_parser->thiswhite)
88 # define PL_thiswhite (PL_parser->thiswhite)
89 # define PL_nexttoke (PL_parser->nexttoke)
90 # define PL_curforce (PL_parser->curforce)
92 # define PL_nexttoke (PL_parser->nexttoke)
93 # define PL_nexttype (PL_parser->nexttype)
94 # define PL_nextval (PL_parser->nextval)
98 S_pending_ident(pTHX);
100 static const char ident_too_long[] = "Identifier too long";
101 static const char commaless_variable_list[] = "comma-less variable list";
103 #ifndef PERL_NO_UTF16_FILTER
104 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
105 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
109 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
110 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
112 # define CURMAD(slot,sv)
113 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
116 #define XFAKEBRACK 128
117 #define XENUMMASK 127
119 #ifdef USE_UTF8_SCRIPTS
120 # define UTF (!IN_BYTES)
122 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
125 /* In variables named $^X, these are the legal values for X.
126 * 1999-02-27 mjd-perl-patch@plover.com */
127 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
129 /* On MacOS, respect nonbreaking spaces */
130 #ifdef MACOS_TRADITIONAL
131 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
133 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
136 /* LEX_* are values for PL_lex_state, the state of the lexer.
137 * They are arranged oddly so that the guard on the switch statement
138 * can get by with a single comparison (if the compiler is smart enough).
141 /* #define LEX_NOTPARSING 11 is done in perl.h. */
143 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
144 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
145 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
146 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
147 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
149 /* at end of code, eg "$x" followed by: */
150 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
151 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
153 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
154 string or after \E, $foo, etc */
155 #define LEX_INTERPCONST 2 /* NOT USED */
156 #define LEX_FORMLINE 1 /* expecting a format line */
157 #define LEX_KNOWNEXT 0 /* next token known; just return it */
161 static const char* const lex_state_names[] = {
180 #include "keywords.h"
182 /* CLINE is a macro that ensures PL_copline has a sane value */
187 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
190 # define SKIPSPACE0(s) skipspace0(s)
191 # define SKIPSPACE1(s) skipspace1(s)
192 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
193 # define PEEKSPACE(s) skipspace2(s,0)
195 # define SKIPSPACE0(s) skipspace(s)
196 # define SKIPSPACE1(s) skipspace(s)
197 # define SKIPSPACE2(s,tsv) skipspace(s)
198 # define PEEKSPACE(s) skipspace(s)
202 * Convenience functions to return different tokens and prime the
203 * lexer for the next token. They all take an argument.
205 * TOKEN : generic token (used for '(', DOLSHARP, etc)
206 * OPERATOR : generic operator
207 * AOPERATOR : assignment operator
208 * PREBLOCK : beginning the block after an if, while, foreach, ...
209 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
210 * PREREF : *EXPR where EXPR is not a simple identifier
211 * TERM : expression term
212 * LOOPX : loop exiting command (goto, last, dump, etc)
213 * FTST : file test operator
214 * FUN0 : zero-argument function
215 * FUN1 : not used, except for not, which isn't a UNIOP
216 * BOop : bitwise or or xor
218 * SHop : shift operator
219 * PWop : power operator
220 * PMop : pattern-matching operator
221 * Aop : addition-level operator
222 * Mop : multiplication-level operator
223 * Eop : equality-testing operator
224 * Rop : relational operator <= != gt
226 * Also see LOP and lop() below.
229 #ifdef DEBUGGING /* Serve -DT. */
230 # define REPORT(retval) tokereport((I32)retval)
232 # define REPORT(retval) (retval)
235 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
236 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
237 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
238 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
239 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
240 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
241 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
242 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
243 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
244 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
245 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
246 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
247 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
248 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
249 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
250 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
251 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
252 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
253 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
254 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
256 /* This bit of chicanery makes a unary function followed by
257 * a parenthesis into a function with one argument, highest precedence.
258 * The UNIDOR macro is for unary functions that can be followed by the //
259 * operator (such as C<shift // 0>).
261 #define UNI2(f,x) { \
262 pl_yylval.ival = f; \
265 PL_last_uni = PL_oldbufptr; \
266 PL_last_lop_op = f; \
268 return REPORT( (int)FUNC1 ); \
270 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
272 #define UNI(f) UNI2(f,XTERM)
273 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
275 #define UNIBRACK(f) { \
276 pl_yylval.ival = f; \
278 PL_last_uni = PL_oldbufptr; \
280 return REPORT( (int)FUNC1 ); \
282 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
285 /* grandfather return to old style */
286 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
290 /* how to interpret the pl_yylval associated with the token */
294 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
300 static struct debug_tokens {
302 enum token_type type;
304 } const debug_tokens[] =
306 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
307 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
308 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
309 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
310 { ARROW, TOKENTYPE_NONE, "ARROW" },
311 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
312 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
313 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
314 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
315 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
316 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
317 { DO, TOKENTYPE_NONE, "DO" },
318 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
319 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
320 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
321 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
322 { ELSE, TOKENTYPE_NONE, "ELSE" },
323 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
324 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
325 { FOR, TOKENTYPE_IVAL, "FOR" },
326 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
330 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
331 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
332 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
333 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
334 { IF, TOKENTYPE_IVAL, "IF" },
335 { LABEL, TOKENTYPE_PVAL, "LABEL" },
336 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
337 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
338 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
339 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
340 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
341 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
342 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
343 { MY, TOKENTYPE_IVAL, "MY" },
344 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
351 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
352 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
353 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
354 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
355 { PREINC, TOKENTYPE_NONE, "PREINC" },
356 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
357 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
358 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
359 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
360 { SUB, TOKENTYPE_NONE, "SUB" },
361 { THING, TOKENTYPE_OPVAL, "THING" },
362 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
363 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
364 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
365 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
366 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
367 { USE, TOKENTYPE_IVAL, "USE" },
368 { WHEN, TOKENTYPE_IVAL, "WHEN" },
369 { WHILE, TOKENTYPE_IVAL, "WHILE" },
370 { WORD, TOKENTYPE_OPVAL, "WORD" },
371 { 0, TOKENTYPE_NONE, NULL }
374 /* dump the returned token in rv, plus any optional arg in pl_yylval */
377 S_tokereport(pTHX_ I32 rv)
381 const char *name = NULL;
382 enum token_type type = TOKENTYPE_NONE;
383 const struct debug_tokens *p;
384 SV* const report = newSVpvs("<== ");
386 for (p = debug_tokens; p->token; p++) {
387 if (p->token == (int)rv) {
394 Perl_sv_catpv(aTHX_ report, name);
395 else if ((char)rv > ' ' && (char)rv < '~')
396 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
398 sv_catpvs(report, "EOF");
400 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
403 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
406 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)pl_yylval.ival);
408 case TOKENTYPE_OPNUM:
409 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
410 PL_op_name[pl_yylval.ival]);
413 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", pl_yylval.pval);
415 case TOKENTYPE_OPVAL:
416 if (pl_yylval.opval) {
417 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
418 PL_op_name[pl_yylval.opval->op_type]);
419 if (pl_yylval.opval->op_type == OP_CONST) {
420 Perl_sv_catpvf(aTHX_ report, " %s",
421 SvPEEK(cSVOPx_sv(pl_yylval.opval)));
426 sv_catpvs(report, "(opval=null)");
429 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
435 /* print the buffer with suitable escapes */
438 S_printbuf(pTHX_ const char* fmt, const char* s)
440 SV* const tmp = newSVpvs("");
441 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
450 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
451 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
455 S_ao(pTHX_ int toketype)
458 if (*PL_bufptr == '=') {
460 if (toketype == ANDAND)
461 pl_yylval.ival = OP_ANDASSIGN;
462 else if (toketype == OROR)
463 pl_yylval.ival = OP_ORASSIGN;
464 else if (toketype == DORDOR)
465 pl_yylval.ival = OP_DORASSIGN;
473 * When Perl expects an operator and finds something else, no_op
474 * prints the warning. It always prints "<something> found where
475 * operator expected. It prints "Missing semicolon on previous line?"
476 * if the surprise occurs at the start of the line. "do you need to
477 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
478 * where the compiler doesn't know if foo is a method call or a function.
479 * It prints "Missing operator before end of line" if there's nothing
480 * after the missing operator, or "... before <...>" if there is something
481 * after the missing operator.
485 S_no_op(pTHX_ const char *what, char *s)
488 char * const oldbp = PL_bufptr;
489 const bool is_first = (PL_oldbufptr == PL_linestart);
495 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
496 if (ckWARN_d(WARN_SYNTAX)) {
498 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
499 "\t(Missing semicolon on previous line?)\n");
500 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
502 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
504 if (t < PL_bufptr && isSPACE(*t))
505 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
506 "\t(Do you need to predeclare %.*s?)\n",
507 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
511 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
512 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
520 * Complain about missing quote/regexp/heredoc terminator.
521 * If it's called with NULL then it cauterizes the line buffer.
522 * If we're in a delimited string and the delimiter is a control
523 * character, it's reformatted into a two-char sequence like ^C.
528 S_missingterm(pTHX_ char *s)
534 char * const nl = strrchr(s,'\n');
540 iscntrl(PL_multi_close)
542 PL_multi_close < 32 || PL_multi_close == 127
546 tmpbuf[1] = (char)toCTRL(PL_multi_close);
551 *tmpbuf = (char)PL_multi_close;
555 q = strchr(s,'"') ? '\'' : '"';
556 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
559 #define FEATURE_IS_ENABLED(name) \
560 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
561 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
562 /* The longest string we pass in. */
563 #define MAX_FEATURE_LEN (sizeof("switch")-1)
566 * S_feature_is_enabled
567 * Check whether the named feature is enabled.
570 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
573 HV * const hinthv = GvHV(PL_hintgv);
574 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
575 assert(namelen <= MAX_FEATURE_LEN);
576 memcpy(&he_name[8], name, namelen);
578 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
586 Perl_deprecate(pTHX_ const char *s)
588 if (ckWARN(WARN_DEPRECATED))
589 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
593 Perl_deprecate_old(pTHX_ const char *s)
595 /* This function should NOT be called for any new deprecated warnings */
596 /* Use Perl_deprecate instead */
598 /* It is here to maintain backward compatibility with the pre-5.8 */
599 /* warnings category hierarchy. The "deprecated" category used to */
600 /* live under the "syntax" category. It is now a top-level category */
601 /* in its own right. */
603 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
604 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
605 "Use of %s is deprecated", s);
609 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
610 * utf16-to-utf8-reversed.
613 #ifdef PERL_CR_FILTER
617 register const char *s = SvPVX_const(sv);
618 register const char * const e = s + SvCUR(sv);
619 /* outer loop optimized to do nothing if there are no CR-LFs */
621 if (*s++ == '\r' && *s == '\n') {
622 /* hit a CR-LF, need to copy the rest */
623 register char *d = s - 1;
626 if (*s == '\r' && s[1] == '\n')
637 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
639 const I32 count = FILTER_READ(idx+1, sv, maxlen);
640 if (count > 0 && !maxlen)
651 * Create a parser object and initialise its parser and lexer fields
653 * rsfp is the opened file handle to read from (if any),
655 * line holds any initial content already read from the file (or in
656 * the case of no file, such as an eval, the whole contents);
658 * new_filter indicates that this is a new file and it shouldn't inherit
659 * the filters from the current parser (ie require).
663 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
666 const char *s = NULL;
668 yy_parser *parser, *oparser;
670 /* create and initialise a parser */
672 Newxz(parser, 1, yy_parser);
673 parser->old_parser = oparser = PL_parser;
676 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
677 parser->ps = parser->stack;
678 parser->stack_size = YYINITDEPTH;
680 parser->stack->state = 0;
681 parser->yyerrstatus = 0;
682 parser->yychar = YYEMPTY; /* Cause a token to be read. */
684 /* on scope exit, free this parser and restore any outer one */
686 parser->saved_curcop = PL_curcop;
688 /* initialise lexer state */
691 parser->curforce = -1;
693 parser->nexttoke = 0;
695 parser->copline = NOLINE;
696 parser->lex_state = LEX_NORMAL;
697 parser->expect = XSTATE;
699 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
700 : (AV*)SvREFCNT_inc(oparser->rsfp_filters);
702 Newx(parser->lex_brackstack, 120, char);
703 Newx(parser->lex_casestack, 12, char);
704 *parser->lex_casestack = '\0';
707 s = SvPV_const(line, len);
713 parser->linestr = newSVpvs("\n;");
714 } else if (SvREADONLY(line) || s[len-1] != ';') {
715 parser->linestr = newSVsv(line);
717 sv_catpvs(parser->linestr, "\n;");
720 SvREFCNT_inc_simple_void_NN(line);
721 parser->linestr = line;
723 parser->oldoldbufptr =
726 parser->linestart = SvPVX(parser->linestr);
727 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
728 parser->last_lop = parser->last_uni = NULL;
732 /* delete a parser object */
735 Perl_parser_free(pTHX_ const yy_parser *parser)
737 PL_curcop = parser->saved_curcop;
738 SvREFCNT_dec(parser->linestr);
740 if (parser->rsfp == PerlIO_stdin())
741 PerlIO_clearerr(parser->rsfp);
742 else if (parser->rsfp && parser->old_parser
743 && parser->rsfp != parser->old_parser->rsfp)
744 PerlIO_close(parser->rsfp);
745 SvREFCNT_dec(parser->rsfp_filters);
747 Safefree(parser->stack);
748 Safefree(parser->lex_brackstack);
749 Safefree(parser->lex_casestack);
750 PL_parser = parser->old_parser;
757 * Finalizer for lexing operations. Must be called when the parser is
758 * done with the lexer.
765 PL_doextract = FALSE;
770 * This subroutine has nothing to do with tilting, whether at windmills
771 * or pinball tables. Its name is short for "increment line". It
772 * increments the current line number in CopLINE(PL_curcop) and checks
773 * to see whether the line starts with a comment of the form
774 * # line 500 "foo.pm"
775 * If so, it sets the current line number and file to the values in the comment.
779 S_incline(pTHX_ const char *s)
786 CopLINE_inc(PL_curcop);
789 while (SPACE_OR_TAB(*s))
791 if (strnEQ(s, "line", 4))
795 if (SPACE_OR_TAB(*s))
799 while (SPACE_OR_TAB(*s))
807 while (SPACE_OR_TAB(*s))
809 if (*s == '"' && (t = strchr(s+1, '"'))) {
819 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
821 if (*e != '\n' && *e != '\0')
822 return; /* false alarm */
825 const STRLEN len = t - s;
827 SV *const temp_sv = CopFILESV(PL_curcop);
833 tmplen = SvCUR(temp_sv);
839 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
840 /* must copy *{"::_<(eval N)[oldfilename:L]"}
841 * to *{"::_<newfilename"} */
842 /* However, the long form of evals is only turned on by the
843 debugger - usually they're "(eval %lu)" */
847 STRLEN tmplen2 = len;
848 if (tmplen + 2 <= sizeof smallbuf)
851 Newx(tmpbuf, tmplen + 2, char);
854 memcpy(tmpbuf + 2, cf, tmplen);
856 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
861 if (tmplen2 + 2 <= sizeof smallbuf)
864 Newx(tmpbuf2, tmplen2 + 2, char);
866 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
867 /* Either they malloc'd it, or we malloc'd it,
868 so no prefix is present in ours. */
873 memcpy(tmpbuf2 + 2, s, tmplen2);
876 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
878 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
879 /* adjust ${"::_<newfilename"} to store the new file name */
880 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
881 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
882 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
885 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
887 if (tmpbuf != smallbuf) Safefree(tmpbuf);
890 CopFILE_free(PL_curcop);
891 CopFILE_setn(PL_curcop, s, len);
893 CopLINE_set(PL_curcop, atoi(n)-1);
897 /* skip space before PL_thistoken */
900 S_skipspace0(pTHX_ register char *s)
907 PL_thiswhite = newSVpvs("");
908 sv_catsv(PL_thiswhite, PL_skipwhite);
909 sv_free(PL_skipwhite);
912 PL_realtokenstart = s - SvPVX(PL_linestr);
916 /* skip space after PL_thistoken */
919 S_skipspace1(pTHX_ register char *s)
921 const char *start = s;
922 I32 startoff = start - SvPVX(PL_linestr);
927 start = SvPVX(PL_linestr) + startoff;
928 if (!PL_thistoken && PL_realtokenstart >= 0) {
929 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
930 PL_thistoken = newSVpvn(tstart, start - tstart);
932 PL_realtokenstart = -1;
935 PL_nextwhite = newSVpvs("");
936 sv_catsv(PL_nextwhite, PL_skipwhite);
937 sv_free(PL_skipwhite);
944 S_skipspace2(pTHX_ register char *s, SV **svp)
947 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
948 const I32 startoff = s - SvPVX(PL_linestr);
951 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
952 if (!PL_madskills || !svp)
954 start = SvPVX(PL_linestr) + startoff;
955 if (!PL_thistoken && PL_realtokenstart >= 0) {
956 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
957 PL_thistoken = newSVpvn(tstart, start - tstart);
958 PL_realtokenstart = -1;
963 sv_setsv(*svp, PL_skipwhite);
964 sv_free(PL_skipwhite);
973 S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
975 AV *av = CopFILEAVx(PL_curcop);
977 SV * const sv = newSV_type(SVt_PVMG);
979 sv_setsv(sv, orig_sv);
981 sv_setpvn(sv, buf, len);
984 av_store(av, (I32)CopLINE(PL_curcop), sv);
990 * Called to gobble the appropriate amount and type of whitespace.
991 * Skips comments as well.
995 S_skipspace(pTHX_ register char *s)
1000 int startoff = s - SvPVX(PL_linestr);
1003 sv_free(PL_skipwhite);
1008 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1009 while (s < PL_bufend && SPACE_OR_TAB(*s))
1019 SSize_t oldprevlen, oldoldprevlen;
1020 SSize_t oldloplen = 0, oldunilen = 0;
1021 while (s < PL_bufend && isSPACE(*s)) {
1022 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1027 if (s < PL_bufend && *s == '#') {
1028 while (s < PL_bufend && *s != '\n')
1030 if (s < PL_bufend) {
1032 if (PL_in_eval && !PL_rsfp) {
1039 /* only continue to recharge the buffer if we're at the end
1040 * of the buffer, we're not reading from a source filter, and
1041 * we're in normal lexing mode
1043 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1044 PL_lex_state == LEX_FORMLINE)
1051 /* try to recharge the buffer */
1053 curoff = s - SvPVX(PL_linestr);
1056 if ((s = filter_gets(PL_linestr, PL_rsfp,
1057 (prevlen = SvCUR(PL_linestr)))) == NULL)
1060 if (PL_madskills && curoff != startoff) {
1062 PL_skipwhite = newSVpvs("");
1063 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1067 /* mustn't throw out old stuff yet if madpropping */
1068 SvCUR(PL_linestr) = curoff;
1069 s = SvPVX(PL_linestr) + curoff;
1071 if (curoff && s[-1] == '\n')
1075 /* end of file. Add on the -p or -n magic */
1076 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1079 sv_catpvs(PL_linestr,
1080 ";}continue{print or die qq(-p destination: $!\\n);}");
1082 sv_setpvs(PL_linestr,
1083 ";}continue{print or die qq(-p destination: $!\\n);}");
1085 PL_minus_n = PL_minus_p = 0;
1087 else if (PL_minus_n) {
1089 sv_catpvn(PL_linestr, ";}", 2);
1091 sv_setpvn(PL_linestr, ";}", 2);
1097 sv_catpvn(PL_linestr,";", 1);
1099 sv_setpvn(PL_linestr,";", 1);
1102 /* reset variables for next time we lex */
1103 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1109 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1110 PL_last_lop = PL_last_uni = NULL;
1112 /* Close the filehandle. Could be from -P preprocessor,
1113 * STDIN, or a regular file. If we were reading code from
1114 * STDIN (because the commandline held no -e or filename)
1115 * then we don't close it, we reset it so the code can
1116 * read from STDIN too.
1119 if (PL_preprocess && !PL_in_eval)
1120 (void)PerlProc_pclose(PL_rsfp);
1121 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1122 PerlIO_clearerr(PL_rsfp);
1124 (void)PerlIO_close(PL_rsfp);
1129 /* not at end of file, so we only read another line */
1130 /* make corresponding updates to old pointers, for yyerror() */
1131 oldprevlen = PL_oldbufptr - PL_bufend;
1132 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1134 oldunilen = PL_last_uni - PL_bufend;
1136 oldloplen = PL_last_lop - PL_bufend;
1137 PL_linestart = PL_bufptr = s + prevlen;
1138 PL_bufend = s + SvCUR(PL_linestr);
1140 PL_oldbufptr = s + oldprevlen;
1141 PL_oldoldbufptr = s + oldoldprevlen;
1143 PL_last_uni = s + oldunilen;
1145 PL_last_lop = s + oldloplen;
1148 /* debugger active and we're not compiling the debugger code,
1149 * so store the line into the debugger's array of lines
1151 if (PERLDB_LINE && PL_curstash != PL_debstash)
1152 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1159 PL_skipwhite = newSVpvs("");
1160 curoff = s - SvPVX(PL_linestr);
1161 if (curoff - startoff)
1162 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1171 * Check the unary operators to ensure there's no ambiguity in how they're
1172 * used. An ambiguous piece of code would be:
1174 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1175 * the +5 is its argument.
1185 if (PL_oldoldbufptr != PL_last_uni)
1187 while (isSPACE(*PL_last_uni))
1190 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1192 if ((t = strchr(s, '(')) && t < PL_bufptr)
1195 if (ckWARN_d(WARN_AMBIGUOUS)){
1196 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1197 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1198 (int)(s - PL_last_uni), PL_last_uni);
1203 * LOP : macro to build a list operator. Its behaviour has been replaced
1204 * with a subroutine, S_lop() for which LOP is just another name.
1207 #define LOP(f,x) return lop(f,x,s)
1211 * Build a list operator (or something that might be one). The rules:
1212 * - if we have a next token, then it's a list operator [why?]
1213 * - if the next thing is an opening paren, then it's a function
1214 * - else it's a list operator
1218 S_lop(pTHX_ I32 f, int x, char *s)
1225 PL_last_lop = PL_oldbufptr;
1226 PL_last_lop_op = (OPCODE)f;
1229 return REPORT(LSTOP);
1232 return REPORT(LSTOP);
1235 return REPORT(FUNC);
1238 return REPORT(FUNC);
1240 return REPORT(LSTOP);
1246 * Sets up for an eventual force_next(). start_force(0) basically does
1247 * an unshift, while start_force(-1) does a push. yylex removes items
1252 S_start_force(pTHX_ int where)
1256 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1257 where = PL_lasttoke;
1258 assert(PL_curforce < 0 || PL_curforce == where);
1259 if (PL_curforce != where) {
1260 for (i = PL_lasttoke; i > where; --i) {
1261 PL_nexttoke[i] = PL_nexttoke[i-1];
1265 if (PL_curforce < 0) /* in case of duplicate start_force() */
1266 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1267 PL_curforce = where;
1270 curmad('^', newSVpvs(""));
1271 CURMAD('_', PL_nextwhite);
1276 S_curmad(pTHX_ char slot, SV *sv)
1282 if (PL_curforce < 0)
1283 where = &PL_thismad;
1285 where = &PL_nexttoke[PL_curforce].next_mad;
1288 sv_setpvn(sv, "", 0);
1291 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1293 else if (PL_encoding) {
1294 sv_recode_to_utf8(sv, PL_encoding);
1299 /* keep a slot open for the head of the list? */
1300 if (slot != '_' && *where && (*where)->mad_key == '^') {
1301 (*where)->mad_key = slot;
1302 sv_free((SV*)((*where)->mad_val));
1303 (*where)->mad_val = (void*)sv;
1306 addmad(newMADsv(slot, sv), where, 0);
1309 # define start_force(where) NOOP
1310 # define curmad(slot, sv) NOOP
1315 * When the lexer realizes it knows the next token (for instance,
1316 * it is reordering tokens for the parser) then it can call S_force_next
1317 * to know what token to return the next time the lexer is called. Caller
1318 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1319 * and possibly PL_expect to ensure the lexer handles the token correctly.
1323 S_force_next(pTHX_ I32 type)
1327 if (PL_curforce < 0)
1328 start_force(PL_lasttoke);
1329 PL_nexttoke[PL_curforce].next_type = type;
1330 if (PL_lex_state != LEX_KNOWNEXT)
1331 PL_lex_defer = PL_lex_state;
1332 PL_lex_state = LEX_KNOWNEXT;
1333 PL_lex_expect = PL_expect;
1336 PL_nexttype[PL_nexttoke] = type;
1338 if (PL_lex_state != LEX_KNOWNEXT) {
1339 PL_lex_defer = PL_lex_state;
1340 PL_lex_expect = PL_expect;
1341 PL_lex_state = LEX_KNOWNEXT;
1347 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1350 SV * const sv = newSVpvn_utf8(start, len,
1352 && is_utf8_string((const U8*)start, len));
1358 * When the lexer knows the next thing is a word (for instance, it has
1359 * just seen -> and it knows that the next char is a word char, then
1360 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1364 * char *start : buffer position (must be within PL_linestr)
1365 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1366 * int check_keyword : if true, Perl checks to make sure the word isn't
1367 * a keyword (do this if the word is a label, e.g. goto FOO)
1368 * int allow_pack : if true, : characters will also be allowed (require,
1369 * use, etc. do this)
1370 * int allow_initial_tick : used by the "sub" lexer only.
1374 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1380 start = SKIPSPACE1(start);
1382 if (isIDFIRST_lazy_if(s,UTF) ||
1383 (allow_pack && *s == ':') ||
1384 (allow_initial_tick && *s == '\'') )
1386 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1387 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1389 start_force(PL_curforce);
1391 curmad('X', newSVpvn(start,s-start));
1392 if (token == METHOD) {
1397 PL_expect = XOPERATOR;
1401 curmad('g', newSVpvs( "forced" ));
1402 NEXTVAL_NEXTTOKE.opval
1403 = (OP*)newSVOP(OP_CONST,0,
1404 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1405 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1413 * Called when the lexer wants $foo *foo &foo etc, but the program
1414 * text only contains the "foo" portion. The first argument is a pointer
1415 * to the "foo", and the second argument is the type symbol to prefix.
1416 * Forces the next token to be a "WORD".
1417 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1421 S_force_ident(pTHX_ register const char *s, int kind)
1425 const STRLEN len = strlen(s);
1426 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1427 start_force(PL_curforce);
1428 NEXTVAL_NEXTTOKE.opval = o;
1431 o->op_private = OPpCONST_ENTERED;
1432 /* XXX see note in pp_entereval() for why we forgo typo
1433 warnings if the symbol must be introduced in an eval.
1435 gv_fetchpvn_flags(s, len,
1436 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1438 kind == '$' ? SVt_PV :
1439 kind == '@' ? SVt_PVAV :
1440 kind == '%' ? SVt_PVHV :
1448 Perl_str_to_version(pTHX_ SV *sv)
1453 const char *start = SvPV_const(sv,len);
1454 const char * const end = start + len;
1455 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1456 while (start < end) {
1460 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1465 retval += ((NV)n)/nshift;
1474 * Forces the next token to be a version number.
1475 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1476 * and if "guessing" is TRUE, then no new token is created (and the caller
1477 * must use an alternative parsing method).
1481 S_force_version(pTHX_ char *s, int guessing)
1487 I32 startoff = s - SvPVX(PL_linestr);
1496 while (isDIGIT(*d) || *d == '_' || *d == '.')
1500 start_force(PL_curforce);
1501 curmad('X', newSVpvn(s,d-s));
1504 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1506 s = scan_num(s, &pl_yylval);
1507 version = pl_yylval.opval;
1508 ver = cSVOPx(version)->op_sv;
1509 if (SvPOK(ver) && !SvNIOK(ver)) {
1510 SvUPGRADE(ver, SVt_PVNV);
1511 SvNV_set(ver, str_to_version(ver));
1512 SvNOK_on(ver); /* hint that it is a version */
1515 else if (guessing) {
1518 sv_free(PL_nextwhite); /* let next token collect whitespace */
1520 s = SvPVX(PL_linestr) + startoff;
1528 if (PL_madskills && !version) {
1529 sv_free(PL_nextwhite); /* let next token collect whitespace */
1531 s = SvPVX(PL_linestr) + startoff;
1534 /* NOTE: The parser sees the package name and the VERSION swapped */
1535 start_force(PL_curforce);
1536 NEXTVAL_NEXTTOKE.opval = version;
1544 * Tokenize a quoted string passed in as an SV. It finds the next
1545 * chunk, up to end of string or a backslash. It may make a new
1546 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1551 S_tokeq(pTHX_ SV *sv)
1555 register char *send;
1563 s = SvPV_force(sv, len);
1564 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1567 while (s < send && *s != '\\')
1572 if ( PL_hints & HINT_NEW_STRING ) {
1573 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
1577 if (s + 1 < send && (s[1] == '\\'))
1578 s++; /* all that, just for this */
1583 SvCUR_set(sv, d - SvPVX_const(sv));
1585 if ( PL_hints & HINT_NEW_STRING )
1586 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
1591 * Now come three functions related to double-quote context,
1592 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1593 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1594 * interact with PL_lex_state, and create fake ( ... ) argument lists
1595 * to handle functions and concatenation.
1596 * They assume that whoever calls them will be setting up a fake
1597 * join call, because each subthing puts a ',' after it. This lets
1600 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1602 * (I'm not sure whether the spurious commas at the end of lcfirst's
1603 * arguments and join's arguments are created or not).
1608 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1610 * Pattern matching will set PL_lex_op to the pattern-matching op to
1611 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
1613 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1615 * Everything else becomes a FUNC.
1617 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1618 * had an OP_CONST or OP_READLINE). This just sets us up for a
1619 * call to S_sublex_push().
1623 S_sublex_start(pTHX)
1626 register const I32 op_type = pl_yylval.ival;
1628 if (op_type == OP_NULL) {
1629 pl_yylval.opval = PL_lex_op;
1633 if (op_type == OP_CONST || op_type == OP_READLINE) {
1634 SV *sv = tokeq(PL_lex_stuff);
1636 if (SvTYPE(sv) == SVt_PVIV) {
1637 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1639 const char * const p = SvPV_const(sv, len);
1640 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
1644 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1645 PL_lex_stuff = NULL;
1646 /* Allow <FH> // "foo" */
1647 if (op_type == OP_READLINE)
1648 PL_expect = XTERMORDORDOR;
1651 else if (op_type == OP_BACKTICK && PL_lex_op) {
1652 /* readpipe() vas overriden */
1653 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1654 pl_yylval.opval = PL_lex_op;
1656 PL_lex_stuff = NULL;
1660 PL_sublex_info.super_state = PL_lex_state;
1661 PL_sublex_info.sub_inwhat = (U16)op_type;
1662 PL_sublex_info.sub_op = PL_lex_op;
1663 PL_lex_state = LEX_INTERPPUSH;
1667 pl_yylval.opval = PL_lex_op;
1677 * Create a new scope to save the lexing state. The scope will be
1678 * ended in S_sublex_done. Returns a '(', starting the function arguments
1679 * to the uc, lc, etc. found before.
1680 * Sets PL_lex_state to LEX_INTERPCONCAT.
1689 PL_lex_state = PL_sublex_info.super_state;
1690 SAVEBOOL(PL_lex_dojoin);
1691 SAVEI32(PL_lex_brackets);
1692 SAVEI32(PL_lex_casemods);
1693 SAVEI32(PL_lex_starts);
1694 SAVEI8(PL_lex_state);
1695 SAVEVPTR(PL_lex_inpat);
1696 SAVEI16(PL_lex_inwhat);
1697 SAVECOPLINE(PL_curcop);
1698 SAVEPPTR(PL_bufptr);
1699 SAVEPPTR(PL_bufend);
1700 SAVEPPTR(PL_oldbufptr);
1701 SAVEPPTR(PL_oldoldbufptr);
1702 SAVEPPTR(PL_last_lop);
1703 SAVEPPTR(PL_last_uni);
1704 SAVEPPTR(PL_linestart);
1705 SAVESPTR(PL_linestr);
1706 SAVEGENERICPV(PL_lex_brackstack);
1707 SAVEGENERICPV(PL_lex_casestack);
1709 PL_linestr = PL_lex_stuff;
1710 PL_lex_stuff = NULL;
1712 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1713 = SvPVX(PL_linestr);
1714 PL_bufend += SvCUR(PL_linestr);
1715 PL_last_lop = PL_last_uni = NULL;
1716 SAVEFREESV(PL_linestr);
1718 PL_lex_dojoin = FALSE;
1719 PL_lex_brackets = 0;
1720 Newx(PL_lex_brackstack, 120, char);
1721 Newx(PL_lex_casestack, 12, char);
1722 PL_lex_casemods = 0;
1723 *PL_lex_casestack = '\0';
1725 PL_lex_state = LEX_INTERPCONCAT;
1726 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1728 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1729 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1730 PL_lex_inpat = PL_sublex_info.sub_op;
1732 PL_lex_inpat = NULL;
1739 * Restores lexer state after a S_sublex_push.
1746 if (!PL_lex_starts++) {
1747 SV * const sv = newSVpvs("");
1748 if (SvUTF8(PL_linestr))
1750 PL_expect = XOPERATOR;
1751 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1755 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1756 PL_lex_state = LEX_INTERPCASEMOD;
1760 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1761 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1762 PL_linestr = PL_lex_repl;
1764 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1765 PL_bufend += SvCUR(PL_linestr);
1766 PL_last_lop = PL_last_uni = NULL;
1767 SAVEFREESV(PL_linestr);
1768 PL_lex_dojoin = FALSE;
1769 PL_lex_brackets = 0;
1770 PL_lex_casemods = 0;
1771 *PL_lex_casestack = '\0';
1773 if (SvEVALED(PL_lex_repl)) {
1774 PL_lex_state = LEX_INTERPNORMAL;
1776 /* we don't clear PL_lex_repl here, so that we can check later
1777 whether this is an evalled subst; that means we rely on the
1778 logic to ensure sublex_done() is called again only via the
1779 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1782 PL_lex_state = LEX_INTERPCONCAT;
1792 PL_endwhite = newSVpvs("");
1793 sv_catsv(PL_endwhite, PL_thiswhite);
1797 sv_setpvn(PL_thistoken,"",0);
1799 PL_realtokenstart = -1;
1803 PL_bufend = SvPVX(PL_linestr);
1804 PL_bufend += SvCUR(PL_linestr);
1805 PL_expect = XOPERATOR;
1806 PL_sublex_info.sub_inwhat = 0;
1814 Extracts a pattern, double-quoted string, or transliteration. This
1817 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1818 processing a pattern (PL_lex_inpat is true), a transliteration
1819 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1821 Returns a pointer to the character scanned up to. If this is
1822 advanced from the start pointer supplied (i.e. if anything was
1823 successfully parsed), will leave an OP for the substring scanned
1824 in pl_yylval. Caller must intuit reason for not parsing further
1825 by looking at the next characters herself.
1829 double-quoted style: \r and \n
1830 regexp special ones: \D \s
1833 case and quoting: \U \Q \E
1834 stops on @ and $, but not for $ as tail anchor
1836 In transliterations:
1837 characters are VERY literal, except for - not at the start or end
1838 of the string, which indicates a range. If the range is in bytes,
1839 scan_const expands the range to the full set of intermediate
1840 characters. If the range is in utf8, the hyphen is replaced with
1841 a certain range mark which will be handled by pmtrans() in op.c.
1843 In double-quoted strings:
1845 double-quoted style: \r and \n
1847 deprecated backrefs: \1 (in substitution replacements)
1848 case and quoting: \U \Q \E
1851 scan_const does *not* construct ops to handle interpolated strings.
1852 It stops processing as soon as it finds an embedded $ or @ variable
1853 and leaves it to the caller to work out what's going on.
1855 embedded arrays (whether in pattern or not) could be:
1856 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1858 $ in double-quoted strings must be the symbol of an embedded scalar.
1860 $ in pattern could be $foo or could be tail anchor. Assumption:
1861 it's a tail anchor if $ is the last thing in the string, or if it's
1862 followed by one of "()| \r\n\t"
1864 \1 (backreferences) are turned into $1
1866 The structure of the code is
1867 while (there's a character to process) {
1868 handle transliteration ranges
1869 skip regexp comments /(?#comment)/ and codes /(?{code})/
1870 skip #-initiated comments in //x patterns
1871 check for embedded arrays
1872 check for embedded scalars
1874 leave intact backslashes from leaveit (below)
1875 deprecate \1 in substitution replacements
1876 handle string-changing backslashes \l \U \Q \E, etc.
1877 switch (what was escaped) {
1878 handle \- in a transliteration (becomes a literal -)
1879 handle \132 (octal characters)
1880 handle \x15 and \x{1234} (hex characters)
1881 handle \N{name} (named characters)
1882 handle \cV (control characters)
1883 handle printf-style backslashes (\f, \r, \n, etc)
1885 } (end if backslash)
1886 } (end while character to read)
1891 S_scan_const(pTHX_ char *start)
1894 register char *send = PL_bufend; /* end of the constant */
1895 SV *sv = newSV(send - start); /* sv for the constant */
1896 register char *s = start; /* start of the constant */
1897 register char *d = SvPVX(sv); /* destination for copies */
1898 bool dorange = FALSE; /* are we in a translit range? */
1899 bool didrange = FALSE; /* did we just finish a range? */
1900 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1901 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1904 UV literal_endpoint = 0;
1905 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1908 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1909 /* If we are doing a trans and we know we want UTF8 set expectation */
1910 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1911 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1915 while (s < send || dorange) {
1916 /* get transliterations out of the way (they're most literal) */
1917 if (PL_lex_inwhat == OP_TRANS) {
1918 /* expand a range A-Z to the full set of characters. AIE! */
1920 I32 i; /* current expanded character */
1921 I32 min; /* first character in range */
1922 I32 max; /* last character in range */
1933 char * const c = (char*)utf8_hop((U8*)d, -1);
1937 *c = (char)UTF_TO_NATIVE(0xff);
1938 /* mark the range as done, and continue */
1944 i = d - SvPVX_const(sv); /* remember current offset */
1947 SvLEN(sv) + (has_utf8 ?
1948 (512 - UTF_CONTINUATION_MARK +
1951 /* How many two-byte within 0..255: 128 in UTF-8,
1952 * 96 in UTF-8-mod. */
1954 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1956 d = SvPVX(sv) + i; /* refresh d after realloc */
1960 for (j = 0; j <= 1; j++) {
1961 char * const c = (char*)utf8_hop((U8*)d, -1);
1962 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1968 max = (U8)0xff; /* only to \xff */
1969 uvmax = uv; /* \x{100} to uvmax */
1971 d = c; /* eat endpoint chars */
1976 d -= 2; /* eat the first char and the - */
1977 min = (U8)*d; /* first char in range */
1978 max = (U8)d[1]; /* last char in range */
1985 "Invalid range \"%c-%c\" in transliteration operator",
1986 (char)min, (char)max);
1990 if (literal_endpoint == 2 &&
1991 ((isLOWER(min) && isLOWER(max)) ||
1992 (isUPPER(min) && isUPPER(max)))) {
1994 for (i = min; i <= max; i++)
1996 *d++ = NATIVE_TO_NEED(has_utf8,i);
1998 for (i = min; i <= max; i++)
2000 *d++ = NATIVE_TO_NEED(has_utf8,i);
2005 for (i = min; i <= max; i++)
2008 const U8 ch = (U8)NATIVE_TO_UTF(i);
2009 if (UNI_IS_INVARIANT(ch))
2012 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2013 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2022 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2024 *d++ = (char)UTF_TO_NATIVE(0xff);
2026 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2030 /* mark the range as done, and continue */
2034 literal_endpoint = 0;
2039 /* range begins (ignore - as first or last char) */
2040 else if (*s == '-' && s+1 < send && s != start) {
2042 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2049 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2059 literal_endpoint = 0;
2060 native_range = TRUE;
2065 /* if we get here, we're not doing a transliteration */
2067 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2068 except for the last char, which will be done separately. */
2069 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2071 while (s+1 < send && *s != ')')
2072 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2074 else if (s[2] == '{' /* This should match regcomp.c */
2075 || (s[2] == '?' && s[3] == '{'))
2078 char *regparse = s + (s[2] == '{' ? 3 : 4);
2081 while (count && (c = *regparse)) {
2082 if (c == '\\' && regparse[1])
2090 if (*regparse != ')')
2091 regparse--; /* Leave one char for continuation. */
2092 while (s < regparse)
2093 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2097 /* likewise skip #-initiated comments in //x patterns */
2098 else if (*s == '#' && PL_lex_inpat &&
2099 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2100 while (s+1 < send && *s != '\n')
2101 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2104 /* check for embedded arrays
2105 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2107 else if (*s == '@' && s[1]) {
2108 if (isALNUM_lazy_if(s+1,UTF))
2110 if (strchr(":'{$", s[1]))
2112 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2113 break; /* in regexp, neither @+ nor @- are interpolated */
2116 /* check for embedded scalars. only stop if we're sure it's a
2119 else if (*s == '$') {
2120 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2122 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2123 break; /* in regexp, $ might be tail anchor */
2126 /* End of else if chain - OP_TRANS rejoin rest */
2129 if (*s == '\\' && s+1 < send) {
2132 /* deprecate \1 in strings and substitution replacements */
2133 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2134 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2136 if (ckWARN(WARN_SYNTAX))
2137 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2142 /* string-change backslash escapes */
2143 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2147 /* skip any other backslash escapes in a pattern */
2148 else if (PL_lex_inpat) {
2149 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2150 goto default_action;
2153 /* if we get here, it's either a quoted -, or a digit */
2156 /* quoted - in transliterations */
2158 if (PL_lex_inwhat == OP_TRANS) {
2165 if ((isALPHA(*s) || isDIGIT(*s)) &&
2167 Perl_warner(aTHX_ packWARN(WARN_MISC),
2168 "Unrecognized escape \\%c passed through",
2170 /* default action is to copy the quoted character */
2171 goto default_action;
2174 /* \132 indicates an octal constant */
2175 case '0': case '1': case '2': case '3':
2176 case '4': case '5': case '6': case '7':
2180 uv = grok_oct(s, &len, &flags, NULL);
2183 goto NUM_ESCAPE_INSERT;
2185 /* \x24 indicates a hex constant */
2189 char* const e = strchr(s, '}');
2190 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2191 PERL_SCAN_DISALLOW_PREFIX;
2196 yyerror("Missing right brace on \\x{}");
2200 uv = grok_hex(s, &len, &flags, NULL);
2206 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2207 uv = grok_hex(s, &len, &flags, NULL);
2213 /* Insert oct or hex escaped character.
2214 * There will always enough room in sv since such
2215 * escapes will be longer than any UTF-8 sequence
2216 * they can end up as. */
2218 /* We need to map to chars to ASCII before doing the tests
2221 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2222 if (!has_utf8 && uv > 255) {
2223 /* Might need to recode whatever we have
2224 * accumulated so far if it contains any
2227 * (Can't we keep track of that and avoid
2228 * this rescan? --jhi)
2232 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2233 if (!NATIVE_IS_INVARIANT(*c)) {
2238 const STRLEN offset = d - SvPVX_const(sv);
2240 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2244 while (src >= (const U8 *)SvPVX_const(sv)) {
2245 if (!NATIVE_IS_INVARIANT(*src)) {
2246 const U8 ch = NATIVE_TO_ASCII(*src);
2247 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2248 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2258 if (has_utf8 || uv > 255) {
2259 d = (char*)uvchr_to_utf8((U8*)d, uv);
2261 if (PL_lex_inwhat == OP_TRANS &&
2262 PL_sublex_info.sub_op) {
2263 PL_sublex_info.sub_op->op_private |=
2264 (PL_lex_repl ? OPpTRANS_FROM_UTF
2268 if (uv > 255 && !dorange)
2269 native_range = FALSE;
2281 /* \N{LATIN SMALL LETTER A} is a named character */
2285 char* e = strchr(s, '}');
2291 yyerror("Missing right brace on \\N{}");
2295 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2297 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2298 PERL_SCAN_DISALLOW_PREFIX;
2301 uv = grok_hex(s, &len, &flags, NULL);
2302 if ( e > s && len != (STRLEN)(e - s) ) {
2306 goto NUM_ESCAPE_INSERT;
2308 res = newSVpvn(s + 1, e - s - 1);
2309 res = new_constant( NULL, 0, "charnames",
2310 res, NULL, s - 2, e - s + 3 );
2312 sv_utf8_upgrade(res);
2313 str = SvPV_const(res,len);
2314 #ifdef EBCDIC_NEVER_MIND
2315 /* charnames uses pack U and that has been
2316 * recently changed to do the below uni->native
2317 * mapping, so this would be redundant (and wrong,
2318 * the code point would be doubly converted).
2319 * But leave this in just in case the pack U change
2320 * gets revoked, but the semantics is still
2321 * desireable for charnames. --jhi */
2323 UV uv = utf8_to_uvchr((const U8*)str, 0);
2326 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2328 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2329 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2330 str = SvPV_const(res, len);
2334 if (!has_utf8 && SvUTF8(res)) {
2335 const char * const ostart = SvPVX_const(sv);
2336 SvCUR_set(sv, d - ostart);
2339 sv_utf8_upgrade(sv);
2340 /* this just broke our allocation above... */
2341 SvGROW(sv, (STRLEN)(send - start));
2342 d = SvPVX(sv) + SvCUR(sv);
2345 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2346 const char * const odest = SvPVX_const(sv);
2348 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2349 d = SvPVX(sv) + (d - odest);
2353 native_range = FALSE; /* \N{} is guessed to be Unicode */
2355 Copy(str, d, len, char);
2362 yyerror("Missing braces on \\N{}");
2365 /* \c is a control character */
2374 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2377 yyerror("Missing control char name in \\c");
2381 /* printf-style backslashes, formfeeds, newlines, etc */
2383 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2386 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2389 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2392 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2395 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2398 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2401 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2407 } /* end if (backslash) */
2414 /* If we started with encoded form, or already know we want it
2415 and then encode the next character */
2416 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2418 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2419 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2422 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2423 const STRLEN off = d - SvPVX_const(sv);
2424 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2426 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2429 if (uv > 255 && !dorange)
2430 native_range = FALSE;
2434 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2436 } /* while loop to process each character */
2438 /* terminate the string and set up the sv */
2440 SvCUR_set(sv, d - SvPVX_const(sv));
2441 if (SvCUR(sv) >= SvLEN(sv))
2442 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2445 if (PL_encoding && !has_utf8) {
2446 sv_recode_to_utf8(sv, PL_encoding);
2452 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2453 PL_sublex_info.sub_op->op_private |=
2454 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2458 /* shrink the sv if we allocated more than we used */
2459 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2460 SvPV_shrink_to_cur(sv);
2463 /* return the substring (via pl_yylval) only if we parsed anything */
2464 if (s > PL_bufptr) {
2465 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2466 const char *const key = PL_lex_inpat ? "qr" : "q";
2467 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2471 if (PL_lex_inwhat == OP_TRANS) {
2474 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2482 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2485 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2492 * Returns TRUE if there's more to the expression (e.g., a subscript),
2495 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2497 * ->[ and ->{ return TRUE
2498 * { and [ outside a pattern are always subscripts, so return TRUE
2499 * if we're outside a pattern and it's not { or [, then return FALSE
2500 * if we're in a pattern and the first char is a {
2501 * {4,5} (any digits around the comma) returns FALSE
2502 * if we're in a pattern and the first char is a [
2504 * [SOMETHING] has a funky algorithm to decide whether it's a
2505 * character class or not. It has to deal with things like
2506 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2507 * anything else returns TRUE
2510 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2513 S_intuit_more(pTHX_ register char *s)
2516 if (PL_lex_brackets)
2518 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2520 if (*s != '{' && *s != '[')
2525 /* In a pattern, so maybe we have {n,m}. */
2542 /* On the other hand, maybe we have a character class */
2545 if (*s == ']' || *s == '^')
2548 /* this is terrifying, and it works */
2549 int weight = 2; /* let's weigh the evidence */
2551 unsigned char un_char = 255, last_un_char;
2552 const char * const send = strchr(s,']');
2553 char tmpbuf[sizeof PL_tokenbuf * 4];
2555 if (!send) /* has to be an expression */
2558 Zero(seen,256,char);
2561 else if (isDIGIT(*s)) {
2563 if (isDIGIT(s[1]) && s[2] == ']')
2569 for (; s < send; s++) {
2570 last_un_char = un_char;
2571 un_char = (unsigned char)*s;
2576 weight -= seen[un_char] * 10;
2577 if (isALNUM_lazy_if(s+1,UTF)) {
2579 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2580 len = (int)strlen(tmpbuf);
2581 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2586 else if (*s == '$' && s[1] &&
2587 strchr("[#!%*<>()-=",s[1])) {
2588 if (/*{*/ strchr("])} =",s[2]))
2597 if (strchr("wds]",s[1]))
2599 else if (seen[(U8)'\''] || seen[(U8)'"'])
2601 else if (strchr("rnftbxcav",s[1]))
2603 else if (isDIGIT(s[1])) {
2605 while (s[1] && isDIGIT(s[1]))
2615 if (strchr("aA01! ",last_un_char))
2617 if (strchr("zZ79~",s[1]))
2619 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2620 weight -= 5; /* cope with negative subscript */
2623 if (!isALNUM(last_un_char)
2624 && !(last_un_char == '$' || last_un_char == '@'
2625 || last_un_char == '&')
2626 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2631 if (keyword(tmpbuf, d - tmpbuf, 0))
2634 if (un_char == last_un_char + 1)
2636 weight -= seen[un_char];
2641 if (weight >= 0) /* probably a character class */
2651 * Does all the checking to disambiguate
2653 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2654 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2656 * First argument is the stuff after the first token, e.g. "bar".
2658 * Not a method if bar is a filehandle.
2659 * Not a method if foo is a subroutine prototyped to take a filehandle.
2660 * Not a method if it's really "Foo $bar"
2661 * Method if it's "foo $bar"
2662 * Not a method if it's really "print foo $bar"
2663 * Method if it's really "foo package::" (interpreted as package->foo)
2664 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2665 * Not a method if bar is a filehandle or package, but is quoted with
2670 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2673 char *s = start + (*start == '$');
2674 char tmpbuf[sizeof PL_tokenbuf];
2682 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2686 const char *proto = SvPVX_const(cv);
2697 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2698 /* start is the beginning of the possible filehandle/object,
2699 * and s is the end of it
2700 * tmpbuf is a copy of it
2703 if (*start == '$') {
2704 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2705 isUPPER(*PL_tokenbuf))
2708 len = start - SvPVX(PL_linestr);
2712 start = SvPVX(PL_linestr) + len;
2716 return *s == '(' ? FUNCMETH : METHOD;
2718 if (!keyword(tmpbuf, len, 0)) {
2719 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2723 soff = s - SvPVX(PL_linestr);
2727 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2728 if (indirgv && GvCVu(indirgv))
2730 /* filehandle or package name makes it a method */
2731 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2733 soff = s - SvPVX(PL_linestr);
2736 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2737 return 0; /* no assumptions -- "=>" quotes bearword */
2739 start_force(PL_curforce);
2740 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2741 newSVpvn(tmpbuf,len));
2742 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2744 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2749 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2751 return *s == '(' ? FUNCMETH : METHOD;
2757 /* Encoded script support. filter_add() effectively inserts a
2758 * 'pre-processing' function into the current source input stream.
2759 * Note that the filter function only applies to the current source file
2760 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2762 * The datasv parameter (which may be NULL) can be used to pass
2763 * private data to this instance of the filter. The filter function
2764 * can recover the SV using the FILTER_DATA macro and use it to
2765 * store private buffers and state information.
2767 * The supplied datasv parameter is upgraded to a PVIO type
2768 * and the IoDIRP/IoANY field is used to store the function pointer,
2769 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2770 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2771 * private use must be set using malloc'd pointers.
2775 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2784 if (!PL_rsfp_filters)
2785 PL_rsfp_filters = newAV();
2788 SvUPGRADE(datasv, SVt_PVIO);
2789 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2790 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2791 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2792 FPTR2DPTR(void *, IoANY(datasv)),
2793 SvPV_nolen(datasv)));
2794 av_unshift(PL_rsfp_filters, 1);
2795 av_store(PL_rsfp_filters, 0, datasv) ;
2800 /* Delete most recently added instance of this filter function. */
2802 Perl_filter_del(pTHX_ filter_t funcp)
2808 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2809 FPTR2DPTR(void*, funcp)));
2811 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2813 /* if filter is on top of stack (usual case) just pop it off */
2814 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2815 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2816 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2817 IoANY(datasv) = (void *)NULL;
2818 sv_free(av_pop(PL_rsfp_filters));
2822 /* we need to search for the correct entry and clear it */
2823 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2827 /* Invoke the idxth filter function for the current rsfp. */
2828 /* maxlen 0 = read one text line */
2830 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2835 /* This API is bad. It should have been using unsigned int for maxlen.
2836 Not sure if we want to change the API, but if not we should sanity
2837 check the value here. */
2838 const unsigned int correct_length
2847 if (!PL_parser || !PL_rsfp_filters)
2849 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2850 /* Provide a default input filter to make life easy. */
2851 /* Note that we append to the line. This is handy. */
2852 DEBUG_P(PerlIO_printf(Perl_debug_log,
2853 "filter_read %d: from rsfp\n", idx));
2854 if (correct_length) {
2857 const int old_len = SvCUR(buf_sv);
2859 /* ensure buf_sv is large enough */
2860 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2861 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2862 correct_length)) <= 0) {
2863 if (PerlIO_error(PL_rsfp))
2864 return -1; /* error */
2866 return 0 ; /* end of file */
2868 SvCUR_set(buf_sv, old_len + len) ;
2871 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2872 if (PerlIO_error(PL_rsfp))
2873 return -1; /* error */
2875 return 0 ; /* end of file */
2878 return SvCUR(buf_sv);
2880 /* Skip this filter slot if filter has been deleted */
2881 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2882 DEBUG_P(PerlIO_printf(Perl_debug_log,
2883 "filter_read %d: skipped (filter deleted)\n",
2885 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2887 /* Get function pointer hidden within datasv */
2888 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2889 DEBUG_P(PerlIO_printf(Perl_debug_log,
2890 "filter_read %d: via function %p (%s)\n",
2891 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2892 /* Call function. The function is expected to */
2893 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2894 /* Return: <0:error, =0:eof, >0:not eof */
2895 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2899 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2902 #ifdef PERL_CR_FILTER
2903 if (!PL_rsfp_filters) {
2904 filter_add(S_cr_textfilter,NULL);
2907 if (PL_rsfp_filters) {
2909 SvCUR_set(sv, 0); /* start with empty line */
2910 if (FILTER_READ(0, sv, 0) > 0)
2911 return ( SvPVX(sv) ) ;
2916 return (sv_gets(sv, fp, append));
2920 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
2925 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2929 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2930 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2932 return GvHV(gv); /* Foo:: */
2935 /* use constant CLASS => 'MyClass' */
2936 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2937 if (gv && GvCV(gv)) {
2938 SV * const sv = cv_const_sv(GvCV(gv));
2940 pkgname = SvPV_const(sv, len);
2943 return gv_stashpvn(pkgname, len, 0);
2947 * S_readpipe_override
2948 * Check whether readpipe() is overriden, and generates the appropriate
2949 * optree, provided sublex_start() is called afterwards.
2952 S_readpipe_override(pTHX)
2955 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2956 pl_yylval.ival = OP_BACKTICK;
2958 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2960 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2961 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
2962 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2964 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2965 append_elem(OP_LIST,
2966 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2967 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2974 * The intent of this yylex wrapper is to minimize the changes to the
2975 * tokener when we aren't interested in collecting madprops. It remains
2976 * to be seen how successful this strategy will be...
2983 char *s = PL_bufptr;
2985 /* make sure PL_thiswhite is initialized */
2989 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2990 if (PL_pending_ident)
2991 return S_pending_ident(aTHX);
2993 /* previous token ate up our whitespace? */
2994 if (!PL_lasttoke && PL_nextwhite) {
2995 PL_thiswhite = PL_nextwhite;
2999 /* isolate the token, and figure out where it is without whitespace */
3000 PL_realtokenstart = -1;
3004 assert(PL_curforce < 0);
3006 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3007 if (!PL_thistoken) {
3008 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3009 PL_thistoken = newSVpvs("");
3011 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3012 PL_thistoken = newSVpvn(tstart, s - tstart);
3015 if (PL_thismad) /* install head */
3016 CURMAD('X', PL_thistoken);
3019 /* last whitespace of a sublex? */
3020 if (optype == ')' && PL_endwhite) {
3021 CURMAD('X', PL_endwhite);
3026 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3027 if (!PL_thiswhite && !PL_endwhite && !optype) {
3028 sv_free(PL_thistoken);
3033 /* put off final whitespace till peg */
3034 if (optype == ';' && !PL_rsfp) {
3035 PL_nextwhite = PL_thiswhite;
3038 else if (PL_thisopen) {
3039 CURMAD('q', PL_thisopen);
3041 sv_free(PL_thistoken);
3045 /* Store actual token text as madprop X */
3046 CURMAD('X', PL_thistoken);
3050 /* add preceding whitespace as madprop _ */
3051 CURMAD('_', PL_thiswhite);
3055 /* add quoted material as madprop = */
3056 CURMAD('=', PL_thisstuff);
3060 /* add terminating quote as madprop Q */
3061 CURMAD('Q', PL_thisclose);
3065 /* special processing based on optype */
3069 /* opval doesn't need a TOKEN since it can already store mp */
3079 if (pl_yylval.opval)
3080 append_madprops(PL_thismad, pl_yylval.opval, 0);
3088 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3097 /* remember any fake bracket that lexer is about to discard */
3098 if (PL_lex_brackets == 1 &&
3099 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3102 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3105 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3106 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3109 break; /* don't bother looking for trailing comment */
3118 /* attach a trailing comment to its statement instead of next token */
3122 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3124 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3126 if (*s == '\n' || *s == '#') {
3127 while (s < PL_bufend && *s != '\n')
3131 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3132 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3149 /* Create new token struct. Note: opvals return early above. */
3150 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3157 S_tokenize_use(pTHX_ int is_use, char *s) {
3159 if (PL_expect != XSTATE)
3160 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3161 is_use ? "use" : "no"));
3163 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3164 s = force_version(s, TRUE);
3165 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3166 start_force(PL_curforce);
3167 NEXTVAL_NEXTTOKE.opval = NULL;
3170 else if (*s == 'v') {
3171 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3172 s = force_version(s, FALSE);
3176 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3177 s = force_version(s, FALSE);
3179 pl_yylval.ival = is_use;
3183 static const char* const exp_name[] =
3184 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3185 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3192 Works out what to call the token just pulled out of the input
3193 stream. The yacc parser takes care of taking the ops we return and
3194 stitching them into a tree.
3200 if read an identifier
3201 if we're in a my declaration
3202 croak if they tried to say my($foo::bar)
3203 build the ops for a my() declaration
3204 if it's an access to a my() variable
3205 are we in a sort block?
3206 croak if my($a); $a <=> $b
3207 build ops for access to a my() variable
3208 if in a dq string, and they've said @foo and we can't find @foo
3210 build ops for a bareword
3211 if we already built the token before, use it.
3216 #pragma segment Perl_yylex
3222 register char *s = PL_bufptr;
3227 /* orig_keyword, gvp, and gv are initialized here because
3228 * jump to the label just_a_word_zero can bypass their
3229 * initialization later. */
3230 I32 orig_keyword = 0;
3235 SV* tmp = newSVpvs("");
3236 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3237 (IV)CopLINE(PL_curcop),
3238 lex_state_names[PL_lex_state],
3239 exp_name[PL_expect],
3240 pv_display(tmp, s, strlen(s), 0, 60));
3243 /* check if there's an identifier for us to look at */
3244 if (PL_pending_ident)
3245 return REPORT(S_pending_ident(aTHX));
3247 /* no identifier pending identification */
3249 switch (PL_lex_state) {
3251 case LEX_NORMAL: /* Some compilers will produce faster */
3252 case LEX_INTERPNORMAL: /* code if we comment these out. */
3256 /* when we've already built the next token, just pull it out of the queue */
3260 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3262 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3263 PL_nexttoke[PL_lasttoke].next_mad = 0;
3264 if (PL_thismad && PL_thismad->mad_key == '_') {
3265 PL_thiswhite = (SV*)PL_thismad->mad_val;
3266 PL_thismad->mad_val = 0;
3267 mad_free(PL_thismad);
3272 PL_lex_state = PL_lex_defer;
3273 PL_expect = PL_lex_expect;
3274 PL_lex_defer = LEX_NORMAL;
3275 if (!PL_nexttoke[PL_lasttoke].next_type)
3280 pl_yylval = PL_nextval[PL_nexttoke];
3282 PL_lex_state = PL_lex_defer;
3283 PL_expect = PL_lex_expect;
3284 PL_lex_defer = LEX_NORMAL;
3288 /* FIXME - can these be merged? */
3289 return(PL_nexttoke[PL_lasttoke].next_type);
3291 return REPORT(PL_nexttype[PL_nexttoke]);
3294 /* interpolated case modifiers like \L \U, including \Q and \E.
3295 when we get here, PL_bufptr is at the \
3297 case LEX_INTERPCASEMOD:
3299 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3300 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3302 /* handle \E or end of string */
3303 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3305 if (PL_lex_casemods) {
3306 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3307 PL_lex_casestack[PL_lex_casemods] = '\0';
3309 if (PL_bufptr != PL_bufend
3310 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3312 PL_lex_state = LEX_INTERPCONCAT;
3315 PL_thistoken = newSVpvs("\\E");
3321 while (PL_bufptr != PL_bufend &&
3322 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3324 PL_thiswhite = newSVpvs("");
3325 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3329 if (PL_bufptr != PL_bufend)
3332 PL_lex_state = LEX_INTERPCONCAT;
3336 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3337 "### Saw case modifier\n"); });
3339 if (s[1] == '\\' && s[2] == 'E') {
3342 PL_thiswhite = newSVpvs("");
3343 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3346 PL_lex_state = LEX_INTERPCONCAT;
3351 if (!PL_madskills) /* when just compiling don't need correct */
3352 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3353 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3354 if ((*s == 'L' || *s == 'U') &&
3355 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3356 PL_lex_casestack[--PL_lex_casemods] = '\0';
3359 if (PL_lex_casemods > 10)
3360 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3361 PL_lex_casestack[PL_lex_casemods++] = *s;
3362 PL_lex_casestack[PL_lex_casemods] = '\0';
3363 PL_lex_state = LEX_INTERPCONCAT;
3364 start_force(PL_curforce);
3365 NEXTVAL_NEXTTOKE.ival = 0;
3367 start_force(PL_curforce);
3369 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3371 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3373 NEXTVAL_NEXTTOKE.ival = OP_LC;
3375 NEXTVAL_NEXTTOKE.ival = OP_UC;
3377 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3379 Perl_croak(aTHX_ "panic: yylex");
3381 SV* const tmpsv = newSVpvs("\\ ");
3382 /* replace the space with the character we want to escape
3384 SvPVX(tmpsv)[1] = *s;
3390 if (PL_lex_starts) {
3396 sv_free(PL_thistoken);
3397 PL_thistoken = newSVpvs("");
3400 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3401 if (PL_lex_casemods == 1 && PL_lex_inpat)
3410 case LEX_INTERPPUSH:
3411 return REPORT(sublex_push());
3413 case LEX_INTERPSTART:
3414 if (PL_bufptr == PL_bufend)
3415 return REPORT(sublex_done());
3416 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3417 "### Interpolated variable\n"); });
3419 PL_lex_dojoin = (*PL_bufptr == '@');
3420 PL_lex_state = LEX_INTERPNORMAL;
3421 if (PL_lex_dojoin) {
3422 start_force(PL_curforce);
3423 NEXTVAL_NEXTTOKE.ival = 0;
3425 start_force(PL_curforce);
3426 force_ident("\"", '$');
3427 start_force(PL_curforce);
3428 NEXTVAL_NEXTTOKE.ival = 0;
3430 start_force(PL_curforce);
3431 NEXTVAL_NEXTTOKE.ival = 0;
3433 start_force(PL_curforce);
3434 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3437 if (PL_lex_starts++) {
3442 sv_free(PL_thistoken);
3443 PL_thistoken = newSVpvs("");
3446 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3447 if (!PL_lex_casemods && PL_lex_inpat)
3454 case LEX_INTERPENDMAYBE:
3455 if (intuit_more(PL_bufptr)) {
3456 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3462 if (PL_lex_dojoin) {
3463 PL_lex_dojoin = FALSE;
3464 PL_lex_state = LEX_INTERPCONCAT;
3468 sv_free(PL_thistoken);
3469 PL_thistoken = newSVpvs("");
3474 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3475 && SvEVALED(PL_lex_repl))
3477 if (PL_bufptr != PL_bufend)
3478 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3482 case LEX_INTERPCONCAT:
3484 if (PL_lex_brackets)
3485 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3487 if (PL_bufptr == PL_bufend)
3488 return REPORT(sublex_done());
3490 if (SvIVX(PL_linestr) == '\'') {
3491 SV *sv = newSVsv(PL_linestr);
3494 else if ( PL_hints & HINT_NEW_RE )
3495 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
3496 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3500 s = scan_const(PL_bufptr);
3502 PL_lex_state = LEX_INTERPCASEMOD;
3504 PL_lex_state = LEX_INTERPSTART;
3507 if (s != PL_bufptr) {
3508 start_force(PL_curforce);
3510 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3512 NEXTVAL_NEXTTOKE = pl_yylval;
3515 if (PL_lex_starts++) {
3519 sv_free(PL_thistoken);
3520 PL_thistoken = newSVpvs("");
3523 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3524 if (!PL_lex_casemods && PL_lex_inpat)
3537 PL_lex_state = LEX_NORMAL;
3538 s = scan_formline(PL_bufptr);
3539 if (!PL_lex_formbrack)
3545 PL_oldoldbufptr = PL_oldbufptr;
3551 sv_free(PL_thistoken);
3554 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3558 if (isIDFIRST_lazy_if(s,UTF))
3560 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
3561 Perl_croak(aTHX_ "Unrecognized character \\x%02X in column %d", *s & 255, (int) len + 1);
3564 goto fake_eof; /* emulate EOF on ^D or ^Z */
3573 if (PL_lex_brackets) {
3574 yyerror((const char *)
3576 ? "Format not terminated"
3577 : "Missing right curly or square bracket"));
3579 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3580 "### Tokener got EOF\n");
3584 if (s++ < PL_bufend)
3585 goto retry; /* ignore stray nulls */
3588 if (!PL_in_eval && !PL_preambled) {
3589 PL_preambled = TRUE;
3595 /* Generate a string of Perl code to load the debugger.
3596 * If PERL5DB is set, it will return the contents of that,
3597 * otherwise a compile-time require of perl5db.pl. */
3599 const char * const pdb = PerlEnv_getenv("PERL5DB");
3602 sv_setpv(PL_linestr, pdb);
3603 sv_catpvs(PL_linestr,";");
3605 SETERRNO(0,SS_NORMAL);
3606 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
3609 sv_setpvs(PL_linestr,"");
3610 if (PL_preambleav) {
3611 SV **svp = AvARRAY(PL_preambleav);
3612 SV **const end = svp + AvFILLp(PL_preambleav);
3614 sv_catsv(PL_linestr, *svp);
3616 sv_catpvs(PL_linestr, ";");
3618 sv_free((SV*)PL_preambleav);
3619 PL_preambleav = NULL;
3621 if (PL_minus_n || PL_minus_p) {
3622 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3624 sv_catpvs(PL_linestr,"chomp;");
3627 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3628 || *PL_splitstr == '"')
3629 && strchr(PL_splitstr + 1, *PL_splitstr))
3630 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3632 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3633 bytes can be used as quoting characters. :-) */
3634 const char *splits = PL_splitstr;
3635 sv_catpvs(PL_linestr, "our @F=split(q\0");
3638 if (*splits == '\\')
3639 sv_catpvn(PL_linestr, splits, 1);
3640 sv_catpvn(PL_linestr, splits, 1);
3641 } while (*splits++);
3642 /* This loop will embed the trailing NUL of
3643 PL_linestr as the last thing it does before
3645 sv_catpvs(PL_linestr, ");");
3649 sv_catpvs(PL_linestr,"our @F=split(' ');");
3653 sv_catpvs(PL_linestr,
3654 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3655 sv_catpvs(PL_linestr, "\n");
3656 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3657 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3658 PL_last_lop = PL_last_uni = NULL;
3659 if (PERLDB_LINE && PL_curstash != PL_debstash)
3660 update_debugger_info(PL_linestr, NULL, 0);
3664 bof = PL_rsfp ? TRUE : FALSE;
3665 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3668 PL_realtokenstart = -1;
3671 if (PL_preprocess && !PL_in_eval)
3672 (void)PerlProc_pclose(PL_rsfp);
3673 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3674 PerlIO_clearerr(PL_rsfp);
3676 (void)PerlIO_close(PL_rsfp);
3678 PL_doextract = FALSE;
3680 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3686 sv_setpvs(PL_linestr, ";}continue{print;}");
3688 sv_setpvs(PL_linestr, ";}");
3689 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3690 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3691 PL_last_lop = PL_last_uni = NULL;
3692 PL_minus_n = PL_minus_p = 0;
3695 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3696 PL_last_lop = PL_last_uni = NULL;
3697 sv_setpvn(PL_linestr,"",0);
3698 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3700 /* If it looks like the start of a BOM or raw UTF-16,
3701 * check if it in fact is. */
3707 #ifdef PERLIO_IS_STDIO
3708 # ifdef __GNU_LIBRARY__
3709 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3710 # define FTELL_FOR_PIPE_IS_BROKEN
3714 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3715 # define FTELL_FOR_PIPE_IS_BROKEN
3720 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3721 /* This loses the possibility to detect the bof
3722 * situation on perl -P when the libc5 is being used.
3723 * Workaround? Maybe attach some extra state to PL_rsfp?
3726 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3728 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3731 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3732 s = swallow_bom((U8*)s);
3736 /* Incest with pod. */
3739 sv_catsv(PL_thiswhite, PL_linestr);
3741 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3742 sv_setpvn(PL_linestr, "", 0);
3743 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3744 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3745 PL_last_lop = PL_last_uni = NULL;
3746 PL_doextract = FALSE;
3750 } while (PL_doextract);
3751 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3752 if (PERLDB_LINE && PL_curstash != PL_debstash)
3753 update_debugger_info(PL_linestr, NULL, 0);
3754 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3755 PL_last_lop = PL_last_uni = NULL;
3756 if (CopLINE(PL_curcop) == 1) {
3757 while (s < PL_bufend && isSPACE(*s))
3759 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3763 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3767 if (*s == '#' && *(s+1) == '!')
3769 #ifdef ALTERNATE_SHEBANG
3771 static char const as[] = ALTERNATE_SHEBANG;
3772 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3773 d = s + (sizeof(as) - 1);
3775 #endif /* ALTERNATE_SHEBANG */
3784 while (*d && !isSPACE(*d))
3788 #ifdef ARG_ZERO_IS_SCRIPT
3789 if (ipathend > ipath) {
3791 * HP-UX (at least) sets argv[0] to the script name,
3792 * which makes $^X incorrect. And Digital UNIX and Linux,
3793 * at least, set argv[0] to the basename of the Perl
3794 * interpreter. So, having found "#!", we'll set it right.
3796 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3798 assert(SvPOK(x) || SvGMAGICAL(x));
3799 if (sv_eq(x, CopFILESV(PL_curcop))) {
3800 sv_setpvn(x, ipath, ipathend - ipath);
3806 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3807 const char * const lstart = SvPV_const(x,llen);
3809 bstart += blen - llen;
3810 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3811 sv_setpvn(x, ipath, ipathend - ipath);
3816 TAINT_NOT; /* $^X is always tainted, but that's OK */
3818 #endif /* ARG_ZERO_IS_SCRIPT */
3823 d = instr(s,"perl -");
3825 d = instr(s,"perl");
3827 /* avoid getting into infinite loops when shebang
3828 * line contains "Perl" rather than "perl" */
3830 for (d = ipathend-4; d >= ipath; --d) {
3831 if ((*d == 'p' || *d == 'P')
3832 && !ibcmp(d, "perl", 4))
3842 #ifdef ALTERNATE_SHEBANG
3844 * If the ALTERNATE_SHEBANG on this system starts with a
3845 * character that can be part of a Perl expression, then if
3846 * we see it but not "perl", we're probably looking at the
3847 * start of Perl code, not a request to hand off to some
3848 * other interpreter. Similarly, if "perl" is there, but
3849 * not in the first 'word' of the line, we assume the line
3850 * contains the start of the Perl program.
3852 if (d && *s != '#') {
3853 const char *c = ipath;
3854 while (*c && !strchr("; \t\r\n\f\v#", *c))
3857 d = NULL; /* "perl" not in first word; ignore */
3859 *s = '#'; /* Don't try to parse shebang line */
3861 #endif /* ALTERNATE_SHEBANG */
3862 #ifndef MACOS_TRADITIONAL
3867 !instr(s,"indir") &&
3868 instr(PL_origargv[0],"perl"))
3875 while (s < PL_bufend && isSPACE(*s))
3877 if (s < PL_bufend) {
3878 Newxz(newargv,PL_origargc+3,char*);
3880 while (s < PL_bufend && !isSPACE(*s))
3883 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3886 newargv = PL_origargv;
3889 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3891 Perl_croak(aTHX_ "Can't exec %s", ipath);
3895 while (*d && !isSPACE(*d))
3897 while (SPACE_OR_TAB(*d))
3901 const bool switches_done = PL_doswitches;
3902 const U32 oldpdb = PL_perldb;
3903 const bool oldn = PL_minus_n;
3904 const bool oldp = PL_minus_p;
3908 if (*d1 == 'M' || *d1 == 'm' || *d1 == 'C') {
3909 const char * const m = d1;
3910 while (*d1 && !isSPACE(*d1))
3912 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3915 d1 = moreswitches(d1);
3917 if (PL_doswitches && !switches_done) {
3918 int argc = PL_origargc;
3919 char **argv = PL_origargv;
3922 } while (argc && argv[0][0] == '-' && argv[0][1]);
3923 init_argv_symbols(argc,argv);
3925 if ((PERLDB_LINE && !oldpdb) ||
3926 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3927 /* if we have already added "LINE: while (<>) {",
3928 we must not do it again */
3930 sv_setpvn(PL_linestr, "", 0);
3931 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3932 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3933 PL_last_lop = PL_last_uni = NULL;
3934 PL_preambled = FALSE;
3936 (void)gv_fetchfile(PL_origfilename);
3943 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3945 PL_lex_state = LEX_FORMLINE;
3950 #ifdef PERL_STRICT_CR
3951 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3953 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3955 case ' ': case '\t': case '\f': case 013:
3956 #ifdef MACOS_TRADITIONAL
3960 PL_realtokenstart = -1;
3962 PL_thiswhite = newSVpvs("");
3963 sv_catpvn(PL_thiswhite, s, 1);
3970 PL_realtokenstart = -1;
3974 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3975 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3976 /* handle eval qq[#line 1 "foo"\n ...] */
3977 CopLINE_dec(PL_curcop);
3980 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3982 if (!PL_in_eval || PL_rsfp)
3987 while (d < PL_bufend && *d != '\n')
3991 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3992 Perl_croak(aTHX_ "panic: input overflow");
3995 PL_thiswhite = newSVpvn(s, d - s);
4000 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4002 PL_lex_state = LEX_FORMLINE;
4008 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4009 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4012 TOKEN(PEG); /* make sure any #! line is accessible */
4017 /* if (PL_madskills && PL_lex_formbrack) { */
4019 while (d < PL_bufend && *d != '\n')
4023 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4024 Perl_croak(aTHX_ "panic: input overflow");
4025 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4027 PL_thiswhite = newSVpvs("");
4028 if (CopLINE(PL_curcop) == 1) {
4029 sv_setpvn(PL_thiswhite, "", 0);
4032 sv_catpvn(PL_thiswhite, s, d - s);
4046 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4054 while (s < PL_bufend && SPACE_OR_TAB(*s))
4057 if (strnEQ(s,"=>",2)) {
4058 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4059 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4060 OPERATOR('-'); /* unary minus */
4062 PL_last_uni = PL_oldbufptr;
4064 case 'r': ftst = OP_FTEREAD; break;
4065 case 'w': ftst = OP_FTEWRITE; break;
4066 case 'x': ftst = OP_FTEEXEC; break;
4067 case 'o': ftst = OP_FTEOWNED; break;
4068 case 'R': ftst = OP_FTRREAD; break;
4069 case 'W': ftst = OP_FTRWRITE; break;
4070 case 'X': ftst = OP_FTREXEC; break;
4071 case 'O': ftst = OP_FTROWNED; break;
4072 case 'e': ftst = OP_FTIS; break;
4073 case 'z': ftst = OP_FTZERO; break;
4074 case 's': ftst = OP_FTSIZE; break;
4075 case 'f': ftst = OP_FTFILE; break;
4076 case 'd': ftst = OP_FTDIR; break;
4077 case 'l': ftst = OP_FTLINK; break;
4078 case 'p': ftst = OP_FTPIPE; break;
4079 case 'S': ftst = OP_FTSOCK; break;
4080 case 'u': ftst = OP_FTSUID; break;
4081 case 'g': ftst = OP_FTSGID; break;
4082 case 'k': ftst = OP_FTSVTX; break;
4083 case 'b': ftst = OP_FTBLK; break;
4084 case 'c': ftst = OP_FTCHR; break;
4085 case 't': ftst = OP_FTTTY; break;
4086 case 'T': ftst = OP_FTTEXT; break;
4087 case 'B': ftst = OP_FTBINARY; break;
4088 case 'M': case 'A': case 'C':
4089 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4091 case 'M': ftst = OP_FTMTIME; break;
4092 case 'A': ftst = OP_FTATIME; break;
4093 case 'C': ftst = OP_FTCTIME; break;
4101 PL_last_lop_op = (OPCODE)ftst;
4102 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4103 "### Saw file test %c\n", (int)tmp);
4108 /* Assume it was a minus followed by a one-letter named
4109 * subroutine call (or a -bareword), then. */
4110 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4111 "### '-%c' looked like a file test but was not\n",
4118 const char tmp = *s++;
4121 if (PL_expect == XOPERATOR)
4126 else if (*s == '>') {
4129 if (isIDFIRST_lazy_if(s,UTF)) {
4130 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4138 if (PL_expect == XOPERATOR)
4141 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4143 OPERATOR('-'); /* unary minus */
4149 const char tmp = *s++;
4152 if (PL_expect == XOPERATOR)
4157 if (PL_expect == XOPERATOR)
4160 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4167 if (PL_expect != XOPERATOR) {
4168 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4169 PL_expect = XOPERATOR;
4170 force_ident(PL_tokenbuf, '*');
4183 if (PL_expect == XOPERATOR) {
4187 PL_tokenbuf[0] = '%';
4188 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4189 sizeof PL_tokenbuf - 1, FALSE);
4190 if (!PL_tokenbuf[1]) {
4193 PL_pending_ident = '%';
4204 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4211 const char tmp = *s++;
4217 goto just_a_word_zero_gv;
4220 switch (PL_expect) {
4226 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4228 PL_bufptr = s; /* update in case we back off */
4234 PL_expect = XTERMBLOCK;
4237 stuffstart = s - SvPVX(PL_linestr) - 1;
4241 while (isIDFIRST_lazy_if(s,UTF)) {
4244 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4245 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4246 if (tmp < 0) tmp = -tmp;
4260 sv = newSVpvn(s, len);
4262 d = scan_str(d,TRUE,TRUE);
4264 /* MUST advance bufptr here to avoid bogus
4265 "at end of line" context messages from yyerror().
4267 PL_bufptr = s + len;
4268 yyerror("Unterminated attribute parameter in attribute list");
4272 return REPORT(0); /* EOF indicator */
4276 sv_catsv(sv, PL_lex_stuff);
4277 attrs = append_elem(OP_LIST, attrs,
4278 newSVOP(OP_CONST, 0, sv));
4279 SvREFCNT_dec(PL_lex_stuff);
4280 PL_lex_stuff = NULL;
4283 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4285 if (PL_in_my == KEY_our) {
4287 GvUNIQUE_on(cGVOPx_gv(pl_yylval.opval));
4289 /* skip to avoid loading attributes.pm */
4291 deprecate(":unique");
4294 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4297 /* NOTE: any CV attrs applied here need to be part of
4298 the CVf_BUILTIN_ATTRS define in cv.h! */
4299 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4301 CvLVALUE_on(PL_compcv);
4303 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4305 CvLOCKED_on(PL_compcv);
4307 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4309 CvMETHOD_on(PL_compcv);
4311 /* After we've set the flags, it could be argued that
4312 we don't need to do the attributes.pm-based setting
4313 process, and shouldn't bother appending recognized
4314 flags. To experiment with that, uncomment the
4315 following "else". (Note that's already been
4316 uncommented. That keeps the above-applied built-in
4317 attributes from being intercepted (and possibly
4318 rejected) by a package's attribute routines, but is
4319 justified by the performance win for the common case
4320 of applying only built-in attributes.) */
4322 attrs = append_elem(OP_LIST, attrs,
4323 newSVOP(OP_CONST, 0,
4327 if (*s == ':' && s[1] != ':')
4330 break; /* require real whitespace or :'s */
4331 /* XXX losing whitespace on sequential attributes here */
4335 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4336 if (*s != ';' && *s != '}' && *s != tmp
4337 && (tmp != '=' || *s != ')')) {
4338 const char q = ((*s == '\'') ? '"' : '\'');
4339 /* If here for an expression, and parsed no attrs, back
4341 if (tmp == '=' && !attrs) {
4345 /* MUST advance bufptr here to avoid bogus "at end of line"
4346 context messages from yyerror().
4349 yyerror( (const char *)
4351 ? Perl_form(aTHX_ "Invalid separator character "
4352 "%c%c%c in attribute list", q, *s, q)
4353 : "Unterminated attribute list" ) );
4361 start_force(PL_curforce);
4362 NEXTVAL_NEXTTOKE.opval = attrs;
4363 CURMAD('_', PL_nextwhite);
4368 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4369 (s - SvPVX(PL_linestr)) - stuffstart);
4377 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4378 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4386 const char tmp = *s++;
4391 const char tmp = *s++;
4399 if (PL_lex_brackets <= 0)
4400 yyerror("Unmatched right square bracket");
4403 if (PL_lex_state == LEX_INTERPNORMAL) {
4404 if (PL_lex_brackets == 0) {
4405 if (*s == '-' && s[1] == '>')
4406 PL_lex_state = LEX_INTERPENDMAYBE;
4407 else if (*s != '[' && *s != '{')
4408 PL_lex_state = LEX_INTERPEND;
4415 if (PL_lex_brackets > 100) {
4416 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4418 switch (PL_expect) {
4420 if (PL_lex_formbrack) {
4424 if (PL_oldoldbufptr == PL_last_lop)
4425 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4427 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4428 OPERATOR(HASHBRACK);
4430 while (s < PL_bufend && SPACE_OR_TAB(*s))
4433 PL_tokenbuf[0] = '\0';
4434 if (d < PL_bufend && *d == '-') {
4435 PL_tokenbuf[0] = '-';
4437 while (d < PL_bufend && SPACE_OR_TAB(*d))
4440 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4441 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4443 while (d < PL_bufend && SPACE_OR_TAB(*d))
4446 const char minus = (PL_tokenbuf[0] == '-');
4447 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4455 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4460 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4465 if (PL_oldoldbufptr == PL_last_lop)
4466 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4468 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4471 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4473 /* This hack is to get the ${} in the message. */
4475 yyerror("syntax error");
4478 OPERATOR(HASHBRACK);
4480 /* This hack serves to disambiguate a pair of curlies
4481 * as being a block or an anon hash. Normally, expectation
4482 * determines that, but in cases where we're not in a
4483 * position to expect anything in particular (like inside
4484 * eval"") we have to resolve the ambiguity. This code
4485 * covers the case where the first term in the curlies is a
4486 * quoted string. Most other cases need to be explicitly
4487 * disambiguated by prepending a "+" before the opening
4488 * curly in order to force resolution as an anon hash.
4490 * XXX should probably propagate the outer expectation
4491 * into eval"" to rely less on this hack, but that could
4492 * potentially break current behavior of eval"".
4496 if (*s == '\'' || *s == '"' || *s == '`') {
4497 /* common case: get past first string, handling escapes */
4498 for (t++; t < PL_bufend && *t != *s;)
4499 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4503 else if (*s == 'q') {
4506 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4509 /* skip q//-like construct */
4511 char open, close, term;
4514 while (t < PL_bufend && isSPACE(*t))
4516 /* check for q => */
4517 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4518 OPERATOR(HASHBRACK);
4522 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4526 for (t++; t < PL_bufend; t++) {
4527 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4529 else if (*t == open)
4533 for (t++; t < PL_bufend; t++) {
4534 if (*t == '\\' && t+1 < PL_bufend)
4536 else if (*t == close && --brackets <= 0)
4538 else if (*t == open)
4545 /* skip plain q word */
4546 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4549 else if (isALNUM_lazy_if(t,UTF)) {
4551 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4554 while (t < PL_bufend && isSPACE(*t))
4556 /* if comma follows first term, call it an anon hash */
4557 /* XXX it could be a comma expression with loop modifiers */
4558 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4559 || (*t == '=' && t[1] == '>')))
4560 OPERATOR(HASHBRACK);
4561 if (PL_expect == XREF)
4564 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4570 pl_yylval.ival = CopLINE(PL_curcop);
4571 if (isSPACE(*s) || *s == '#')
4572 PL_copline = NOLINE; /* invalidate current command line number */
4577 if (PL_lex_brackets <= 0)
4578 yyerror("Unmatched right curly bracket");
4580 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4581 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4582 PL_lex_formbrack = 0;
4583 if (PL_lex_state == LEX_INTERPNORMAL) {
4584 if (PL_lex_brackets == 0) {
4585 if (PL_expect & XFAKEBRACK) {
4586 PL_expect &= XENUMMASK;
4587 PL_lex_state = LEX_INTERPEND;
4592 PL_thiswhite = newSVpvs("");
4593 sv_catpvn(PL_thiswhite,"}",1);
4596 return yylex(); /* ignore fake brackets */
4598 if (*s == '-' && s[1] == '>')
4599 PL_lex_state = LEX_INTERPENDMAYBE;
4600 else if (*s != '[' && *s != '{')
4601 PL_lex_state = LEX_INTERPEND;
4604 if (PL_expect & XFAKEBRACK) {
4605 PL_expect &= XENUMMASK;
4607 return yylex(); /* ignore fake brackets */
4609 start_force(PL_curforce);
4611 curmad('X', newSVpvn(s-1,1));
4612 CURMAD('_', PL_thiswhite);
4617 PL_thistoken = newSVpvs("");
4625 if (PL_expect == XOPERATOR) {
4626 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4627 && isIDFIRST_lazy_if(s,UTF))
4629 CopLINE_dec(PL_curcop);
4630 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4631 CopLINE_inc(PL_curcop);
4636 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4638 PL_expect = XOPERATOR;
4639 force_ident(PL_tokenbuf, '&');
4643 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
4655 const char tmp = *s++;
4662 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4663 && strchr("+-*/%.^&|<",tmp))
4664 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4665 "Reversed %c= operator",(int)tmp);
4667 if (PL_expect == XSTATE && isALPHA(tmp) &&
4668 (s == PL_linestart+1 || s[-2] == '\n') )
4670 if (PL_in_eval && !PL_rsfp) {
4675 if (strnEQ(s,"=cut",4)) {
4691 PL_thiswhite = newSVpvs("");
4692 sv_catpvn(PL_thiswhite, PL_linestart,
4693 PL_bufend - PL_linestart);
4697 PL_doextract = TRUE;
4701 if (PL_lex_brackets < PL_lex_formbrack) {
4703 #ifdef PERL_STRICT_CR
4704 while (SPACE_OR_TAB(*t))
4706 while (SPACE_OR_TAB(*t) || *t == '\r')
4709 if (*t == '\n' || *t == '#') {
4720 const char tmp = *s++;
4722 /* was this !=~ where !~ was meant?
4723 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4725 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4726 const char *t = s+1;
4728 while (t < PL_bufend && isSPACE(*t))
4731 if (*t == '/' || *t == '?' ||
4732 ((*t == 'm' || *t == 's' || *t == 'y')
4733 && !isALNUM(t[1])) ||
4734 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4735 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4736 "!=~ should be !~");
4746 if (PL_expect != XOPERATOR) {
4747 if (s[1] != '<' && !strchr(s,'>'))
4750 s = scan_heredoc(s);
4752 s = scan_inputsymbol(s);
4753 TERM(sublex_start());
4759 SHop(OP_LEFT_SHIFT);
4773 const char tmp = *s++;
4775 SHop(OP_RIGHT_SHIFT);
4776 else if (tmp == '=')
4785 if (PL_expect == XOPERATOR) {
4786 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4788 deprecate_old(commaless_variable_list);
4789 return REPORT(','); /* grandfather non-comma-format format */
4793 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4794 PL_tokenbuf[0] = '@';
4795 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4796 sizeof PL_tokenbuf - 1, FALSE);
4797 if (PL_expect == XOPERATOR)
4798 no_op("Array length", s);
4799 if (!PL_tokenbuf[1])
4801 PL_expect = XOPERATOR;
4802 PL_pending_ident = '#';
4806 PL_tokenbuf[0] = '$';
4807 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4808 sizeof PL_tokenbuf - 1, FALSE);
4809 if (PL_expect == XOPERATOR)
4811 if (!PL_tokenbuf[1]) {
4813 yyerror("Final $ should be \\$ or $name");
4817 /* This kludge not intended to be bulletproof. */
4818 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4819 pl_yylval.opval = newSVOP(OP_CONST, 0,
4820 newSViv(CopARYBASE_get(&PL_compiling)));
4821 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
4827 const char tmp = *s;
4828 if (PL_lex_state == LEX_NORMAL)
4831 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4832 && intuit_more(s)) {
4834 PL_tokenbuf[0] = '@';
4835 if (ckWARN(WARN_SYNTAX)) {
4838 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4841 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4842 while (t < PL_bufend && *t != ']')
4844 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4845 "Multidimensional syntax %.*s not supported",
4846 (int)((t - PL_bufptr) + 1), PL_bufptr);
4850 else if (*s == '{') {
4852 PL_tokenbuf[0] = '%';
4853 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4854 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4856 char tmpbuf[sizeof PL_tokenbuf];
4859 } while (isSPACE(*t));
4860 if (isIDFIRST_lazy_if(t,UTF)) {
4862 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4866 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4867 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4868 "You need to quote \"%s\"",
4875 PL_expect = XOPERATOR;
4876 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4877 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4878 if (!islop || PL_last_lop_op == OP_GREPSTART)
4879 PL_expect = XOPERATOR;
4880 else if (strchr("$@\"'`q", *s))
4881 PL_expect = XTERM; /* e.g. print $fh "foo" */
4882 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4883 PL_expect = XTERM; /* e.g. print $fh &sub */
4884 else if (isIDFIRST_lazy_if(s,UTF)) {
4885 char tmpbuf[sizeof PL_tokenbuf];
4887 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4888 if ((t2 = keyword(tmpbuf, len, 0))) {
4889 /* binary operators exclude handle interpretations */
4901 PL_expect = XTERM; /* e.g. print $fh length() */
4906 PL_expect = XTERM; /* e.g. print $fh subr() */
4909 else if (isDIGIT(*s))
4910 PL_expect = XTERM; /* e.g. print $fh 3 */
4911 else if (*s == '.' && isDIGIT(s[1]))
4912 PL_expect = XTERM; /* e.g. print $fh .3 */
4913 else if ((*s == '?' || *s == '-' || *s == '+')
4914 && !isSPACE(s[1]) && s[1] != '=')
4915 PL_expect = XTERM; /* e.g. print $fh -1 */
4916 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4918 PL_expect = XTERM; /* e.g. print $fh /.../
4919 XXX except DORDOR operator
4921 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4923 PL_expect = XTERM; /* print $fh <<"EOF" */
4926 PL_pending_ident = '$';
4930 if (PL_expect == XOPERATOR)
4932 PL_tokenbuf[0] = '@';
4933 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4934 if (!PL_tokenbuf[1]) {
4937 if (PL_lex_state == LEX_NORMAL)
4939 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4941 PL_tokenbuf[0] = '%';
4943 /* Warn about @ where they meant $. */
4944 if (*s == '[' || *s == '{') {
4945 if (ckWARN(WARN_SYNTAX)) {
4946 const char *t = s + 1;
4947 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4949 if (*t == '}' || *t == ']') {
4951 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4952 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4953 "Scalar value %.*s better written as $%.*s",
4954 (int)(t-PL_bufptr), PL_bufptr,
4955 (int)(t-PL_bufptr-1), PL_bufptr+1);
4960 PL_pending_ident = '@';
4963 case '/': /* may be division, defined-or, or pattern */
4964 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4968 case '?': /* may either be conditional or pattern */
4969 if(PL_expect == XOPERATOR) {
4977 /* A // operator. */
4987 /* Disable warning on "study /blah/" */
4988 if (PL_oldoldbufptr == PL_last_uni
4989 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4990 || memNE(PL_last_uni, "study", 5)
4991 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4994 s = scan_pat(s,OP_MATCH);
4995 TERM(sublex_start());
4999 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5000 #ifdef PERL_STRICT_CR
5003 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5005 && (s == PL_linestart || s[-1] == '\n') )
5007 PL_lex_formbrack = 0;
5011 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5017 pl_yylval.ival = OPf_SPECIAL;
5023 if (PL_expect != XOPERATOR)
5028 case '0': case '1': case '2': case '3': case '4':
5029 case '5': case '6': case '7': case '8': case '9':
5030 s = scan_num(s, &pl_yylval);
5031 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5032 if (PL_expect == XOPERATOR)
5037 s = scan_str(s,!!PL_madskills,FALSE);
5038 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5039 if (PL_expect == XOPERATOR) {
5040 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5042 deprecate_old(commaless_variable_list);
5043 return REPORT(','); /* grandfather non-comma-format format */
5050 pl_yylval.ival = OP_CONST;
5051 TERM(sublex_start());
5054 s = scan_str(s,!!PL_madskills,FALSE);
5055 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5056 if (PL_expect == XOPERATOR) {
5057 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5059 deprecate_old(commaless_variable_list);
5060 return REPORT(','); /* grandfather non-comma-format format */
5067 pl_yylval.ival = OP_CONST;
5068 /* FIXME. I think that this can be const if char *d is replaced by
5069 more localised variables. */
5070 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5071 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5072 pl_yylval.ival = OP_STRINGIFY;
5076 TERM(sublex_start());
5079 s = scan_str(s,!!PL_madskills,FALSE);
5080 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5081 if (PL_expect == XOPERATOR)
5082 no_op("Backticks",s);
5085 readpipe_override();
5086 TERM(sublex_start());
5090 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
5091 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5093 if (PL_expect == XOPERATOR)
5094 no_op("Backslash",s);
5098 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5099 char *start = s + 2;
5100 while (isDIGIT(*start) || *start == '_')
5102 if (*start == '.' && isDIGIT(start[1])) {
5103 s = scan_num(s, &pl_yylval);
5106 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5107 else if (!isALPHA(*start) && (PL_expect == XTERM
5108 || PL_expect == XREF || PL_expect == XSTATE
5109 || PL_expect == XTERMORDORDOR)) {
5110 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5112 s = scan_num(s, &pl_yylval);
5119 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5161 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5163 /* Some keywords can be followed by any delimiter, including ':' */
5164 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5165 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5166 (PL_tokenbuf[0] == 'q' &&
5167 strchr("qwxr", PL_tokenbuf[1])))));
5169 /* x::* is just a word, unless x is "CORE" */
5170 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5174 while (d < PL_bufend && isSPACE(*d))
5175 d++; /* no comments skipped here, or s### is misparsed */
5177 /* Is this a label? */
5178 if (!tmp && PL_expect == XSTATE
5179 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5181 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5186 /* Check for keywords */
5187 tmp = keyword(PL_tokenbuf, len, 0);
5189 /* Is this a word before a => operator? */
5190 if (*d == '=' && d[1] == '>') {
5193 = (OP*)newSVOP(OP_CONST, 0,
5194 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5195 pl_yylval.opval->op_private = OPpCONST_BARE;
5199 if (tmp < 0) { /* second-class keyword? */
5200 GV *ogv = NULL; /* override (winner) */
5201 GV *hgv = NULL; /* hidden (loser) */
5202 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5204 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5207 if (GvIMPORTED_CV(gv))
5209 else if (! CvMETHOD(cv))
5213 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5214 (gv = *gvp) && isGV_with_GP(gv) &&
5215 GvCVu(gv) && GvIMPORTED_CV(gv))
5222 tmp = 0; /* overridden by import or by GLOBAL */
5225 && -tmp==KEY_lock /* XXX generalizable kludge */
5228 tmp = 0; /* any sub overrides "weak" keyword */
5230 else { /* no override */
5232 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
5233 Perl_warner(aTHX_ packWARN(WARN_MISC),
5234 "dump() better written as CORE::dump()");
5238 if (hgv && tmp != KEY_x && tmp != KEY_CORE
5239 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
5240 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5241 "Ambiguous call resolved as CORE::%s(), %s",
5242 GvENAME(hgv), "qualify as such or use &");
5249 default: /* not a keyword */
5250 /* Trade off - by using this evil construction we can pull the
5251 variable gv into the block labelled keylookup. If not, then
5252 we have to give it function scope so that the goto from the
5253 earlier ':' case doesn't bypass the initialisation. */
5255 just_a_word_zero_gv:
5263 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5266 SV *nextPL_nextwhite = 0;
5270 /* Get the rest if it looks like a package qualifier */
5272 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5274 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5277 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5278 *s == '\'' ? "'" : "::");
5283 if (PL_expect == XOPERATOR) {
5284 if (PL_bufptr == PL_linestart) {
5285 CopLINE_dec(PL_curcop);
5286 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
5287 CopLINE_inc(PL_curcop);
5290 no_op("Bareword",s);
5293 /* Look for a subroutine with this name in current package,
5294 unless name is "Foo::", in which case Foo is a bearword
5295 (and a package name). */
5297 if (len > 2 && !PL_madskills &&
5298 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5300 if (ckWARN(WARN_BAREWORD)
5301 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5302 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5303 "Bareword \"%s\" refers to nonexistent package",
5306 PL_tokenbuf[len] = '\0';
5312 /* Mustn't actually add anything to a symbol table.
5313 But also don't want to "initialise" any placeholder
5314 constants that might already be there into full
5315 blown PVGVs with attached PVCV. */
5316 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5317 GV_NOADD_NOINIT, SVt_PVCV);
5322 /* if we saw a global override before, get the right name */
5325 sv = newSVpvs("CORE::GLOBAL::");
5326 sv_catpv(sv,PL_tokenbuf);
5329 /* If len is 0, newSVpv does strlen(), which is correct.
5330 If len is non-zero, then it will be the true length,
5331 and so the scalar will be created correctly. */
5332 sv = newSVpv(PL_tokenbuf,len);
5335 if (PL_madskills && !PL_thistoken) {
5336 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5337 PL_thistoken = newSVpvn(start,s - start);
5338 PL_realtokenstart = s - SvPVX(PL_linestr);
5342 /* Presume this is going to be a bareword of some sort. */
5345 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5346 pl_yylval.opval->op_private = OPpCONST_BARE;
5347 /* UTF-8 package name? */
5348 if (UTF && !IN_BYTES &&
5349 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5352 /* And if "Foo::", then that's what it certainly is. */
5357 /* Do the explicit type check so that we don't need to force
5358 the initialisation of the symbol table to have a real GV.
5359 Beware - gv may not really be a PVGV, cv may not really be
5360 a PVCV, (because of the space optimisations that gv_init
5361 understands) But they're true if for this symbol there is
5362 respectively a typeglob and a subroutine.
5364 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5365 /* Real typeglob, so get the real subroutine: */
5367 /* A proxy for a subroutine in this package? */
5368 : SvOK(gv) ? (CV *) gv : NULL)
5371 /* See if it's the indirect object for a list operator. */
5373 if (PL_oldoldbufptr &&
5374 PL_oldoldbufptr < PL_bufptr &&
5375 (PL_oldoldbufptr == PL_last_lop
5376 || PL_oldoldbufptr == PL_last_uni) &&
5377 /* NO SKIPSPACE BEFORE HERE! */
5378 (PL_expect == XREF ||
5379 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5381 bool immediate_paren = *s == '(';
5383 /* (Now we can afford to cross potential line boundary.) */
5384 s = SKIPSPACE2(s,nextPL_nextwhite);
5386 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5389 /* Two barewords in a row may indicate method call. */
5391 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5392 (tmp = intuit_method(s, gv, cv)))
5395 /* If not a declared subroutine, it's an indirect object. */
5396 /* (But it's an indir obj regardless for sort.) */
5397 /* Also, if "_" follows a filetest operator, it's a bareword */
5400 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5402 (PL_last_lop_op != OP_MAPSTART &&
5403 PL_last_lop_op != OP_GREPSTART))))
5404 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5405 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5408 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5413 PL_expect = XOPERATOR;
5416 s = SKIPSPACE2(s,nextPL_nextwhite);
5417 PL_nextwhite = nextPL_nextwhite;
5422 /* Is this a word before a => operator? */
5423 if (*s == '=' && s[1] == '>' && !pkgname) {
5425 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
5426 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5427 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
5431 /* If followed by a paren, it's certainly a subroutine. */
5436 while (SPACE_OR_TAB(*d))
5438 if (*d == ')' && (sv = gv_const_sv(gv))) {
5445 PL_nextwhite = PL_thiswhite;
5448 start_force(PL_curforce);
5450 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5451 PL_expect = XOPERATOR;
5454 PL_nextwhite = nextPL_nextwhite;
5455 curmad('X', PL_thistoken);
5456 PL_thistoken = newSVpvs("");
5464 /* If followed by var or block, call it a method (unless sub) */
5466 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5467 PL_last_lop = PL_oldbufptr;
5468 PL_last_lop_op = OP_METHOD;
5472 /* If followed by a bareword, see if it looks like indir obj. */
5475 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5476 && (tmp = intuit_method(s, gv, cv)))
5479 /* Not a method, so call it a subroutine (if defined) */
5482 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5483 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5484 "Ambiguous use of -%s resolved as -&%s()",
5485 PL_tokenbuf, PL_tokenbuf);
5486 /* Check for a constant sub */
5487 if ((sv = gv_const_sv(gv))) {
5489 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
5490 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5491 pl_yylval.opval->op_private = 0;
5495 /* Resolve to GV now. */
5496 if (SvTYPE(gv) != SVt_PVGV) {
5497 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5498 assert (SvTYPE(gv) == SVt_PVGV);
5499 /* cv must have been some sort of placeholder, so
5500 now needs replacing with a real code reference. */
5504 op_free(pl_yylval.opval);
5505 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5506 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5507 PL_last_lop = PL_oldbufptr;
5508 PL_last_lop_op = OP_ENTERSUB;
5509 /* Is there a prototype? */
5517 const char *proto = SvPV_const((SV*)cv, protolen);
5520 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5522 while (*proto == ';')
5524 if (*proto == '&' && *s == '{') {
5526 sv_setpvs(PL_subname, "__ANON__");
5528 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5535 PL_nextwhite = PL_thiswhite;
5538 start_force(PL_curforce);
5539 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5542 PL_nextwhite = nextPL_nextwhite;
5543 curmad('X', PL_thistoken);
5544 PL_thistoken = newSVpvs("");
5551 /* Guess harder when madskills require "best effort". */
5552 if (PL_madskills && (!gv || !GvCVu(gv))) {
5553 int probable_sub = 0;
5554 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5556 else if (isALPHA(*s)) {
5560 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5561 if (!keyword(tmpbuf, tmplen, 0))
5564 while (d < PL_bufend && isSPACE(*d))
5566 if (*d == '=' && d[1] == '>')
5571 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5572 op_free(pl_yylval.opval);
5573 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5574 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5575 PL_last_lop = PL_oldbufptr;
5576 PL_last_lop_op = OP_ENTERSUB;
5577 PL_nextwhite = PL_thiswhite;
5579 start_force(PL_curforce);
5580 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5582 PL_nextwhite = nextPL_nextwhite;
5583 curmad('X', PL_thistoken);
5584 PL_thistoken = newSVpvs("");
5589 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5596 /* Call it a bare word */
5598 if (PL_hints & HINT_STRICT_SUBS)
5599 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5602 if (lastchar != '-') {
5603 if (ckWARN(WARN_RESERVED)) {
5607 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5608 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5615 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5616 && ckWARN_d(WARN_AMBIGUOUS)) {
5617 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5618 "Operator or semicolon missing before %c%s",
5619 lastchar, PL_tokenbuf);
5620 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5621 "Ambiguous use of %c resolved as operator %c",
5622 lastchar, lastchar);
5628 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5629 newSVpv(CopFILE(PL_curcop),0));
5633 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5634 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5637 case KEY___PACKAGE__:
5638 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5640 ? newSVhek(HvNAME_HEK(PL_curstash))
5647 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5648 const char *pname = "main";
5649 if (PL_tokenbuf[2] == 'D')
5650 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5651 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5655 GvIOp(gv) = newIO();
5656 IoIFP(GvIOp(gv)) = PL_rsfp;
5657 #if defined(HAS_FCNTL) && defined(F_SETFD)
5659 const int fd = PerlIO_fileno(PL_rsfp);
5660 fcntl(fd,F_SETFD,fd >= 3);
5663 /* Mark this internal pseudo-handle as clean */
5664 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5666 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
5667 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5668 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5670 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5671 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5672 /* if the script was opened in binmode, we need to revert
5673 * it to text mode for compatibility; but only iff it has CRs
5674 * XXX this is a questionable hack at best. */
5675 if (PL_bufend-PL_bufptr > 2
5676 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5679 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5680 loc = PerlIO_tell(PL_rsfp);
5681 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5684 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5686 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5687 #endif /* NETWARE */
5688 #ifdef PERLIO_IS_STDIO /* really? */
5689 # if defined(__BORLANDC__)
5690 /* XXX see note in do_binmode() */
5691 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5695 PerlIO_seek(PL_rsfp, loc, 0);
5699 #ifdef PERLIO_LAYERS
5702 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5703 else if (PL_encoding) {
5710 XPUSHs(PL_encoding);
5712 call_method("name", G_SCALAR);
5716 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5717 Perl_form(aTHX_ ":encoding(%"SVf")",
5726 if (PL_realtokenstart >= 0) {
5727 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5729 PL_endwhite = newSVpvs("");
5730 sv_catsv(PL_endwhite, PL_thiswhite);
5732 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5733 PL_realtokenstart = -1;
5735 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5736 SvCUR(PL_endwhite))) != NULL) ;
5751 if (PL_expect == XSTATE) {
5758 if (*s == ':' && s[1] == ':') {
5761 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5762 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5763 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5766 else if (tmp == KEY_require || tmp == KEY_do)
5767 /* that's a way to remember we saw "CORE::" */
5780 LOP(OP_ACCEPT,XTERM);
5786 LOP(OP_ATAN2,XTERM);
5792 LOP(OP_BINMODE,XTERM);
5795 LOP(OP_BLESS,XTERM);
5804 /* When 'use switch' is in effect, continue has a dual
5805 life as a control operator. */
5807 if (!FEATURE_IS_ENABLED("switch"))
5810 /* We have to disambiguate the two senses of
5811 "continue". If the next token is a '{' then
5812 treat it as the start of a continue block;
5813 otherwise treat it as a control operator.
5825 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5842 if (!PL_cryptseen) {
5843 PL_cryptseen = TRUE;
5847 LOP(OP_CRYPT,XTERM);
5850 LOP(OP_CHMOD,XTERM);
5853 LOP(OP_CHOWN,XTERM);
5856 LOP(OP_CONNECT,XTERM);
5875 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5876 if (orig_keyword == KEY_do) {
5885 PL_hints |= HINT_BLOCK_SCOPE;
5895 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5896 LOP(OP_DBMOPEN,XTERM);
5902 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5909 pl_yylval.ival = CopLINE(PL_curcop);
5925 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
5926 UNIBRACK(OP_ENTEREVAL);
5940 case KEY_endhostent:
5946 case KEY_endservent:
5949 case KEY_endprotoent:
5960 pl_yylval.ival = CopLINE(PL_curcop);
5962 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
5965 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
5968 if ((PL_bufend - p) >= 3 &&
5969 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
5971 else if ((PL_bufend - p) >= 4 &&
5972 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
5975 if (isIDFIRST_lazy_if(p,UTF)) {
5976 p = scan_ident(p, PL_bufend,
5977 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5981 Perl_croak(aTHX_ "Missing $ on loop variable");
5983 s = SvPVX(PL_linestr) + soff;
5989 LOP(OP_FORMLINE,XTERM);
5995 LOP(OP_FCNTL,XTERM);
6001 LOP(OP_FLOCK,XTERM);
6010 LOP(OP_GREPSTART, XREF);
6013 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6028 case KEY_getpriority:
6029 LOP(OP_GETPRIORITY,XTERM);
6031 case KEY_getprotobyname:
6034 case KEY_getprotobynumber:
6035 LOP(OP_GPBYNUMBER,XTERM);
6037 case KEY_getprotoent:
6049 case KEY_getpeername:
6050 UNI(OP_GETPEERNAME);
6052 case KEY_gethostbyname:
6055 case KEY_gethostbyaddr:
6056 LOP(OP_GHBYADDR,XTERM);
6058 case KEY_gethostent:
6061 case KEY_getnetbyname:
6064 case KEY_getnetbyaddr:
6065 LOP(OP_GNBYADDR,XTERM);
6070 case KEY_getservbyname:
6071 LOP(OP_GSBYNAME,XTERM);
6073 case KEY_getservbyport:
6074 LOP(OP_GSBYPORT,XTERM);
6076 case KEY_getservent:
6079 case KEY_getsockname:
6080 UNI(OP_GETSOCKNAME);
6082 case KEY_getsockopt:
6083 LOP(OP_GSOCKOPT,XTERM);
6098 pl_yylval.ival = CopLINE(PL_curcop);
6108 pl_yylval.ival = CopLINE(PL_curcop);
6112 LOP(OP_INDEX,XTERM);
6118 LOP(OP_IOCTL,XTERM);
6130 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6162 LOP(OP_LISTEN,XTERM);
6171 s = scan_pat(s,OP_MATCH);
6172 TERM(sublex_start());
6175 LOP(OP_MAPSTART, XREF);
6178 LOP(OP_MKDIR,XTERM);
6181 LOP(OP_MSGCTL,XTERM);
6184 LOP(OP_MSGGET,XTERM);
6187 LOP(OP_MSGRCV,XTERM);
6190 LOP(OP_MSGSND,XTERM);
6195 PL_in_my = (U16)tmp;
6197 if (isIDFIRST_lazy_if(s,UTF)) {
6201 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6202 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6204 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6205 if (!PL_in_my_stash) {
6208 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6212 if (PL_madskills) { /* just add type to declarator token */
6213 sv_catsv(PL_thistoken, PL_nextwhite);
6215 sv_catpvn(PL_thistoken, start, s - start);
6223 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6230 s = tokenize_use(0, s);
6234 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6241 if (isIDFIRST_lazy_if(s,UTF)) {
6243 for (d = s; isALNUM_lazy_if(d,UTF);)
6245 for (t=d; isSPACE(*t);)
6247 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6249 && !(t[0] == '=' && t[1] == '>')
6251 int parms_len = (int)(d-s);
6252 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6253 "Precedence problem: open %.*s should be open(%.*s)",
6254 parms_len, s, parms_len, s);
6260 pl_yylval.ival = OP_OR;
6270 LOP(OP_OPEN_DIR,XTERM);
6273 checkcomma(s,PL_tokenbuf,"filehandle");
6277 checkcomma(s,PL_tokenbuf,"filehandle");
6296 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6300 LOP(OP_PIPE_OP,XTERM);
6303 s = scan_str(s,!!PL_madskills,FALSE);
6306 pl_yylval.ival = OP_CONST;
6307 TERM(sublex_start());
6313 s = scan_str(s,!!PL_madskills,FALSE);
6316 PL_expect = XOPERATOR;
6318 if (SvCUR(PL_lex_stuff)) {
6321 d = SvPV_force(PL_lex_stuff, len);
6323 for (; isSPACE(*d) && len; --len, ++d)
6328 if (!warned && ckWARN(WARN_QW)) {
6329 for (; !isSPACE(*d) && len; --len, ++d) {
6331 Perl_warner(aTHX_ packWARN(WARN_QW),
6332 "Possible attempt to separate words with commas");
6335 else if (*d == '#') {
6336 Perl_warner(aTHX_ packWARN(WARN_QW),
6337 "Possible attempt to put comments in qw() list");
6343 for (; !isSPACE(*d) && len; --len, ++d)
6346 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
6347 words = append_elem(OP_LIST, words,
6348 newSVOP(OP_CONST, 0, tokeq(sv)));
6352 start_force(PL_curforce);
6353 NEXTVAL_NEXTTOKE.opval = words;
6358 SvREFCNT_dec(PL_lex_stuff);
6359 PL_lex_stuff = NULL;
6365 s = scan_str(s,!!PL_madskills,FALSE);
6368 pl_yylval.ival = OP_STRINGIFY;
6369 if (SvIVX(PL_lex_stuff) == '\'')
6370 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6371 TERM(sublex_start());
6374 s = scan_pat(s,OP_QR);
6375 TERM(sublex_start());
6378 s = scan_str(s,!!PL_madskills,FALSE);
6381 readpipe_override();
6382 TERM(sublex_start());
6390 s = force_version(s, FALSE);
6392 else if (*s != 'v' || !isDIGIT(s[1])
6393 || (s = force_version(s, TRUE), *s == 'v'))
6395 *PL_tokenbuf = '\0';
6396 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6397 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6398 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6400 yyerror("<> should be quotes");
6402 if (orig_keyword == KEY_require) {
6410 PL_last_uni = PL_oldbufptr;
6411 PL_last_lop_op = OP_REQUIRE;
6413 return REPORT( (int)REQUIRE );
6419 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6423 LOP(OP_RENAME,XTERM);
6432 LOP(OP_RINDEX,XTERM);
6441 UNIDOR(OP_READLINE);
6444 UNIDOR(OP_BACKTICK);
6453 LOP(OP_REVERSE,XTERM);
6456 UNIDOR(OP_READLINK);
6463 if (pl_yylval.opval)
6464 TERM(sublex_start());
6466 TOKEN(1); /* force error */
6469 checkcomma(s,PL_tokenbuf,"filehandle");
6479 LOP(OP_SELECT,XTERM);
6485 LOP(OP_SEMCTL,XTERM);
6488 LOP(OP_SEMGET,XTERM);
6491 LOP(OP_SEMOP,XTERM);
6497 LOP(OP_SETPGRP,XTERM);
6499 case KEY_setpriority:
6500 LOP(OP_SETPRIORITY,XTERM);
6502 case KEY_sethostent:
6508 case KEY_setservent:
6511 case KEY_setprotoent:
6521 LOP(OP_SEEKDIR,XTERM);
6523 case KEY_setsockopt:
6524 LOP(OP_SSOCKOPT,XTERM);
6530 LOP(OP_SHMCTL,XTERM);
6533 LOP(OP_SHMGET,XTERM);
6536 LOP(OP_SHMREAD,XTERM);
6539 LOP(OP_SHMWRITE,XTERM);
6542 LOP(OP_SHUTDOWN,XTERM);
6551 LOP(OP_SOCKET,XTERM);
6553 case KEY_socketpair:
6554 LOP(OP_SOCKPAIR,XTERM);
6557 checkcomma(s,PL_tokenbuf,"subroutine name");
6559 if (*s == ';' || *s == ')') /* probably a close */
6560 Perl_croak(aTHX_ "sort is now a reserved word");
6562 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6566 LOP(OP_SPLIT,XTERM);
6569 LOP(OP_SPRINTF,XTERM);
6572 LOP(OP_SPLICE,XTERM);
6587 LOP(OP_SUBSTR,XTERM);
6593 char tmpbuf[sizeof PL_tokenbuf];
6594 SSize_t tboffset = 0;
6595 expectation attrful;
6596 bool have_name, have_proto;
6597 const int key = tmp;
6602 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6603 SV *subtoken = newSVpvn(tstart, s - tstart);
6607 s = SKIPSPACE2(s,tmpwhite);
6612 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6613 (*s == ':' && s[1] == ':'))
6620 attrful = XATTRBLOCK;
6621 /* remember buffer pos'n for later force_word */
6622 tboffset = s - PL_oldbufptr;
6623 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6626 nametoke = newSVpvn(s, d - s);
6628 if (memchr(tmpbuf, ':', len))
6629 sv_setpvn(PL_subname, tmpbuf, len);
6631 sv_setsv(PL_subname,PL_curstname);
6632 sv_catpvs(PL_subname,"::");
6633 sv_catpvn(PL_subname,tmpbuf,len);
6640 CURMAD('X', nametoke);
6641 CURMAD('_', tmpwhite);
6642 (void) force_word(PL_oldbufptr + tboffset, WORD,
6645 s = SKIPSPACE2(d,tmpwhite);
6652 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6653 PL_expect = XTERMBLOCK;
6654 attrful = XATTRTERM;
6655 sv_setpvn(PL_subname,"?",1);
6659 if (key == KEY_format) {
6661 PL_lex_formbrack = PL_lex_brackets + 1;
6663 PL_thistoken = subtoken;
6667 (void) force_word(PL_oldbufptr + tboffset, WORD,
6673 /* Look for a prototype */
6676 bool bad_proto = FALSE;
6677 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6679 s = scan_str(s,!!PL_madskills,FALSE);
6681 Perl_croak(aTHX_ "Prototype not terminated");
6682 /* strip spaces and check for bad characters */
6683 d = SvPVX(PL_lex_stuff);
6685 for (p = d; *p; ++p) {
6688 if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
6694 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6695 "Illegal character in prototype for %"SVf" : %s",
6696 SVfARG(PL_subname), d);
6697 SvCUR_set(PL_lex_stuff, tmp);
6702 CURMAD('q', PL_thisopen);
6703 CURMAD('_', tmpwhite);
6704 CURMAD('=', PL_thisstuff);
6705 CURMAD('Q', PL_thisclose);
6706 NEXTVAL_NEXTTOKE.opval =
6707 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6708 PL_lex_stuff = NULL;
6711 s = SKIPSPACE2(s,tmpwhite);
6719 if (*s == ':' && s[1] != ':')
6720 PL_expect = attrful;
6721 else if (*s != '{' && key == KEY_sub) {
6723 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6725 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6732 curmad('^', newSVpvs(""));
6733 CURMAD('_', tmpwhite);
6737 PL_thistoken = subtoken;
6740 NEXTVAL_NEXTTOKE.opval =
6741 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6742 PL_lex_stuff = NULL;
6748 sv_setpvs(PL_subname, "__ANON__");
6750 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6754 (void) force_word(PL_oldbufptr + tboffset, WORD,
6763 LOP(OP_SYSTEM,XREF);
6766 LOP(OP_SYMLINK,XTERM);
6769 LOP(OP_SYSCALL,XTERM);
6772 LOP(OP_SYSOPEN,XTERM);
6775 LOP(OP_SYSSEEK,XTERM);
6778 LOP(OP_SYSREAD,XTERM);
6781 LOP(OP_SYSWRITE,XTERM);
6785 TERM(sublex_start());
6806 LOP(OP_TRUNCATE,XTERM);
6818 pl_yylval.ival = CopLINE(PL_curcop);
6822 pl_yylval.ival = CopLINE(PL_curcop);
6826 LOP(OP_UNLINK,XTERM);
6832 LOP(OP_UNPACK,XTERM);
6835 LOP(OP_UTIME,XTERM);
6841 LOP(OP_UNSHIFT,XTERM);
6844 s = tokenize_use(1, s);
6854 pl_yylval.ival = CopLINE(PL_curcop);
6858 pl_yylval.ival = CopLINE(PL_curcop);
6862 PL_hints |= HINT_BLOCK_SCOPE;
6869 LOP(OP_WAITPID,XTERM);
6878 ctl_l[0] = toCTRL('L');
6880 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6883 /* Make sure $^L is defined */
6884 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
6889 if (PL_expect == XOPERATOR)
6895 pl_yylval.ival = OP_XOR;
6900 TERM(sublex_start());
6905 #pragma segment Main
6909 S_pending_ident(pTHX)
6914 /* pit holds the identifier we read and pending_ident is reset */
6915 char pit = PL_pending_ident;
6916 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
6917 /* All routes through this function want to know if there is a colon. */
6918 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
6919 PL_pending_ident = 0;
6921 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
6922 DEBUG_T({ PerlIO_printf(Perl_debug_log,
6923 "### Pending identifier '%s'\n", PL_tokenbuf); });
6925 /* if we're in a my(), we can't allow dynamics here.
6926 $foo'bar has already been turned into $foo::bar, so
6927 just check for colons.
6929 if it's a legal name, the OP is a PADANY.
6932 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
6934 yyerror(Perl_form(aTHX_ "No package name allowed for "
6935 "variable %s in \"our\"",
6937 tmp = allocmy(PL_tokenbuf);
6941 yyerror(Perl_form(aTHX_ PL_no_myglob,
6942 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
6944 pl_yylval.opval = newOP(OP_PADANY, 0);
6945 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf);
6951 build the ops for accesses to a my() variable.
6953 Deny my($a) or my($b) in a sort block, *if* $a or $b is
6954 then used in a comparison. This catches most, but not
6955 all cases. For instance, it catches
6956 sort { my($a); $a <=> $b }
6958 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
6959 (although why you'd do that is anyone's guess).
6964 tmp = pad_findmy(PL_tokenbuf);
6965 if (tmp != NOT_IN_PAD) {
6966 /* might be an "our" variable" */
6967 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6968 /* build ops for a bareword */
6969 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
6970 HEK * const stashname = HvNAME_HEK(stash);
6971 SV * const sym = newSVhek(stashname);
6972 sv_catpvs(sym, "::");
6973 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6974 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
6975 pl_yylval.opval->op_private = OPpCONST_ENTERED;
6978 ? (GV_ADDMULTI | GV_ADDINEVAL)
6981 ((PL_tokenbuf[0] == '$') ? SVt_PV
6982 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6987 /* if it's a sort block and they're naming $a or $b */
6988 if (PL_last_lop_op == OP_SORT &&
6989 PL_tokenbuf[0] == '$' &&
6990 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
6993 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
6994 d < PL_bufend && *d != '\n';
6997 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
6998 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7004 pl_yylval.opval = newOP(OP_PADANY, 0);
7005 pl_yylval.opval->op_targ = tmp;
7011 Whine if they've said @foo in a doublequoted string,
7012 and @foo isn't a variable we can find in the symbol
7015 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7016 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7018 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7019 && ckWARN(WARN_AMBIGUOUS)
7020 /* DO NOT warn for @- and @+ */
7021 && !( PL_tokenbuf[2] == '\0' &&
7022 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7025 /* Downgraded from fatal to warning 20000522 mjd */
7026 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7027 "Possible unintended interpolation of %s in string",
7032 /* build ops for a bareword */
7033 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7035 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7037 PL_tokenbuf + 1, tokenbuf_len - 1,
7038 /* If the identifier refers to a stash, don't autovivify it.
7039 * Change 24660 had the side effect of causing symbol table
7040 * hashes to always be defined, even if they were freshly
7041 * created and the only reference in the entire program was
7042 * the single statement with the defined %foo::bar:: test.
7043 * It appears that all code in the wild doing this actually
7044 * wants to know whether sub-packages have been loaded, so
7045 * by avoiding auto-vivifying symbol tables, we ensure that
7046 * defined %foo::bar:: continues to be false, and the existing
7047 * tests still give the expected answers, even though what
7048 * they're actually testing has now changed subtly.
7050 (*PL_tokenbuf == '%'
7051 && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7054 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7055 ((PL_tokenbuf[0] == '$') ? SVt_PV
7056 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7062 * The following code was generated by perl_keyword.pl.
7066 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7071 case 1: /* 5 tokens of length 1 */
7103 case 2: /* 18 tokens of length 2 */
7249 case 3: /* 29 tokens of length 3 */
7253 if (name[1] == 'N' &&
7316 if (name[1] == 'i' &&
7348 if (name[1] == 'o' &&
7357 if (name[1] == 'e' &&
7366 if (name[1] == 'n' &&
7375 if (name[1] == 'o' &&
7384 if (name[1] == 'a' &&
7393 if (name[1] == 'o' &&
7455 if (name[1] == 'e' &&
7469 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7495 if (name[1] == 'i' &&
7504 if (name[1] == 's' &&
7513 if (name[1] == 'e' &&
7522 if (name[1] == 'o' &&
7534 case 4: /* 41 tokens of length 4 */
7538 if (name[1] == 'O' &&
7548 if (name[1] == 'N' &&
7558 if (name[1] == 'i' &&
7568 if (name[1] == 'h' &&
7578 if (name[1] == 'u' &&
7591 if (name[2] == 'c' &&
7600 if (name[2] == 's' &&
7609 if (name[2] == 'a' &&
7645 if (name[1] == 'o' &&
7658 if (name[2] == 't' &&
7667 if (name[2] == 'o' &&
7676 if (name[2] == 't' &&
7685 if (name[2] == 'e' &&
7698 if (name[1] == 'o' &&
7711 if (name[2] == 'y' &&
7720 if (name[2] == 'l' &&
7736 if (name[2] == 's' &&
7745 if (name[2] == 'n' &&
7754 if (name[2] == 'c' &&
7767 if (name[1] == 'e' &&
7777 if (name[1] == 'p' &&
7790 if (name[2] == 'c' &&
7799 if (name[2] == 'p' &&
7808 if (name[2] == 's' &&
7824 if (name[2] == 'n' &&
7894 if (name[2] == 'r' &&
7903 if (name[2] == 'r' &&
7912 if (name[2] == 'a' &&
7928 if (name[2] == 'l' &&
7990 if (name[2] == 'e' &&
7993 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8006 case 5: /* 39 tokens of length 5 */
8010 if (name[1] == 'E' &&
8021 if (name[1] == 'H' &&
8035 if (name[2] == 'a' &&
8045 if (name[2] == 'a' &&
8062 if (name[2] == 'e' &&
8072 if (name[2] == 'e' &&
8076 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8092 if (name[3] == 'i' &&
8101 if (name[3] == 'o' &&
8137 if (name[2] == 'o' &&
8147 if (name[2] == 'y' &&
8161 if (name[1] == 'l' &&
8175 if (name[2] == 'n' &&
8185 if (name[2] == 'o' &&
8199 if (name[1] == 'i' &&
8204 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8213 if (name[2] == 'd' &&
8223 if (name[2] == 'c' &&
8240 if (name[2] == 'c' &&
8250 if (name[2] == 't' &&
8264 if (name[1] == 'k' &&
8275 if (name[1] == 'r' &&
8289 if (name[2] == 's' &&
8299 if (name[2] == 'd' &&
8316 if (name[2] == 'm' &&
8326 if (name[2] == 'i' &&
8336 if (name[2] == 'e' &&
8346 if (name[2] == 'l' &&
8356 if (name[2] == 'a' &&
8369 if (name[3] == 't' &&
8372 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8378 if (name[3] == 'd' &&
8395 if (name[1] == 'i' &&
8409 if (name[2] == 'a' &&
8422 if (name[3] == 'e' &&
8457 if (name[2] == 'i' &&
8474 if (name[2] == 'i' &&
8484 if (name[2] == 'i' &&
8501 case 6: /* 33 tokens of length 6 */
8505 if (name[1] == 'c' &&
8520 if (name[2] == 'l' &&
8531 if (name[2] == 'r' &&
8546 if (name[1] == 'e' &&
8561 if (name[2] == 's' &&
8566 if(ckWARN_d(WARN_SYNTAX))
8567 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8573 if (name[2] == 'i' &&
8591 if (name[2] == 'l' &&
8602 if (name[2] == 'r' &&
8617 if (name[1] == 'm' &&
8632 if (name[2] == 'n' &&
8643 if (name[2] == 's' &&
8658 if (name[1] == 's' &&
8664 if (name[4] == 't' &&
8673 if (name[4] == 'e' &&
8682 if (name[4] == 'c' &&
8691 if (name[4] == 'n' &&
8707 if (name[1] == 'r' &&
8725 if (name[3] == 'a' &&
8735 if (name[3] == 'u' &&
8749 if (name[2] == 'n' &&
8767 if (name[2] == 'a' &&
8781 if (name[3] == 'e' &&
8794 if (name[4] == 't' &&
8803 if (name[4] == 'e' &&
8825 if (name[4] == 't' &&
8834 if (name[4] == 'e' &&
8850 if (name[2] == 'c' &&
8861 if (name[2] == 'l' &&
8872 if (name[2] == 'b' &&
8883 if (name[2] == 's' &&
8906 if (name[4] == 's' &&
8915 if (name[4] == 'n' &&
8928 if (name[3] == 'a' &&
8945 if (name[1] == 'a' &&
8960 case 7: /* 29 tokens of length 7 */
8964 if (name[1] == 'E' &&
8977 if (name[1] == '_' &&
8990 if (name[1] == 'i' &&
8997 return -KEY_binmode;
9003 if (name[1] == 'o' &&
9010 return -KEY_connect;
9019 if (name[2] == 'm' &&
9025 return -KEY_dbmopen;
9036 if (name[4] == 'u' &&
9040 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9046 if (name[4] == 'n' &&
9067 if (name[1] == 'o' &&
9080 if (name[1] == 'e' &&
9087 if (name[5] == 'r' &&
9090 return -KEY_getpgrp;
9096 if (name[5] == 'i' &&
9099 return -KEY_getppid;
9112 if (name[1] == 'c' &&
9119 return -KEY_lcfirst;
9125 if (name[1] == 'p' &&
9132 return -KEY_opendir;
9138 if (name[1] == 'a' &&
9156 if (name[3] == 'd' &&
9161 return -KEY_readdir;
9167 if (name[3] == 'u' &&
9178 if (name[3] == 'e' &&
9183 return -KEY_reverse;
9202 if (name[3] == 'k' &&
9207 return -KEY_seekdir;
9213 if (name[3] == 'p' &&
9218 return -KEY_setpgrp;
9228 if (name[2] == 'm' &&
9234 return -KEY_shmread;
9240 if (name[2] == 'r' &&
9246 return -KEY_sprintf;
9255 if (name[3] == 'l' &&
9260 return -KEY_symlink;
9269 if (name[4] == 'a' &&
9273 return -KEY_syscall;
9279 if (name[4] == 'p' &&
9283 return -KEY_sysopen;
9289 if (name[4] == 'e' &&
9293 return -KEY_sysread;
9299 if (name[4] == 'e' &&
9303 return -KEY_sysseek;
9321 if (name[1] == 'e' &&
9328 return -KEY_telldir;
9337 if (name[2] == 'f' &&
9343 return -KEY_ucfirst;
9349 if (name[2] == 's' &&
9355 return -KEY_unshift;
9365 if (name[1] == 'a' &&
9372 return -KEY_waitpid;
9381 case 8: /* 26 tokens of length 8 */
9385 if (name[1] == 'U' &&
9393 return KEY_AUTOLOAD;
9404 if (name[3] == 'A' &&
9410 return KEY___DATA__;
9416 if (name[3] == 'I' &&
9422 return -KEY___FILE__;
9428 if (name[3] == 'I' &&
9434 return -KEY___LINE__;
9450 if (name[2] == 'o' &&
9457 return -KEY_closedir;
9463 if (name[2] == 'n' &&
9470 return -KEY_continue;
9480 if (name[1] == 'b' &&
9488 return -KEY_dbmclose;
9494 if (name[1] == 'n' &&
9500 if (name[4] == 'r' &&
9505 return -KEY_endgrent;
9511 if (name[4] == 'w' &&
9516 return -KEY_endpwent;
9529 if (name[1] == 'o' &&
9537 return -KEY_formline;
9543 if (name[1] == 'e' &&
9554 if (name[6] == 'n' &&
9557 return -KEY_getgrent;
9563 if (name[6] == 'i' &&
9566 return -KEY_getgrgid;
9572 if (name[6] == 'a' &&
9575 return -KEY_getgrnam;
9588 if (name[4] == 'o' &&
9593 return -KEY_getlogin;
9604 if (name[6] == 'n' &&
9607 return -KEY_getpwent;
9613 if (name[6] == 'a' &&
9616 return -KEY_getpwnam;
9622 if (name[6] == 'i' &&
9625 return -KEY_getpwuid;
9645 if (name[1] == 'e' &&
9652 if (name[5] == 'i' &&
9659 return -KEY_readline;
9664 return -KEY_readlink;
9675 if (name[5] == 'i' &&
9679 return -KEY_readpipe;
9700 if (name[4] == 'r' &&
9705 return -KEY_setgrent;
9711 if (name[4] == 'w' &&
9716 return -KEY_setpwent;
9732 if (name[3] == 'w' &&
9738 return -KEY_shmwrite;
9744 if (name[3] == 't' &&
9750 return -KEY_shutdown;
9760 if (name[2] == 's' &&
9767 return -KEY_syswrite;
9777 if (name[1] == 'r' &&
9785 return -KEY_truncate;
9794 case 9: /* 9 tokens of length 9 */
9798 if (name[1] == 'N' &&
9807 return KEY_UNITCHECK;
9813 if (name[1] == 'n' &&
9822 return -KEY_endnetent;
9828 if (name[1] == 'e' &&
9837 return -KEY_getnetent;
9843 if (name[1] == 'o' &&
9852 return -KEY_localtime;
9858 if (name[1] == 'r' &&
9867 return KEY_prototype;
9873 if (name[1] == 'u' &&
9882 return -KEY_quotemeta;
9888 if (name[1] == 'e' &&
9897 return -KEY_rewinddir;
9903 if (name[1] == 'e' &&
9912 return -KEY_setnetent;
9918 if (name[1] == 'a' &&
9927 return -KEY_wantarray;
9936 case 10: /* 9 tokens of length 10 */
9940 if (name[1] == 'n' &&
9946 if (name[4] == 'o' &&
9953 return -KEY_endhostent;
9959 if (name[4] == 'e' &&
9966 return -KEY_endservent;
9979 if (name[1] == 'e' &&
9985 if (name[4] == 'o' &&
9992 return -KEY_gethostent;
10001 if (name[5] == 'r' &&
10007 return -KEY_getservent;
10013 if (name[5] == 'c' &&
10019 return -KEY_getsockopt;
10039 if (name[2] == 't')
10044 if (name[4] == 'o' &&
10051 return -KEY_sethostent;
10060 if (name[5] == 'r' &&
10066 return -KEY_setservent;
10072 if (name[5] == 'c' &&
10078 return -KEY_setsockopt;
10095 if (name[2] == 'c' &&
10104 return -KEY_socketpair;
10117 case 11: /* 8 tokens of length 11 */
10121 if (name[1] == '_' &&
10131 { /* __PACKAGE__ */
10132 return -KEY___PACKAGE__;
10138 if (name[1] == 'n' &&
10148 { /* endprotoent */
10149 return -KEY_endprotoent;
10155 if (name[1] == 'e' &&
10164 if (name[5] == 'e' &&
10170 { /* getpeername */
10171 return -KEY_getpeername;
10180 if (name[6] == 'o' &&
10185 { /* getpriority */
10186 return -KEY_getpriority;
10192 if (name[6] == 't' &&
10197 { /* getprotoent */
10198 return -KEY_getprotoent;
10212 if (name[4] == 'o' &&
10219 { /* getsockname */
10220 return -KEY_getsockname;
10233 if (name[1] == 'e' &&
10241 if (name[6] == 'o' &&
10246 { /* setpriority */
10247 return -KEY_setpriority;
10253 if (name[6] == 't' &&
10258 { /* setprotoent */
10259 return -KEY_setprotoent;
10275 case 12: /* 2 tokens of length 12 */
10276 if (name[0] == 'g' &&
10288 if (name[9] == 'd' &&
10291 { /* getnetbyaddr */
10292 return -KEY_getnetbyaddr;
10298 if (name[9] == 'a' &&
10301 { /* getnetbyname */
10302 return -KEY_getnetbyname;
10314 case 13: /* 4 tokens of length 13 */
10315 if (name[0] == 'g' &&
10322 if (name[4] == 'o' &&
10331 if (name[10] == 'd' &&
10334 { /* gethostbyaddr */
10335 return -KEY_gethostbyaddr;
10341 if (name[10] == 'a' &&
10344 { /* gethostbyname */
10345 return -KEY_gethostbyname;
10358 if (name[4] == 'e' &&
10367 if (name[10] == 'a' &&
10370 { /* getservbyname */
10371 return -KEY_getservbyname;
10377 if (name[10] == 'o' &&
10380 { /* getservbyport */
10381 return -KEY_getservbyport;
10400 case 14: /* 1 tokens of length 14 */
10401 if (name[0] == 'g' &&
10415 { /* getprotobyname */
10416 return -KEY_getprotobyname;
10421 case 16: /* 1 tokens of length 16 */
10422 if (name[0] == 'g' &&
10438 { /* getprotobynumber */
10439 return -KEY_getprotobynumber;
10453 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10457 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10458 if (ckWARN(WARN_SYNTAX)) {
10461 for (w = s+2; *w && level; w++) {
10464 else if (*w == ')')
10467 while (isSPACE(*w))
10469 /* the list of chars below is for end of statements or
10470 * block / parens, boolean operators (&&, ||, //) and branch
10471 * constructs (or, and, if, until, unless, while, err, for).
10472 * Not a very solid hack... */
10473 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10474 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10475 "%s (...) interpreted as function",name);
10478 while (s < PL_bufend && isSPACE(*s))
10482 while (s < PL_bufend && isSPACE(*s))
10484 if (isIDFIRST_lazy_if(s,UTF)) {
10485 const char * const w = s++;
10486 while (isALNUM_lazy_if(s,UTF))
10488 while (s < PL_bufend && isSPACE(*s))
10492 if (keyword(w, s - w, 0))
10495 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10496 if (gv && GvCVu(gv))
10498 Perl_croak(aTHX_ "No comma allowed after %s", what);
10503 /* Either returns sv, or mortalizes sv and returns a new SV*.
10504 Best used as sv=new_constant(..., sv, ...).
10505 If s, pv are NULL, calls subroutine with one argument,
10506 and type is used with error messages only. */
10509 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10510 SV *sv, SV *pv, const char *type, STRLEN typelen)
10513 HV * const table = GvHV(PL_hintgv); /* ^H */
10517 const char *why1 = "", *why2 = "", *why3 = "";
10519 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10522 why2 = (const char *)
10523 (strEQ(key,"charnames")
10524 ? "(possibly a missing \"use charnames ...\")"
10526 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10527 (type ? type: "undef"), why2);
10529 /* This is convoluted and evil ("goto considered harmful")
10530 * but I do not understand the intricacies of all the different
10531 * failure modes of %^H in here. The goal here is to make
10532 * the most probable error message user-friendly. --jhi */
10537 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10538 (type ? type: "undef"), why1, why2, why3);
10540 yyerror(SvPVX_const(msg));
10544 cvp = hv_fetch(table, key, keylen, FALSE);
10545 if (!cvp || !SvOK(*cvp)) {
10548 why3 = "} is not defined";
10551 sv_2mortal(sv); /* Parent created it permanently */
10554 pv = newSVpvn_flags(s, len, SVs_TEMP);
10556 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
10558 typesv = &PL_sv_undef;
10560 PUSHSTACKi(PERLSI_OVERLOAD);
10572 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10576 /* Check the eval first */
10577 if (!PL_in_eval && SvTRUE(ERRSV)) {
10578 sv_catpvs(ERRSV, "Propagated");
10579 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10581 res = SvREFCNT_inc_simple(sv);
10585 SvREFCNT_inc_simple_void(res);
10594 why1 = "Call to &{$^H{";
10596 why3 = "}} did not return a defined value";
10604 /* Returns a NUL terminated string, with the length of the string written to
10608 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10611 register char *d = dest;
10612 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10615 Perl_croak(aTHX_ ident_too_long);
10616 if (isALNUM(*s)) /* UTF handled below */
10618 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10623 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10627 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10628 char *t = s + UTF8SKIP(s);
10630 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10634 Perl_croak(aTHX_ ident_too_long);
10635 Copy(s, d, len, char);
10648 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10651 char *bracket = NULL;
10653 register char *d = dest;
10654 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10659 while (isDIGIT(*s)) {
10661 Perl_croak(aTHX_ ident_too_long);
10668 Perl_croak(aTHX_ ident_too_long);
10669 if (isALNUM(*s)) /* UTF handled below */
10671 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10676 else if (*s == ':' && s[1] == ':') {
10680 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10681 char *t = s + UTF8SKIP(s);
10682 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10684 if (d + (t - s) > e)
10685 Perl_croak(aTHX_ ident_too_long);
10686 Copy(s, d, t - s, char);
10697 if (PL_lex_state != LEX_NORMAL)
10698 PL_lex_state = LEX_INTERPENDMAYBE;
10701 if (*s == '$' && s[1] &&
10702 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10715 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10720 if (isSPACE(s[-1])) {
10722 const char ch = *s++;
10723 if (!SPACE_OR_TAB(ch)) {
10729 if (isIDFIRST_lazy_if(d,UTF)) {
10733 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10734 end += UTF8SKIP(end);
10735 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10736 end += UTF8SKIP(end);
10738 Copy(s, d, end - s, char);
10743 while ((isALNUM(*s) || *s == ':') && d < e)
10746 Perl_croak(aTHX_ ident_too_long);
10749 while (s < send && SPACE_OR_TAB(*s))
10751 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10752 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10753 const char * const brack =
10755 ((*s == '[') ? "[...]" : "{...}");
10756 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10757 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10758 funny, dest, brack, funny, dest, brack);
10761 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10765 /* Handle extended ${^Foo} variables
10766 * 1999-02-27 mjd-perl-patch@plover.com */
10767 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10771 while (isALNUM(*s) && d < e) {
10775 Perl_croak(aTHX_ ident_too_long);
10780 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10781 PL_lex_state = LEX_INTERPEND;
10784 if (PL_lex_state == LEX_NORMAL) {
10785 if (ckWARN(WARN_AMBIGUOUS) &&
10786 (keyword(dest, d - dest, 0)
10787 || get_cvn_flags(dest, d - dest, 0)))
10791 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10792 "Ambiguous use of %c{%s} resolved to %c%s",
10793 funny, dest, funny, dest);
10798 s = bracket; /* let the parser handle it */
10802 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10803 PL_lex_state = LEX_INTERPEND;
10808 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10810 PERL_UNUSED_CONTEXT;
10814 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
10815 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10816 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10817 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10818 case KEEPCOPY_PAT_MOD: *pmfl |= PMf_KEEPCOPY; break;
10824 S_scan_pat(pTHX_ char *start, I32 type)
10828 char *s = scan_str(start,!!PL_madskills,FALSE);
10829 const char * const valid_flags =
10830 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10837 const char * const delimiter = skipspace(start);
10841 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10842 : "Search pattern not terminated" ));
10845 pm = (PMOP*)newPMOP(type, 0);
10846 if (PL_multi_open == '?') {
10847 /* This is the only point in the code that sets PMf_ONCE: */
10848 pm->op_pmflags |= PMf_ONCE;
10850 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10851 allows us to restrict the list needed by reset to just the ??
10853 assert(type != OP_TRANS);
10855 MAGIC *mg = mg_find((SV*)PL_curstash, PERL_MAGIC_symtab);
10858 mg = sv_magicext((SV*)PL_curstash, 0, PERL_MAGIC_symtab, 0, 0,
10861 elements = mg->mg_len / sizeof(PMOP**);
10862 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10863 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10864 mg->mg_len = elements * sizeof(PMOP**);
10865 PmopSTASH_set(pm,PL_curstash);
10871 while (*s && strchr(valid_flags, *s))
10872 pmflag(&pm->op_pmflags,*s++);
10874 if (PL_madskills && modstart != s) {
10875 SV* tmptoken = newSVpvn(modstart, s - modstart);
10876 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
10879 /* issue a warning if /c is specified,but /g is not */
10880 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
10881 && ckWARN(WARN_REGEXP))
10883 Perl_warner(aTHX_ packWARN(WARN_REGEXP),
10884 "Use of /c modifier is meaningless without /g" );
10887 PL_lex_op = (OP*)pm;
10888 pl_yylval.ival = OP_MATCH;
10893 S_scan_subst(pTHX_ char *start)
10904 pl_yylval.ival = OP_NULL;
10906 s = scan_str(start,!!PL_madskills,FALSE);
10909 Perl_croak(aTHX_ "Substitution pattern not terminated");
10911 if (s[-1] == PL_multi_open)
10914 if (PL_madskills) {
10915 CURMAD('q', PL_thisopen);
10916 CURMAD('_', PL_thiswhite);
10917 CURMAD('E', PL_thisstuff);
10918 CURMAD('Q', PL_thisclose);
10919 PL_realtokenstart = s - SvPVX(PL_linestr);
10923 first_start = PL_multi_start;
10924 s = scan_str(s,!!PL_madskills,FALSE);
10926 if (PL_lex_stuff) {
10927 SvREFCNT_dec(PL_lex_stuff);
10928 PL_lex_stuff = NULL;
10930 Perl_croak(aTHX_ "Substitution replacement not terminated");
10932 PL_multi_start = first_start; /* so whole substitution is taken together */
10934 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10937 if (PL_madskills) {
10938 CURMAD('z', PL_thisopen);
10939 CURMAD('R', PL_thisstuff);
10940 CURMAD('Z', PL_thisclose);
10946 if (*s == EXEC_PAT_MOD) {
10950 else if (strchr(S_PAT_MODS, *s))
10951 pmflag(&pm->op_pmflags,*s++);
10957 if (PL_madskills) {
10959 curmad('m', newSVpvn(modstart, s - modstart));
10960 append_madprops(PL_thismad, (OP*)pm, 0);
10964 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
10965 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10969 SV * const repl = newSVpvs("");
10971 PL_sublex_info.super_bufptr = s;
10972 PL_sublex_info.super_bufend = PL_bufend;
10974 pm->op_pmflags |= PMf_EVAL;
10977 sv_catpvs(repl, "eval ");
10979 sv_catpvs(repl, "do ");
10981 sv_catpvs(repl, "{");
10982 sv_catsv(repl, PL_lex_repl);
10983 if (strchr(SvPVX(PL_lex_repl), '#'))
10984 sv_catpvs(repl, "\n");
10985 sv_catpvs(repl, "}");
10987 SvREFCNT_dec(PL_lex_repl);
10988 PL_lex_repl = repl;
10991 PL_lex_op = (OP*)pm;
10992 pl_yylval.ival = OP_SUBST;
10997 S_scan_trans(pTHX_ char *start)
11010 pl_yylval.ival = OP_NULL;
11012 s = scan_str(start,!!PL_madskills,FALSE);
11014 Perl_croak(aTHX_ "Transliteration pattern not terminated");
11016 if (s[-1] == PL_multi_open)
11019 if (PL_madskills) {
11020 CURMAD('q', PL_thisopen);
11021 CURMAD('_', PL_thiswhite);
11022 CURMAD('E', PL_thisstuff);
11023 CURMAD('Q', PL_thisclose);
11024 PL_realtokenstart = s - SvPVX(PL_linestr);
11028 s = scan_str(s,!!PL_madskills,FALSE);
11030 if (PL_lex_stuff) {
11031 SvREFCNT_dec(PL_lex_stuff);
11032 PL_lex_stuff = NULL;
11034 Perl_croak(aTHX_ "Transliteration replacement not terminated");
11036 if (PL_madskills) {
11037 CURMAD('z', PL_thisopen);
11038 CURMAD('R', PL_thisstuff);
11039 CURMAD('Z', PL_thisclose);
11042 complement = del = squash = 0;
11049 complement = OPpTRANS_COMPLEMENT;
11052 del = OPpTRANS_DELETE;
11055 squash = OPpTRANS_SQUASH;
11064 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11065 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11066 o->op_private &= ~OPpTRANS_ALL;
11067 o->op_private |= del|squash|complement|
11068 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11069 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11072 pl_yylval.ival = OP_TRANS;
11075 if (PL_madskills) {
11077 curmad('m', newSVpvn(modstart, s - modstart));
11078 append_madprops(PL_thismad, o, 0);
11087 S_scan_heredoc(pTHX_ register char *s)
11091 I32 op_type = OP_SCALAR;
11095 const char *found_newline;
11099 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11101 I32 stuffstart = s - SvPVX(PL_linestr);
11104 PL_realtokenstart = -1;
11109 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11113 while (SPACE_OR_TAB(*peek))
11115 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11118 s = delimcpy(d, e, s, PL_bufend, term, &len);
11128 if (!isALNUM_lazy_if(s,UTF))
11129 deprecate_old("bare << to mean <<\"\"");
11130 for (; isALNUM_lazy_if(s,UTF); s++) {
11135 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11136 Perl_croak(aTHX_ "Delimiter for here document is too long");
11139 len = d - PL_tokenbuf;
11142 if (PL_madskills) {
11143 tstart = PL_tokenbuf + !outer;
11144 PL_thisclose = newSVpvn(tstart, len - !outer);
11145 tstart = SvPVX(PL_linestr) + stuffstart;
11146 PL_thisopen = newSVpvn(tstart, s - tstart);
11147 stuffstart = s - SvPVX(PL_linestr);
11150 #ifndef PERL_STRICT_CR
11151 d = strchr(s, '\r');
11153 char * const olds = s;
11155 while (s < PL_bufend) {
11161 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11170 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11177 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11178 herewas = newSVpvn(s,PL_bufend-s);
11182 herewas = newSVpvn(s-1,found_newline-s+1);
11185 herewas = newSVpvn(s,found_newline-s);
11189 if (PL_madskills) {
11190 tstart = SvPVX(PL_linestr) + stuffstart;
11192 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11194 PL_thisstuff = newSVpvn(tstart, s - tstart);
11197 s += SvCUR(herewas);
11200 stuffstart = s - SvPVX(PL_linestr);
11206 tmpstr = newSV_type(SVt_PVIV);
11207 SvGROW(tmpstr, 80);
11208 if (term == '\'') {
11209 op_type = OP_CONST;
11210 SvIV_set(tmpstr, -1);
11212 else if (term == '`') {
11213 op_type = OP_BACKTICK;
11214 SvIV_set(tmpstr, '\\');
11218 PL_multi_start = CopLINE(PL_curcop);
11219 PL_multi_open = PL_multi_close = '<';
11220 term = *PL_tokenbuf;
11221 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11222 char * const bufptr = PL_sublex_info.super_bufptr;
11223 char * const bufend = PL_sublex_info.super_bufend;
11224 char * const olds = s - SvCUR(herewas);
11225 s = strchr(bufptr, '\n');
11229 while (s < bufend &&
11230 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11232 CopLINE_inc(PL_curcop);
11235 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11236 missingterm(PL_tokenbuf);
11238 sv_setpvn(herewas,bufptr,d-bufptr+1);
11239 sv_setpvn(tmpstr,d+1,s-d);
11241 sv_catpvn(herewas,s,bufend-s);
11242 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11249 while (s < PL_bufend &&
11250 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11252 CopLINE_inc(PL_curcop);
11254 if (s >= PL_bufend) {
11255 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11256 missingterm(PL_tokenbuf);
11258 sv_setpvn(tmpstr,d+1,s-d);
11260 if (PL_madskills) {
11262 sv_catpvn(PL_thisstuff, d + 1, s - d);
11264 PL_thisstuff = newSVpvn(d + 1, s - d);
11265 stuffstart = s - SvPVX(PL_linestr);
11269 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11271 sv_catpvn(herewas,s,PL_bufend-s);
11272 sv_setsv(PL_linestr,herewas);
11273 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11274 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11275 PL_last_lop = PL_last_uni = NULL;
11278 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
11279 while (s >= PL_bufend) { /* multiple line string? */
11281 if (PL_madskills) {
11282 tstart = SvPVX(PL_linestr) + stuffstart;
11284 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11286 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11290 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11291 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11292 missingterm(PL_tokenbuf);
11295 stuffstart = s - SvPVX(PL_linestr);
11297 CopLINE_inc(PL_curcop);
11298 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11299 PL_last_lop = PL_last_uni = NULL;
11300 #ifndef PERL_STRICT_CR
11301 if (PL_bufend - PL_linestart >= 2) {
11302 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11303 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11305 PL_bufend[-2] = '\n';
11307 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11309 else if (PL_bufend[-1] == '\r')
11310 PL_bufend[-1] = '\n';
11312 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11313 PL_bufend[-1] = '\n';
11315 if (PERLDB_LINE && PL_curstash != PL_debstash)
11316 update_debugger_info(PL_linestr, NULL, 0);
11317 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11318 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11319 *(SvPVX(PL_linestr) + off ) = ' ';
11320 sv_catsv(PL_linestr,herewas);
11321 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11322 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11326 sv_catsv(tmpstr,PL_linestr);
11331 PL_multi_end = CopLINE(PL_curcop);
11332 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11333 SvPV_shrink_to_cur(tmpstr);
11335 SvREFCNT_dec(herewas);
11337 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11339 else if (PL_encoding)
11340 sv_recode_to_utf8(tmpstr, PL_encoding);
11342 PL_lex_stuff = tmpstr;
11343 pl_yylval.ival = op_type;
11347 /* scan_inputsymbol
11348 takes: current position in input buffer
11349 returns: new position in input buffer
11350 side-effects: pl_yylval and lex_op are set.
11355 <FH> read from filehandle
11356 <pkg::FH> read from package qualified filehandle
11357 <pkg'FH> read from package qualified filehandle
11358 <$fh> read from filehandle in $fh
11359 <*.h> filename glob
11364 S_scan_inputsymbol(pTHX_ char *start)
11367 register char *s = start; /* current position in buffer */
11371 char *d = PL_tokenbuf; /* start of temp holding space */
11372 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11374 end = strchr(s, '\n');
11377 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11379 /* die if we didn't have space for the contents of the <>,
11380 or if it didn't end, or if we see a newline
11383 if (len >= (I32)sizeof PL_tokenbuf)
11384 Perl_croak(aTHX_ "Excessively long <> operator");
11386 Perl_croak(aTHX_ "Unterminated <> operator");
11391 Remember, only scalar variables are interpreted as filehandles by
11392 this code. Anything more complex (e.g., <$fh{$num}>) will be
11393 treated as a glob() call.
11394 This code makes use of the fact that except for the $ at the front,
11395 a scalar variable and a filehandle look the same.
11397 if (*d == '$' && d[1]) d++;
11399 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11400 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11403 /* If we've tried to read what we allow filehandles to look like, and
11404 there's still text left, then it must be a glob() and not a getline.
11405 Use scan_str to pull out the stuff between the <> and treat it
11406 as nothing more than a string.
11409 if (d - PL_tokenbuf != len) {
11410 pl_yylval.ival = OP_GLOB;
11411 s = scan_str(start,!!PL_madskills,FALSE);
11413 Perl_croak(aTHX_ "Glob not terminated");
11417 bool readline_overriden = FALSE;
11420 /* we're in a filehandle read situation */
11423 /* turn <> into <ARGV> */
11425 Copy("ARGV",d,5,char);
11427 /* Check whether readline() is overriden */
11428 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11430 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11432 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11433 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11434 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11435 readline_overriden = TRUE;
11437 /* if <$fh>, create the ops to turn the variable into a
11441 /* try to find it in the pad for this block, otherwise find
11442 add symbol table ops
11444 const PADOFFSET tmp = pad_findmy(d);
11445 if (tmp != NOT_IN_PAD) {
11446 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11447 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11448 HEK * const stashname = HvNAME_HEK(stash);
11449 SV * const sym = sv_2mortal(newSVhek(stashname));
11450 sv_catpvs(sym, "::");
11451 sv_catpv(sym, d+1);
11456 OP * const o = newOP(OP_PADSV, 0);
11458 PL_lex_op = readline_overriden
11459 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11460 append_elem(OP_LIST, o,
11461 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11462 : (OP*)newUNOP(OP_READLINE, 0, o);
11471 ? (GV_ADDMULTI | GV_ADDINEVAL)
11474 PL_lex_op = readline_overriden
11475 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11476 append_elem(OP_LIST,
11477 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11478 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11479 : (OP*)newUNOP(OP_READLINE, 0,
11480 newUNOP(OP_RV2SV, 0,
11481 newGVOP(OP_GV, 0, gv)));
11483 if (!readline_overriden)
11484 PL_lex_op->op_flags |= OPf_SPECIAL;
11485 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11486 pl_yylval.ival = OP_NULL;
11489 /* If it's none of the above, it must be a literal filehandle
11490 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11492 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11493 PL_lex_op = readline_overriden
11494 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11495 append_elem(OP_LIST,
11496 newGVOP(OP_GV, 0, gv),
11497 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11498 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11499 pl_yylval.ival = OP_NULL;
11508 takes: start position in buffer
11509 keep_quoted preserve \ on the embedded delimiter(s)
11510 keep_delims preserve the delimiters around the string
11511 returns: position to continue reading from buffer
11512 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11513 updates the read buffer.
11515 This subroutine pulls a string out of the input. It is called for:
11516 q single quotes q(literal text)
11517 ' single quotes 'literal text'
11518 qq double quotes qq(interpolate $here please)
11519 " double quotes "interpolate $here please"
11520 qx backticks qx(/bin/ls -l)
11521 ` backticks `/bin/ls -l`
11522 qw quote words @EXPORT_OK = qw( func() $spam )
11523 m// regexp match m/this/
11524 s/// regexp substitute s/this/that/
11525 tr/// string transliterate tr/this/that/
11526 y/// string transliterate y/this/that/
11527 ($*@) sub prototypes sub foo ($)
11528 (stuff) sub attr parameters sub foo : attr(stuff)
11529 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11531 In most of these cases (all but <>, patterns and transliterate)
11532 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11533 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11534 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11537 It skips whitespace before the string starts, and treats the first
11538 character as the delimiter. If the delimiter is one of ([{< then
11539 the corresponding "close" character )]}> is used as the closing
11540 delimiter. It allows quoting of delimiters, and if the string has
11541 balanced delimiters ([{<>}]) it allows nesting.
11543 On success, the SV with the resulting string is put into lex_stuff or,
11544 if that is already non-NULL, into lex_repl. The second case occurs only
11545 when parsing the RHS of the special constructs s/// and tr/// (y///).
11546 For convenience, the terminating delimiter character is stuffed into
11551 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11554 SV *sv; /* scalar value: string */
11555 const char *tmps; /* temp string, used for delimiter matching */
11556 register char *s = start; /* current position in the buffer */
11557 register char term; /* terminating character */
11558 register char *to; /* current position in the sv's data */
11559 I32 brackets = 1; /* bracket nesting level */
11560 bool has_utf8 = FALSE; /* is there any utf8 content? */
11561 I32 termcode; /* terminating char. code */
11562 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11563 STRLEN termlen; /* length of terminating string */
11564 int last_off = 0; /* last position for nesting bracket */
11570 /* skip space before the delimiter */
11576 if (PL_realtokenstart >= 0) {
11577 stuffstart = PL_realtokenstart;
11578 PL_realtokenstart = -1;
11581 stuffstart = start - SvPVX(PL_linestr);
11583 /* mark where we are, in case we need to report errors */
11586 /* after skipping whitespace, the next character is the terminator */
11589 termcode = termstr[0] = term;
11593 termcode = utf8_to_uvchr((U8*)s, &termlen);
11594 Copy(s, termstr, termlen, U8);
11595 if (!UTF8_IS_INVARIANT(term))
11599 /* mark where we are */
11600 PL_multi_start = CopLINE(PL_curcop);
11601 PL_multi_open = term;
11603 /* find corresponding closing delimiter */
11604 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11605 termcode = termstr[0] = term = tmps[5];
11607 PL_multi_close = term;
11609 /* create a new SV to hold the contents. 79 is the SV's initial length.
11610 What a random number. */
11611 sv = newSV_type(SVt_PVIV);
11613 SvIV_set(sv, termcode);
11614 (void)SvPOK_only(sv); /* validate pointer */
11616 /* move past delimiter and try to read a complete string */
11618 sv_catpvn(sv, s, termlen);
11621 tstart = SvPVX(PL_linestr) + stuffstart;
11622 if (!PL_thisopen && !keep_delims) {
11623 PL_thisopen = newSVpvn(tstart, s - tstart);
11624 stuffstart = s - SvPVX(PL_linestr);
11628 if (PL_encoding && !UTF) {
11632 int offset = s - SvPVX_const(PL_linestr);
11633 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11634 &offset, (char*)termstr, termlen);
11635 const char * const ns = SvPVX_const(PL_linestr) + offset;
11636 char * const svlast = SvEND(sv) - 1;
11638 for (; s < ns; s++) {
11639 if (*s == '\n' && !PL_rsfp)
11640 CopLINE_inc(PL_curcop);
11643 goto read_more_line;
11645 /* handle quoted delimiters */
11646 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11648 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11650 if ((svlast-1 - t) % 2) {
11651 if (!keep_quoted) {
11652 *(svlast-1) = term;
11654 SvCUR_set(sv, SvCUR(sv) - 1);
11659 if (PL_multi_open == PL_multi_close) {
11665 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11666 /* At here, all closes are "was quoted" one,
11667 so we don't check PL_multi_close. */
11669 if (!keep_quoted && *(t+1) == PL_multi_open)
11674 else if (*t == PL_multi_open)
11682 SvCUR_set(sv, w - SvPVX_const(sv));
11684 last_off = w - SvPVX(sv);
11685 if (--brackets <= 0)
11690 if (!keep_delims) {
11691 SvCUR_set(sv, SvCUR(sv) - 1);
11697 /* extend sv if need be */
11698 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11699 /* set 'to' to the next character in the sv's string */
11700 to = SvPVX(sv)+SvCUR(sv);
11702 /* if open delimiter is the close delimiter read unbridle */
11703 if (PL_multi_open == PL_multi_close) {
11704 for (; s < PL_bufend; s++,to++) {
11705 /* embedded newlines increment the current line number */
11706 if (*s == '\n' && !PL_rsfp)
11707 CopLINE_inc(PL_curcop);
11708 /* handle quoted delimiters */
11709 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11710 if (!keep_quoted && s[1] == term)
11712 /* any other quotes are simply copied straight through */
11716 /* terminate when run out of buffer (the for() condition), or
11717 have found the terminator */
11718 else if (*s == term) {
11721 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11724 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11730 /* if the terminator isn't the same as the start character (e.g.,
11731 matched brackets), we have to allow more in the quoting, and
11732 be prepared for nested brackets.
11735 /* read until we run out of string, or we find the terminator */
11736 for (; s < PL_bufend; s++,to++) {
11737 /* embedded newlines increment the line count */
11738 if (*s == '\n' && !PL_rsfp)
11739 CopLINE_inc(PL_curcop);
11740 /* backslashes can escape the open or closing characters */
11741 if (*s == '\\' && s+1 < PL_bufend) {
11742 if (!keep_quoted &&
11743 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11748 /* allow nested opens and closes */
11749 else if (*s == PL_multi_close && --brackets <= 0)
11751 else if (*s == PL_multi_open)
11753 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11758 /* terminate the copied string and update the sv's end-of-string */
11760 SvCUR_set(sv, to - SvPVX_const(sv));
11763 * this next chunk reads more into the buffer if we're not done yet
11767 break; /* handle case where we are done yet :-) */
11769 #ifndef PERL_STRICT_CR
11770 if (to - SvPVX_const(sv) >= 2) {
11771 if ((to[-2] == '\r' && to[-1] == '\n') ||
11772 (to[-2] == '\n' && to[-1] == '\r'))
11776 SvCUR_set(sv, to - SvPVX_const(sv));
11778 else if (to[-1] == '\r')
11781 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11786 /* if we're out of file, or a read fails, bail and reset the current
11787 line marker so we can report where the unterminated string began
11790 if (PL_madskills) {
11791 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11793 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11795 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11799 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11801 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11807 /* we read a line, so increment our line counter */
11808 CopLINE_inc(PL_curcop);
11810 /* update debugger info */
11811 if (PERLDB_LINE && PL_curstash != PL_debstash)
11812 update_debugger_info(PL_linestr, NULL, 0);
11814 /* having changed the buffer, we must update PL_bufend */
11815 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11816 PL_last_lop = PL_last_uni = NULL;
11819 /* at this point, we have successfully read the delimited string */
11821 if (!PL_encoding || UTF) {
11823 if (PL_madskills) {
11824 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11825 const int len = s - tstart;
11827 sv_catpvn(PL_thisstuff, tstart, len);
11829 PL_thisstuff = newSVpvn(tstart, len);
11830 if (!PL_thisclose && !keep_delims)
11831 PL_thisclose = newSVpvn(s,termlen);
11836 sv_catpvn(sv, s, termlen);
11841 if (PL_madskills) {
11842 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11843 const int len = s - tstart - termlen;
11845 sv_catpvn(PL_thisstuff, tstart, len);
11847 PL_thisstuff = newSVpvn(tstart, len);
11848 if (!PL_thisclose && !keep_delims)
11849 PL_thisclose = newSVpvn(s - termlen,termlen);
11853 if (has_utf8 || PL_encoding)
11856 PL_multi_end = CopLINE(PL_curcop);
11858 /* if we allocated too much space, give some back */
11859 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11860 SvLEN_set(sv, SvCUR(sv) + 1);
11861 SvPV_renew(sv, SvLEN(sv));
11864 /* decide whether this is the first or second quoted string we've read
11877 takes: pointer to position in buffer
11878 returns: pointer to new position in buffer
11879 side-effects: builds ops for the constant in pl_yylval.op
11881 Read a number in any of the formats that Perl accepts:
11883 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11884 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11887 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11889 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11892 If it reads a number without a decimal point or an exponent, it will
11893 try converting the number to an integer and see if it can do so
11894 without loss of precision.
11898 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11901 register const char *s = start; /* current position in buffer */
11902 register char *d; /* destination in temp buffer */
11903 register char *e; /* end of temp buffer */
11904 NV nv; /* number read, as a double */
11905 SV *sv = NULL; /* place to put the converted number */
11906 bool floatit; /* boolean: int or float? */
11907 const char *lastub = NULL; /* position of last underbar */
11908 static char const number_too_long[] = "Number too long";
11910 /* We use the first character to decide what type of number this is */
11914 Perl_croak(aTHX_ "panic: scan_num");
11916 /* if it starts with a 0, it could be an octal number, a decimal in
11917 0.13 disguise, or a hexadecimal number, or a binary number. */
11921 u holds the "number so far"
11922 shift the power of 2 of the base
11923 (hex == 4, octal == 3, binary == 1)
11924 overflowed was the number more than we can hold?
11926 Shift is used when we add a digit. It also serves as an "are
11927 we in octal/hex/binary?" indicator to disallow hex characters
11928 when in octal mode.
11933 bool overflowed = FALSE;
11934 bool just_zero = TRUE; /* just plain 0 or binary number? */
11935 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11936 static const char* const bases[5] =
11937 { "", "binary", "", "octal", "hexadecimal" };
11938 static const char* const Bases[5] =
11939 { "", "Binary", "", "Octal", "Hexadecimal" };
11940 static const char* const maxima[5] =
11942 "0b11111111111111111111111111111111",
11946 const char *base, *Base, *max;
11948 /* check for hex */
11953 } else if (s[1] == 'b') {
11958 /* check for a decimal in disguise */
11959 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11961 /* so it must be octal */
11968 if (ckWARN(WARN_SYNTAX))
11969 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11970 "Misplaced _ in number");
11974 base = bases[shift];
11975 Base = Bases[shift];
11976 max = maxima[shift];
11978 /* read the rest of the number */
11980 /* x is used in the overflow test,
11981 b is the digit we're adding on. */
11986 /* if we don't mention it, we're done */
11990 /* _ are ignored -- but warned about if consecutive */
11992 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11993 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11994 "Misplaced _ in number");
11998 /* 8 and 9 are not octal */
11999 case '8': case '9':
12001 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12005 case '2': case '3': case '4':
12006 case '5': case '6': case '7':
12008 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12011 case '0': case '1':
12012 b = *s++ & 15; /* ASCII digit -> value of digit */
12016 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12017 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12018 /* make sure they said 0x */
12021 b = (*s++ & 7) + 9;
12023 /* Prepare to put the digit we have onto the end
12024 of the number so far. We check for overflows.
12030 x = u << shift; /* make room for the digit */
12032 if ((x >> shift) != u
12033 && !(PL_hints & HINT_NEW_BINARY)) {
12036 if (ckWARN_d(WARN_OVERFLOW))
12037 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12038 "Integer overflow in %s number",
12041 u = x | b; /* add the digit to the end */
12044 n *= nvshift[shift];
12045 /* If an NV has not enough bits in its
12046 * mantissa to represent an UV this summing of
12047 * small low-order numbers is a waste of time
12048 * (because the NV cannot preserve the
12049 * low-order bits anyway): we could just
12050 * remember when did we overflow and in the
12051 * end just multiply n by the right
12059 /* if we get here, we had success: make a scalar value from
12064 /* final misplaced underbar check */
12065 if (s[-1] == '_') {
12066 if (ckWARN(WARN_SYNTAX))
12067 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12072 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
12073 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12074 "%s number > %s non-portable",
12080 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
12081 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12082 "%s number > %s non-portable",
12087 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12088 sv = new_constant(start, s - start, "integer",
12089 sv, NULL, NULL, 0);
12090 else if (PL_hints & HINT_NEW_BINARY)
12091 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12096 handle decimal numbers.
12097 we're also sent here when we read a 0 as the first digit
12099 case '1': case '2': case '3': case '4': case '5':
12100 case '6': case '7': case '8': case '9': case '.':
12103 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12106 /* read next group of digits and _ and copy into d */
12107 while (isDIGIT(*s) || *s == '_') {
12108 /* skip underscores, checking for misplaced ones
12112 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12113 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12114 "Misplaced _ in number");
12118 /* check for end of fixed-length buffer */
12120 Perl_croak(aTHX_ number_too_long);
12121 /* if we're ok, copy the character */
12126 /* final misplaced underbar check */
12127 if (lastub && s == lastub + 1) {
12128 if (ckWARN(WARN_SYNTAX))
12129 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12132 /* read a decimal portion if there is one. avoid
12133 3..5 being interpreted as the number 3. followed
12136 if (*s == '.' && s[1] != '.') {
12141 if (ckWARN(WARN_SYNTAX))
12142 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12143 "Misplaced _ in number");
12147 /* copy, ignoring underbars, until we run out of digits.
12149 for (; isDIGIT(*s) || *s == '_'; s++) {
12150 /* fixed length buffer check */
12152 Perl_croak(aTHX_ number_too_long);
12154 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12155 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12156 "Misplaced _ in number");
12162 /* fractional part ending in underbar? */
12163 if (s[-1] == '_') {
12164 if (ckWARN(WARN_SYNTAX))
12165 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12166 "Misplaced _ in number");
12168 if (*s == '.' && isDIGIT(s[1])) {
12169 /* oops, it's really a v-string, but without the "v" */
12175 /* read exponent part, if present */
12176 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12180 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12181 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12183 /* stray preinitial _ */
12185 if (ckWARN(WARN_SYNTAX))
12186 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12187 "Misplaced _ in number");
12191 /* allow positive or negative exponent */
12192 if (*s == '+' || *s == '-')
12195 /* stray initial _ */
12197 if (ckWARN(WARN_SYNTAX))
12198 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12199 "Misplaced _ in number");
12203 /* read digits of exponent */
12204 while (isDIGIT(*s) || *s == '_') {
12207 Perl_croak(aTHX_ number_too_long);
12211 if (((lastub && s == lastub + 1) ||
12212 (!isDIGIT(s[1]) && s[1] != '_'))
12213 && ckWARN(WARN_SYNTAX))
12214 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12215 "Misplaced _ in number");
12222 /* make an sv from the string */
12226 We try to do an integer conversion first if no characters
12227 indicating "float" have been found.
12232 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12234 if (flags == IS_NUMBER_IN_UV) {
12236 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12239 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12240 if (uv <= (UV) IV_MIN)
12241 sv_setiv(sv, -(IV)uv);
12248 /* terminate the string */
12250 nv = Atof(PL_tokenbuf);
12255 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12256 const char *const key = floatit ? "float" : "integer";
12257 const STRLEN keylen = floatit ? 5 : 7;
12258 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12259 key, keylen, sv, NULL, NULL, 0);
12263 /* if it starts with a v, it could be a v-string */
12266 sv = newSV(5); /* preallocate storage space */
12267 s = scan_vstring(s, PL_bufend, sv);
12271 /* make the op for the constant and return */
12274 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12276 lvalp->opval = NULL;
12282 S_scan_formline(pTHX_ register char *s)
12285 register char *eol;
12287 SV * const stuff = newSVpvs("");
12288 bool needargs = FALSE;
12289 bool eofmt = FALSE;
12291 char *tokenstart = s;
12294 if (PL_madskills) {
12295 savewhite = PL_thiswhite;
12300 while (!needargs) {
12303 #ifdef PERL_STRICT_CR
12304 while (SPACE_OR_TAB(*t))
12307 while (SPACE_OR_TAB(*t) || *t == '\r')
12310 if (*t == '\n' || t == PL_bufend) {
12315 if (PL_in_eval && !PL_rsfp) {
12316 eol = (char *) memchr(s,'\n',PL_bufend-s);
12321 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12323 for (t = s; t < eol; t++) {
12324 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12326 goto enough; /* ~~ must be first line in formline */
12328 if (*t == '@' || *t == '^')
12332 sv_catpvn(stuff, s, eol-s);
12333 #ifndef PERL_STRICT_CR
12334 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12335 char *end = SvPVX(stuff) + SvCUR(stuff);
12338 SvCUR_set(stuff, SvCUR(stuff) - 1);
12348 if (PL_madskills) {
12350 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12352 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12355 s = filter_gets(PL_linestr, PL_rsfp, 0);
12357 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12359 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12361 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12362 PL_last_lop = PL_last_uni = NULL;
12371 if (SvCUR(stuff)) {
12374 PL_lex_state = LEX_NORMAL;
12375 start_force(PL_curforce);
12376 NEXTVAL_NEXTTOKE.ival = 0;
12380 PL_lex_state = LEX_FORMLINE;
12382 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12384 else if (PL_encoding)
12385 sv_recode_to_utf8(stuff, PL_encoding);
12387 start_force(PL_curforce);
12388 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12390 start_force(PL_curforce);
12391 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12395 SvREFCNT_dec(stuff);
12397 PL_lex_formbrack = 0;
12401 if (PL_madskills) {
12403 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12405 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12406 PL_thiswhite = savewhite;
12413 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12416 const I32 oldsavestack_ix = PL_savestack_ix;
12417 CV* const outsidecv = PL_compcv;
12420 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12422 SAVEI32(PL_subline);
12423 save_item(PL_subname);
12424 SAVESPTR(PL_compcv);
12426 PL_compcv = (CV*)newSV_type(is_format ? SVt_PVFM : SVt_PVCV);
12427 CvFLAGS(PL_compcv) |= flags;
12429 PL_subline = CopLINE(PL_curcop);
12430 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12431 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
12432 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12434 return oldsavestack_ix;
12438 #pragma segment Perl_yylex
12441 Perl_yywarn(pTHX_ const char *s)
12444 PL_in_eval |= EVAL_WARNONLY;
12446 PL_in_eval &= ~EVAL_WARNONLY;
12451 Perl_yyerror(pTHX_ const char *s)
12454 const char *where = NULL;
12455 const char *context = NULL;
12458 int yychar = PL_parser->yychar;
12460 if (!yychar || (yychar == ';' && !PL_rsfp))
12462 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12463 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12464 PL_oldbufptr != PL_bufptr) {
12467 The code below is removed for NetWare because it abends/crashes on NetWare
12468 when the script has error such as not having the closing quotes like:
12469 if ($var eq "value)
12470 Checking of white spaces is anyway done in NetWare code.
12473 while (isSPACE(*PL_oldoldbufptr))
12476 context = PL_oldoldbufptr;
12477 contlen = PL_bufptr - PL_oldoldbufptr;
12479 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12480 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12483 The code below is removed for NetWare because it abends/crashes on NetWare
12484 when the script has error such as not having the closing quotes like:
12485 if ($var eq "value)
12486 Checking of white spaces is anyway done in NetWare code.
12489 while (isSPACE(*PL_oldbufptr))
12492 context = PL_oldbufptr;
12493 contlen = PL_bufptr - PL_oldbufptr;
12495 else if (yychar > 255)
12496 where = "next token ???";
12497 else if (yychar == -2) { /* YYEMPTY */
12498 if (PL_lex_state == LEX_NORMAL ||
12499 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12500 where = "at end of line";
12501 else if (PL_lex_inpat)
12502 where = "within pattern";
12504 where = "within string";
12507 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
12509 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12510 else if (isPRINT_LC(yychar)) {
12511 const char string = yychar;
12512 sv_catpvn(where_sv, &string, 1);
12515 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12516 where = SvPVX_const(where_sv);
12518 msg = sv_2mortal(newSVpv(s, 0));
12519 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12520 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12522 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12524 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12525 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12526 Perl_sv_catpvf(aTHX_ msg,
12527 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12528 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12531 if (PL_in_eval & EVAL_WARNONLY) {
12532 if (ckWARN_d(WARN_SYNTAX))
12533 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12537 if (PL_error_count >= 10) {
12538 if (PL_in_eval && SvCUR(ERRSV))
12539 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12540 SVfARG(ERRSV), OutCopFILE(PL_curcop));
12542 Perl_croak(aTHX_ "%s has too many errors.\n",
12543 OutCopFILE(PL_curcop));
12546 PL_in_my_stash = NULL;
12550 #pragma segment Main
12554 S_swallow_bom(pTHX_ U8 *s)
12557 const STRLEN slen = SvCUR(PL_linestr);
12560 if (s[1] == 0xFE) {
12561 /* UTF-16 little-endian? (or UTF32-LE?) */
12562 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12563 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12564 #ifndef PERL_NO_UTF16_FILTER
12565 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12568 if (PL_bufend > (char*)s) {
12572 filter_add(utf16rev_textfilter, NULL);
12573 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12574 utf16_to_utf8_reversed(s, news,
12575 PL_bufend - (char*)s - 1,
12577 sv_setpvn(PL_linestr, (const char*)news, newlen);
12579 s = (U8*)SvPVX(PL_linestr);
12580 Copy(news, s, newlen, U8);
12584 SvUTF8_on(PL_linestr);
12585 s = (U8*)SvPVX(PL_linestr);
12587 /* FIXME - is this a general bug fix? */
12590 PL_bufend = SvPVX(PL_linestr) + newlen;
12593 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12598 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12599 #ifndef PERL_NO_UTF16_FILTER
12600 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12603 if (PL_bufend > (char *)s) {
12607 filter_add(utf16_textfilter, NULL);
12608 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12609 utf16_to_utf8(s, news,
12610 PL_bufend - (char*)s,
12612 sv_setpvn(PL_linestr, (const char*)news, newlen);
12614 SvUTF8_on(PL_linestr);
12615 s = (U8*)SvPVX(PL_linestr);
12616 PL_bufend = SvPVX(PL_linestr) + newlen;
12619 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12624 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12625 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12626 s += 3; /* UTF-8 */
12632 if (s[2] == 0xFE && s[3] == 0xFF) {
12633 /* UTF-32 big-endian */
12634 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12637 else if (s[2] == 0 && s[3] != 0) {
12640 * are a good indicator of UTF-16BE. */
12641 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12647 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12648 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12649 s += 4; /* UTF-8 */
12655 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12658 * are a good indicator of UTF-16LE. */
12659 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12667 #ifndef PERL_NO_UTF16_FILTER
12669 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12672 const STRLEN old = SvCUR(sv);
12673 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12674 DEBUG_P(PerlIO_printf(Perl_debug_log,
12675 "utf16_textfilter(%p): %d %d (%d)\n",
12676 FPTR2DPTR(void *, utf16_textfilter),
12677 idx, maxlen, (int) count));
12681 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12682 Copy(SvPVX_const(sv), tmps, old, char);
12683 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12684 SvCUR(sv) - old, &newlen);
12685 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12687 DEBUG_P({sv_dump(sv);});
12692 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12695 const STRLEN old = SvCUR(sv);
12696 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12697 DEBUG_P(PerlIO_printf(Perl_debug_log,
12698 "utf16rev_textfilter(%p): %d %d (%d)\n",
12699 FPTR2DPTR(void *, utf16rev_textfilter),
12700 idx, maxlen, (int) count));
12704 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12705 Copy(SvPVX_const(sv), tmps, old, char);
12706 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12707 SvCUR(sv) - old, &newlen);
12708 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12710 DEBUG_P({ sv_dump(sv); });
12716 Returns a pointer to the next character after the parsed
12717 vstring, as well as updating the passed in sv.
12719 Function must be called like
12722 s = scan_vstring(s,e,sv);
12724 where s and e are the start and end of the string.
12725 The sv should already be large enough to store the vstring
12726 passed in, for performance reasons.
12731 Perl_scan_vstring(pTHX_ const char *s, const char *e, SV *sv)
12734 const char *pos = s;
12735 const char *start = s;
12736 if (*pos == 'v') pos++; /* get past 'v' */
12737 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12739 if ( *pos != '.') {
12740 /* this may not be a v-string if followed by => */
12741 const char *next = pos;
12742 while (next < e && isSPACE(*next))
12744 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12745 /* return string not v-string */
12746 sv_setpvn(sv,(char *)s,pos-s);
12747 return (char *)pos;
12751 if (!isALPHA(*pos)) {
12752 U8 tmpbuf[UTF8_MAXBYTES+1];
12755 s++; /* get past 'v' */
12757 sv_setpvn(sv, "", 0);
12760 /* this is atoi() that tolerates underscores */
12763 const char *end = pos;
12765 while (--end >= s) {
12767 const UV orev = rev;
12768 rev += (*end - '0') * mult;
12770 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12771 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12772 "Integer overflow in decimal number");
12776 if (rev > 0x7FFFFFFF)
12777 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12779 /* Append native character for the rev point */
12780 tmpend = uvchr_to_utf8(tmpbuf, rev);
12781 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12782 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12784 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12790 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12794 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12802 * c-indentation-style: bsd
12803 * c-basic-offset: 4
12804 * indent-tabs-mode: t
12807 * ex: set ts=8 sts=4 sw=4 noet: