3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yylval (PL_parser->yylval)
28 /* YYINITDEPTH -- initial size of the parser's stacks. */
29 #define YYINITDEPTH 200
31 /* XXX temporary backwards compatibility */
32 #define PL_lex_brackets (PL_parser->lex_brackets)
33 #define PL_lex_brackstack (PL_parser->lex_brackstack)
34 #define PL_lex_casemods (PL_parser->lex_casemods)
35 #define PL_lex_casestack (PL_parser->lex_casestack)
36 #define PL_lex_defer (PL_parser->lex_defer)
37 #define PL_lex_dojoin (PL_parser->lex_dojoin)
38 #define PL_lex_expect (PL_parser->lex_expect)
39 #define PL_lex_formbrack (PL_parser->lex_formbrack)
40 #define PL_lex_inpat (PL_parser->lex_inpat)
41 #define PL_lex_inwhat (PL_parser->lex_inwhat)
42 #define PL_lex_op (PL_parser->lex_op)
43 #define PL_lex_repl (PL_parser->lex_repl)
44 #define PL_lex_starts (PL_parser->lex_starts)
45 #define PL_lex_stuff (PL_parser->lex_stuff)
46 #define PL_multi_start (PL_parser->multi_start)
47 #define PL_multi_open (PL_parser->multi_open)
48 #define PL_multi_close (PL_parser->multi_close)
49 #define PL_pending_ident (PL_parser->pending_ident)
50 #define PL_preambled (PL_parser->preambled)
51 #define PL_sublex_info (PL_parser->sublex_info)
54 # define PL_endwhite (PL_parser->endwhite)
55 # define PL_faketokens (PL_parser->faketokens)
56 # define PL_lasttoke (PL_parser->lasttoke)
57 # define PL_nextwhite (PL_parser->nextwhite)
58 # define PL_realtokenstart (PL_parser->realtokenstart)
59 # define PL_skipwhite (PL_parser->skipwhite)
60 # define PL_thisclose (PL_parser->thisclose)
61 # define PL_thismad (PL_parser->thismad)
62 # define PL_thisopen (PL_parser->thisopen)
63 # define PL_thisstuff (PL_parser->thisstuff)
64 # define PL_thistoken (PL_parser->thistoken)
65 # define PL_thiswhite (PL_parser->thiswhite)
69 S_pending_ident(pTHX);
71 static const char ident_too_long[] = "Identifier too long";
72 static const char commaless_variable_list[] = "comma-less variable list";
74 static void restore_rsfp(pTHX_ void *f);
75 #ifndef PERL_NO_UTF16_FILTER
76 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
77 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
81 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
82 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
84 # define CURMAD(slot,sv)
85 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
88 #define XFAKEBRACK 128
91 #ifdef USE_UTF8_SCRIPTS
92 # define UTF (!IN_BYTES)
94 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
97 /* In variables named $^X, these are the legal values for X.
98 * 1999-02-27 mjd-perl-patch@plover.com */
99 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
101 /* On MacOS, respect nonbreaking spaces */
102 #ifdef MACOS_TRADITIONAL
103 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
105 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
108 /* LEX_* are values for PL_lex_state, the state of the lexer.
109 * They are arranged oddly so that the guard on the switch statement
110 * can get by with a single comparison (if the compiler is smart enough).
113 /* #define LEX_NOTPARSING 11 is done in perl.h. */
115 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
116 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
117 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
118 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
119 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
121 /* at end of code, eg "$x" followed by: */
122 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
123 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
125 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
126 string or after \E, $foo, etc */
127 #define LEX_INTERPCONST 2 /* NOT USED */
128 #define LEX_FORMLINE 1 /* expecting a format line */
129 #define LEX_KNOWNEXT 0 /* next token known; just return it */
133 static const char* const lex_state_names[] = {
152 #include "keywords.h"
154 /* CLINE is a macro that ensures PL_copline has a sane value */
159 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
162 # define SKIPSPACE0(s) skipspace0(s)
163 # define SKIPSPACE1(s) skipspace1(s)
164 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
165 # define PEEKSPACE(s) skipspace2(s,0)
167 # define SKIPSPACE0(s) skipspace(s)
168 # define SKIPSPACE1(s) skipspace(s)
169 # define SKIPSPACE2(s,tsv) skipspace(s)
170 # define PEEKSPACE(s) skipspace(s)
174 * Convenience functions to return different tokens and prime the
175 * lexer for the next token. They all take an argument.
177 * TOKEN : generic token (used for '(', DOLSHARP, etc)
178 * OPERATOR : generic operator
179 * AOPERATOR : assignment operator
180 * PREBLOCK : beginning the block after an if, while, foreach, ...
181 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
182 * PREREF : *EXPR where EXPR is not a simple identifier
183 * TERM : expression term
184 * LOOPX : loop exiting command (goto, last, dump, etc)
185 * FTST : file test operator
186 * FUN0 : zero-argument function
187 * FUN1 : not used, except for not, which isn't a UNIOP
188 * BOop : bitwise or or xor
190 * SHop : shift operator
191 * PWop : power operator
192 * PMop : pattern-matching operator
193 * Aop : addition-level operator
194 * Mop : multiplication-level operator
195 * Eop : equality-testing operator
196 * Rop : relational operator <= != gt
198 * Also see LOP and lop() below.
201 #ifdef DEBUGGING /* Serve -DT. */
202 # define REPORT(retval) tokereport((I32)retval)
204 # define REPORT(retval) (retval)
207 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
208 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
209 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
210 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
211 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
212 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
213 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
214 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
215 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
216 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
217 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
218 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
219 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
220 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
221 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
222 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
223 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
224 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
225 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
226 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
228 /* This bit of chicanery makes a unary function followed by
229 * a parenthesis into a function with one argument, highest precedence.
230 * The UNIDOR macro is for unary functions that can be followed by the //
231 * operator (such as C<shift // 0>).
233 #define UNI2(f,x) { \
237 PL_last_uni = PL_oldbufptr; \
238 PL_last_lop_op = f; \
240 return REPORT( (int)FUNC1 ); \
242 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
244 #define UNI(f) UNI2(f,XTERM)
245 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
247 #define UNIBRACK(f) { \
250 PL_last_uni = PL_oldbufptr; \
252 return REPORT( (int)FUNC1 ); \
254 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
257 /* grandfather return to old style */
258 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
262 /* how to interpret the yylval associated with the token */
266 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
272 static struct debug_tokens {
274 enum token_type type;
276 } const debug_tokens[] =
278 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
279 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
280 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
281 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
282 { ARROW, TOKENTYPE_NONE, "ARROW" },
283 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
284 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
285 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
286 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
287 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
288 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
289 { DO, TOKENTYPE_NONE, "DO" },
290 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
291 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
292 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
293 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
294 { ELSE, TOKENTYPE_NONE, "ELSE" },
295 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
296 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
297 { FOR, TOKENTYPE_IVAL, "FOR" },
298 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
299 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
300 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
301 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
302 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
303 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
304 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
305 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
306 { IF, TOKENTYPE_IVAL, "IF" },
307 { LABEL, TOKENTYPE_PVAL, "LABEL" },
308 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
309 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
310 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
311 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
312 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
313 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
314 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
315 { MY, TOKENTYPE_IVAL, "MY" },
316 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
317 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
318 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
319 { OROP, TOKENTYPE_IVAL, "OROP" },
320 { OROR, TOKENTYPE_NONE, "OROR" },
321 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
322 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
323 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
324 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
325 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
326 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
327 { PREINC, TOKENTYPE_NONE, "PREINC" },
328 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
329 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
330 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
331 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
332 { SUB, TOKENTYPE_NONE, "SUB" },
333 { THING, TOKENTYPE_OPVAL, "THING" },
334 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
335 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
336 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
337 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
338 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
339 { USE, TOKENTYPE_IVAL, "USE" },
340 { WHEN, TOKENTYPE_IVAL, "WHEN" },
341 { WHILE, TOKENTYPE_IVAL, "WHILE" },
342 { WORD, TOKENTYPE_OPVAL, "WORD" },
343 { 0, TOKENTYPE_NONE, NULL }
346 /* dump the returned token in rv, plus any optional arg in yylval */
349 S_tokereport(pTHX_ I32 rv)
353 const char *name = NULL;
354 enum token_type type = TOKENTYPE_NONE;
355 const struct debug_tokens *p;
356 SV* const report = newSVpvs("<== ");
358 for (p = debug_tokens; p->token; p++) {
359 if (p->token == (int)rv) {
366 Perl_sv_catpv(aTHX_ report, name);
367 else if ((char)rv > ' ' && (char)rv < '~')
368 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
370 sv_catpvs(report, "EOF");
372 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
375 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
378 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
380 case TOKENTYPE_OPNUM:
381 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
382 PL_op_name[yylval.ival]);
385 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
387 case TOKENTYPE_OPVAL:
389 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
390 PL_op_name[yylval.opval->op_type]);
391 if (yylval.opval->op_type == OP_CONST) {
392 Perl_sv_catpvf(aTHX_ report, " %s",
393 SvPEEK(cSVOPx_sv(yylval.opval)));
398 sv_catpvs(report, "(opval=null)");
401 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
407 /* print the buffer with suitable escapes */
410 S_printbuf(pTHX_ const char* fmt, const char* s)
412 SV* const tmp = newSVpvs("");
413 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
422 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
423 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
427 S_ao(pTHX_ int toketype)
430 if (*PL_bufptr == '=') {
432 if (toketype == ANDAND)
433 yylval.ival = OP_ANDASSIGN;
434 else if (toketype == OROR)
435 yylval.ival = OP_ORASSIGN;
436 else if (toketype == DORDOR)
437 yylval.ival = OP_DORASSIGN;
445 * When Perl expects an operator and finds something else, no_op
446 * prints the warning. It always prints "<something> found where
447 * operator expected. It prints "Missing semicolon on previous line?"
448 * if the surprise occurs at the start of the line. "do you need to
449 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
450 * where the compiler doesn't know if foo is a method call or a function.
451 * It prints "Missing operator before end of line" if there's nothing
452 * after the missing operator, or "... before <...>" if there is something
453 * after the missing operator.
457 S_no_op(pTHX_ const char *what, char *s)
460 char * const oldbp = PL_bufptr;
461 const bool is_first = (PL_oldbufptr == PL_linestart);
467 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
468 if (ckWARN_d(WARN_SYNTAX)) {
470 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
471 "\t(Missing semicolon on previous line?)\n");
472 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
474 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
476 if (t < PL_bufptr && isSPACE(*t))
477 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
478 "\t(Do you need to predeclare %.*s?)\n",
479 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
483 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
484 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
492 * Complain about missing quote/regexp/heredoc terminator.
493 * If it's called with NULL then it cauterizes the line buffer.
494 * If we're in a delimited string and the delimiter is a control
495 * character, it's reformatted into a two-char sequence like ^C.
500 S_missingterm(pTHX_ char *s)
506 char * const nl = strrchr(s,'\n');
512 iscntrl(PL_multi_close)
514 PL_multi_close < 32 || PL_multi_close == 127
518 tmpbuf[1] = (char)toCTRL(PL_multi_close);
523 *tmpbuf = (char)PL_multi_close;
527 q = strchr(s,'"') ? '\'' : '"';
528 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
531 #define FEATURE_IS_ENABLED(name) \
532 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
533 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
535 * S_feature_is_enabled
536 * Check whether the named feature is enabled.
539 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
542 HV * const hinthv = GvHV(PL_hintgv);
543 char he_name[32] = "feature_";
544 (void) my_strlcpy(&he_name[8], name, 24);
546 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
554 Perl_deprecate(pTHX_ const char *s)
556 if (ckWARN(WARN_DEPRECATED))
557 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
561 Perl_deprecate_old(pTHX_ const char *s)
563 /* This function should NOT be called for any new deprecated warnings */
564 /* Use Perl_deprecate instead */
566 /* It is here to maintain backward compatibility with the pre-5.8 */
567 /* warnings category hierarchy. The "deprecated" category used to */
568 /* live under the "syntax" category. It is now a top-level category */
569 /* in its own right. */
571 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
572 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
573 "Use of %s is deprecated", s);
577 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
578 * utf16-to-utf8-reversed.
581 #ifdef PERL_CR_FILTER
585 register const char *s = SvPVX_const(sv);
586 register const char * const e = s + SvCUR(sv);
587 /* outer loop optimized to do nothing if there are no CR-LFs */
589 if (*s++ == '\r' && *s == '\n') {
590 /* hit a CR-LF, need to copy the rest */
591 register char *d = s - 1;
594 if (*s == '\r' && s[1] == '\n')
605 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
607 const I32 count = FILTER_READ(idx+1, sv, maxlen);
608 if (count > 0 && !maxlen)
618 * Initialize variables. Uses the Perl save_stack to save its state (for
619 * recursive calls to the parser).
623 Perl_lex_start(pTHX_ SV *line)
626 const char *s = NULL;
630 /* create and initialise a parser */
632 Newxz(parser, 1, yy_parser);
633 parser->old_parser = PL_parser;
636 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
637 parser->ps = parser->stack;
638 parser->stack_size = YYINITDEPTH;
640 parser->stack->state = 0;
641 parser->yyerrstatus = 0;
642 parser->yychar = YYEMPTY; /* Cause a token to be read. */
644 /* initialise lexer state */
646 SAVEI32(PL_lex_state);
648 if (PL_lex_state == LEX_KNOWNEXT) {
649 I32 toke = parser->old_parser->lasttoke;
650 while (--toke >= 0) {
651 SAVEI32(PL_nexttoke[toke].next_type);
652 SAVEVPTR(PL_nexttoke[toke].next_val);
654 SAVEVPTR(PL_nexttoke[toke].next_mad);
657 SAVEI32(PL_curforce);
659 if (PL_lex_state == LEX_KNOWNEXT) {
660 I32 toke = PL_nexttoke;
661 while (--toke >= 0) {
662 SAVEI32(PL_nexttype[toke]);
663 SAVEVPTR(PL_nextval[toke]);
665 SAVEI32(PL_nexttoke);
668 SAVECOPLINE(PL_curcop);
671 SAVEPPTR(PL_oldbufptr);
672 SAVEPPTR(PL_oldoldbufptr);
673 SAVEPPTR(PL_last_lop);
674 SAVEPPTR(PL_last_uni);
675 SAVEPPTR(PL_linestart);
676 SAVESPTR(PL_linestr);
677 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
681 PL_lex_state = LEX_NORMAL;
683 Newx(parser->lex_brackstack, 120, char);
684 Newx(parser->lex_casestack, 12, char);
685 *parser->lex_casestack = '\0';
691 s = SvPV_const(line, len);
696 PL_linestr = newSVpvs("\n;");
697 } else if (SvREADONLY(line) || s[len-1] != ';') {
698 PL_linestr = newSVsv(line);
700 sv_catpvs(PL_linestr, "\n;");
703 SvREFCNT_inc_simple_void_NN(line);
706 /* PL_linestr needs to survive until end of scope, not just the next
707 FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
708 SAVEFREESV(PL_linestr);
709 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
710 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
711 PL_last_lop = PL_last_uni = NULL;
717 * Finalizer for lexing operations. Must be called when the parser is
718 * done with the lexer.
725 PL_doextract = FALSE;
730 * This subroutine has nothing to do with tilting, whether at windmills
731 * or pinball tables. Its name is short for "increment line". It
732 * increments the current line number in CopLINE(PL_curcop) and checks
733 * to see whether the line starts with a comment of the form
734 * # line 500 "foo.pm"
735 * If so, it sets the current line number and file to the values in the comment.
739 S_incline(pTHX_ const char *s)
746 CopLINE_inc(PL_curcop);
749 while (SPACE_OR_TAB(*s))
751 if (strnEQ(s, "line", 4))
755 if (SPACE_OR_TAB(*s))
759 while (SPACE_OR_TAB(*s))
767 while (SPACE_OR_TAB(*s))
769 if (*s == '"' && (t = strchr(s+1, '"'))) {
779 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
781 if (*e != '\n' && *e != '\0')
782 return; /* false alarm */
785 const STRLEN len = t - s;
787 const char * const cf = CopFILE(PL_curcop);
788 STRLEN tmplen = cf ? strlen(cf) : 0;
789 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
790 /* must copy *{"::_<(eval N)[oldfilename:L]"}
791 * to *{"::_<newfilename"} */
792 /* However, the long form of evals is only turned on by the
793 debugger - usually they're "(eval %lu)" */
797 STRLEN tmplen2 = len;
798 if (tmplen + 2 <= sizeof smallbuf)
801 Newx(tmpbuf, tmplen + 2, char);
804 memcpy(tmpbuf + 2, cf, tmplen);
806 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
811 if (tmplen2 + 2 <= sizeof smallbuf)
814 Newx(tmpbuf2, tmplen2 + 2, char);
816 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
817 /* Either they malloc'd it, or we malloc'd it,
818 so no prefix is present in ours. */
823 memcpy(tmpbuf2 + 2, s, tmplen2);
826 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
828 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
829 /* adjust ${"::_<newfilename"} to store the new file name */
830 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
831 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
832 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
835 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
837 if (tmpbuf != smallbuf) Safefree(tmpbuf);
840 CopFILE_free(PL_curcop);
841 CopFILE_setn(PL_curcop, s, len);
843 CopLINE_set(PL_curcop, atoi(n)-1);
847 /* skip space before PL_thistoken */
850 S_skipspace0(pTHX_ register char *s)
857 PL_thiswhite = newSVpvs("");
858 sv_catsv(PL_thiswhite, PL_skipwhite);
859 sv_free(PL_skipwhite);
862 PL_realtokenstart = s - SvPVX(PL_linestr);
866 /* skip space after PL_thistoken */
869 S_skipspace1(pTHX_ register char *s)
871 const char *start = s;
872 I32 startoff = start - SvPVX(PL_linestr);
877 start = SvPVX(PL_linestr) + startoff;
878 if (!PL_thistoken && PL_realtokenstart >= 0) {
879 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
880 PL_thistoken = newSVpvn(tstart, start - tstart);
882 PL_realtokenstart = -1;
885 PL_nextwhite = newSVpvs("");
886 sv_catsv(PL_nextwhite, PL_skipwhite);
887 sv_free(PL_skipwhite);
894 S_skipspace2(pTHX_ register char *s, SV **svp)
897 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
898 const I32 startoff = s - SvPVX(PL_linestr);
901 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
902 if (!PL_madskills || !svp)
904 start = SvPVX(PL_linestr) + startoff;
905 if (!PL_thistoken && PL_realtokenstart >= 0) {
906 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
907 PL_thistoken = newSVpvn(tstart, start - tstart);
908 PL_realtokenstart = -1;
913 sv_setsv(*svp, PL_skipwhite);
914 sv_free(PL_skipwhite);
923 S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
925 AV *av = CopFILEAVx(PL_curcop);
927 SV * const sv = newSV_type(SVt_PVMG);
929 sv_setsv(sv, orig_sv);
931 sv_setpvn(sv, buf, len);
934 av_store(av, (I32)CopLINE(PL_curcop), sv);
940 * Called to gobble the appropriate amount and type of whitespace.
941 * Skips comments as well.
945 S_skipspace(pTHX_ register char *s)
950 int startoff = s - SvPVX(PL_linestr);
953 sv_free(PL_skipwhite);
958 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
959 while (s < PL_bufend && SPACE_OR_TAB(*s))
969 SSize_t oldprevlen, oldoldprevlen;
970 SSize_t oldloplen = 0, oldunilen = 0;
971 while (s < PL_bufend && isSPACE(*s)) {
972 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
977 if (s < PL_bufend && *s == '#') {
978 while (s < PL_bufend && *s != '\n')
982 if (PL_in_eval && !PL_rsfp) {
989 /* only continue to recharge the buffer if we're at the end
990 * of the buffer, we're not reading from a source filter, and
991 * we're in normal lexing mode
993 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
994 PL_lex_state == LEX_FORMLINE)
1001 /* try to recharge the buffer */
1003 curoff = s - SvPVX(PL_linestr);
1006 if ((s = filter_gets(PL_linestr, PL_rsfp,
1007 (prevlen = SvCUR(PL_linestr)))) == NULL)
1010 if (PL_madskills && curoff != startoff) {
1012 PL_skipwhite = newSVpvs("");
1013 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1017 /* mustn't throw out old stuff yet if madpropping */
1018 SvCUR(PL_linestr) = curoff;
1019 s = SvPVX(PL_linestr) + curoff;
1021 if (curoff && s[-1] == '\n')
1025 /* end of file. Add on the -p or -n magic */
1026 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1029 sv_catpvs(PL_linestr,
1030 ";}continue{print or die qq(-p destination: $!\\n);}");
1032 sv_setpvs(PL_linestr,
1033 ";}continue{print or die qq(-p destination: $!\\n);}");
1035 PL_minus_n = PL_minus_p = 0;
1037 else if (PL_minus_n) {
1039 sv_catpvn(PL_linestr, ";}", 2);
1041 sv_setpvn(PL_linestr, ";}", 2);
1047 sv_catpvn(PL_linestr,";", 1);
1049 sv_setpvn(PL_linestr,";", 1);
1052 /* reset variables for next time we lex */
1053 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1059 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1060 PL_last_lop = PL_last_uni = NULL;
1062 /* Close the filehandle. Could be from -P preprocessor,
1063 * STDIN, or a regular file. If we were reading code from
1064 * STDIN (because the commandline held no -e or filename)
1065 * then we don't close it, we reset it so the code can
1066 * read from STDIN too.
1069 if (PL_preprocess && !PL_in_eval)
1070 (void)PerlProc_pclose(PL_rsfp);
1071 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1072 PerlIO_clearerr(PL_rsfp);
1074 (void)PerlIO_close(PL_rsfp);
1079 /* not at end of file, so we only read another line */
1080 /* make corresponding updates to old pointers, for yyerror() */
1081 oldprevlen = PL_oldbufptr - PL_bufend;
1082 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1084 oldunilen = PL_last_uni - PL_bufend;
1086 oldloplen = PL_last_lop - PL_bufend;
1087 PL_linestart = PL_bufptr = s + prevlen;
1088 PL_bufend = s + SvCUR(PL_linestr);
1090 PL_oldbufptr = s + oldprevlen;
1091 PL_oldoldbufptr = s + oldoldprevlen;
1093 PL_last_uni = s + oldunilen;
1095 PL_last_lop = s + oldloplen;
1098 /* debugger active and we're not compiling the debugger code,
1099 * so store the line into the debugger's array of lines
1101 if (PERLDB_LINE && PL_curstash != PL_debstash)
1102 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1109 PL_skipwhite = newSVpvs("");
1110 curoff = s - SvPVX(PL_linestr);
1111 if (curoff - startoff)
1112 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1121 * Check the unary operators to ensure there's no ambiguity in how they're
1122 * used. An ambiguous piece of code would be:
1124 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1125 * the +5 is its argument.
1135 if (PL_oldoldbufptr != PL_last_uni)
1137 while (isSPACE(*PL_last_uni))
1140 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1142 if ((t = strchr(s, '(')) && t < PL_bufptr)
1145 if (ckWARN_d(WARN_AMBIGUOUS)){
1146 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1147 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1148 (int)(s - PL_last_uni), PL_last_uni);
1153 * LOP : macro to build a list operator. Its behaviour has been replaced
1154 * with a subroutine, S_lop() for which LOP is just another name.
1157 #define LOP(f,x) return lop(f,x,s)
1161 * Build a list operator (or something that might be one). The rules:
1162 * - if we have a next token, then it's a list operator [why?]
1163 * - if the next thing is an opening paren, then it's a function
1164 * - else it's a list operator
1168 S_lop(pTHX_ I32 f, int x, char *s)
1175 PL_last_lop = PL_oldbufptr;
1176 PL_last_lop_op = (OPCODE)f;
1179 return REPORT(LSTOP);
1182 return REPORT(LSTOP);
1185 return REPORT(FUNC);
1188 return REPORT(FUNC);
1190 return REPORT(LSTOP);
1196 * Sets up for an eventual force_next(). start_force(0) basically does
1197 * an unshift, while start_force(-1) does a push. yylex removes items
1202 S_start_force(pTHX_ int where)
1206 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1207 where = PL_lasttoke;
1208 assert(PL_curforce < 0 || PL_curforce == where);
1209 if (PL_curforce != where) {
1210 for (i = PL_lasttoke; i > where; --i) {
1211 PL_nexttoke[i] = PL_nexttoke[i-1];
1215 if (PL_curforce < 0) /* in case of duplicate start_force() */
1216 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1217 PL_curforce = where;
1220 curmad('^', newSVpvs(""));
1221 CURMAD('_', PL_nextwhite);
1226 S_curmad(pTHX_ char slot, SV *sv)
1232 if (PL_curforce < 0)
1233 where = &PL_thismad;
1235 where = &PL_nexttoke[PL_curforce].next_mad;
1238 sv_setpvn(sv, "", 0);
1241 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1243 else if (PL_encoding) {
1244 sv_recode_to_utf8(sv, PL_encoding);
1249 /* keep a slot open for the head of the list? */
1250 if (slot != '_' && *where && (*where)->mad_key == '^') {
1251 (*where)->mad_key = slot;
1252 sv_free((*where)->mad_val);
1253 (*where)->mad_val = (void*)sv;
1256 addmad(newMADsv(slot, sv), where, 0);
1259 # define start_force(where) NOOP
1260 # define curmad(slot, sv) NOOP
1265 * When the lexer realizes it knows the next token (for instance,
1266 * it is reordering tokens for the parser) then it can call S_force_next
1267 * to know what token to return the next time the lexer is called. Caller
1268 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1269 * and possibly PL_expect to ensure the lexer handles the token correctly.
1273 S_force_next(pTHX_ I32 type)
1277 if (PL_curforce < 0)
1278 start_force(PL_lasttoke);
1279 PL_nexttoke[PL_curforce].next_type = type;
1280 if (PL_lex_state != LEX_KNOWNEXT)
1281 PL_lex_defer = PL_lex_state;
1282 PL_lex_state = LEX_KNOWNEXT;
1283 PL_lex_expect = PL_expect;
1286 PL_nexttype[PL_nexttoke] = type;
1288 if (PL_lex_state != LEX_KNOWNEXT) {
1289 PL_lex_defer = PL_lex_state;
1290 PL_lex_expect = PL_expect;
1291 PL_lex_state = LEX_KNOWNEXT;
1297 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1300 SV * const sv = newSVpvn(start,len);
1301 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1308 * When the lexer knows the next thing is a word (for instance, it has
1309 * just seen -> and it knows that the next char is a word char, then
1310 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1314 * char *start : buffer position (must be within PL_linestr)
1315 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1316 * int check_keyword : if true, Perl checks to make sure the word isn't
1317 * a keyword (do this if the word is a label, e.g. goto FOO)
1318 * int allow_pack : if true, : characters will also be allowed (require,
1319 * use, etc. do this)
1320 * int allow_initial_tick : used by the "sub" lexer only.
1324 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1330 start = SKIPSPACE1(start);
1332 if (isIDFIRST_lazy_if(s,UTF) ||
1333 (allow_pack && *s == ':') ||
1334 (allow_initial_tick && *s == '\'') )
1336 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1337 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1339 start_force(PL_curforce);
1341 curmad('X', newSVpvn(start,s-start));
1342 if (token == METHOD) {
1347 PL_expect = XOPERATOR;
1350 NEXTVAL_NEXTTOKE.opval
1351 = (OP*)newSVOP(OP_CONST,0,
1352 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1353 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1361 * Called when the lexer wants $foo *foo &foo etc, but the program
1362 * text only contains the "foo" portion. The first argument is a pointer
1363 * to the "foo", and the second argument is the type symbol to prefix.
1364 * Forces the next token to be a "WORD".
1365 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1369 S_force_ident(pTHX_ register const char *s, int kind)
1373 const STRLEN len = strlen(s);
1374 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1375 start_force(PL_curforce);
1376 NEXTVAL_NEXTTOKE.opval = o;
1379 o->op_private = OPpCONST_ENTERED;
1380 /* XXX see note in pp_entereval() for why we forgo typo
1381 warnings if the symbol must be introduced in an eval.
1383 gv_fetchpvn_flags(s, len,
1384 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1386 kind == '$' ? SVt_PV :
1387 kind == '@' ? SVt_PVAV :
1388 kind == '%' ? SVt_PVHV :
1396 Perl_str_to_version(pTHX_ SV *sv)
1401 const char *start = SvPV_const(sv,len);
1402 const char * const end = start + len;
1403 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1404 while (start < end) {
1408 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1413 retval += ((NV)n)/nshift;
1422 * Forces the next token to be a version number.
1423 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1424 * and if "guessing" is TRUE, then no new token is created (and the caller
1425 * must use an alternative parsing method).
1429 S_force_version(pTHX_ char *s, int guessing)
1435 I32 startoff = s - SvPVX(PL_linestr);
1444 while (isDIGIT(*d) || *d == '_' || *d == '.')
1448 start_force(PL_curforce);
1449 curmad('X', newSVpvn(s,d-s));
1452 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1454 s = scan_num(s, &yylval);
1455 version = yylval.opval;
1456 ver = cSVOPx(version)->op_sv;
1457 if (SvPOK(ver) && !SvNIOK(ver)) {
1458 SvUPGRADE(ver, SVt_PVNV);
1459 SvNV_set(ver, str_to_version(ver));
1460 SvNOK_on(ver); /* hint that it is a version */
1463 else if (guessing) {
1466 sv_free(PL_nextwhite); /* let next token collect whitespace */
1468 s = SvPVX(PL_linestr) + startoff;
1476 if (PL_madskills && !version) {
1477 sv_free(PL_nextwhite); /* let next token collect whitespace */
1479 s = SvPVX(PL_linestr) + startoff;
1482 /* NOTE: The parser sees the package name and the VERSION swapped */
1483 start_force(PL_curforce);
1484 NEXTVAL_NEXTTOKE.opval = version;
1492 * Tokenize a quoted string passed in as an SV. It finds the next
1493 * chunk, up to end of string or a backslash. It may make a new
1494 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1499 S_tokeq(pTHX_ SV *sv)
1503 register char *send;
1511 s = SvPV_force(sv, len);
1512 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1515 while (s < send && *s != '\\')
1520 if ( PL_hints & HINT_NEW_STRING ) {
1521 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1527 if (s + 1 < send && (s[1] == '\\'))
1528 s++; /* all that, just for this */
1533 SvCUR_set(sv, d - SvPVX_const(sv));
1535 if ( PL_hints & HINT_NEW_STRING )
1536 return new_constant(NULL, 0, "q", sv, pv, "q");
1541 * Now come three functions related to double-quote context,
1542 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1543 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1544 * interact with PL_lex_state, and create fake ( ... ) argument lists
1545 * to handle functions and concatenation.
1546 * They assume that whoever calls them will be setting up a fake
1547 * join call, because each subthing puts a ',' after it. This lets
1550 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1552 * (I'm not sure whether the spurious commas at the end of lcfirst's
1553 * arguments and join's arguments are created or not).
1558 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1560 * Pattern matching will set PL_lex_op to the pattern-matching op to
1561 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1563 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1565 * Everything else becomes a FUNC.
1567 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1568 * had an OP_CONST or OP_READLINE). This just sets us up for a
1569 * call to S_sublex_push().
1573 S_sublex_start(pTHX)
1576 register const I32 op_type = yylval.ival;
1578 if (op_type == OP_NULL) {
1579 yylval.opval = PL_lex_op;
1583 if (op_type == OP_CONST || op_type == OP_READLINE) {
1584 SV *sv = tokeq(PL_lex_stuff);
1586 if (SvTYPE(sv) == SVt_PVIV) {
1587 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1589 const char * const p = SvPV_const(sv, len);
1590 SV * const nsv = newSVpvn(p, len);
1596 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1597 PL_lex_stuff = NULL;
1598 /* Allow <FH> // "foo" */
1599 if (op_type == OP_READLINE)
1600 PL_expect = XTERMORDORDOR;
1603 else if (op_type == OP_BACKTICK && PL_lex_op) {
1604 /* readpipe() vas overriden */
1605 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1606 yylval.opval = PL_lex_op;
1608 PL_lex_stuff = NULL;
1612 PL_sublex_info.super_state = PL_lex_state;
1613 PL_sublex_info.sub_inwhat = op_type;
1614 PL_sublex_info.sub_op = PL_lex_op;
1615 PL_lex_state = LEX_INTERPPUSH;
1619 yylval.opval = PL_lex_op;
1629 * Create a new scope to save the lexing state. The scope will be
1630 * ended in S_sublex_done. Returns a '(', starting the function arguments
1631 * to the uc, lc, etc. found before.
1632 * Sets PL_lex_state to LEX_INTERPCONCAT.
1641 PL_lex_state = PL_sublex_info.super_state;
1642 SAVEI32(PL_lex_dojoin);
1643 SAVEI32(PL_lex_brackets);
1644 SAVEI32(PL_lex_casemods);
1645 SAVEI32(PL_lex_starts);
1646 SAVEI32(PL_lex_state);
1647 SAVEVPTR(PL_lex_inpat);
1648 SAVEI32(PL_lex_inwhat);
1649 SAVECOPLINE(PL_curcop);
1650 SAVEPPTR(PL_bufptr);
1651 SAVEPPTR(PL_bufend);
1652 SAVEPPTR(PL_oldbufptr);
1653 SAVEPPTR(PL_oldoldbufptr);
1654 SAVEPPTR(PL_last_lop);
1655 SAVEPPTR(PL_last_uni);
1656 SAVEPPTR(PL_linestart);
1657 SAVESPTR(PL_linestr);
1658 SAVEGENERICPV(PL_lex_brackstack);
1659 SAVEGENERICPV(PL_lex_casestack);
1661 PL_linestr = PL_lex_stuff;
1662 PL_lex_stuff = NULL;
1664 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1665 = SvPVX(PL_linestr);
1666 PL_bufend += SvCUR(PL_linestr);
1667 PL_last_lop = PL_last_uni = NULL;
1668 SAVEFREESV(PL_linestr);
1670 PL_lex_dojoin = FALSE;
1671 PL_lex_brackets = 0;
1672 Newx(PL_lex_brackstack, 120, char);
1673 Newx(PL_lex_casestack, 12, char);
1674 PL_lex_casemods = 0;
1675 *PL_lex_casestack = '\0';
1677 PL_lex_state = LEX_INTERPCONCAT;
1678 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1680 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1681 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1682 PL_lex_inpat = PL_sublex_info.sub_op;
1684 PL_lex_inpat = NULL;
1691 * Restores lexer state after a S_sublex_push.
1698 if (!PL_lex_starts++) {
1699 SV * const sv = newSVpvs("");
1700 if (SvUTF8(PL_linestr))
1702 PL_expect = XOPERATOR;
1703 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1707 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1708 PL_lex_state = LEX_INTERPCASEMOD;
1712 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1713 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1714 PL_linestr = PL_lex_repl;
1716 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1717 PL_bufend += SvCUR(PL_linestr);
1718 PL_last_lop = PL_last_uni = NULL;
1719 SAVEFREESV(PL_linestr);
1720 PL_lex_dojoin = FALSE;
1721 PL_lex_brackets = 0;
1722 PL_lex_casemods = 0;
1723 *PL_lex_casestack = '\0';
1725 if (SvEVALED(PL_lex_repl)) {
1726 PL_lex_state = LEX_INTERPNORMAL;
1728 /* we don't clear PL_lex_repl here, so that we can check later
1729 whether this is an evalled subst; that means we rely on the
1730 logic to ensure sublex_done() is called again only via the
1731 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1734 PL_lex_state = LEX_INTERPCONCAT;
1744 PL_endwhite = newSVpvs("");
1745 sv_catsv(PL_endwhite, PL_thiswhite);
1749 sv_setpvn(PL_thistoken,"",0);
1751 PL_realtokenstart = -1;
1755 PL_bufend = SvPVX(PL_linestr);
1756 PL_bufend += SvCUR(PL_linestr);
1757 PL_expect = XOPERATOR;
1758 PL_sublex_info.sub_inwhat = 0;
1766 Extracts a pattern, double-quoted string, or transliteration. This
1769 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1770 processing a pattern (PL_lex_inpat is true), a transliteration
1771 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1773 Returns a pointer to the character scanned up to. If this is
1774 advanced from the start pointer supplied (i.e. if anything was
1775 successfully parsed), will leave an OP for the substring scanned
1776 in yylval. Caller must intuit reason for not parsing further
1777 by looking at the next characters herself.
1781 double-quoted style: \r and \n
1782 regexp special ones: \D \s
1785 case and quoting: \U \Q \E
1786 stops on @ and $, but not for $ as tail anchor
1788 In transliterations:
1789 characters are VERY literal, except for - not at the start or end
1790 of the string, which indicates a range. If the range is in bytes,
1791 scan_const expands the range to the full set of intermediate
1792 characters. If the range is in utf8, the hyphen is replaced with
1793 a certain range mark which will be handled by pmtrans() in op.c.
1795 In double-quoted strings:
1797 double-quoted style: \r and \n
1799 deprecated backrefs: \1 (in substitution replacements)
1800 case and quoting: \U \Q \E
1803 scan_const does *not* construct ops to handle interpolated strings.
1804 It stops processing as soon as it finds an embedded $ or @ variable
1805 and leaves it to the caller to work out what's going on.
1807 embedded arrays (whether in pattern or not) could be:
1808 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1810 $ in double-quoted strings must be the symbol of an embedded scalar.
1812 $ in pattern could be $foo or could be tail anchor. Assumption:
1813 it's a tail anchor if $ is the last thing in the string, or if it's
1814 followed by one of "()| \r\n\t"
1816 \1 (backreferences) are turned into $1
1818 The structure of the code is
1819 while (there's a character to process) {
1820 handle transliteration ranges
1821 skip regexp comments /(?#comment)/ and codes /(?{code})/
1822 skip #-initiated comments in //x patterns
1823 check for embedded arrays
1824 check for embedded scalars
1826 leave intact backslashes from leaveit (below)
1827 deprecate \1 in substitution replacements
1828 handle string-changing backslashes \l \U \Q \E, etc.
1829 switch (what was escaped) {
1830 handle \- in a transliteration (becomes a literal -)
1831 handle \132 (octal characters)
1832 handle \x15 and \x{1234} (hex characters)
1833 handle \N{name} (named characters)
1834 handle \cV (control characters)
1835 handle printf-style backslashes (\f, \r, \n, etc)
1837 } (end if backslash)
1838 } (end while character to read)
1843 S_scan_const(pTHX_ char *start)
1846 register char *send = PL_bufend; /* end of the constant */
1847 SV *sv = newSV(send - start); /* sv for the constant */
1848 register char *s = start; /* start of the constant */
1849 register char *d = SvPVX(sv); /* destination for copies */
1850 bool dorange = FALSE; /* are we in a translit range? */
1851 bool didrange = FALSE; /* did we just finish a range? */
1852 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1853 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1856 UV literal_endpoint = 0;
1857 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1860 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1861 /* If we are doing a trans and we know we want UTF8 set expectation */
1862 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1863 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1867 while (s < send || dorange) {
1868 /* get transliterations out of the way (they're most literal) */
1869 if (PL_lex_inwhat == OP_TRANS) {
1870 /* expand a range A-Z to the full set of characters. AIE! */
1872 I32 i; /* current expanded character */
1873 I32 min; /* first character in range */
1874 I32 max; /* last character in range */
1885 char * const c = (char*)utf8_hop((U8*)d, -1);
1889 *c = (char)UTF_TO_NATIVE(0xff);
1890 /* mark the range as done, and continue */
1896 i = d - SvPVX_const(sv); /* remember current offset */
1899 SvLEN(sv) + (has_utf8 ?
1900 (512 - UTF_CONTINUATION_MARK +
1903 /* How many two-byte within 0..255: 128 in UTF-8,
1904 * 96 in UTF-8-mod. */
1906 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1908 d = SvPVX(sv) + i; /* refresh d after realloc */
1912 for (j = 0; j <= 1; j++) {
1913 char * const c = (char*)utf8_hop((U8*)d, -1);
1914 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1920 max = (U8)0xff; /* only to \xff */
1921 uvmax = uv; /* \x{100} to uvmax */
1923 d = c; /* eat endpoint chars */
1928 d -= 2; /* eat the first char and the - */
1929 min = (U8)*d; /* first char in range */
1930 max = (U8)d[1]; /* last char in range */
1937 "Invalid range \"%c-%c\" in transliteration operator",
1938 (char)min, (char)max);
1942 if (literal_endpoint == 2 &&
1943 ((isLOWER(min) && isLOWER(max)) ||
1944 (isUPPER(min) && isUPPER(max)))) {
1946 for (i = min; i <= max; i++)
1948 *d++ = NATIVE_TO_NEED(has_utf8,i);
1950 for (i = min; i <= max; i++)
1952 *d++ = NATIVE_TO_NEED(has_utf8,i);
1957 for (i = min; i <= max; i++)
1960 const U8 ch = (U8)NATIVE_TO_UTF(i);
1961 if (UNI_IS_INVARIANT(ch))
1964 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1965 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1974 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1976 *d++ = (char)UTF_TO_NATIVE(0xff);
1978 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1982 /* mark the range as done, and continue */
1986 literal_endpoint = 0;
1991 /* range begins (ignore - as first or last char) */
1992 else if (*s == '-' && s+1 < send && s != start) {
1994 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2001 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2011 literal_endpoint = 0;
2012 native_range = TRUE;
2017 /* if we get here, we're not doing a transliteration */
2019 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2020 except for the last char, which will be done separately. */
2021 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2023 while (s+1 < send && *s != ')')
2024 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2026 else if (s[2] == '{' /* This should match regcomp.c */
2027 || (s[2] == '?' && s[3] == '{'))
2030 char *regparse = s + (s[2] == '{' ? 3 : 4);
2033 while (count && (c = *regparse)) {
2034 if (c == '\\' && regparse[1])
2042 if (*regparse != ')')
2043 regparse--; /* Leave one char for continuation. */
2044 while (s < regparse)
2045 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2049 /* likewise skip #-initiated comments in //x patterns */
2050 else if (*s == '#' && PL_lex_inpat &&
2051 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2052 while (s+1 < send && *s != '\n')
2053 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2056 /* check for embedded arrays
2057 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2059 else if (*s == '@' && s[1]) {
2060 if (isALNUM_lazy_if(s+1,UTF))
2062 if (strchr(":'{$", s[1]))
2064 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2065 break; /* in regexp, neither @+ nor @- are interpolated */
2068 /* check for embedded scalars. only stop if we're sure it's a
2071 else if (*s == '$') {
2072 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2074 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2075 break; /* in regexp, $ might be tail anchor */
2078 /* End of else if chain - OP_TRANS rejoin rest */
2081 if (*s == '\\' && s+1 < send) {
2084 /* deprecate \1 in strings and substitution replacements */
2085 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2086 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2088 if (ckWARN(WARN_SYNTAX))
2089 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2094 /* string-change backslash escapes */
2095 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2099 /* skip any other backslash escapes in a pattern */
2100 else if (PL_lex_inpat) {
2101 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2102 goto default_action;
2105 /* if we get here, it's either a quoted -, or a digit */
2108 /* quoted - in transliterations */
2110 if (PL_lex_inwhat == OP_TRANS) {
2117 if ((isALPHA(*s) || isDIGIT(*s)) &&
2119 Perl_warner(aTHX_ packWARN(WARN_MISC),
2120 "Unrecognized escape \\%c passed through",
2122 /* default action is to copy the quoted character */
2123 goto default_action;
2126 /* \132 indicates an octal constant */
2127 case '0': case '1': case '2': case '3':
2128 case '4': case '5': case '6': case '7':
2132 uv = grok_oct(s, &len, &flags, NULL);
2135 goto NUM_ESCAPE_INSERT;
2137 /* \x24 indicates a hex constant */
2141 char* const e = strchr(s, '}');
2142 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2143 PERL_SCAN_DISALLOW_PREFIX;
2148 yyerror("Missing right brace on \\x{}");
2152 uv = grok_hex(s, &len, &flags, NULL);
2158 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2159 uv = grok_hex(s, &len, &flags, NULL);
2165 /* Insert oct or hex escaped character.
2166 * There will always enough room in sv since such
2167 * escapes will be longer than any UTF-8 sequence
2168 * they can end up as. */
2170 /* We need to map to chars to ASCII before doing the tests
2173 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2174 if (!has_utf8 && uv > 255) {
2175 /* Might need to recode whatever we have
2176 * accumulated so far if it contains any
2179 * (Can't we keep track of that and avoid
2180 * this rescan? --jhi)
2184 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2185 if (!NATIVE_IS_INVARIANT(*c)) {
2190 const STRLEN offset = d - SvPVX_const(sv);
2192 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2196 while (src >= (const U8 *)SvPVX_const(sv)) {
2197 if (!NATIVE_IS_INVARIANT(*src)) {
2198 const U8 ch = NATIVE_TO_ASCII(*src);
2199 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2200 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2210 if (has_utf8 || uv > 255) {
2211 d = (char*)uvchr_to_utf8((U8*)d, uv);
2213 if (PL_lex_inwhat == OP_TRANS &&
2214 PL_sublex_info.sub_op) {
2215 PL_sublex_info.sub_op->op_private |=
2216 (PL_lex_repl ? OPpTRANS_FROM_UTF
2220 if (uv > 255 && !dorange)
2221 native_range = FALSE;
2233 /* \N{LATIN SMALL LETTER A} is a named character */
2237 char* e = strchr(s, '}');
2244 yyerror("Missing right brace on \\N{}");
2248 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2250 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2251 PERL_SCAN_DISALLOW_PREFIX;
2254 uv = grok_hex(s, &len, &flags, NULL);
2255 if ( e > s && len != (STRLEN)(e - s) ) {
2259 goto NUM_ESCAPE_INSERT;
2261 res = newSVpvn(s + 1, e - s - 1);
2262 type = newSVpvn(s - 2,e - s + 3);
2263 res = new_constant( NULL, 0, "charnames",
2264 res, NULL, SvPVX(type) );
2267 sv_utf8_upgrade(res);
2268 str = SvPV_const(res,len);
2269 #ifdef EBCDIC_NEVER_MIND
2270 /* charnames uses pack U and that has been
2271 * recently changed to do the below uni->native
2272 * mapping, so this would be redundant (and wrong,
2273 * the code point would be doubly converted).
2274 * But leave this in just in case the pack U change
2275 * gets revoked, but the semantics is still
2276 * desireable for charnames. --jhi */
2278 UV uv = utf8_to_uvchr((const U8*)str, 0);
2281 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2283 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2284 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2285 str = SvPV_const(res, len);
2289 if (!has_utf8 && SvUTF8(res)) {
2290 const char * const ostart = SvPVX_const(sv);
2291 SvCUR_set(sv, d - ostart);
2294 sv_utf8_upgrade(sv);
2295 /* this just broke our allocation above... */
2296 SvGROW(sv, (STRLEN)(send - start));
2297 d = SvPVX(sv) + SvCUR(sv);
2300 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2301 const char * const odest = SvPVX_const(sv);
2303 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2304 d = SvPVX(sv) + (d - odest);
2308 native_range = FALSE; /* \N{} is guessed to be Unicode */
2310 Copy(str, d, len, char);
2317 yyerror("Missing braces on \\N{}");
2320 /* \c is a control character */
2329 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2332 yyerror("Missing control char name in \\c");
2336 /* printf-style backslashes, formfeeds, newlines, etc */
2338 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2341 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2344 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2347 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2350 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2353 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2356 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2362 } /* end if (backslash) */
2369 /* If we started with encoded form, or already know we want it
2370 and then encode the next character */
2371 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2373 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2374 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2377 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2378 const STRLEN off = d - SvPVX_const(sv);
2379 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2381 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2384 if (uv > 255 && !dorange)
2385 native_range = FALSE;
2389 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2391 } /* while loop to process each character */
2393 /* terminate the string and set up the sv */
2395 SvCUR_set(sv, d - SvPVX_const(sv));
2396 if (SvCUR(sv) >= SvLEN(sv))
2397 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2400 if (PL_encoding && !has_utf8) {
2401 sv_recode_to_utf8(sv, PL_encoding);
2407 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2408 PL_sublex_info.sub_op->op_private |=
2409 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2413 /* shrink the sv if we allocated more than we used */
2414 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2415 SvPV_shrink_to_cur(sv);
2418 /* return the substring (via yylval) only if we parsed anything */
2419 if (s > PL_bufptr) {
2420 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2421 sv = new_constant(start, s - start,
2422 (const char *)(PL_lex_inpat ? "qr" : "q"),
2425 (( PL_lex_inwhat == OP_TRANS
2427 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2430 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2437 * Returns TRUE if there's more to the expression (e.g., a subscript),
2440 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2442 * ->[ and ->{ return TRUE
2443 * { and [ outside a pattern are always subscripts, so return TRUE
2444 * if we're outside a pattern and it's not { or [, then return FALSE
2445 * if we're in a pattern and the first char is a {
2446 * {4,5} (any digits around the comma) returns FALSE
2447 * if we're in a pattern and the first char is a [
2449 * [SOMETHING] has a funky algorithm to decide whether it's a
2450 * character class or not. It has to deal with things like
2451 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2452 * anything else returns TRUE
2455 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2458 S_intuit_more(pTHX_ register char *s)
2461 if (PL_lex_brackets)
2463 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2465 if (*s != '{' && *s != '[')
2470 /* In a pattern, so maybe we have {n,m}. */
2487 /* On the other hand, maybe we have a character class */
2490 if (*s == ']' || *s == '^')
2493 /* this is terrifying, and it works */
2494 int weight = 2; /* let's weigh the evidence */
2496 unsigned char un_char = 255, last_un_char;
2497 const char * const send = strchr(s,']');
2498 char tmpbuf[sizeof PL_tokenbuf * 4];
2500 if (!send) /* has to be an expression */
2503 Zero(seen,256,char);
2506 else if (isDIGIT(*s)) {
2508 if (isDIGIT(s[1]) && s[2] == ']')
2514 for (; s < send; s++) {
2515 last_un_char = un_char;
2516 un_char = (unsigned char)*s;
2521 weight -= seen[un_char] * 10;
2522 if (isALNUM_lazy_if(s+1,UTF)) {
2524 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2525 len = (int)strlen(tmpbuf);
2526 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2531 else if (*s == '$' && s[1] &&
2532 strchr("[#!%*<>()-=",s[1])) {
2533 if (/*{*/ strchr("])} =",s[2]))
2542 if (strchr("wds]",s[1]))
2544 else if (seen[(U8)'\''] || seen[(U8)'"'])
2546 else if (strchr("rnftbxcav",s[1]))
2548 else if (isDIGIT(s[1])) {
2550 while (s[1] && isDIGIT(s[1]))
2560 if (strchr("aA01! ",last_un_char))
2562 if (strchr("zZ79~",s[1]))
2564 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2565 weight -= 5; /* cope with negative subscript */
2568 if (!isALNUM(last_un_char)
2569 && !(last_un_char == '$' || last_un_char == '@'
2570 || last_un_char == '&')
2571 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2576 if (keyword(tmpbuf, d - tmpbuf, 0))
2579 if (un_char == last_un_char + 1)
2581 weight -= seen[un_char];
2586 if (weight >= 0) /* probably a character class */
2596 * Does all the checking to disambiguate
2598 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2599 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2601 * First argument is the stuff after the first token, e.g. "bar".
2603 * Not a method if bar is a filehandle.
2604 * Not a method if foo is a subroutine prototyped to take a filehandle.
2605 * Not a method if it's really "Foo $bar"
2606 * Method if it's "foo $bar"
2607 * Not a method if it's really "print foo $bar"
2608 * Method if it's really "foo package::" (interpreted as package->foo)
2609 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2610 * Not a method if bar is a filehandle or package, but is quoted with
2615 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2618 char *s = start + (*start == '$');
2619 char tmpbuf[sizeof PL_tokenbuf];
2627 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2631 const char *proto = SvPVX_const(cv);
2642 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2643 /* start is the beginning of the possible filehandle/object,
2644 * and s is the end of it
2645 * tmpbuf is a copy of it
2648 if (*start == '$') {
2649 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2650 isUPPER(*PL_tokenbuf))
2653 len = start - SvPVX(PL_linestr);
2657 start = SvPVX(PL_linestr) + len;
2661 return *s == '(' ? FUNCMETH : METHOD;
2663 if (!keyword(tmpbuf, len, 0)) {
2664 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2668 soff = s - SvPVX(PL_linestr);
2672 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2673 if (indirgv && GvCVu(indirgv))
2675 /* filehandle or package name makes it a method */
2676 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2678 soff = s - SvPVX(PL_linestr);
2681 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2682 return 0; /* no assumptions -- "=>" quotes bearword */
2684 start_force(PL_curforce);
2685 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2686 newSVpvn(tmpbuf,len));
2687 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2689 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2694 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2696 return *s == '(' ? FUNCMETH : METHOD;
2704 * Return a string of Perl code to load the debugger. If PERL5DB
2705 * is set, it will return the contents of that, otherwise a
2706 * compile-time require of perl5db.pl.
2714 const char * const pdb = PerlEnv_getenv("PERL5DB");
2718 SETERRNO(0,SS_NORMAL);
2719 return "BEGIN { require 'perl5db.pl' }";
2725 /* Encoded script support. filter_add() effectively inserts a
2726 * 'pre-processing' function into the current source input stream.
2727 * Note that the filter function only applies to the current source file
2728 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2730 * The datasv parameter (which may be NULL) can be used to pass
2731 * private data to this instance of the filter. The filter function
2732 * can recover the SV using the FILTER_DATA macro and use it to
2733 * store private buffers and state information.
2735 * The supplied datasv parameter is upgraded to a PVIO type
2736 * and the IoDIRP/IoANY field is used to store the function pointer,
2737 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2738 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2739 * private use must be set using malloc'd pointers.
2743 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2749 if (!PL_rsfp_filters)
2750 PL_rsfp_filters = newAV();
2753 SvUPGRADE(datasv, SVt_PVIO);
2754 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2755 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2756 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2757 FPTR2DPTR(void *, IoANY(datasv)),
2758 SvPV_nolen(datasv)));
2759 av_unshift(PL_rsfp_filters, 1);
2760 av_store(PL_rsfp_filters, 0, datasv) ;
2765 /* Delete most recently added instance of this filter function. */
2767 Perl_filter_del(pTHX_ filter_t funcp)
2773 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2774 FPTR2DPTR(void*, funcp)));
2776 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2778 /* if filter is on top of stack (usual case) just pop it off */
2779 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2780 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2781 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2782 IoANY(datasv) = (void *)NULL;
2783 sv_free(av_pop(PL_rsfp_filters));
2787 /* we need to search for the correct entry and clear it */
2788 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2792 /* Invoke the idxth filter function for the current rsfp. */
2793 /* maxlen 0 = read one text line */
2795 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2800 /* This API is bad. It should have been using unsigned int for maxlen.
2801 Not sure if we want to change the API, but if not we should sanity
2802 check the value here. */
2803 const unsigned int correct_length
2812 if (!PL_rsfp_filters)
2814 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2815 /* Provide a default input filter to make life easy. */
2816 /* Note that we append to the line. This is handy. */
2817 DEBUG_P(PerlIO_printf(Perl_debug_log,
2818 "filter_read %d: from rsfp\n", idx));
2819 if (correct_length) {
2822 const int old_len = SvCUR(buf_sv);
2824 /* ensure buf_sv is large enough */
2825 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2826 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2827 correct_length)) <= 0) {
2828 if (PerlIO_error(PL_rsfp))
2829 return -1; /* error */
2831 return 0 ; /* end of file */
2833 SvCUR_set(buf_sv, old_len + len) ;
2836 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2837 if (PerlIO_error(PL_rsfp))
2838 return -1; /* error */
2840 return 0 ; /* end of file */
2843 return SvCUR(buf_sv);
2845 /* Skip this filter slot if filter has been deleted */
2846 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2847 DEBUG_P(PerlIO_printf(Perl_debug_log,
2848 "filter_read %d: skipped (filter deleted)\n",
2850 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2852 /* Get function pointer hidden within datasv */
2853 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2854 DEBUG_P(PerlIO_printf(Perl_debug_log,
2855 "filter_read %d: via function %p (%s)\n",
2856 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2857 /* Call function. The function is expected to */
2858 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2859 /* Return: <0:error, =0:eof, >0:not eof */
2860 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2864 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2867 #ifdef PERL_CR_FILTER
2868 if (!PL_rsfp_filters) {
2869 filter_add(S_cr_textfilter,NULL);
2872 if (PL_rsfp_filters) {
2874 SvCUR_set(sv, 0); /* start with empty line */
2875 if (FILTER_READ(0, sv, 0) > 0)
2876 return ( SvPVX(sv) ) ;
2881 return (sv_gets(sv, fp, append));
2885 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2890 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2894 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2895 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2897 return GvHV(gv); /* Foo:: */
2900 /* use constant CLASS => 'MyClass' */
2901 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2902 if (gv && GvCV(gv)) {
2903 SV * const sv = cv_const_sv(GvCV(gv));
2905 pkgname = SvPV_nolen_const(sv);
2908 return gv_stashpv(pkgname, 0);
2912 * S_readpipe_override
2913 * Check whether readpipe() is overriden, and generates the appropriate
2914 * optree, provided sublex_start() is called afterwards.
2917 S_readpipe_override(pTHX)
2920 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2921 yylval.ival = OP_BACKTICK;
2923 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2925 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2926 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
2927 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2929 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2930 append_elem(OP_LIST,
2931 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2932 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2942 * The intent of this yylex wrapper is to minimize the changes to the
2943 * tokener when we aren't interested in collecting madprops. It remains
2944 * to be seen how successful this strategy will be...
2951 char *s = PL_bufptr;
2953 /* make sure PL_thiswhite is initialized */
2957 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2958 if (PL_pending_ident)
2959 return S_pending_ident(aTHX);
2961 /* previous token ate up our whitespace? */
2962 if (!PL_lasttoke && PL_nextwhite) {
2963 PL_thiswhite = PL_nextwhite;
2967 /* isolate the token, and figure out where it is without whitespace */
2968 PL_realtokenstart = -1;
2972 assert(PL_curforce < 0);
2974 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2975 if (!PL_thistoken) {
2976 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2977 PL_thistoken = newSVpvs("");
2979 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
2980 PL_thistoken = newSVpvn(tstart, s - tstart);
2983 if (PL_thismad) /* install head */
2984 CURMAD('X', PL_thistoken);
2987 /* last whitespace of a sublex? */
2988 if (optype == ')' && PL_endwhite) {
2989 CURMAD('X', PL_endwhite);
2994 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
2995 if (!PL_thiswhite && !PL_endwhite && !optype) {
2996 sv_free(PL_thistoken);
3001 /* put off final whitespace till peg */
3002 if (optype == ';' && !PL_rsfp) {
3003 PL_nextwhite = PL_thiswhite;
3006 else if (PL_thisopen) {
3007 CURMAD('q', PL_thisopen);
3009 sv_free(PL_thistoken);
3013 /* Store actual token text as madprop X */
3014 CURMAD('X', PL_thistoken);
3018 /* add preceding whitespace as madprop _ */
3019 CURMAD('_', PL_thiswhite);
3023 /* add quoted material as madprop = */
3024 CURMAD('=', PL_thisstuff);
3028 /* add terminating quote as madprop Q */
3029 CURMAD('Q', PL_thisclose);
3033 /* special processing based on optype */
3037 /* opval doesn't need a TOKEN since it can already store mp */
3048 append_madprops(PL_thismad, yylval.opval, 0);
3056 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3065 /* remember any fake bracket that lexer is about to discard */
3066 if (PL_lex_brackets == 1 &&
3067 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3070 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3073 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3074 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3077 break; /* don't bother looking for trailing comment */
3086 /* attach a trailing comment to its statement instead of next token */
3090 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3092 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3094 if (*s == '\n' || *s == '#') {
3095 while (s < PL_bufend && *s != '\n')
3099 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3100 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3117 /* Create new token struct. Note: opvals return early above. */
3118 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3125 S_tokenize_use(pTHX_ int is_use, char *s) {
3127 if (PL_expect != XSTATE)
3128 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3129 is_use ? "use" : "no"));
3131 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3132 s = force_version(s, TRUE);
3133 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3134 start_force(PL_curforce);
3135 NEXTVAL_NEXTTOKE.opval = NULL;
3138 else if (*s == 'v') {
3139 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3140 s = force_version(s, FALSE);
3144 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3145 s = force_version(s, FALSE);
3147 yylval.ival = is_use;
3151 static const char* const exp_name[] =
3152 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3153 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3160 Works out what to call the token just pulled out of the input
3161 stream. The yacc parser takes care of taking the ops we return and
3162 stitching them into a tree.
3168 if read an identifier
3169 if we're in a my declaration
3170 croak if they tried to say my($foo::bar)
3171 build the ops for a my() declaration
3172 if it's an access to a my() variable
3173 are we in a sort block?
3174 croak if my($a); $a <=> $b
3175 build ops for access to a my() variable
3176 if in a dq string, and they've said @foo and we can't find @foo
3178 build ops for a bareword
3179 if we already built the token before, use it.
3184 #pragma segment Perl_yylex
3190 register char *s = PL_bufptr;
3195 /* orig_keyword, gvp, and gv are initialized here because
3196 * jump to the label just_a_word_zero can bypass their
3197 * initialization later. */
3198 I32 orig_keyword = 0;
3203 SV* tmp = newSVpvs("");
3204 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3205 (IV)CopLINE(PL_curcop),
3206 lex_state_names[PL_lex_state],
3207 exp_name[PL_expect],
3208 pv_display(tmp, s, strlen(s), 0, 60));
3211 /* check if there's an identifier for us to look at */
3212 if (PL_pending_ident)
3213 return REPORT(S_pending_ident(aTHX));
3215 /* no identifier pending identification */
3217 switch (PL_lex_state) {
3219 case LEX_NORMAL: /* Some compilers will produce faster */
3220 case LEX_INTERPNORMAL: /* code if we comment these out. */
3224 /* when we've already built the next token, just pull it out of the queue */
3228 yylval = PL_nexttoke[PL_lasttoke].next_val;
3230 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3231 PL_nexttoke[PL_lasttoke].next_mad = 0;
3232 if (PL_thismad && PL_thismad->mad_key == '_') {
3233 PL_thiswhite = (SV*)PL_thismad->mad_val;
3234 PL_thismad->mad_val = 0;
3235 mad_free(PL_thismad);
3240 PL_lex_state = PL_lex_defer;
3241 PL_expect = PL_lex_expect;
3242 PL_lex_defer = LEX_NORMAL;
3243 if (!PL_nexttoke[PL_lasttoke].next_type)
3248 yylval = PL_nextval[PL_nexttoke];
3250 PL_lex_state = PL_lex_defer;
3251 PL_expect = PL_lex_expect;
3252 PL_lex_defer = LEX_NORMAL;
3256 /* FIXME - can these be merged? */
3257 return(PL_nexttoke[PL_lasttoke].next_type);
3259 return REPORT(PL_nexttype[PL_nexttoke]);
3262 /* interpolated case modifiers like \L \U, including \Q and \E.
3263 when we get here, PL_bufptr is at the \
3265 case LEX_INTERPCASEMOD:
3267 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3268 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3270 /* handle \E or end of string */
3271 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3273 if (PL_lex_casemods) {
3274 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3275 PL_lex_casestack[PL_lex_casemods] = '\0';
3277 if (PL_bufptr != PL_bufend
3278 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280 PL_lex_state = LEX_INTERPCONCAT;
3283 PL_thistoken = newSVpvs("\\E");
3289 while (PL_bufptr != PL_bufend &&
3290 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3292 PL_thiswhite = newSVpvs("");
3293 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3297 if (PL_bufptr != PL_bufend)
3300 PL_lex_state = LEX_INTERPCONCAT;
3304 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3305 "### Saw case modifier\n"); });
3307 if (s[1] == '\\' && s[2] == 'E') {
3310 PL_thiswhite = newSVpvs("");
3311 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3314 PL_lex_state = LEX_INTERPCONCAT;
3319 if (!PL_madskills) /* when just compiling don't need correct */
3320 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3321 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3322 if ((*s == 'L' || *s == 'U') &&
3323 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3324 PL_lex_casestack[--PL_lex_casemods] = '\0';
3327 if (PL_lex_casemods > 10)
3328 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3329 PL_lex_casestack[PL_lex_casemods++] = *s;
3330 PL_lex_casestack[PL_lex_casemods] = '\0';
3331 PL_lex_state = LEX_INTERPCONCAT;
3332 start_force(PL_curforce);
3333 NEXTVAL_NEXTTOKE.ival = 0;
3335 start_force(PL_curforce);
3337 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3339 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3341 NEXTVAL_NEXTTOKE.ival = OP_LC;
3343 NEXTVAL_NEXTTOKE.ival = OP_UC;
3345 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3347 Perl_croak(aTHX_ "panic: yylex");
3349 SV* const tmpsv = newSVpvs("");
3350 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3356 if (PL_lex_starts) {
3362 sv_free(PL_thistoken);
3363 PL_thistoken = newSVpvs("");
3366 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3367 if (PL_lex_casemods == 1 && PL_lex_inpat)
3376 case LEX_INTERPPUSH:
3377 return REPORT(sublex_push());
3379 case LEX_INTERPSTART:
3380 if (PL_bufptr == PL_bufend)
3381 return REPORT(sublex_done());
3382 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3383 "### Interpolated variable\n"); });
3385 PL_lex_dojoin = (*PL_bufptr == '@');
3386 PL_lex_state = LEX_INTERPNORMAL;
3387 if (PL_lex_dojoin) {
3388 start_force(PL_curforce);
3389 NEXTVAL_NEXTTOKE.ival = 0;
3391 start_force(PL_curforce);
3392 force_ident("\"", '$');
3393 start_force(PL_curforce);
3394 NEXTVAL_NEXTTOKE.ival = 0;
3396 start_force(PL_curforce);
3397 NEXTVAL_NEXTTOKE.ival = 0;
3399 start_force(PL_curforce);
3400 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3403 if (PL_lex_starts++) {
3408 sv_free(PL_thistoken);
3409 PL_thistoken = newSVpvs("");
3412 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3413 if (!PL_lex_casemods && PL_lex_inpat)
3420 case LEX_INTERPENDMAYBE:
3421 if (intuit_more(PL_bufptr)) {
3422 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3428 if (PL_lex_dojoin) {
3429 PL_lex_dojoin = FALSE;
3430 PL_lex_state = LEX_INTERPCONCAT;
3434 sv_free(PL_thistoken);
3435 PL_thistoken = newSVpvs("");
3440 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3441 && SvEVALED(PL_lex_repl))
3443 if (PL_bufptr != PL_bufend)
3444 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3448 case LEX_INTERPCONCAT:
3450 if (PL_lex_brackets)
3451 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3453 if (PL_bufptr == PL_bufend)
3454 return REPORT(sublex_done());
3456 if (SvIVX(PL_linestr) == '\'') {
3457 SV *sv = newSVsv(PL_linestr);
3460 else if ( PL_hints & HINT_NEW_RE )
3461 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
3462 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3466 s = scan_const(PL_bufptr);
3468 PL_lex_state = LEX_INTERPCASEMOD;
3470 PL_lex_state = LEX_INTERPSTART;
3473 if (s != PL_bufptr) {
3474 start_force(PL_curforce);
3476 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3478 NEXTVAL_NEXTTOKE = yylval;
3481 if (PL_lex_starts++) {
3485 sv_free(PL_thistoken);
3486 PL_thistoken = newSVpvs("");
3489 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3490 if (!PL_lex_casemods && PL_lex_inpat)
3503 PL_lex_state = LEX_NORMAL;
3504 s = scan_formline(PL_bufptr);
3505 if (!PL_lex_formbrack)
3511 PL_oldoldbufptr = PL_oldbufptr;
3517 sv_free(PL_thistoken);
3520 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3524 if (isIDFIRST_lazy_if(s,UTF))
3526 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
3529 goto fake_eof; /* emulate EOF on ^D or ^Z */
3538 if (PL_lex_brackets) {
3539 yyerror((const char *)
3541 ? "Format not terminated"
3542 : "Missing right curly or square bracket"));
3544 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3545 "### Tokener got EOF\n");
3549 if (s++ < PL_bufend)
3550 goto retry; /* ignore stray nulls */
3553 if (!PL_in_eval && !PL_preambled) {
3554 PL_preambled = TRUE;
3559 sv_setpv(PL_linestr,incl_perldb());
3560 if (SvCUR(PL_linestr))
3561 sv_catpvs(PL_linestr,";");
3563 while(AvFILLp(PL_preambleav) >= 0) {
3564 SV *tmpsv = av_shift(PL_preambleav);
3565 sv_catsv(PL_linestr, tmpsv);
3566 sv_catpvs(PL_linestr, ";");
3569 sv_free((SV*)PL_preambleav);
3570 PL_preambleav = NULL;
3572 if (PL_minus_n || PL_minus_p) {
3573 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3575 sv_catpvs(PL_linestr,"chomp;");
3578 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3579 || *PL_splitstr == '"')
3580 && strchr(PL_splitstr + 1, *PL_splitstr))
3581 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3583 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3584 bytes can be used as quoting characters. :-) */
3585 const char *splits = PL_splitstr;
3586 sv_catpvs(PL_linestr, "our @F=split(q\0");
3589 if (*splits == '\\')
3590 sv_catpvn(PL_linestr, splits, 1);
3591 sv_catpvn(PL_linestr, splits, 1);
3592 } while (*splits++);
3593 /* This loop will embed the trailing NUL of
3594 PL_linestr as the last thing it does before
3596 sv_catpvs(PL_linestr, ");");
3600 sv_catpvs(PL_linestr,"our @F=split(' ');");
3604 sv_catpvs(PL_linestr,"use feature ':5.10';");
3605 sv_catpvs(PL_linestr, "\n");
3606 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3607 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3608 PL_last_lop = PL_last_uni = NULL;
3609 if (PERLDB_LINE && PL_curstash != PL_debstash)
3610 update_debugger_info(PL_linestr, NULL, 0);
3614 bof = PL_rsfp ? TRUE : FALSE;
3615 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3618 PL_realtokenstart = -1;
3621 if (PL_preprocess && !PL_in_eval)
3622 (void)PerlProc_pclose(PL_rsfp);
3623 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3624 PerlIO_clearerr(PL_rsfp);
3626 (void)PerlIO_close(PL_rsfp);
3628 PL_doextract = FALSE;
3630 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3635 sv_setpv(PL_linestr,
3638 ? ";}continue{print;}" : ";}"));
3639 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3640 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3641 PL_last_lop = PL_last_uni = NULL;
3642 PL_minus_n = PL_minus_p = 0;
3645 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3646 PL_last_lop = PL_last_uni = NULL;
3647 sv_setpvn(PL_linestr,"",0);
3648 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3650 /* If it looks like the start of a BOM or raw UTF-16,
3651 * check if it in fact is. */
3657 #ifdef PERLIO_IS_STDIO
3658 # ifdef __GNU_LIBRARY__
3659 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3660 # define FTELL_FOR_PIPE_IS_BROKEN
3664 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3665 # define FTELL_FOR_PIPE_IS_BROKEN
3670 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3671 /* This loses the possibility to detect the bof
3672 * situation on perl -P when the libc5 is being used.
3673 * Workaround? Maybe attach some extra state to PL_rsfp?
3676 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3678 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3681 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3682 s = swallow_bom((U8*)s);
3686 /* Incest with pod. */
3689 sv_catsv(PL_thiswhite, PL_linestr);
3691 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3692 sv_setpvn(PL_linestr, "", 0);
3693 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3694 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3695 PL_last_lop = PL_last_uni = NULL;
3696 PL_doextract = FALSE;
3700 } while (PL_doextract);
3701 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3702 if (PERLDB_LINE && PL_curstash != PL_debstash)
3703 update_debugger_info(PL_linestr, NULL, 0);
3704 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3705 PL_last_lop = PL_last_uni = NULL;
3706 if (CopLINE(PL_curcop) == 1) {
3707 while (s < PL_bufend && isSPACE(*s))
3709 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3713 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3717 if (*s == '#' && *(s+1) == '!')
3719 #ifdef ALTERNATE_SHEBANG
3721 static char const as[] = ALTERNATE_SHEBANG;
3722 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3723 d = s + (sizeof(as) - 1);
3725 #endif /* ALTERNATE_SHEBANG */
3734 while (*d && !isSPACE(*d))
3738 #ifdef ARG_ZERO_IS_SCRIPT
3739 if (ipathend > ipath) {
3741 * HP-UX (at least) sets argv[0] to the script name,
3742 * which makes $^X incorrect. And Digital UNIX and Linux,
3743 * at least, set argv[0] to the basename of the Perl
3744 * interpreter. So, having found "#!", we'll set it right.
3746 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3748 assert(SvPOK(x) || SvGMAGICAL(x));
3749 if (sv_eq(x, CopFILESV(PL_curcop))) {
3750 sv_setpvn(x, ipath, ipathend - ipath);
3756 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3757 const char * const lstart = SvPV_const(x,llen);
3759 bstart += blen - llen;
3760 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3761 sv_setpvn(x, ipath, ipathend - ipath);
3766 TAINT_NOT; /* $^X is always tainted, but that's OK */
3768 #endif /* ARG_ZERO_IS_SCRIPT */
3773 d = instr(s,"perl -");
3775 d = instr(s,"perl");
3777 /* avoid getting into infinite loops when shebang
3778 * line contains "Perl" rather than "perl" */
3780 for (d = ipathend-4; d >= ipath; --d) {
3781 if ((*d == 'p' || *d == 'P')
3782 && !ibcmp(d, "perl", 4))
3792 #ifdef ALTERNATE_SHEBANG
3794 * If the ALTERNATE_SHEBANG on this system starts with a
3795 * character that can be part of a Perl expression, then if
3796 * we see it but not "perl", we're probably looking at the
3797 * start of Perl code, not a request to hand off to some
3798 * other interpreter. Similarly, if "perl" is there, but
3799 * not in the first 'word' of the line, we assume the line
3800 * contains the start of the Perl program.
3802 if (d && *s != '#') {
3803 const char *c = ipath;
3804 while (*c && !strchr("; \t\r\n\f\v#", *c))
3807 d = NULL; /* "perl" not in first word; ignore */
3809 *s = '#'; /* Don't try to parse shebang line */
3811 #endif /* ALTERNATE_SHEBANG */
3812 #ifndef MACOS_TRADITIONAL
3817 !instr(s,"indir") &&
3818 instr(PL_origargv[0],"perl"))
3825 while (s < PL_bufend && isSPACE(*s))
3827 if (s < PL_bufend) {
3828 Newxz(newargv,PL_origargc+3,char*);
3830 while (s < PL_bufend && !isSPACE(*s))
3833 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3836 newargv = PL_origargv;
3839 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3841 Perl_croak(aTHX_ "Can't exec %s", ipath);
3845 while (*d && !isSPACE(*d))
3847 while (SPACE_OR_TAB(*d))
3851 const bool switches_done = PL_doswitches;
3852 const U32 oldpdb = PL_perldb;
3853 const bool oldn = PL_minus_n;
3854 const bool oldp = PL_minus_p;
3857 if (*d == 'M' || *d == 'm' || *d == 'C') {
3858 const char * const m = d;
3859 while (*d && !isSPACE(*d))
3861 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3864 d = moreswitches(d);
3866 if (PL_doswitches && !switches_done) {
3867 int argc = PL_origargc;
3868 char **argv = PL_origargv;
3871 } while (argc && argv[0][0] == '-' && argv[0][1]);
3872 init_argv_symbols(argc,argv);
3874 if ((PERLDB_LINE && !oldpdb) ||
3875 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3876 /* if we have already added "LINE: while (<>) {",
3877 we must not do it again */
3879 sv_setpvn(PL_linestr, "", 0);
3880 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3881 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3882 PL_last_lop = PL_last_uni = NULL;
3883 PL_preambled = FALSE;
3885 (void)gv_fetchfile(PL_origfilename);
3892 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3894 PL_lex_state = LEX_FORMLINE;
3899 #ifdef PERL_STRICT_CR
3900 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3902 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3904 case ' ': case '\t': case '\f': case 013:
3905 #ifdef MACOS_TRADITIONAL
3909 PL_realtokenstart = -1;
3911 PL_thiswhite = newSVpvs("");
3912 sv_catpvn(PL_thiswhite, s, 1);
3919 PL_realtokenstart = -1;
3923 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3924 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3925 /* handle eval qq[#line 1 "foo"\n ...] */
3926 CopLINE_dec(PL_curcop);
3929 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3931 if (!PL_in_eval || PL_rsfp)
3936 while (d < PL_bufend && *d != '\n')
3940 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3941 Perl_croak(aTHX_ "panic: input overflow");
3944 PL_thiswhite = newSVpvn(s, d - s);
3949 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3951 PL_lex_state = LEX_FORMLINE;
3957 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
3958 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
3961 TOKEN(PEG); /* make sure any #! line is accessible */
3966 /* if (PL_madskills && PL_lex_formbrack) { */
3968 while (d < PL_bufend && *d != '\n')
3972 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3973 Perl_croak(aTHX_ "panic: input overflow");
3974 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
3976 PL_thiswhite = newSVpvs("");
3977 if (CopLINE(PL_curcop) == 1) {
3978 sv_setpvn(PL_thiswhite, "", 0);
3981 sv_catpvn(PL_thiswhite, s, d - s);
3995 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4003 while (s < PL_bufend && SPACE_OR_TAB(*s))
4006 if (strnEQ(s,"=>",2)) {
4007 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4008 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4009 OPERATOR('-'); /* unary minus */
4011 PL_last_uni = PL_oldbufptr;
4013 case 'r': ftst = OP_FTEREAD; break;
4014 case 'w': ftst = OP_FTEWRITE; break;
4015 case 'x': ftst = OP_FTEEXEC; break;
4016 case 'o': ftst = OP_FTEOWNED; break;
4017 case 'R': ftst = OP_FTRREAD; break;
4018 case 'W': ftst = OP_FTRWRITE; break;
4019 case 'X': ftst = OP_FTREXEC; break;
4020 case 'O': ftst = OP_FTROWNED; break;
4021 case 'e': ftst = OP_FTIS; break;
4022 case 'z': ftst = OP_FTZERO; break;
4023 case 's': ftst = OP_FTSIZE; break;
4024 case 'f': ftst = OP_FTFILE; break;
4025 case 'd': ftst = OP_FTDIR; break;
4026 case 'l': ftst = OP_FTLINK; break;
4027 case 'p': ftst = OP_FTPIPE; break;
4028 case 'S': ftst = OP_FTSOCK; break;
4029 case 'u': ftst = OP_FTSUID; break;
4030 case 'g': ftst = OP_FTSGID; break;
4031 case 'k': ftst = OP_FTSVTX; break;
4032 case 'b': ftst = OP_FTBLK; break;
4033 case 'c': ftst = OP_FTCHR; break;
4034 case 't': ftst = OP_FTTTY; break;
4035 case 'T': ftst = OP_FTTEXT; break;
4036 case 'B': ftst = OP_FTBINARY; break;
4037 case 'M': case 'A': case 'C':
4038 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4040 case 'M': ftst = OP_FTMTIME; break;
4041 case 'A': ftst = OP_FTATIME; break;
4042 case 'C': ftst = OP_FTCTIME; break;
4050 PL_last_lop_op = (OPCODE)ftst;
4051 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4052 "### Saw file test %c\n", (int)tmp);
4057 /* Assume it was a minus followed by a one-letter named
4058 * subroutine call (or a -bareword), then. */
4059 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4060 "### '-%c' looked like a file test but was not\n",
4067 const char tmp = *s++;
4070 if (PL_expect == XOPERATOR)
4075 else if (*s == '>') {
4078 if (isIDFIRST_lazy_if(s,UTF)) {
4079 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4087 if (PL_expect == XOPERATOR)
4090 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4092 OPERATOR('-'); /* unary minus */
4098 const char tmp = *s++;
4101 if (PL_expect == XOPERATOR)
4106 if (PL_expect == XOPERATOR)
4109 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4116 if (PL_expect != XOPERATOR) {
4117 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4118 PL_expect = XOPERATOR;
4119 force_ident(PL_tokenbuf, '*');
4132 if (PL_expect == XOPERATOR) {
4136 PL_tokenbuf[0] = '%';
4137 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4138 sizeof PL_tokenbuf - 1, FALSE);
4139 if (!PL_tokenbuf[1]) {
4142 PL_pending_ident = '%';
4153 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4160 const char tmp = *s++;
4166 goto just_a_word_zero_gv;
4169 switch (PL_expect) {
4175 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4177 PL_bufptr = s; /* update in case we back off */
4183 PL_expect = XTERMBLOCK;
4186 stuffstart = s - SvPVX(PL_linestr) - 1;
4190 while (isIDFIRST_lazy_if(s,UTF)) {
4193 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4194 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4195 if (tmp < 0) tmp = -tmp;
4210 sv = newSVpvn(s, len);
4212 d = scan_str(d,TRUE,TRUE);
4214 /* MUST advance bufptr here to avoid bogus
4215 "at end of line" context messages from yyerror().
4217 PL_bufptr = s + len;
4218 yyerror("Unterminated attribute parameter in attribute list");
4222 return REPORT(0); /* EOF indicator */
4226 sv_catsv(sv, PL_lex_stuff);
4227 attrs = append_elem(OP_LIST, attrs,
4228 newSVOP(OP_CONST, 0, sv));
4229 SvREFCNT_dec(PL_lex_stuff);
4230 PL_lex_stuff = NULL;
4233 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4235 if (PL_in_my == KEY_our) {
4237 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
4239 /* skip to avoid loading attributes.pm */
4241 deprecate(":unique");
4244 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4247 /* NOTE: any CV attrs applied here need to be part of
4248 the CVf_BUILTIN_ATTRS define in cv.h! */
4249 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4251 CvLVALUE_on(PL_compcv);
4253 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4255 CvLOCKED_on(PL_compcv);
4257 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4259 CvMETHOD_on(PL_compcv);
4261 else if (!PL_in_my && len == 9 && strnEQ(SvPVX(sv), "assertion", len)) {
4263 CvASSERTION_on(PL_compcv);
4265 /* After we've set the flags, it could be argued that
4266 we don't need to do the attributes.pm-based setting
4267 process, and shouldn't bother appending recognized
4268 flags. To experiment with that, uncomment the
4269 following "else". (Note that's already been
4270 uncommented. That keeps the above-applied built-in
4271 attributes from being intercepted (and possibly
4272 rejected) by a package's attribute routines, but is
4273 justified by the performance win for the common case
4274 of applying only built-in attributes.) */
4276 attrs = append_elem(OP_LIST, attrs,
4277 newSVOP(OP_CONST, 0,
4281 if (*s == ':' && s[1] != ':')
4284 break; /* require real whitespace or :'s */
4285 /* XXX losing whitespace on sequential attributes here */
4289 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4290 if (*s != ';' && *s != '}' && *s != tmp
4291 && (tmp != '=' || *s != ')')) {
4292 const char q = ((*s == '\'') ? '"' : '\'');
4293 /* If here for an expression, and parsed no attrs, back
4295 if (tmp == '=' && !attrs) {
4299 /* MUST advance bufptr here to avoid bogus "at end of line"
4300 context messages from yyerror().
4303 yyerror( (const char *)
4305 ? Perl_form(aTHX_ "Invalid separator character "
4306 "%c%c%c in attribute list", q, *s, q)
4307 : "Unterminated attribute list" ) );
4315 start_force(PL_curforce);
4316 NEXTVAL_NEXTTOKE.opval = attrs;
4317 CURMAD('_', PL_nextwhite);
4322 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4323 (s - SvPVX(PL_linestr)) - stuffstart);
4331 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4332 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4340 const char tmp = *s++;
4345 const char tmp = *s++;
4353 if (PL_lex_brackets <= 0)
4354 yyerror("Unmatched right square bracket");
4357 if (PL_lex_state == LEX_INTERPNORMAL) {
4358 if (PL_lex_brackets == 0) {
4359 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
4360 PL_lex_state = LEX_INTERPEND;
4367 if (PL_lex_brackets > 100) {
4368 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4370 switch (PL_expect) {
4372 if (PL_lex_formbrack) {
4376 if (PL_oldoldbufptr == PL_last_lop)
4377 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4379 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4380 OPERATOR(HASHBRACK);
4382 while (s < PL_bufend && SPACE_OR_TAB(*s))
4385 PL_tokenbuf[0] = '\0';
4386 if (d < PL_bufend && *d == '-') {
4387 PL_tokenbuf[0] = '-';
4389 while (d < PL_bufend && SPACE_OR_TAB(*d))
4392 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4393 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4395 while (d < PL_bufend && SPACE_OR_TAB(*d))
4398 const char minus = (PL_tokenbuf[0] == '-');
4399 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4407 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4412 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4417 if (PL_oldoldbufptr == PL_last_lop)
4418 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4420 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4423 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4425 /* This hack is to get the ${} in the message. */
4427 yyerror("syntax error");
4430 OPERATOR(HASHBRACK);
4432 /* This hack serves to disambiguate a pair of curlies
4433 * as being a block or an anon hash. Normally, expectation
4434 * determines that, but in cases where we're not in a
4435 * position to expect anything in particular (like inside
4436 * eval"") we have to resolve the ambiguity. This code
4437 * covers the case where the first term in the curlies is a
4438 * quoted string. Most other cases need to be explicitly
4439 * disambiguated by prepending a "+" before the opening
4440 * curly in order to force resolution as an anon hash.
4442 * XXX should probably propagate the outer expectation
4443 * into eval"" to rely less on this hack, but that could
4444 * potentially break current behavior of eval"".
4448 if (*s == '\'' || *s == '"' || *s == '`') {
4449 /* common case: get past first string, handling escapes */
4450 for (t++; t < PL_bufend && *t != *s;)
4451 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4455 else if (*s == 'q') {
4458 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4461 /* skip q//-like construct */
4463 char open, close, term;
4466 while (t < PL_bufend && isSPACE(*t))
4468 /* check for q => */
4469 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4470 OPERATOR(HASHBRACK);
4474 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4478 for (t++; t < PL_bufend; t++) {
4479 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4481 else if (*t == open)
4485 for (t++; t < PL_bufend; t++) {
4486 if (*t == '\\' && t+1 < PL_bufend)
4488 else if (*t == close && --brackets <= 0)
4490 else if (*t == open)
4497 /* skip plain q word */
4498 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4501 else if (isALNUM_lazy_if(t,UTF)) {
4503 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4506 while (t < PL_bufend && isSPACE(*t))
4508 /* if comma follows first term, call it an anon hash */
4509 /* XXX it could be a comma expression with loop modifiers */
4510 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4511 || (*t == '=' && t[1] == '>')))
4512 OPERATOR(HASHBRACK);
4513 if (PL_expect == XREF)
4516 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4522 yylval.ival = CopLINE(PL_curcop);
4523 if (isSPACE(*s) || *s == '#')
4524 PL_copline = NOLINE; /* invalidate current command line number */
4529 if (PL_lex_brackets <= 0)
4530 yyerror("Unmatched right curly bracket");
4532 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4533 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4534 PL_lex_formbrack = 0;
4535 if (PL_lex_state == LEX_INTERPNORMAL) {
4536 if (PL_lex_brackets == 0) {
4537 if (PL_expect & XFAKEBRACK) {
4538 PL_expect &= XENUMMASK;
4539 PL_lex_state = LEX_INTERPEND;
4544 PL_thiswhite = newSVpvs("");
4545 sv_catpvn(PL_thiswhite,"}",1);
4548 return yylex(); /* ignore fake brackets */
4550 if (*s == '-' && s[1] == '>')
4551 PL_lex_state = LEX_INTERPENDMAYBE;
4552 else if (*s != '[' && *s != '{')
4553 PL_lex_state = LEX_INTERPEND;
4556 if (PL_expect & XFAKEBRACK) {
4557 PL_expect &= XENUMMASK;
4559 return yylex(); /* ignore fake brackets */
4561 start_force(PL_curforce);
4563 curmad('X', newSVpvn(s-1,1));
4564 CURMAD('_', PL_thiswhite);
4569 PL_thistoken = newSVpvs("");
4577 if (PL_expect == XOPERATOR) {
4578 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4579 && isIDFIRST_lazy_if(s,UTF))
4581 CopLINE_dec(PL_curcop);
4582 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4583 CopLINE_inc(PL_curcop);
4588 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4590 PL_expect = XOPERATOR;
4591 force_ident(PL_tokenbuf, '&');
4595 yylval.ival = (OPpENTERSUB_AMPER<<8);
4607 const char tmp = *s++;
4614 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4615 && strchr("+-*/%.^&|<",tmp))
4616 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4617 "Reversed %c= operator",(int)tmp);
4619 if (PL_expect == XSTATE && isALPHA(tmp) &&
4620 (s == PL_linestart+1 || s[-2] == '\n') )
4622 if (PL_in_eval && !PL_rsfp) {
4627 if (strnEQ(s,"=cut",4)) {
4643 PL_thiswhite = newSVpvs("");
4644 sv_catpvn(PL_thiswhite, PL_linestart,
4645 PL_bufend - PL_linestart);
4649 PL_doextract = TRUE;
4653 if (PL_lex_brackets < PL_lex_formbrack) {
4655 #ifdef PERL_STRICT_CR
4656 while (SPACE_OR_TAB(*t))
4658 while (SPACE_OR_TAB(*t) || *t == '\r')
4661 if (*t == '\n' || *t == '#') {
4672 const char tmp = *s++;
4674 /* was this !=~ where !~ was meant?
4675 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4677 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4678 const char *t = s+1;
4680 while (t < PL_bufend && isSPACE(*t))
4683 if (*t == '/' || *t == '?' ||
4684 ((*t == 'm' || *t == 's' || *t == 'y')
4685 && !isALNUM(t[1])) ||
4686 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4687 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4688 "!=~ should be !~");
4698 if (PL_expect != XOPERATOR) {
4699 if (s[1] != '<' && !strchr(s,'>'))
4702 s = scan_heredoc(s);
4704 s = scan_inputsymbol(s);
4705 TERM(sublex_start());
4711 SHop(OP_LEFT_SHIFT);
4725 const char tmp = *s++;
4727 SHop(OP_RIGHT_SHIFT);
4728 else if (tmp == '=')
4737 if (PL_expect == XOPERATOR) {
4738 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4740 deprecate_old(commaless_variable_list);
4741 return REPORT(','); /* grandfather non-comma-format format */
4745 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4746 PL_tokenbuf[0] = '@';
4747 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4748 sizeof PL_tokenbuf - 1, FALSE);
4749 if (PL_expect == XOPERATOR)
4750 no_op("Array length", s);
4751 if (!PL_tokenbuf[1])
4753 PL_expect = XOPERATOR;
4754 PL_pending_ident = '#';
4758 PL_tokenbuf[0] = '$';
4759 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4760 sizeof PL_tokenbuf - 1, FALSE);
4761 if (PL_expect == XOPERATOR)
4763 if (!PL_tokenbuf[1]) {
4765 yyerror("Final $ should be \\$ or $name");
4769 /* This kludge not intended to be bulletproof. */
4770 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4771 yylval.opval = newSVOP(OP_CONST, 0,
4772 newSViv(CopARYBASE_get(&PL_compiling)));
4773 yylval.opval->op_private = OPpCONST_ARYBASE;
4779 const char tmp = *s;
4780 if (PL_lex_state == LEX_NORMAL)
4783 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4784 && intuit_more(s)) {
4786 PL_tokenbuf[0] = '@';
4787 if (ckWARN(WARN_SYNTAX)) {
4790 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4793 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4794 while (t < PL_bufend && *t != ']')
4796 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4797 "Multidimensional syntax %.*s not supported",
4798 (int)((t - PL_bufptr) + 1), PL_bufptr);
4802 else if (*s == '{') {
4804 PL_tokenbuf[0] = '%';
4805 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4806 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4808 char tmpbuf[sizeof PL_tokenbuf];
4811 } while (isSPACE(*t));
4812 if (isIDFIRST_lazy_if(t,UTF)) {
4814 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4818 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4819 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4820 "You need to quote \"%s\"",
4827 PL_expect = XOPERATOR;
4828 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4829 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4830 if (!islop || PL_last_lop_op == OP_GREPSTART)
4831 PL_expect = XOPERATOR;
4832 else if (strchr("$@\"'`q", *s))
4833 PL_expect = XTERM; /* e.g. print $fh "foo" */
4834 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4835 PL_expect = XTERM; /* e.g. print $fh &sub */
4836 else if (isIDFIRST_lazy_if(s,UTF)) {
4837 char tmpbuf[sizeof PL_tokenbuf];
4839 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4840 if ((t2 = keyword(tmpbuf, len, 0))) {
4841 /* binary operators exclude handle interpretations */
4853 PL_expect = XTERM; /* e.g. print $fh length() */
4858 PL_expect = XTERM; /* e.g. print $fh subr() */
4861 else if (isDIGIT(*s))
4862 PL_expect = XTERM; /* e.g. print $fh 3 */
4863 else if (*s == '.' && isDIGIT(s[1]))
4864 PL_expect = XTERM; /* e.g. print $fh .3 */
4865 else if ((*s == '?' || *s == '-' || *s == '+')
4866 && !isSPACE(s[1]) && s[1] != '=')
4867 PL_expect = XTERM; /* e.g. print $fh -1 */
4868 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4870 PL_expect = XTERM; /* e.g. print $fh /.../
4871 XXX except DORDOR operator
4873 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4875 PL_expect = XTERM; /* print $fh <<"EOF" */
4878 PL_pending_ident = '$';
4882 if (PL_expect == XOPERATOR)
4884 PL_tokenbuf[0] = '@';
4885 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4886 if (!PL_tokenbuf[1]) {
4889 if (PL_lex_state == LEX_NORMAL)
4891 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4893 PL_tokenbuf[0] = '%';
4895 /* Warn about @ where they meant $. */
4896 if (*s == '[' || *s == '{') {
4897 if (ckWARN(WARN_SYNTAX)) {
4898 const char *t = s + 1;
4899 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4901 if (*t == '}' || *t == ']') {
4903 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4904 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4905 "Scalar value %.*s better written as $%.*s",
4906 (int)(t-PL_bufptr), PL_bufptr,
4907 (int)(t-PL_bufptr-1), PL_bufptr+1);
4912 PL_pending_ident = '@';
4915 case '/': /* may be division, defined-or, or pattern */
4916 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4920 case '?': /* may either be conditional or pattern */
4921 if(PL_expect == XOPERATOR) {
4929 /* A // operator. */
4939 /* Disable warning on "study /blah/" */
4940 if (PL_oldoldbufptr == PL_last_uni
4941 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4942 || memNE(PL_last_uni, "study", 5)
4943 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4946 s = scan_pat(s,OP_MATCH);
4947 TERM(sublex_start());
4951 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4952 #ifdef PERL_STRICT_CR
4955 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
4957 && (s == PL_linestart || s[-1] == '\n') )
4959 PL_lex_formbrack = 0;
4963 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
4969 yylval.ival = OPf_SPECIAL;
4975 if (PL_expect != XOPERATOR)
4980 case '0': case '1': case '2': case '3': case '4':
4981 case '5': case '6': case '7': case '8': case '9':
4982 s = scan_num(s, &yylval);
4983 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
4984 if (PL_expect == XOPERATOR)
4989 s = scan_str(s,!!PL_madskills,FALSE);
4990 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
4991 if (PL_expect == XOPERATOR) {
4992 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4994 deprecate_old(commaless_variable_list);
4995 return REPORT(','); /* grandfather non-comma-format format */
5002 yylval.ival = OP_CONST;
5003 TERM(sublex_start());
5006 s = scan_str(s,!!PL_madskills,FALSE);
5007 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5008 if (PL_expect == XOPERATOR) {
5009 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5011 deprecate_old(commaless_variable_list);
5012 return REPORT(','); /* grandfather non-comma-format format */
5019 yylval.ival = OP_CONST;
5020 /* FIXME. I think that this can be const if char *d is replaced by
5021 more localised variables. */
5022 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5023 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5024 yylval.ival = OP_STRINGIFY;
5028 TERM(sublex_start());
5031 s = scan_str(s,!!PL_madskills,FALSE);
5032 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5033 if (PL_expect == XOPERATOR)
5034 no_op("Backticks",s);
5037 readpipe_override();
5038 TERM(sublex_start());
5042 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
5043 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5045 if (PL_expect == XOPERATOR)
5046 no_op("Backslash",s);
5050 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5051 char *start = s + 2;
5052 while (isDIGIT(*start) || *start == '_')
5054 if (*start == '.' && isDIGIT(start[1])) {
5055 s = scan_num(s, &yylval);
5058 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5059 else if (!isALPHA(*start) && (PL_expect == XTERM
5060 || PL_expect == XREF || PL_expect == XSTATE
5061 || PL_expect == XTERMORDORDOR)) {
5062 /* XXX Use gv_fetchpvn rather than stomping on a const string */
5063 const char c = *start;
5066 gv = gv_fetchpv(s, 0, SVt_PVCV);
5069 s = scan_num(s, &yylval);
5076 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5118 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5120 /* Some keywords can be followed by any delimiter, including ':' */
5121 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5122 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5123 (PL_tokenbuf[0] == 'q' &&
5124 strchr("qwxr", PL_tokenbuf[1])))));
5126 /* x::* is just a word, unless x is "CORE" */
5127 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5131 while (d < PL_bufend && isSPACE(*d))
5132 d++; /* no comments skipped here, or s### is misparsed */
5134 /* Is this a label? */
5135 if (!tmp && PL_expect == XSTATE
5136 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5138 yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5143 /* Check for keywords */
5144 tmp = keyword(PL_tokenbuf, len, 0);
5146 /* Is this a word before a => operator? */
5147 if (*d == '=' && d[1] == '>') {
5150 = (OP*)newSVOP(OP_CONST, 0,
5151 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5152 yylval.opval->op_private = OPpCONST_BARE;
5156 if (tmp < 0) { /* second-class keyword? */
5157 GV *ogv = NULL; /* override (winner) */
5158 GV *hgv = NULL; /* hidden (loser) */
5159 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5161 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5164 if (GvIMPORTED_CV(gv))
5166 else if (! CvMETHOD(cv))
5170 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5171 (gv = *gvp) && isGV_with_GP(gv) &&
5172 GvCVu(gv) && GvIMPORTED_CV(gv))
5179 tmp = 0; /* overridden by import or by GLOBAL */
5182 && -tmp==KEY_lock /* XXX generalizable kludge */
5184 && !hv_fetchs(GvHVn(PL_incgv), "Thread.pm", FALSE))
5186 tmp = 0; /* any sub overrides "weak" keyword */
5188 else { /* no override */
5190 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
5191 Perl_warner(aTHX_ packWARN(WARN_MISC),
5192 "dump() better written as CORE::dump()");
5196 if (hgv && tmp != KEY_x && tmp != KEY_CORE
5197 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
5198 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5199 "Ambiguous call resolved as CORE::%s(), %s",
5200 GvENAME(hgv), "qualify as such or use &");
5207 default: /* not a keyword */
5208 /* Trade off - by using this evil construction we can pull the
5209 variable gv into the block labelled keylookup. If not, then
5210 we have to give it function scope so that the goto from the
5211 earlier ':' case doesn't bypass the initialisation. */
5213 just_a_word_zero_gv:
5221 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5224 SV *nextPL_nextwhite = 0;
5228 /* Get the rest if it looks like a package qualifier */
5230 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5232 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5235 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5236 *s == '\'' ? "'" : "::");
5241 if (PL_expect == XOPERATOR) {
5242 if (PL_bufptr == PL_linestart) {
5243 CopLINE_dec(PL_curcop);
5244 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
5245 CopLINE_inc(PL_curcop);
5248 no_op("Bareword",s);
5251 /* Look for a subroutine with this name in current package,
5252 unless name is "Foo::", in which case Foo is a bearword
5253 (and a package name). */
5255 if (len > 2 && !PL_madskills &&
5256 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5258 if (ckWARN(WARN_BAREWORD)
5259 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5260 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5261 "Bareword \"%s\" refers to nonexistent package",
5264 PL_tokenbuf[len] = '\0';
5270 /* Mustn't actually add anything to a symbol table.
5271 But also don't want to "initialise" any placeholder
5272 constants that might already be there into full
5273 blown PVGVs with attached PVCV. */
5274 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5275 GV_NOADD_NOINIT, SVt_PVCV);
5280 /* if we saw a global override before, get the right name */
5283 sv = newSVpvs("CORE::GLOBAL::");
5284 sv_catpv(sv,PL_tokenbuf);
5287 /* If len is 0, newSVpv does strlen(), which is correct.
5288 If len is non-zero, then it will be the true length,
5289 and so the scalar will be created correctly. */
5290 sv = newSVpv(PL_tokenbuf,len);
5293 if (PL_madskills && !PL_thistoken) {
5294 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5295 PL_thistoken = newSVpv(start,s - start);
5296 PL_realtokenstart = s - SvPVX(PL_linestr);
5300 /* Presume this is going to be a bareword of some sort. */
5303 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5304 yylval.opval->op_private = OPpCONST_BARE;
5305 /* UTF-8 package name? */
5306 if (UTF && !IN_BYTES &&
5307 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5310 /* And if "Foo::", then that's what it certainly is. */
5315 /* Do the explicit type check so that we don't need to force
5316 the initialisation of the symbol table to have a real GV.
5317 Beware - gv may not really be a PVGV, cv may not really be
5318 a PVCV, (because of the space optimisations that gv_init
5319 understands) But they're true if for this symbol there is
5320 respectively a typeglob and a subroutine.
5322 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5323 /* Real typeglob, so get the real subroutine: */
5325 /* A proxy for a subroutine in this package? */
5326 : SvOK(gv) ? (CV *) gv : NULL)
5329 /* See if it's the indirect object for a list operator. */
5331 if (PL_oldoldbufptr &&
5332 PL_oldoldbufptr < PL_bufptr &&
5333 (PL_oldoldbufptr == PL_last_lop
5334 || PL_oldoldbufptr == PL_last_uni) &&
5335 /* NO SKIPSPACE BEFORE HERE! */
5336 (PL_expect == XREF ||
5337 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5339 bool immediate_paren = *s == '(';
5341 /* (Now we can afford to cross potential line boundary.) */
5342 s = SKIPSPACE2(s,nextPL_nextwhite);
5344 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5347 /* Two barewords in a row may indicate method call. */
5349 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5350 (tmp = intuit_method(s, gv, cv)))
5353 /* If not a declared subroutine, it's an indirect object. */
5354 /* (But it's an indir obj regardless for sort.) */
5355 /* Also, if "_" follows a filetest operator, it's a bareword */
5358 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5360 (PL_last_lop_op != OP_MAPSTART &&
5361 PL_last_lop_op != OP_GREPSTART))))
5362 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5363 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5366 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5371 PL_expect = XOPERATOR;
5374 s = SKIPSPACE2(s,nextPL_nextwhite);
5375 PL_nextwhite = nextPL_nextwhite;
5380 /* Is this a word before a => operator? */
5381 if (*s == '=' && s[1] == '>' && !pkgname) {
5383 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
5384 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5385 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
5389 /* If followed by a paren, it's certainly a subroutine. */
5394 while (SPACE_OR_TAB(*d))
5396 if (*d == ')' && (sv = gv_const_sv(gv))) {
5400 char *par = SvPVX(PL_linestr) + PL_realtokenstart;
5401 sv_catpvn(PL_thistoken, par, s - par);
5403 sv_free(PL_nextwhite);
5413 PL_nextwhite = PL_thiswhite;
5416 start_force(PL_curforce);
5418 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5419 PL_expect = XOPERATOR;
5422 PL_nextwhite = nextPL_nextwhite;
5423 curmad('X', PL_thistoken);
5424 PL_thistoken = newSVpvs("");
5432 /* If followed by var or block, call it a method (unless sub) */
5434 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5435 PL_last_lop = PL_oldbufptr;
5436 PL_last_lop_op = OP_METHOD;
5440 /* If followed by a bareword, see if it looks like indir obj. */
5443 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5444 && (tmp = intuit_method(s, gv, cv)))
5447 /* Not a method, so call it a subroutine (if defined) */
5450 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5451 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5452 "Ambiguous use of -%s resolved as -&%s()",
5453 PL_tokenbuf, PL_tokenbuf);
5454 /* Check for a constant sub */
5455 if ((sv = gv_const_sv(gv))) {
5457 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
5458 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5459 yylval.opval->op_private = 0;
5463 /* Resolve to GV now. */
5464 if (SvTYPE(gv) != SVt_PVGV) {
5465 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5466 assert (SvTYPE(gv) == SVt_PVGV);
5467 /* cv must have been some sort of placeholder, so
5468 now needs replacing with a real code reference. */
5472 op_free(yylval.opval);
5473 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5474 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5475 PL_last_lop = PL_oldbufptr;
5476 PL_last_lop_op = OP_ENTERSUB;
5477 /* Is there a prototype? */
5485 const char *proto = SvPV_const((SV*)cv, protolen);
5488 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5490 while (*proto == ';')
5492 if (*proto == '&' && *s == '{') {
5493 sv_setpv(PL_subname,
5496 "__ANON__" : "__ANON__::__ANON__"));
5503 PL_nextwhite = PL_thiswhite;
5506 start_force(PL_curforce);
5507 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5510 PL_nextwhite = nextPL_nextwhite;
5511 curmad('X', PL_thistoken);
5512 PL_thistoken = newSVpvs("");
5519 /* Guess harder when madskills require "best effort". */
5520 if (PL_madskills && (!gv || !GvCVu(gv))) {
5521 int probable_sub = 0;
5522 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5524 else if (isALPHA(*s)) {
5528 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5529 if (!keyword(tmpbuf, tmplen, 0))
5532 while (d < PL_bufend && isSPACE(*d))
5534 if (*d == '=' && d[1] == '>')
5539 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5540 op_free(yylval.opval);
5541 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5542 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5543 PL_last_lop = PL_oldbufptr;
5544 PL_last_lop_op = OP_ENTERSUB;
5545 PL_nextwhite = PL_thiswhite;
5547 start_force(PL_curforce);
5548 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5550 PL_nextwhite = nextPL_nextwhite;
5551 curmad('X', PL_thistoken);
5552 PL_thistoken = newSVpvs("");
5557 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5564 /* Call it a bare word */
5566 if (PL_hints & HINT_STRICT_SUBS)
5567 yylval.opval->op_private |= OPpCONST_STRICT;
5570 if (lastchar != '-') {
5571 if (ckWARN(WARN_RESERVED)) {
5575 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5576 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5583 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5584 && ckWARN_d(WARN_AMBIGUOUS)) {
5585 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5586 "Operator or semicolon missing before %c%s",
5587 lastchar, PL_tokenbuf);
5588 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5589 "Ambiguous use of %c resolved as operator %c",
5590 lastchar, lastchar);
5596 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5597 newSVpv(CopFILE(PL_curcop),0));
5601 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5602 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5605 case KEY___PACKAGE__:
5606 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5608 ? newSVhek(HvNAME_HEK(PL_curstash))
5615 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5616 const char *pname = "main";
5617 if (PL_tokenbuf[2] == 'D')
5618 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5619 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5623 GvIOp(gv) = newIO();
5624 IoIFP(GvIOp(gv)) = PL_rsfp;
5625 #if defined(HAS_FCNTL) && defined(F_SETFD)
5627 const int fd = PerlIO_fileno(PL_rsfp);
5628 fcntl(fd,F_SETFD,fd >= 3);
5631 /* Mark this internal pseudo-handle as clean */
5632 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5634 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
5635 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5636 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5638 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5639 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5640 /* if the script was opened in binmode, we need to revert
5641 * it to text mode for compatibility; but only iff it has CRs
5642 * XXX this is a questionable hack at best. */
5643 if (PL_bufend-PL_bufptr > 2
5644 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5647 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5648 loc = PerlIO_tell(PL_rsfp);
5649 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5652 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5654 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5655 #endif /* NETWARE */
5656 #ifdef PERLIO_IS_STDIO /* really? */
5657 # if defined(__BORLANDC__)
5658 /* XXX see note in do_binmode() */
5659 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5663 PerlIO_seek(PL_rsfp, loc, 0);
5667 #ifdef PERLIO_LAYERS
5670 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5671 else if (PL_encoding) {
5678 XPUSHs(PL_encoding);
5680 call_method("name", G_SCALAR);
5684 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5685 Perl_form(aTHX_ ":encoding(%"SVf")",
5694 if (PL_realtokenstart >= 0) {
5695 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5697 PL_endwhite = newSVpvs("");
5698 sv_catsv(PL_endwhite, PL_thiswhite);
5700 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5701 PL_realtokenstart = -1;
5703 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5704 SvCUR(PL_endwhite))) != Nullch) ;
5719 if (PL_expect == XSTATE) {
5726 if (*s == ':' && s[1] == ':') {
5729 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5730 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5731 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5734 else if (tmp == KEY_require || tmp == KEY_do)
5735 /* that's a way to remember we saw "CORE::" */
5748 LOP(OP_ACCEPT,XTERM);
5754 LOP(OP_ATAN2,XTERM);
5760 LOP(OP_BINMODE,XTERM);
5763 LOP(OP_BLESS,XTERM);
5772 /* When 'use switch' is in effect, continue has a dual
5773 life as a control operator. */
5775 if (!FEATURE_IS_ENABLED("switch"))
5778 /* We have to disambiguate the two senses of
5779 "continue". If the next token is a '{' then
5780 treat it as the start of a continue block;
5781 otherwise treat it as a control operator.
5793 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5810 if (!PL_cryptseen) {
5811 PL_cryptseen = TRUE;
5815 LOP(OP_CRYPT,XTERM);
5818 LOP(OP_CHMOD,XTERM);
5821 LOP(OP_CHOWN,XTERM);
5824 LOP(OP_CONNECT,XTERM);
5843 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5844 if (orig_keyword == KEY_do) {
5853 PL_hints |= HINT_BLOCK_SCOPE;
5863 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5864 LOP(OP_DBMOPEN,XTERM);
5870 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5877 yylval.ival = CopLINE(PL_curcop);
5893 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
5894 UNIBRACK(OP_ENTEREVAL);
5912 case KEY_endhostent:
5918 case KEY_endservent:
5921 case KEY_endprotoent:
5932 yylval.ival = CopLINE(PL_curcop);
5934 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
5937 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
5940 if ((PL_bufend - p) >= 3 &&
5941 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
5943 else if ((PL_bufend - p) >= 4 &&
5944 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
5947 if (isIDFIRST_lazy_if(p,UTF)) {
5948 p = scan_ident(p, PL_bufend,
5949 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5953 Perl_croak(aTHX_ "Missing $ on loop variable");
5955 s = SvPVX(PL_linestr) + soff;
5961 LOP(OP_FORMLINE,XTERM);
5967 LOP(OP_FCNTL,XTERM);
5973 LOP(OP_FLOCK,XTERM);
5982 LOP(OP_GREPSTART, XREF);
5985 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6000 case KEY_getpriority:
6001 LOP(OP_GETPRIORITY,XTERM);
6003 case KEY_getprotobyname:
6006 case KEY_getprotobynumber:
6007 LOP(OP_GPBYNUMBER,XTERM);
6009 case KEY_getprotoent:
6021 case KEY_getpeername:
6022 UNI(OP_GETPEERNAME);
6024 case KEY_gethostbyname:
6027 case KEY_gethostbyaddr:
6028 LOP(OP_GHBYADDR,XTERM);
6030 case KEY_gethostent:
6033 case KEY_getnetbyname:
6036 case KEY_getnetbyaddr:
6037 LOP(OP_GNBYADDR,XTERM);
6042 case KEY_getservbyname:
6043 LOP(OP_GSBYNAME,XTERM);
6045 case KEY_getservbyport:
6046 LOP(OP_GSBYPORT,XTERM);
6048 case KEY_getservent:
6051 case KEY_getsockname:
6052 UNI(OP_GETSOCKNAME);
6054 case KEY_getsockopt:
6055 LOP(OP_GSOCKOPT,XTERM);
6070 yylval.ival = CopLINE(PL_curcop);
6081 yylval.ival = CopLINE(PL_curcop);
6085 LOP(OP_INDEX,XTERM);
6091 LOP(OP_IOCTL,XTERM);
6103 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6135 LOP(OP_LISTEN,XTERM);
6144 s = scan_pat(s,OP_MATCH);
6145 TERM(sublex_start());
6148 LOP(OP_MAPSTART, XREF);
6151 LOP(OP_MKDIR,XTERM);
6154 LOP(OP_MSGCTL,XTERM);
6157 LOP(OP_MSGGET,XTERM);
6160 LOP(OP_MSGRCV,XTERM);
6163 LOP(OP_MSGSND,XTERM);
6170 if (isIDFIRST_lazy_if(s,UTF)) {
6174 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6175 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6177 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6178 if (!PL_in_my_stash) {
6181 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6185 if (PL_madskills) { /* just add type to declarator token */
6186 sv_catsv(PL_thistoken, PL_nextwhite);
6188 sv_catpvn(PL_thistoken, start, s - start);
6196 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6203 s = tokenize_use(0, s);
6207 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6214 if (isIDFIRST_lazy_if(s,UTF)) {
6216 for (d = s; isALNUM_lazy_if(d,UTF);)
6218 for (t=d; isSPACE(*t);)
6220 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6222 && !(t[0] == '=' && t[1] == '>')
6224 int parms_len = (int)(d-s);
6225 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6226 "Precedence problem: open %.*s should be open(%.*s)",
6227 parms_len, s, parms_len, s);
6233 yylval.ival = OP_OR;
6243 LOP(OP_OPEN_DIR,XTERM);
6246 checkcomma(s,PL_tokenbuf,"filehandle");
6250 checkcomma(s,PL_tokenbuf,"filehandle");
6269 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6273 LOP(OP_PIPE_OP,XTERM);
6276 s = scan_str(s,!!PL_madskills,FALSE);
6279 yylval.ival = OP_CONST;
6280 TERM(sublex_start());
6286 s = scan_str(s,!!PL_madskills,FALSE);
6289 PL_expect = XOPERATOR;
6291 if (SvCUR(PL_lex_stuff)) {
6294 d = SvPV_force(PL_lex_stuff, len);
6296 for (; isSPACE(*d) && len; --len, ++d)
6301 if (!warned && ckWARN(WARN_QW)) {
6302 for (; !isSPACE(*d) && len; --len, ++d) {
6304 Perl_warner(aTHX_ packWARN(WARN_QW),
6305 "Possible attempt to separate words with commas");
6308 else if (*d == '#') {
6309 Perl_warner(aTHX_ packWARN(WARN_QW),
6310 "Possible attempt to put comments in qw() list");
6316 for (; !isSPACE(*d) && len; --len, ++d)
6319 sv = newSVpvn(b, d-b);
6320 if (DO_UTF8(PL_lex_stuff))
6322 words = append_elem(OP_LIST, words,
6323 newSVOP(OP_CONST, 0, tokeq(sv)));
6327 start_force(PL_curforce);
6328 NEXTVAL_NEXTTOKE.opval = words;
6333 SvREFCNT_dec(PL_lex_stuff);
6334 PL_lex_stuff = NULL;
6340 s = scan_str(s,!!PL_madskills,FALSE);
6343 yylval.ival = OP_STRINGIFY;
6344 if (SvIVX(PL_lex_stuff) == '\'')
6345 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6346 TERM(sublex_start());
6349 s = scan_pat(s,OP_QR);
6350 TERM(sublex_start());
6353 s = scan_str(s,!!PL_madskills,FALSE);
6356 readpipe_override();
6357 TERM(sublex_start());
6365 s = force_version(s, FALSE);
6367 else if (*s != 'v' || !isDIGIT(s[1])
6368 || (s = force_version(s, TRUE), *s == 'v'))
6370 *PL_tokenbuf = '\0';
6371 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6372 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6373 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6375 yyerror("<> should be quotes");
6377 if (orig_keyword == KEY_require) {
6385 PL_last_uni = PL_oldbufptr;
6386 PL_last_lop_op = OP_REQUIRE;
6388 return REPORT( (int)REQUIRE );
6394 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6398 LOP(OP_RENAME,XTERM);
6407 LOP(OP_RINDEX,XTERM);
6417 UNIDOR(OP_READLINE);
6421 UNIDOR(OP_BACKTICK);
6430 LOP(OP_REVERSE,XTERM);
6433 UNIDOR(OP_READLINK);
6441 TERM(sublex_start());
6443 TOKEN(1); /* force error */
6446 checkcomma(s,PL_tokenbuf,"filehandle");
6456 LOP(OP_SELECT,XTERM);
6462 LOP(OP_SEMCTL,XTERM);
6465 LOP(OP_SEMGET,XTERM);
6468 LOP(OP_SEMOP,XTERM);
6474 LOP(OP_SETPGRP,XTERM);
6476 case KEY_setpriority:
6477 LOP(OP_SETPRIORITY,XTERM);
6479 case KEY_sethostent:
6485 case KEY_setservent:
6488 case KEY_setprotoent:
6498 LOP(OP_SEEKDIR,XTERM);
6500 case KEY_setsockopt:
6501 LOP(OP_SSOCKOPT,XTERM);
6507 LOP(OP_SHMCTL,XTERM);
6510 LOP(OP_SHMGET,XTERM);
6513 LOP(OP_SHMREAD,XTERM);
6516 LOP(OP_SHMWRITE,XTERM);
6519 LOP(OP_SHUTDOWN,XTERM);
6528 LOP(OP_SOCKET,XTERM);
6530 case KEY_socketpair:
6531 LOP(OP_SOCKPAIR,XTERM);
6534 checkcomma(s,PL_tokenbuf,"subroutine name");
6536 if (*s == ';' || *s == ')') /* probably a close */
6537 Perl_croak(aTHX_ "sort is now a reserved word");
6539 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6543 LOP(OP_SPLIT,XTERM);
6546 LOP(OP_SPRINTF,XTERM);
6549 LOP(OP_SPLICE,XTERM);
6564 LOP(OP_SUBSTR,XTERM);
6570 char tmpbuf[sizeof PL_tokenbuf];
6571 SSize_t tboffset = 0;
6572 expectation attrful;
6573 bool have_name, have_proto;
6574 const int key = tmp;
6579 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6580 SV *subtoken = newSVpvn(tstart, s - tstart);
6584 s = SKIPSPACE2(s,tmpwhite);
6589 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6590 (*s == ':' && s[1] == ':'))
6597 attrful = XATTRBLOCK;
6598 /* remember buffer pos'n for later force_word */
6599 tboffset = s - PL_oldbufptr;
6600 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6603 nametoke = newSVpvn(s, d - s);
6605 if (memchr(tmpbuf, ':', len))
6606 sv_setpvn(PL_subname, tmpbuf, len);
6608 sv_setsv(PL_subname,PL_curstname);
6609 sv_catpvs(PL_subname,"::");
6610 sv_catpvn(PL_subname,tmpbuf,len);
6617 CURMAD('X', nametoke);
6618 CURMAD('_', tmpwhite);
6619 (void) force_word(PL_oldbufptr + tboffset, WORD,
6622 s = SKIPSPACE2(d,tmpwhite);
6629 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6630 PL_expect = XTERMBLOCK;
6631 attrful = XATTRTERM;
6632 sv_setpvn(PL_subname,"?",1);
6636 if (key == KEY_format) {
6638 PL_lex_formbrack = PL_lex_brackets + 1;
6640 PL_thistoken = subtoken;
6644 (void) force_word(PL_oldbufptr + tboffset, WORD,
6650 /* Look for a prototype */
6653 bool bad_proto = FALSE;
6654 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6656 s = scan_str(s,!!PL_madskills,FALSE);
6658 Perl_croak(aTHX_ "Prototype not terminated");
6659 /* strip spaces and check for bad characters */
6660 d = SvPVX(PL_lex_stuff);
6662 for (p = d; *p; ++p) {
6665 if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
6671 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6672 "Illegal character in prototype for %"SVf" : %s",
6673 SVfARG(PL_subname), d);
6674 SvCUR_set(PL_lex_stuff, tmp);
6679 CURMAD('q', PL_thisopen);
6680 CURMAD('_', tmpwhite);
6681 CURMAD('=', PL_thisstuff);
6682 CURMAD('Q', PL_thisclose);
6683 NEXTVAL_NEXTTOKE.opval =
6684 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6685 PL_lex_stuff = Nullsv;
6688 s = SKIPSPACE2(s,tmpwhite);
6696 if (*s == ':' && s[1] != ':')
6697 PL_expect = attrful;
6698 else if (*s != '{' && key == KEY_sub) {
6700 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6702 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6709 curmad('^', newSVpvs(""));
6710 CURMAD('_', tmpwhite);
6714 PL_thistoken = subtoken;
6717 NEXTVAL_NEXTTOKE.opval =
6718 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6719 PL_lex_stuff = NULL;
6724 sv_setpv(PL_subname,
6726 (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"));
6730 (void) force_word(PL_oldbufptr + tboffset, WORD,
6740 LOP(OP_SYSTEM,XREF);
6743 LOP(OP_SYMLINK,XTERM);
6746 LOP(OP_SYSCALL,XTERM);
6749 LOP(OP_SYSOPEN,XTERM);
6752 LOP(OP_SYSSEEK,XTERM);
6755 LOP(OP_SYSREAD,XTERM);
6758 LOP(OP_SYSWRITE,XTERM);
6762 TERM(sublex_start());
6783 LOP(OP_TRUNCATE,XTERM);
6795 yylval.ival = CopLINE(PL_curcop);
6799 yylval.ival = CopLINE(PL_curcop);
6803 LOP(OP_UNLINK,XTERM);
6809 LOP(OP_UNPACK,XTERM);
6812 LOP(OP_UTIME,XTERM);
6818 LOP(OP_UNSHIFT,XTERM);
6821 s = tokenize_use(1, s);
6831 yylval.ival = CopLINE(PL_curcop);
6835 yylval.ival = CopLINE(PL_curcop);
6839 PL_hints |= HINT_BLOCK_SCOPE;
6846 LOP(OP_WAITPID,XTERM);
6855 ctl_l[0] = toCTRL('L');
6857 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6860 /* Make sure $^L is defined */
6861 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
6866 if (PL_expect == XOPERATOR)
6872 yylval.ival = OP_XOR;
6877 TERM(sublex_start());
6882 #pragma segment Main
6886 S_pending_ident(pTHX)
6891 /* pit holds the identifier we read and pending_ident is reset */
6892 char pit = PL_pending_ident;
6893 PL_pending_ident = 0;
6895 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
6896 DEBUG_T({ PerlIO_printf(Perl_debug_log,
6897 "### Pending identifier '%s'\n", PL_tokenbuf); });
6899 /* if we're in a my(), we can't allow dynamics here.
6900 $foo'bar has already been turned into $foo::bar, so
6901 just check for colons.
6903 if it's a legal name, the OP is a PADANY.
6906 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
6907 if (strchr(PL_tokenbuf,':'))
6908 yyerror(Perl_form(aTHX_ "No package name allowed for "
6909 "variable %s in \"our\"",
6911 tmp = allocmy(PL_tokenbuf);
6914 if (strchr(PL_tokenbuf,':'))
6915 yyerror(Perl_form(aTHX_ PL_no_myglob,
6916 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
6918 yylval.opval = newOP(OP_PADANY, 0);
6919 yylval.opval->op_targ = allocmy(PL_tokenbuf);
6925 build the ops for accesses to a my() variable.
6927 Deny my($a) or my($b) in a sort block, *if* $a or $b is
6928 then used in a comparison. This catches most, but not
6929 all cases. For instance, it catches
6930 sort { my($a); $a <=> $b }
6932 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
6933 (although why you'd do that is anyone's guess).
6936 if (!strchr(PL_tokenbuf,':')) {
6938 tmp = pad_findmy(PL_tokenbuf);
6939 if (tmp != NOT_IN_PAD) {
6940 /* might be an "our" variable" */
6941 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6942 /* build ops for a bareword */
6943 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
6944 HEK * const stashname = HvNAME_HEK(stash);
6945 SV * const sym = newSVhek(stashname);
6946 sv_catpvs(sym, "::");
6947 sv_catpv(sym, PL_tokenbuf+1);
6948 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
6949 yylval.opval->op_private = OPpCONST_ENTERED;
6952 ? (GV_ADDMULTI | GV_ADDINEVAL)
6955 ((PL_tokenbuf[0] == '$') ? SVt_PV
6956 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6961 /* if it's a sort block and they're naming $a or $b */
6962 if (PL_last_lop_op == OP_SORT &&
6963 PL_tokenbuf[0] == '$' &&
6964 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
6967 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
6968 d < PL_bufend && *d != '\n';
6971 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
6972 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
6978 yylval.opval = newOP(OP_PADANY, 0);
6979 yylval.opval->op_targ = tmp;
6985 Whine if they've said @foo in a doublequoted string,
6986 and @foo isn't a variable we can find in the symbol
6989 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
6990 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
6991 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
6992 && ckWARN(WARN_AMBIGUOUS)
6993 /* DO NOT warn for @- and @+ */
6994 && !( PL_tokenbuf[2] == '\0' &&
6995 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
6998 /* Downgraded from fatal to warning 20000522 mjd */
6999 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7000 "Possible unintended interpolation of %s in string",
7005 /* build ops for a bareword */
7006 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
7007 yylval.opval->op_private = OPpCONST_ENTERED;
7010 /* If the identifier refers to a stash, don't autovivify it.
7011 * Change 24660 had the side effect of causing symbol table
7012 * hashes to always be defined, even if they were freshly
7013 * created and the only reference in the entire program was
7014 * the single statement with the defined %foo::bar:: test.
7015 * It appears that all code in the wild doing this actually
7016 * wants to know whether sub-packages have been loaded, so
7017 * by avoiding auto-vivifying symbol tables, we ensure that
7018 * defined %foo::bar:: continues to be false, and the existing
7019 * tests still give the expected answers, even though what
7020 * they're actually testing has now changed subtly.
7022 (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
7024 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7025 ((PL_tokenbuf[0] == '$') ? SVt_PV
7026 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7032 * The following code was generated by perl_keyword.pl.
7036 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7041 case 1: /* 5 tokens of length 1 */
7073 case 2: /* 18 tokens of length 2 */
7219 case 3: /* 29 tokens of length 3 */
7223 if (name[1] == 'N' &&
7286 if (name[1] == 'i' &&
7308 return (all_keywords || FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
7326 if (name[1] == 'o' &&
7335 if (name[1] == 'e' &&
7344 if (name[1] == 'n' &&
7353 if (name[1] == 'o' &&
7362 if (name[1] == 'a' &&
7371 if (name[1] == 'o' &&
7433 if (name[1] == 'e' &&
7447 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7473 if (name[1] == 'i' &&
7482 if (name[1] == 's' &&
7491 if (name[1] == 'e' &&
7500 if (name[1] == 'o' &&
7512 case 4: /* 41 tokens of length 4 */
7516 if (name[1] == 'O' &&
7526 if (name[1] == 'N' &&
7536 if (name[1] == 'i' &&
7546 if (name[1] == 'h' &&
7556 if (name[1] == 'u' &&
7569 if (name[2] == 'c' &&
7578 if (name[2] == 's' &&
7587 if (name[2] == 'a' &&
7623 if (name[1] == 'o' &&
7636 if (name[2] == 't' &&
7645 if (name[2] == 'o' &&
7654 if (name[2] == 't' &&
7663 if (name[2] == 'e' &&
7676 if (name[1] == 'o' &&
7689 if (name[2] == 'y' &&
7698 if (name[2] == 'l' &&
7714 if (name[2] == 's' &&
7723 if (name[2] == 'n' &&
7732 if (name[2] == 'c' &&
7745 if (name[1] == 'e' &&
7755 if (name[1] == 'p' &&
7768 if (name[2] == 'c' &&
7777 if (name[2] == 'p' &&
7786 if (name[2] == 's' &&
7802 if (name[2] == 'n' &&
7872 if (name[2] == 'r' &&
7881 if (name[2] == 'r' &&
7890 if (name[2] == 'a' &&
7906 if (name[2] == 'l' &&
7968 if (name[2] == 'e' &&
7971 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
7984 case 5: /* 39 tokens of length 5 */
7988 if (name[1] == 'E' &&
7999 if (name[1] == 'H' &&
8013 if (name[2] == 'a' &&
8023 if (name[2] == 'a' &&
8040 if (name[2] == 'e' &&
8050 if (name[2] == 'e' &&
8054 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8070 if (name[3] == 'i' &&
8079 if (name[3] == 'o' &&
8115 if (name[2] == 'o' &&
8125 if (name[2] == 'y' &&
8139 if (name[1] == 'l' &&
8153 if (name[2] == 'n' &&
8163 if (name[2] == 'o' &&
8177 if (name[1] == 'i' &&
8182 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8191 if (name[2] == 'd' &&
8201 if (name[2] == 'c' &&
8218 if (name[2] == 'c' &&
8228 if (name[2] == 't' &&
8242 if (name[1] == 'k' &&
8253 if (name[1] == 'r' &&
8267 if (name[2] == 's' &&
8277 if (name[2] == 'd' &&
8294 if (name[2] == 'm' &&
8304 if (name[2] == 'i' &&
8314 if (name[2] == 'e' &&
8324 if (name[2] == 'l' &&
8334 if (name[2] == 'a' &&
8347 if (name[3] == 't' &&
8350 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8356 if (name[3] == 'd' &&
8373 if (name[1] == 'i' &&
8387 if (name[2] == 'a' &&
8400 if (name[3] == 'e' &&
8435 if (name[2] == 'i' &&
8452 if (name[2] == 'i' &&
8462 if (name[2] == 'i' &&
8479 case 6: /* 33 tokens of length 6 */
8483 if (name[1] == 'c' &&
8498 if (name[2] == 'l' &&
8509 if (name[2] == 'r' &&
8524 if (name[1] == 'e' &&
8539 if (name[2] == 's' &&
8544 if(ckWARN_d(WARN_SYNTAX))
8545 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8551 if (name[2] == 'i' &&
8569 if (name[2] == 'l' &&
8580 if (name[2] == 'r' &&
8595 if (name[1] == 'm' &&
8610 if (name[2] == 'n' &&
8621 if (name[2] == 's' &&
8636 if (name[1] == 's' &&
8642 if (name[4] == 't' &&
8651 if (name[4] == 'e' &&
8660 if (name[4] == 'c' &&
8669 if (name[4] == 'n' &&
8685 if (name[1] == 'r' &&
8703 if (name[3] == 'a' &&
8713 if (name[3] == 'u' &&
8727 if (name[2] == 'n' &&
8745 if (name[2] == 'a' &&
8759 if (name[3] == 'e' &&
8772 if (name[4] == 't' &&
8781 if (name[4] == 'e' &&
8803 if (name[4] == 't' &&
8812 if (name[4] == 'e' &&
8828 if (name[2] == 'c' &&
8839 if (name[2] == 'l' &&
8850 if (name[2] == 'b' &&
8861 if (name[2] == 's' &&
8884 if (name[4] == 's' &&
8893 if (name[4] == 'n' &&
8906 if (name[3] == 'a' &&
8923 if (name[1] == 'a' &&
8938 case 7: /* 29 tokens of length 7 */
8942 if (name[1] == 'E' &&
8955 if (name[1] == '_' &&
8968 if (name[1] == 'i' &&
8975 return -KEY_binmode;
8981 if (name[1] == 'o' &&
8988 return -KEY_connect;
8997 if (name[2] == 'm' &&
9003 return -KEY_dbmopen;
9014 if (name[4] == 'u' &&
9018 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9024 if (name[4] == 'n' &&
9045 if (name[1] == 'o' &&
9058 if (name[1] == 'e' &&
9065 if (name[5] == 'r' &&
9068 return -KEY_getpgrp;
9074 if (name[5] == 'i' &&
9077 return -KEY_getppid;
9090 if (name[1] == 'c' &&
9097 return -KEY_lcfirst;
9103 if (name[1] == 'p' &&
9110 return -KEY_opendir;
9116 if (name[1] == 'a' &&
9134 if (name[3] == 'd' &&
9139 return -KEY_readdir;
9145 if (name[3] == 'u' &&
9156 if (name[3] == 'e' &&
9161 return -KEY_reverse;
9180 if (name[3] == 'k' &&
9185 return -KEY_seekdir;
9191 if (name[3] == 'p' &&
9196 return -KEY_setpgrp;
9206 if (name[2] == 'm' &&
9212 return -KEY_shmread;
9218 if (name[2] == 'r' &&
9224 return -KEY_sprintf;
9233 if (name[3] == 'l' &&
9238 return -KEY_symlink;
9247 if (name[4] == 'a' &&
9251 return -KEY_syscall;
9257 if (name[4] == 'p' &&
9261 return -KEY_sysopen;
9267 if (name[4] == 'e' &&
9271 return -KEY_sysread;
9277 if (name[4] == 'e' &&
9281 return -KEY_sysseek;
9299 if (name[1] == 'e' &&
9306 return -KEY_telldir;
9315 if (name[2] == 'f' &&
9321 return -KEY_ucfirst;
9327 if (name[2] == 's' &&
9333 return -KEY_unshift;
9343 if (name[1] == 'a' &&
9350 return -KEY_waitpid;
9359 case 8: /* 26 tokens of length 8 */
9363 if (name[1] == 'U' &&
9371 return KEY_AUTOLOAD;
9382 if (name[3] == 'A' &&
9388 return KEY___DATA__;
9394 if (name[3] == 'I' &&
9400 return -KEY___FILE__;
9406 if (name[3] == 'I' &&
9412 return -KEY___LINE__;
9428 if (name[2] == 'o' &&
9435 return -KEY_closedir;
9441 if (name[2] == 'n' &&
9448 return -KEY_continue;
9458 if (name[1] == 'b' &&
9466 return -KEY_dbmclose;
9472 if (name[1] == 'n' &&
9478 if (name[4] == 'r' &&
9483 return -KEY_endgrent;
9489 if (name[4] == 'w' &&
9494 return -KEY_endpwent;
9507 if (name[1] == 'o' &&
9515 return -KEY_formline;
9521 if (name[1] == 'e' &&
9532 if (name[6] == 'n' &&
9535 return -KEY_getgrent;
9541 if (name[6] == 'i' &&
9544 return -KEY_getgrgid;
9550 if (name[6] == 'a' &&
9553 return -KEY_getgrnam;
9566 if (name[4] == 'o' &&
9571 return -KEY_getlogin;
9582 if (name[6] == 'n' &&
9585 return -KEY_getpwent;
9591 if (name[6] == 'a' &&
9594 return -KEY_getpwnam;
9600 if (name[6] == 'i' &&
9603 return -KEY_getpwuid;
9623 if (name[1] == 'e' &&
9630 if (name[5] == 'i' &&
9637 return -KEY_readline;
9642 return -KEY_readlink;
9653 if (name[5] == 'i' &&
9657 return -KEY_readpipe;
9678 if (name[4] == 'r' &&
9683 return -KEY_setgrent;
9689 if (name[4] == 'w' &&
9694 return -KEY_setpwent;
9710 if (name[3] == 'w' &&
9716 return -KEY_shmwrite;
9722 if (name[3] == 't' &&
9728 return -KEY_shutdown;
9738 if (name[2] == 's' &&
9745 return -KEY_syswrite;
9755 if (name[1] == 'r' &&
9763 return -KEY_truncate;
9772 case 9: /* 9 tokens of length 9 */
9776 if (name[1] == 'N' &&
9785 return KEY_UNITCHECK;
9791 if (name[1] == 'n' &&
9800 return -KEY_endnetent;
9806 if (name[1] == 'e' &&
9815 return -KEY_getnetent;
9821 if (name[1] == 'o' &&
9830 return -KEY_localtime;
9836 if (name[1] == 'r' &&
9845 return KEY_prototype;
9851 if (name[1] == 'u' &&
9860 return -KEY_quotemeta;
9866 if (name[1] == 'e' &&
9875 return -KEY_rewinddir;
9881 if (name[1] == 'e' &&
9890 return -KEY_setnetent;
9896 if (name[1] == 'a' &&
9905 return -KEY_wantarray;
9914 case 10: /* 9 tokens of length 10 */
9918 if (name[1] == 'n' &&
9924 if (name[4] == 'o' &&
9931 return -KEY_endhostent;
9937 if (name[4] == 'e' &&
9944 return -KEY_endservent;
9957 if (name[1] == 'e' &&
9963 if (name[4] == 'o' &&
9970 return -KEY_gethostent;
9979 if (name[5] == 'r' &&
9985 return -KEY_getservent;
9991 if (name[5] == 'c' &&
9997 return -KEY_getsockopt;
10017 if (name[2] == 't')
10022 if (name[4] == 'o' &&
10029 return -KEY_sethostent;
10038 if (name[5] == 'r' &&
10044 return -KEY_setservent;
10050 if (name[5] == 'c' &&
10056 return -KEY_setsockopt;
10073 if (name[2] == 'c' &&
10082 return -KEY_socketpair;
10095 case 11: /* 8 tokens of length 11 */
10099 if (name[1] == '_' &&
10109 { /* __PACKAGE__ */
10110 return -KEY___PACKAGE__;
10116 if (name[1] == 'n' &&
10126 { /* endprotoent */
10127 return -KEY_endprotoent;
10133 if (name[1] == 'e' &&
10142 if (name[5] == 'e' &&
10148 { /* getpeername */
10149 return -KEY_getpeername;
10158 if (name[6] == 'o' &&
10163 { /* getpriority */
10164 return -KEY_getpriority;
10170 if (name[6] == 't' &&
10175 { /* getprotoent */
10176 return -KEY_getprotoent;
10190 if (name[4] == 'o' &&
10197 { /* getsockname */
10198 return -KEY_getsockname;
10211 if (name[1] == 'e' &&
10219 if (name[6] == 'o' &&
10224 { /* setpriority */
10225 return -KEY_setpriority;
10231 if (name[6] == 't' &&
10236 { /* setprotoent */
10237 return -KEY_setprotoent;
10253 case 12: /* 2 tokens of length 12 */
10254 if (name[0] == 'g' &&
10266 if (name[9] == 'd' &&
10269 { /* getnetbyaddr */
10270 return -KEY_getnetbyaddr;
10276 if (name[9] == 'a' &&
10279 { /* getnetbyname */
10280 return -KEY_getnetbyname;
10292 case 13: /* 4 tokens of length 13 */
10293 if (name[0] == 'g' &&
10300 if (name[4] == 'o' &&
10309 if (name[10] == 'd' &&
10312 { /* gethostbyaddr */
10313 return -KEY_gethostbyaddr;
10319 if (name[10] == 'a' &&
10322 { /* gethostbyname */
10323 return -KEY_gethostbyname;
10336 if (name[4] == 'e' &&
10345 if (name[10] == 'a' &&
10348 { /* getservbyname */
10349 return -KEY_getservbyname;
10355 if (name[10] == 'o' &&
10358 { /* getservbyport */
10359 return -KEY_getservbyport;
10378 case 14: /* 1 tokens of length 14 */
10379 if (name[0] == 'g' &&
10393 { /* getprotobyname */
10394 return -KEY_getprotobyname;
10399 case 16: /* 1 tokens of length 16 */
10400 if (name[0] == 'g' &&
10416 { /* getprotobynumber */
10417 return -KEY_getprotobynumber;
10431 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10435 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10436 if (ckWARN(WARN_SYNTAX)) {
10439 for (w = s+2; *w && level; w++) {
10442 else if (*w == ')')
10445 while (isSPACE(*w))
10447 /* the list of chars below is for end of statements or
10448 * block / parens, boolean operators (&&, ||, //) and branch
10449 * constructs (or, and, if, until, unless, while, err, for).
10450 * Not a very solid hack... */
10451 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10452 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10453 "%s (...) interpreted as function",name);
10456 while (s < PL_bufend && isSPACE(*s))
10460 while (s < PL_bufend && isSPACE(*s))
10462 if (isIDFIRST_lazy_if(s,UTF)) {
10463 const char * const w = s++;
10464 while (isALNUM_lazy_if(s,UTF))
10466 while (s < PL_bufend && isSPACE(*s))
10470 if (keyword(w, s - w, 0))
10473 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10474 if (gv && GvCVu(gv))
10476 Perl_croak(aTHX_ "No comma allowed after %s", what);
10481 /* Either returns sv, or mortalizes sv and returns a new SV*.
10482 Best used as sv=new_constant(..., sv, ...).
10483 If s, pv are NULL, calls subroutine with one argument,
10484 and type is used with error messages only. */
10487 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
10491 HV * const table = GvHV(PL_hintgv); /* ^H */
10495 const char *why1 = "", *why2 = "", *why3 = "";
10497 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10500 why2 = (const char *)
10501 (strEQ(key,"charnames")
10502 ? "(possibly a missing \"use charnames ...\")"
10504 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10505 (type ? type: "undef"), why2);
10507 /* This is convoluted and evil ("goto considered harmful")
10508 * but I do not understand the intricacies of all the different
10509 * failure modes of %^H in here. The goal here is to make
10510 * the most probable error message user-friendly. --jhi */
10515 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10516 (type ? type: "undef"), why1, why2, why3);
10518 yyerror(SvPVX_const(msg));
10522 cvp = hv_fetch(table, key, strlen(key), FALSE);
10523 if (!cvp || !SvOK(*cvp)) {
10526 why3 = "} is not defined";
10529 sv_2mortal(sv); /* Parent created it permanently */
10532 pv = sv_2mortal(newSVpvn(s, len));
10534 typesv = sv_2mortal(newSVpv(type, 0));
10536 typesv = &PL_sv_undef;
10538 PUSHSTACKi(PERLSI_OVERLOAD);
10550 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10554 /* Check the eval first */
10555 if (!PL_in_eval && SvTRUE(ERRSV)) {
10556 sv_catpvs(ERRSV, "Propagated");
10557 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10559 res = SvREFCNT_inc_simple(sv);
10563 SvREFCNT_inc_simple_void(res);
10572 why1 = "Call to &{$^H{";
10574 why3 = "}} did not return a defined value";
10582 /* Returns a NUL terminated string, with the length of the string written to
10586 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10589 register char *d = dest;
10590 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10593 Perl_croak(aTHX_ ident_too_long);
10594 if (isALNUM(*s)) /* UTF handled below */
10596 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10601 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10605 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10606 char *t = s + UTF8SKIP(s);
10608 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10612 Perl_croak(aTHX_ ident_too_long);
10613 Copy(s, d, len, char);
10626 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10629 char *bracket = NULL;
10631 register char *d = dest;
10632 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10637 while (isDIGIT(*s)) {
10639 Perl_croak(aTHX_ ident_too_long);
10646 Perl_croak(aTHX_ ident_too_long);
10647 if (isALNUM(*s)) /* UTF handled below */
10649 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10654 else if (*s == ':' && s[1] == ':') {
10658 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10659 char *t = s + UTF8SKIP(s);
10660 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10662 if (d + (t - s) > e)
10663 Perl_croak(aTHX_ ident_too_long);
10664 Copy(s, d, t - s, char);
10675 if (PL_lex_state != LEX_NORMAL)
10676 PL_lex_state = LEX_INTERPENDMAYBE;
10679 if (*s == '$' && s[1] &&
10680 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10693 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10698 if (isSPACE(s[-1])) {
10700 const char ch = *s++;
10701 if (!SPACE_OR_TAB(ch)) {
10707 if (isIDFIRST_lazy_if(d,UTF)) {
10711 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10712 end += UTF8SKIP(end);
10713 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10714 end += UTF8SKIP(end);
10716 Copy(s, d, end - s, char);
10721 while ((isALNUM(*s) || *s == ':') && d < e)
10724 Perl_croak(aTHX_ ident_too_long);
10727 while (s < send && SPACE_OR_TAB(*s))
10729 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10730 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10731 const char * const brack =
10733 ((*s == '[') ? "[...]" : "{...}");
10734 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10735 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10736 funny, dest, brack, funny, dest, brack);
10739 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10743 /* Handle extended ${^Foo} variables
10744 * 1999-02-27 mjd-perl-patch@plover.com */
10745 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10749 while (isALNUM(*s) && d < e) {
10753 Perl_croak(aTHX_ ident_too_long);
10758 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10759 PL_lex_state = LEX_INTERPEND;
10762 if (PL_lex_state == LEX_NORMAL) {
10763 if (ckWARN(WARN_AMBIGUOUS) &&
10764 (keyword(dest, d - dest, 0)
10765 || get_cvn_flags(dest, d - dest, 0)))
10769 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10770 "Ambiguous use of %c{%s} resolved to %c%s",
10771 funny, dest, funny, dest);
10776 s = bracket; /* let the parser handle it */
10780 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10781 PL_lex_state = LEX_INTERPEND;
10786 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10788 PERL_UNUSED_CONTEXT;
10792 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
10793 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10794 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10795 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10796 case KEEPCOPY_PAT_MOD: *pmfl |= PMf_KEEPCOPY; break;
10802 S_scan_pat(pTHX_ char *start, I32 type)
10806 char *s = scan_str(start,!!PL_madskills,FALSE);
10807 const char * const valid_flags =
10808 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10815 const char * const delimiter = skipspace(start);
10819 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10820 : "Search pattern not terminated" ));
10823 pm = (PMOP*)newPMOP(type, 0);
10824 if (PL_multi_open == '?')
10825 pm->op_pmflags |= PMf_ONCE;
10829 while (*s && strchr(valid_flags, *s))
10830 pmflag(&pm->op_pmflags,*s++);
10832 if (PL_madskills && modstart != s) {
10833 SV* tmptoken = newSVpvn(modstart, s - modstart);
10834 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
10837 /* issue a warning if /c is specified,but /g is not */
10838 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
10839 && ckWARN(WARN_REGEXP))
10841 Perl_warner(aTHX_ packWARN(WARN_REGEXP),
10842 "Use of /c modifier is meaningless without /g" );
10845 pm->op_pmpermflags = pm->op_pmflags;
10847 PL_lex_op = (OP*)pm;
10848 yylval.ival = OP_MATCH;
10853 S_scan_subst(pTHX_ char *start)
10864 yylval.ival = OP_NULL;
10866 s = scan_str(start,!!PL_madskills,FALSE);
10869 Perl_croak(aTHX_ "Substitution pattern not terminated");
10871 if (s[-1] == PL_multi_open)
10874 if (PL_madskills) {
10875 CURMAD('q', PL_thisopen);
10876 CURMAD('_', PL_thiswhite);
10877 CURMAD('E', PL_thisstuff);
10878 CURMAD('Q', PL_thisclose);
10879 PL_realtokenstart = s - SvPVX(PL_linestr);
10883 first_start = PL_multi_start;
10884 s = scan_str(s,!!PL_madskills,FALSE);
10886 if (PL_lex_stuff) {
10887 SvREFCNT_dec(PL_lex_stuff);
10888 PL_lex_stuff = NULL;
10890 Perl_croak(aTHX_ "Substitution replacement not terminated");
10892 PL_multi_start = first_start; /* so whole substitution is taken together */
10894 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10897 if (PL_madskills) {
10898 CURMAD('z', PL_thisopen);
10899 CURMAD('R', PL_thisstuff);
10900 CURMAD('Z', PL_thisclose);
10906 if (*s == EXEC_PAT_MOD) {
10910 else if (strchr(S_PAT_MODS, *s))
10911 pmflag(&pm->op_pmflags,*s++);
10917 if (PL_madskills) {
10919 curmad('m', newSVpvn(modstart, s - modstart));
10920 append_madprops(PL_thismad, (OP*)pm, 0);
10924 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
10925 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10929 SV * const repl = newSVpvs("");
10931 PL_sublex_info.super_bufptr = s;
10932 PL_sublex_info.super_bufend = PL_bufend;
10934 pm->op_pmflags |= PMf_EVAL;
10936 sv_catpv(repl, (const char *)(es ? "eval " : "do "));
10937 sv_catpvs(repl, "{");
10938 sv_catsv(repl, PL_lex_repl);
10939 if (strchr(SvPVX(PL_lex_repl), '#'))
10940 sv_catpvs(repl, "\n");
10941 sv_catpvs(repl, "}");
10943 SvREFCNT_dec(PL_lex_repl);
10944 PL_lex_repl = repl;
10947 pm->op_pmpermflags = pm->op_pmflags;
10948 PL_lex_op = (OP*)pm;
10949 yylval.ival = OP_SUBST;
10954 S_scan_trans(pTHX_ char *start)
10967 yylval.ival = OP_NULL;
10969 s = scan_str(start,!!PL_madskills,FALSE);
10971 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10973 if (s[-1] == PL_multi_open)
10976 if (PL_madskills) {
10977 CURMAD('q', PL_thisopen);
10978 CURMAD('_', PL_thiswhite);
10979 CURMAD('E', PL_thisstuff);
10980 CURMAD('Q', PL_thisclose);
10981 PL_realtokenstart = s - SvPVX(PL_linestr);
10985 s = scan_str(s,!!PL_madskills,FALSE);
10987 if (PL_lex_stuff) {
10988 SvREFCNT_dec(PL_lex_stuff);
10989 PL_lex_stuff = NULL;
10991 Perl_croak(aTHX_ "Transliteration replacement not terminated");
10993 if (PL_madskills) {
10994 CURMAD('z', PL_thisopen);
10995 CURMAD('R', PL_thisstuff);
10996 CURMAD('Z', PL_thisclose);
10999 complement = del = squash = 0;
11006 complement = OPpTRANS_COMPLEMENT;
11009 del = OPpTRANS_DELETE;
11012 squash = OPpTRANS_SQUASH;
11021 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11022 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11023 o->op_private &= ~OPpTRANS_ALL;
11024 o->op_private |= del|squash|complement|
11025 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11026 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11029 yylval.ival = OP_TRANS;
11032 if (PL_madskills) {
11034 curmad('m', newSVpvn(modstart, s - modstart));
11035 append_madprops(PL_thismad, o, 0);
11044 S_scan_heredoc(pTHX_ register char *s)
11048 I32 op_type = OP_SCALAR;
11052 const char *found_newline;
11056 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11058 I32 stuffstart = s - SvPVX(PL_linestr);
11061 PL_realtokenstart = -1;
11066 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11070 while (SPACE_OR_TAB(*peek))
11072 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11075 s = delimcpy(d, e, s, PL_bufend, term, &len);
11085 if (!isALNUM_lazy_if(s,UTF))
11086 deprecate_old("bare << to mean <<\"\"");
11087 for (; isALNUM_lazy_if(s,UTF); s++) {
11092 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11093 Perl_croak(aTHX_ "Delimiter for here document is too long");
11096 len = d - PL_tokenbuf;
11099 if (PL_madskills) {
11100 tstart = PL_tokenbuf + !outer;
11101 PL_thisclose = newSVpvn(tstart, len - !outer);
11102 tstart = SvPVX(PL_linestr) + stuffstart;
11103 PL_thisopen = newSVpvn(tstart, s - tstart);
11104 stuffstart = s - SvPVX(PL_linestr);
11107 #ifndef PERL_STRICT_CR
11108 d = strchr(s, '\r');
11110 char * const olds = s;
11112 while (s < PL_bufend) {
11118 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11127 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11134 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11135 herewas = newSVpvn(s,PL_bufend-s);
11139 herewas = newSVpvn(s-1,found_newline-s+1);
11142 herewas = newSVpvn(s,found_newline-s);
11146 if (PL_madskills) {
11147 tstart = SvPVX(PL_linestr) + stuffstart;
11149 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11151 PL_thisstuff = newSVpvn(tstart, s - tstart);
11154 s += SvCUR(herewas);
11157 stuffstart = s - SvPVX(PL_linestr);
11163 tmpstr = newSV_type(SVt_PVIV);
11164 SvGROW(tmpstr, 80);
11165 if (term == '\'') {
11166 op_type = OP_CONST;
11167 SvIV_set(tmpstr, -1);
11169 else if (term == '`') {
11170 op_type = OP_BACKTICK;
11171 SvIV_set(tmpstr, '\\');
11175 PL_multi_start = CopLINE(PL_curcop);
11176 PL_multi_open = PL_multi_close = '<';
11177 term = *PL_tokenbuf;
11178 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11179 char * const bufptr = PL_sublex_info.super_bufptr;
11180 char * const bufend = PL_sublex_info.super_bufend;
11181 char * const olds = s - SvCUR(herewas);
11182 s = strchr(bufptr, '\n');
11186 while (s < bufend &&
11187 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11189 CopLINE_inc(PL_curcop);
11192 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11193 missingterm(PL_tokenbuf);
11195 sv_setpvn(herewas,bufptr,d-bufptr+1);
11196 sv_setpvn(tmpstr,d+1,s-d);
11198 sv_catpvn(herewas,s,bufend-s);
11199 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11206 while (s < PL_bufend &&
11207 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11209 CopLINE_inc(PL_curcop);
11211 if (s >= PL_bufend) {
11212 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11213 missingterm(PL_tokenbuf);
11215 sv_setpvn(tmpstr,d+1,s-d);
11217 if (PL_madskills) {
11219 sv_catpvn(PL_thisstuff, d + 1, s - d);
11221 PL_thisstuff = newSVpvn(d + 1, s - d);
11222 stuffstart = s - SvPVX(PL_linestr);
11226 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11228 sv_catpvn(herewas,s,PL_bufend-s);
11229 sv_setsv(PL_linestr,herewas);
11230 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11231 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11232 PL_last_lop = PL_last_uni = NULL;
11235 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
11236 while (s >= PL_bufend) { /* multiple line string? */
11238 if (PL_madskills) {
11239 tstart = SvPVX(PL_linestr) + stuffstart;
11241 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11243 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11247 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11248 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11249 missingterm(PL_tokenbuf);
11252 stuffstart = s - SvPVX(PL_linestr);
11254 CopLINE_inc(PL_curcop);
11255 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11256 PL_last_lop = PL_last_uni = NULL;
11257 #ifndef PERL_STRICT_CR
11258 if (PL_bufend - PL_linestart >= 2) {
11259 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11260 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11262 PL_bufend[-2] = '\n';
11264 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11266 else if (PL_bufend[-1] == '\r')
11267 PL_bufend[-1] = '\n';
11269 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11270 PL_bufend[-1] = '\n';
11272 if (PERLDB_LINE && PL_curstash != PL_debstash)
11273 update_debugger_info(PL_linestr, NULL, 0);
11274 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11275 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11276 *(SvPVX(PL_linestr) + off ) = ' ';
11277 sv_catsv(PL_linestr,herewas);
11278 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11279 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11283 sv_catsv(tmpstr,PL_linestr);
11288 PL_multi_end = CopLINE(PL_curcop);
11289 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11290 SvPV_shrink_to_cur(tmpstr);
11292 SvREFCNT_dec(herewas);
11294 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11296 else if (PL_encoding)
11297 sv_recode_to_utf8(tmpstr, PL_encoding);
11299 PL_lex_stuff = tmpstr;
11300 yylval.ival = op_type;
11304 /* scan_inputsymbol
11305 takes: current position in input buffer
11306 returns: new position in input buffer
11307 side-effects: yylval and lex_op are set.
11312 <FH> read from filehandle
11313 <pkg::FH> read from package qualified filehandle
11314 <pkg'FH> read from package qualified filehandle
11315 <$fh> read from filehandle in $fh
11316 <*.h> filename glob
11321 S_scan_inputsymbol(pTHX_ char *start)
11324 register char *s = start; /* current position in buffer */
11328 char *d = PL_tokenbuf; /* start of temp holding space */
11329 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11331 end = strchr(s, '\n');
11334 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11336 /* die if we didn't have space for the contents of the <>,
11337 or if it didn't end, or if we see a newline
11340 if (len >= (I32)sizeof PL_tokenbuf)
11341 Perl_croak(aTHX_ "Excessively long <> operator");
11343 Perl_croak(aTHX_ "Unterminated <> operator");
11348 Remember, only scalar variables are interpreted as filehandles by
11349 this code. Anything more complex (e.g., <$fh{$num}>) will be
11350 treated as a glob() call.
11351 This code makes use of the fact that except for the $ at the front,
11352 a scalar variable and a filehandle look the same.
11354 if (*d == '$' && d[1]) d++;
11356 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11357 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11360 /* If we've tried to read what we allow filehandles to look like, and
11361 there's still text left, then it must be a glob() and not a getline.
11362 Use scan_str to pull out the stuff between the <> and treat it
11363 as nothing more than a string.
11366 if (d - PL_tokenbuf != len) {
11367 yylval.ival = OP_GLOB;
11369 s = scan_str(start,!!PL_madskills,FALSE);
11371 Perl_croak(aTHX_ "Glob not terminated");
11375 bool readline_overriden = FALSE;
11378 /* we're in a filehandle read situation */
11381 /* turn <> into <ARGV> */
11383 Copy("ARGV",d,5,char);
11385 /* Check whether readline() is overriden */
11386 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11388 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11390 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11391 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11392 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11393 readline_overriden = TRUE;
11395 /* if <$fh>, create the ops to turn the variable into a
11399 /* try to find it in the pad for this block, otherwise find
11400 add symbol table ops
11402 const PADOFFSET tmp = pad_findmy(d);
11403 if (tmp != NOT_IN_PAD) {
11404 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11405 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11406 HEK * const stashname = HvNAME_HEK(stash);
11407 SV * const sym = sv_2mortal(newSVhek(stashname));
11408 sv_catpvs(sym, "::");
11409 sv_catpv(sym, d+1);
11414 OP * const o = newOP(OP_PADSV, 0);
11416 PL_lex_op = readline_overriden
11417 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11418 append_elem(OP_LIST, o,
11419 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11420 : (OP*)newUNOP(OP_READLINE, 0, o);
11429 ? (GV_ADDMULTI | GV_ADDINEVAL)
11432 PL_lex_op = readline_overriden
11433 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11434 append_elem(OP_LIST,
11435 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11436 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11437 : (OP*)newUNOP(OP_READLINE, 0,
11438 newUNOP(OP_RV2SV, 0,
11439 newGVOP(OP_GV, 0, gv)));
11441 if (!readline_overriden)
11442 PL_lex_op->op_flags |= OPf_SPECIAL;
11443 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
11444 yylval.ival = OP_NULL;
11447 /* If it's none of the above, it must be a literal filehandle
11448 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11450 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11451 PL_lex_op = readline_overriden
11452 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11453 append_elem(OP_LIST,
11454 newGVOP(OP_GV, 0, gv),
11455 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11456 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11457 yylval.ival = OP_NULL;
11466 takes: start position in buffer
11467 keep_quoted preserve \ on the embedded delimiter(s)
11468 keep_delims preserve the delimiters around the string
11469 returns: position to continue reading from buffer
11470 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11471 updates the read buffer.
11473 This subroutine pulls a string out of the input. It is called for:
11474 q single quotes q(literal text)
11475 ' single quotes 'literal text'
11476 qq double quotes qq(interpolate $here please)
11477 " double quotes "interpolate $here please"
11478 qx backticks qx(/bin/ls -l)
11479 ` backticks `/bin/ls -l`
11480 qw quote words @EXPORT_OK = qw( func() $spam )
11481 m// regexp match m/this/
11482 s/// regexp substitute s/this/that/
11483 tr/// string transliterate tr/this/that/
11484 y/// string transliterate y/this/that/
11485 ($*@) sub prototypes sub foo ($)
11486 (stuff) sub attr parameters sub foo : attr(stuff)
11487 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11489 In most of these cases (all but <>, patterns and transliterate)
11490 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11491 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11492 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11495 It skips whitespace before the string starts, and treats the first
11496 character as the delimiter. If the delimiter is one of ([{< then
11497 the corresponding "close" character )]}> is used as the closing
11498 delimiter. It allows quoting of delimiters, and if the string has
11499 balanced delimiters ([{<>}]) it allows nesting.
11501 On success, the SV with the resulting string is put into lex_stuff or,
11502 if that is already non-NULL, into lex_repl. The second case occurs only
11503 when parsing the RHS of the special constructs s/// and tr/// (y///).
11504 For convenience, the terminating delimiter character is stuffed into
11509 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11512 SV *sv; /* scalar value: string */
11513 const char *tmps; /* temp string, used for delimiter matching */
11514 register char *s = start; /* current position in the buffer */
11515 register char term; /* terminating character */
11516 register char *to; /* current position in the sv's data */
11517 I32 brackets = 1; /* bracket nesting level */
11518 bool has_utf8 = FALSE; /* is there any utf8 content? */
11519 I32 termcode; /* terminating char. code */
11520 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11521 STRLEN termlen; /* length of terminating string */
11522 int last_off = 0; /* last position for nesting bracket */
11528 /* skip space before the delimiter */
11534 if (PL_realtokenstart >= 0) {
11535 stuffstart = PL_realtokenstart;
11536 PL_realtokenstart = -1;
11539 stuffstart = start - SvPVX(PL_linestr);
11541 /* mark where we are, in case we need to report errors */
11544 /* after skipping whitespace, the next character is the terminator */
11547 termcode = termstr[0] = term;
11551 termcode = utf8_to_uvchr((U8*)s, &termlen);
11552 Copy(s, termstr, termlen, U8);
11553 if (!UTF8_IS_INVARIANT(term))
11557 /* mark where we are */
11558 PL_multi_start = CopLINE(PL_curcop);
11559 PL_multi_open = term;
11561 /* find corresponding closing delimiter */
11562 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11563 termcode = termstr[0] = term = tmps[5];
11565 PL_multi_close = term;
11567 /* create a new SV to hold the contents. 79 is the SV's initial length.
11568 What a random number. */
11569 sv = newSV_type(SVt_PVIV);
11571 SvIV_set(sv, termcode);
11572 (void)SvPOK_only(sv); /* validate pointer */
11574 /* move past delimiter and try to read a complete string */
11576 sv_catpvn(sv, s, termlen);
11579 tstart = SvPVX(PL_linestr) + stuffstart;
11580 if (!PL_thisopen && !keep_delims) {
11581 PL_thisopen = newSVpvn(tstart, s - tstart);
11582 stuffstart = s - SvPVX(PL_linestr);
11586 if (PL_encoding && !UTF) {
11590 int offset = s - SvPVX_const(PL_linestr);
11591 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11592 &offset, (char*)termstr, termlen);
11593 const char * const ns = SvPVX_const(PL_linestr) + offset;
11594 char * const svlast = SvEND(sv) - 1;
11596 for (; s < ns; s++) {
11597 if (*s == '\n' && !PL_rsfp)
11598 CopLINE_inc(PL_curcop);
11601 goto read_more_line;
11603 /* handle quoted delimiters */
11604 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11606 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11608 if ((svlast-1 - t) % 2) {
11609 if (!keep_quoted) {
11610 *(svlast-1) = term;
11612 SvCUR_set(sv, SvCUR(sv) - 1);
11617 if (PL_multi_open == PL_multi_close) {
11623 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11624 /* At here, all closes are "was quoted" one,
11625 so we don't check PL_multi_close. */
11627 if (!keep_quoted && *(t+1) == PL_multi_open)
11632 else if (*t == PL_multi_open)
11640 SvCUR_set(sv, w - SvPVX_const(sv));
11642 last_off = w - SvPVX(sv);
11643 if (--brackets <= 0)
11648 if (!keep_delims) {
11649 SvCUR_set(sv, SvCUR(sv) - 1);
11655 /* extend sv if need be */
11656 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11657 /* set 'to' to the next character in the sv's string */
11658 to = SvPVX(sv)+SvCUR(sv);
11660 /* if open delimiter is the close delimiter read unbridle */
11661 if (PL_multi_open == PL_multi_close) {
11662 for (; s < PL_bufend; s++,to++) {
11663 /* embedded newlines increment the current line number */
11664 if (*s == '\n' && !PL_rsfp)
11665 CopLINE_inc(PL_curcop);
11666 /* handle quoted delimiters */
11667 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11668 if (!keep_quoted && s[1] == term)
11670 /* any other quotes are simply copied straight through */
11674 /* terminate when run out of buffer (the for() condition), or
11675 have found the terminator */
11676 else if (*s == term) {
11679 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11682 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11688 /* if the terminator isn't the same as the start character (e.g.,
11689 matched brackets), we have to allow more in the quoting, and
11690 be prepared for nested brackets.
11693 /* read until we run out of string, or we find the terminator */
11694 for (; s < PL_bufend; s++,to++) {
11695 /* embedded newlines increment the line count */
11696 if (*s == '\n' && !PL_rsfp)
11697 CopLINE_inc(PL_curcop);
11698 /* backslashes can escape the open or closing characters */
11699 if (*s == '\\' && s+1 < PL_bufend) {
11700 if (!keep_quoted &&
11701 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11706 /* allow nested opens and closes */
11707 else if (*s == PL_multi_close && --brackets <= 0)
11709 else if (*s == PL_multi_open)
11711 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11716 /* terminate the copied string and update the sv's end-of-string */
11718 SvCUR_set(sv, to - SvPVX_const(sv));
11721 * this next chunk reads more into the buffer if we're not done yet
11725 break; /* handle case where we are done yet :-) */
11727 #ifndef PERL_STRICT_CR
11728 if (to - SvPVX_const(sv) >= 2) {
11729 if ((to[-2] == '\r' && to[-1] == '\n') ||
11730 (to[-2] == '\n' && to[-1] == '\r'))
11734 SvCUR_set(sv, to - SvPVX_const(sv));
11736 else if (to[-1] == '\r')
11739 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11744 /* if we're out of file, or a read fails, bail and reset the current
11745 line marker so we can report where the unterminated string began
11748 if (PL_madskills) {
11749 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11751 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11753 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11757 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11759 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11765 /* we read a line, so increment our line counter */
11766 CopLINE_inc(PL_curcop);
11768 /* update debugger info */
11769 if (PERLDB_LINE && PL_curstash != PL_debstash)
11770 update_debugger_info(PL_linestr, NULL, 0);
11772 /* having changed the buffer, we must update PL_bufend */
11773 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11774 PL_last_lop = PL_last_uni = NULL;
11777 /* at this point, we have successfully read the delimited string */
11779 if (!PL_encoding || UTF) {
11781 if (PL_madskills) {
11782 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11783 const int len = s - tstart;
11785 sv_catpvn(PL_thisstuff, tstart, len);
11787 PL_thisstuff = newSVpvn(tstart, len);
11788 if (!PL_thisclose && !keep_delims)
11789 PL_thisclose = newSVpvn(s,termlen);
11794 sv_catpvn(sv, s, termlen);
11799 if (PL_madskills) {
11800 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11801 const int len = s - tstart - termlen;
11803 sv_catpvn(PL_thisstuff, tstart, len);
11805 PL_thisstuff = newSVpvn(tstart, len);
11806 if (!PL_thisclose && !keep_delims)
11807 PL_thisclose = newSVpvn(s - termlen,termlen);
11811 if (has_utf8 || PL_encoding)
11814 PL_multi_end = CopLINE(PL_curcop);
11816 /* if we allocated too much space, give some back */
11817 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11818 SvLEN_set(sv, SvCUR(sv) + 1);
11819 SvPV_renew(sv, SvLEN(sv));
11822 /* decide whether this is the first or second quoted string we've read
11835 takes: pointer to position in buffer
11836 returns: pointer to new position in buffer
11837 side-effects: builds ops for the constant in yylval.op
11839 Read a number in any of the formats that Perl accepts:
11841 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11842 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11845 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11847 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11850 If it reads a number without a decimal point or an exponent, it will
11851 try converting the number to an integer and see if it can do so
11852 without loss of precision.
11856 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11859 register const char *s = start; /* current position in buffer */
11860 register char *d; /* destination in temp buffer */
11861 register char *e; /* end of temp buffer */
11862 NV nv; /* number read, as a double */
11863 SV *sv = NULL; /* place to put the converted number */
11864 bool floatit; /* boolean: int or float? */
11865 const char *lastub = NULL; /* position of last underbar */
11866 static char const number_too_long[] = "Number too long";
11868 /* We use the first character to decide what type of number this is */
11872 Perl_croak(aTHX_ "panic: scan_num");
11874 /* if it starts with a 0, it could be an octal number, a decimal in
11875 0.13 disguise, or a hexadecimal number, or a binary number. */
11879 u holds the "number so far"
11880 shift the power of 2 of the base
11881 (hex == 4, octal == 3, binary == 1)
11882 overflowed was the number more than we can hold?
11884 Shift is used when we add a digit. It also serves as an "are
11885 we in octal/hex/binary?" indicator to disallow hex characters
11886 when in octal mode.
11891 bool overflowed = FALSE;
11892 bool just_zero = TRUE; /* just plain 0 or binary number? */
11893 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11894 static const char* const bases[5] =
11895 { "", "binary", "", "octal", "hexadecimal" };
11896 static const char* const Bases[5] =
11897 { "", "Binary", "", "Octal", "Hexadecimal" };
11898 static const char* const maxima[5] =
11900 "0b11111111111111111111111111111111",
11904 const char *base, *Base, *max;
11906 /* check for hex */
11911 } else if (s[1] == 'b') {
11916 /* check for a decimal in disguise */
11917 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11919 /* so it must be octal */
11926 if (ckWARN(WARN_SYNTAX))
11927 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11928 "Misplaced _ in number");
11932 base = bases[shift];
11933 Base = Bases[shift];
11934 max = maxima[shift];
11936 /* read the rest of the number */
11938 /* x is used in the overflow test,
11939 b is the digit we're adding on. */
11944 /* if we don't mention it, we're done */
11948 /* _ are ignored -- but warned about if consecutive */
11950 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11951 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11952 "Misplaced _ in number");
11956 /* 8 and 9 are not octal */
11957 case '8': case '9':
11959 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11963 case '2': case '3': case '4':
11964 case '5': case '6': case '7':
11966 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11969 case '0': case '1':
11970 b = *s++ & 15; /* ASCII digit -> value of digit */
11974 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11975 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11976 /* make sure they said 0x */
11979 b = (*s++ & 7) + 9;
11981 /* Prepare to put the digit we have onto the end
11982 of the number so far. We check for overflows.
11988 x = u << shift; /* make room for the digit */
11990 if ((x >> shift) != u
11991 && !(PL_hints & HINT_NEW_BINARY)) {
11994 if (ckWARN_d(WARN_OVERFLOW))
11995 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11996 "Integer overflow in %s number",
11999 u = x | b; /* add the digit to the end */
12002 n *= nvshift[shift];
12003 /* If an NV has not enough bits in its
12004 * mantissa to represent an UV this summing of
12005 * small low-order numbers is a waste of time
12006 * (because the NV cannot preserve the
12007 * low-order bits anyway): we could just
12008 * remember when did we overflow and in the
12009 * end just multiply n by the right
12017 /* if we get here, we had success: make a scalar value from
12022 /* final misplaced underbar check */
12023 if (s[-1] == '_') {
12024 if (ckWARN(WARN_SYNTAX))
12025 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12030 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
12031 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12032 "%s number > %s non-portable",
12038 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
12039 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12040 "%s number > %s non-portable",
12045 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12046 sv = new_constant(start, s - start, "integer",
12048 else if (PL_hints & HINT_NEW_BINARY)
12049 sv = new_constant(start, s - start, "binary", sv, NULL, NULL);
12054 handle decimal numbers.
12055 we're also sent here when we read a 0 as the first digit
12057 case '1': case '2': case '3': case '4': case '5':
12058 case '6': case '7': case '8': case '9': case '.':
12061 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12064 /* read next group of digits and _ and copy into d */
12065 while (isDIGIT(*s) || *s == '_') {
12066 /* skip underscores, checking for misplaced ones
12070 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12071 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12072 "Misplaced _ in number");
12076 /* check for end of fixed-length buffer */
12078 Perl_croak(aTHX_ number_too_long);
12079 /* if we're ok, copy the character */
12084 /* final misplaced underbar check */
12085 if (lastub && s == lastub + 1) {
12086 if (ckWARN(WARN_SYNTAX))
12087 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12090 /* read a decimal portion if there is one. avoid
12091 3..5 being interpreted as the number 3. followed
12094 if (*s == '.' && s[1] != '.') {
12099 if (ckWARN(WARN_SYNTAX))
12100 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12101 "Misplaced _ in number");
12105 /* copy, ignoring underbars, until we run out of digits.
12107 for (; isDIGIT(*s) || *s == '_'; s++) {
12108 /* fixed length buffer check */
12110 Perl_croak(aTHX_ number_too_long);
12112 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12113 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12114 "Misplaced _ in number");
12120 /* fractional part ending in underbar? */
12121 if (s[-1] == '_') {
12122 if (ckWARN(WARN_SYNTAX))
12123 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12124 "Misplaced _ in number");
12126 if (*s == '.' && isDIGIT(s[1])) {
12127 /* oops, it's really a v-string, but without the "v" */
12133 /* read exponent part, if present */
12134 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12138 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12139 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12141 /* stray preinitial _ */
12143 if (ckWARN(WARN_SYNTAX))
12144 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12145 "Misplaced _ in number");
12149 /* allow positive or negative exponent */
12150 if (*s == '+' || *s == '-')
12153 /* stray initial _ */
12155 if (ckWARN(WARN_SYNTAX))
12156 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12157 "Misplaced _ in number");
12161 /* read digits of exponent */
12162 while (isDIGIT(*s) || *s == '_') {
12165 Perl_croak(aTHX_ number_too_long);
12169 if (((lastub && s == lastub + 1) ||
12170 (!isDIGIT(s[1]) && s[1] != '_'))
12171 && ckWARN(WARN_SYNTAX))
12172 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12173 "Misplaced _ in number");
12180 /* make an sv from the string */
12184 We try to do an integer conversion first if no characters
12185 indicating "float" have been found.
12190 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12192 if (flags == IS_NUMBER_IN_UV) {
12194 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12197 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12198 if (uv <= (UV) IV_MIN)
12199 sv_setiv(sv, -(IV)uv);
12206 /* terminate the string */
12208 nv = Atof(PL_tokenbuf);
12212 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
12213 (PL_hints & HINT_NEW_INTEGER) )
12214 sv = new_constant(PL_tokenbuf,
12217 (floatit ? "float" : "integer"),
12221 /* if it starts with a v, it could be a v-string */
12224 sv = newSV(5); /* preallocate storage space */
12225 s = scan_vstring(s,sv);
12229 /* make the op for the constant and return */
12232 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12234 lvalp->opval = NULL;
12240 S_scan_formline(pTHX_ register char *s)
12243 register char *eol;
12245 SV * const stuff = newSVpvs("");
12246 bool needargs = FALSE;
12247 bool eofmt = FALSE;
12249 char *tokenstart = s;
12252 if (PL_madskills) {
12253 savewhite = PL_thiswhite;
12258 while (!needargs) {
12261 #ifdef PERL_STRICT_CR
12262 while (SPACE_OR_TAB(*t))
12265 while (SPACE_OR_TAB(*t) || *t == '\r')
12268 if (*t == '\n' || t == PL_bufend) {
12273 if (PL_in_eval && !PL_rsfp) {
12274 eol = (char *) memchr(s,'\n',PL_bufend-s);
12279 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12281 for (t = s; t < eol; t++) {
12282 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12284 goto enough; /* ~~ must be first line in formline */
12286 if (*t == '@' || *t == '^')
12290 sv_catpvn(stuff, s, eol-s);
12291 #ifndef PERL_STRICT_CR
12292 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12293 char *end = SvPVX(stuff) + SvCUR(stuff);
12296 SvCUR_set(stuff, SvCUR(stuff) - 1);
12306 if (PL_madskills) {
12308 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12310 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12313 s = filter_gets(PL_linestr, PL_rsfp, 0);
12315 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12317 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12319 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12320 PL_last_lop = PL_last_uni = NULL;
12329 if (SvCUR(stuff)) {
12332 PL_lex_state = LEX_NORMAL;
12333 start_force(PL_curforce);
12334 NEXTVAL_NEXTTOKE.ival = 0;
12338 PL_lex_state = LEX_FORMLINE;
12340 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12342 else if (PL_encoding)
12343 sv_recode_to_utf8(stuff, PL_encoding);
12345 start_force(PL_curforce);
12346 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12348 start_force(PL_curforce);
12349 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12353 SvREFCNT_dec(stuff);
12355 PL_lex_formbrack = 0;
12359 if (PL_madskills) {
12361 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12363 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12364 PL_thiswhite = savewhite;
12376 PL_cshlen = strlen(PL_cshname);
12378 #if defined(USE_ITHREADS)
12379 PERL_UNUSED_CONTEXT;
12385 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12388 const I32 oldsavestack_ix = PL_savestack_ix;
12389 CV* const outsidecv = PL_compcv;
12392 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12394 SAVEI32(PL_subline);
12395 save_item(PL_subname);
12396 SAVESPTR(PL_compcv);
12398 PL_compcv = (CV*)newSV_type(is_format ? SVt_PVFM : SVt_PVCV);
12399 CvFLAGS(PL_compcv) |= flags;
12401 PL_subline = CopLINE(PL_curcop);
12402 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12403 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
12404 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12406 return oldsavestack_ix;
12410 #pragma segment Perl_yylex
12413 Perl_yywarn(pTHX_ const char *s)
12416 PL_in_eval |= EVAL_WARNONLY;
12418 PL_in_eval &= ~EVAL_WARNONLY;
12423 Perl_yyerror(pTHX_ const char *s)
12426 const char *where = NULL;
12427 const char *context = NULL;
12430 int yychar = PL_parser->yychar;
12432 if (!yychar || (yychar == ';' && !PL_rsfp))
12434 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12435 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12436 PL_oldbufptr != PL_bufptr) {
12439 The code below is removed for NetWare because it abends/crashes on NetWare
12440 when the script has error such as not having the closing quotes like:
12441 if ($var eq "value)
12442 Checking of white spaces is anyway done in NetWare code.
12445 while (isSPACE(*PL_oldoldbufptr))
12448 context = PL_oldoldbufptr;
12449 contlen = PL_bufptr - PL_oldoldbufptr;
12451 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12452 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12455 The code below is removed for NetWare because it abends/crashes on NetWare
12456 when the script has error such as not having the closing quotes like:
12457 if ($var eq "value)
12458 Checking of white spaces is anyway done in NetWare code.
12461 while (isSPACE(*PL_oldbufptr))
12464 context = PL_oldbufptr;
12465 contlen = PL_bufptr - PL_oldbufptr;
12467 else if (yychar > 255)
12468 where = "next token ???";
12469 else if (yychar == -2) { /* YYEMPTY */
12470 if (PL_lex_state == LEX_NORMAL ||
12471 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12472 where = "at end of line";
12473 else if (PL_lex_inpat)
12474 where = "within pattern";
12476 where = "within string";
12479 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
12481 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12482 else if (isPRINT_LC(yychar))
12483 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
12485 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12486 where = SvPVX_const(where_sv);
12488 msg = sv_2mortal(newSVpv(s, 0));
12489 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12490 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12492 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12494 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12495 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12496 Perl_sv_catpvf(aTHX_ msg,
12497 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12498 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12501 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
12502 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12505 if (PL_error_count >= 10) {
12506 if (PL_in_eval && SvCUR(ERRSV))
12507 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12508 SVfARG(ERRSV), OutCopFILE(PL_curcop));
12510 Perl_croak(aTHX_ "%s has too many errors.\n",
12511 OutCopFILE(PL_curcop));
12514 PL_in_my_stash = NULL;
12518 #pragma segment Main
12522 S_swallow_bom(pTHX_ U8 *s)
12525 const STRLEN slen = SvCUR(PL_linestr);
12528 if (s[1] == 0xFE) {
12529 /* UTF-16 little-endian? (or UTF32-LE?) */
12530 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12531 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12532 #ifndef PERL_NO_UTF16_FILTER
12533 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12536 if (PL_bufend > (char*)s) {
12540 filter_add(utf16rev_textfilter, NULL);
12541 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12542 utf16_to_utf8_reversed(s, news,
12543 PL_bufend - (char*)s - 1,
12545 sv_setpvn(PL_linestr, (const char*)news, newlen);
12547 s = (U8*)SvPVX(PL_linestr);
12548 Copy(news, s, newlen, U8);
12552 SvUTF8_on(PL_linestr);
12553 s = (U8*)SvPVX(PL_linestr);
12555 /* FIXME - is this a general bug fix? */
12558 PL_bufend = SvPVX(PL_linestr) + newlen;
12561 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12566 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12567 #ifndef PERL_NO_UTF16_FILTER
12568 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12571 if (PL_bufend > (char *)s) {
12575 filter_add(utf16_textfilter, NULL);
12576 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12577 utf16_to_utf8(s, news,
12578 PL_bufend - (char*)s,
12580 sv_setpvn(PL_linestr, (const char*)news, newlen);
12582 SvUTF8_on(PL_linestr);
12583 s = (U8*)SvPVX(PL_linestr);
12584 PL_bufend = SvPVX(PL_linestr) + newlen;
12587 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12592 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12593 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12594 s += 3; /* UTF-8 */
12600 if (s[2] == 0xFE && s[3] == 0xFF) {
12601 /* UTF-32 big-endian */
12602 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12605 else if (s[2] == 0 && s[3] != 0) {
12608 * are a good indicator of UTF-16BE. */
12609 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12615 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12616 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12617 s += 4; /* UTF-8 */
12623 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12626 * are a good indicator of UTF-16LE. */
12627 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12636 * Restore a source filter.
12640 restore_rsfp(pTHX_ void *f)
12643 PerlIO * const fp = (PerlIO*)f;
12645 if (PL_rsfp == PerlIO_stdin())
12646 PerlIO_clearerr(PL_rsfp);
12647 else if (PL_rsfp && (PL_rsfp != fp))
12648 PerlIO_close(PL_rsfp);
12652 #ifndef PERL_NO_UTF16_FILTER
12654 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12657 const STRLEN old = SvCUR(sv);
12658 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12659 DEBUG_P(PerlIO_printf(Perl_debug_log,
12660 "utf16_textfilter(%p): %d %d (%d)\n",
12661 FPTR2DPTR(void *, utf16_textfilter),
12662 idx, maxlen, (int) count));
12666 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12667 Copy(SvPVX_const(sv), tmps, old, char);
12668 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12669 SvCUR(sv) - old, &newlen);
12670 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12672 DEBUG_P({sv_dump(sv);});
12677 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12680 const STRLEN old = SvCUR(sv);
12681 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12682 DEBUG_P(PerlIO_printf(Perl_debug_log,
12683 "utf16rev_textfilter(%p): %d %d (%d)\n",
12684 FPTR2DPTR(void *, utf16rev_textfilter),
12685 idx, maxlen, (int) count));
12689 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12690 Copy(SvPVX_const(sv), tmps, old, char);
12691 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12692 SvCUR(sv) - old, &newlen);
12693 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12695 DEBUG_P({ sv_dump(sv); });
12701 Returns a pointer to the next character after the parsed
12702 vstring, as well as updating the passed in sv.
12704 Function must be called like
12707 s = scan_vstring(s,sv);
12709 The sv should already be large enough to store the vstring
12710 passed in, for performance reasons.
12715 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
12718 const char *pos = s;
12719 const char *start = s;
12720 if (*pos == 'v') pos++; /* get past 'v' */
12721 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
12723 if ( *pos != '.') {
12724 /* this may not be a v-string if followed by => */
12725 const char *next = pos;
12726 while (next < PL_bufend && isSPACE(*next))
12728 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
12729 /* return string not v-string */
12730 sv_setpvn(sv,(char *)s,pos-s);
12731 return (char *)pos;
12735 if (!isALPHA(*pos)) {
12736 U8 tmpbuf[UTF8_MAXBYTES+1];
12739 s++; /* get past 'v' */
12741 sv_setpvn(sv, "", 0);
12744 /* this is atoi() that tolerates underscores */
12747 const char *end = pos;
12749 while (--end >= s) {
12751 const UV orev = rev;
12752 rev += (*end - '0') * mult;
12754 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12755 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12756 "Integer overflow in decimal number");
12760 if (rev > 0x7FFFFFFF)
12761 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12763 /* Append native character for the rev point */
12764 tmpend = uvchr_to_utf8(tmpbuf, rev);
12765 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12766 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12768 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
12774 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
12778 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12786 * c-indentation-style: bsd
12787 * c-basic-offset: 4
12788 * indent-tabs-mode: t
12791 * ex: set ts=8 sts=4 sw=4 noet: