3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yylval (PL_parser->yylval)
28 /* YYINITDEPTH -- initial size of the parser's stacks. */
29 #define YYINITDEPTH 200
31 /* XXX temporary backwards compatibility */
32 #define PL_lex_brackets (PL_parser->lex_brackets)
33 #define PL_lex_brackstack (PL_parser->lex_brackstack)
34 #define PL_lex_casemods (PL_parser->lex_casemods)
35 #define PL_lex_casestack (PL_parser->lex_casestack)
36 #define PL_lex_defer (PL_parser->lex_defer)
37 #define PL_lex_dojoin (PL_parser->lex_dojoin)
38 #define PL_lex_expect (PL_parser->lex_expect)
39 #define PL_lex_formbrack (PL_parser->lex_formbrack)
40 #define PL_lex_inpat (PL_parser->lex_inpat)
41 #define PL_lex_inwhat (PL_parser->lex_inwhat)
42 #define PL_lex_op (PL_parser->lex_op)
43 #define PL_lex_repl (PL_parser->lex_repl)
44 #define PL_lex_starts (PL_parser->lex_starts)
45 #define PL_lex_stuff (PL_parser->lex_stuff)
46 #define PL_multi_start (PL_parser->multi_start)
47 #define PL_multi_open (PL_parser->multi_open)
48 #define PL_multi_close (PL_parser->multi_close)
49 #define PL_pending_ident (PL_parser->pending_ident)
50 #define PL_preambled (PL_parser->preambled)
51 #define PL_sublex_info (PL_parser->sublex_info)
54 # define PL_endwhite (PL_parser->endwhite)
55 # define PL_faketokens (PL_parser->faketokens)
56 # define PL_lasttoke (PL_parser->lasttoke)
57 # define PL_nextwhite (PL_parser->nextwhite)
58 # define PL_realtokenstart (PL_parser->realtokenstart)
59 # define PL_skipwhite (PL_parser->skipwhite)
60 # define PL_thisclose (PL_parser->thisclose)
61 # define PL_thismad (PL_parser->thismad)
62 # define PL_thisopen (PL_parser->thisopen)
63 # define PL_thisstuff (PL_parser->thisstuff)
64 # define PL_thistoken (PL_parser->thistoken)
65 # define PL_thiswhite (PL_parser->thiswhite)
69 S_pending_ident(pTHX);
71 static const char ident_too_long[] = "Identifier too long";
72 static const char commaless_variable_list[] = "comma-less variable list";
74 static void restore_rsfp(pTHX_ void *f);
75 #ifndef PERL_NO_UTF16_FILTER
76 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
77 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
81 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
82 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
84 # define CURMAD(slot,sv)
85 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
88 #define XFAKEBRACK 128
91 #ifdef USE_UTF8_SCRIPTS
92 # define UTF (!IN_BYTES)
94 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
97 /* In variables named $^X, these are the legal values for X.
98 * 1999-02-27 mjd-perl-patch@plover.com */
99 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
101 /* On MacOS, respect nonbreaking spaces */
102 #ifdef MACOS_TRADITIONAL
103 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
105 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
108 /* LEX_* are values for PL_lex_state, the state of the lexer.
109 * They are arranged oddly so that the guard on the switch statement
110 * can get by with a single comparison (if the compiler is smart enough).
113 /* #define LEX_NOTPARSING 11 is done in perl.h. */
115 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
116 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
117 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
118 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
119 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
121 /* at end of code, eg "$x" followed by: */
122 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
123 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
125 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
126 string or after \E, $foo, etc */
127 #define LEX_INTERPCONST 2 /* NOT USED */
128 #define LEX_FORMLINE 1 /* expecting a format line */
129 #define LEX_KNOWNEXT 0 /* next token known; just return it */
133 static const char* const lex_state_names[] = {
152 #include "keywords.h"
154 /* CLINE is a macro that ensures PL_copline has a sane value */
159 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
162 # define SKIPSPACE0(s) skipspace0(s)
163 # define SKIPSPACE1(s) skipspace1(s)
164 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
165 # define PEEKSPACE(s) skipspace2(s,0)
167 # define SKIPSPACE0(s) skipspace(s)
168 # define SKIPSPACE1(s) skipspace(s)
169 # define SKIPSPACE2(s,tsv) skipspace(s)
170 # define PEEKSPACE(s) skipspace(s)
174 * Convenience functions to return different tokens and prime the
175 * lexer for the next token. They all take an argument.
177 * TOKEN : generic token (used for '(', DOLSHARP, etc)
178 * OPERATOR : generic operator
179 * AOPERATOR : assignment operator
180 * PREBLOCK : beginning the block after an if, while, foreach, ...
181 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
182 * PREREF : *EXPR where EXPR is not a simple identifier
183 * TERM : expression term
184 * LOOPX : loop exiting command (goto, last, dump, etc)
185 * FTST : file test operator
186 * FUN0 : zero-argument function
187 * FUN1 : not used, except for not, which isn't a UNIOP
188 * BOop : bitwise or or xor
190 * SHop : shift operator
191 * PWop : power operator
192 * PMop : pattern-matching operator
193 * Aop : addition-level operator
194 * Mop : multiplication-level operator
195 * Eop : equality-testing operator
196 * Rop : relational operator <= != gt
198 * Also see LOP and lop() below.
201 #ifdef DEBUGGING /* Serve -DT. */
202 # define REPORT(retval) tokereport((I32)retval)
204 # define REPORT(retval) (retval)
207 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
208 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
209 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
210 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
211 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
212 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
213 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
214 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
215 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
216 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
217 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
218 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
219 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
220 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
221 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
222 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
223 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
224 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
225 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
226 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
228 /* This bit of chicanery makes a unary function followed by
229 * a parenthesis into a function with one argument, highest precedence.
230 * The UNIDOR macro is for unary functions that can be followed by the //
231 * operator (such as C<shift // 0>).
233 #define UNI2(f,x) { \
237 PL_last_uni = PL_oldbufptr; \
238 PL_last_lop_op = f; \
240 return REPORT( (int)FUNC1 ); \
242 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
244 #define UNI(f) UNI2(f,XTERM)
245 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
247 #define UNIBRACK(f) { \
250 PL_last_uni = PL_oldbufptr; \
252 return REPORT( (int)FUNC1 ); \
254 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
257 /* grandfather return to old style */
258 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
262 /* how to interpret the yylval associated with the token */
266 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
272 static struct debug_tokens {
274 enum token_type type;
276 } const debug_tokens[] =
278 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
279 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
280 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
281 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
282 { ARROW, TOKENTYPE_NONE, "ARROW" },
283 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
284 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
285 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
286 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
287 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
288 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
289 { DO, TOKENTYPE_NONE, "DO" },
290 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
291 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
292 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
293 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
294 { ELSE, TOKENTYPE_NONE, "ELSE" },
295 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
296 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
297 { FOR, TOKENTYPE_IVAL, "FOR" },
298 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
299 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
300 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
301 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
302 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
303 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
304 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
305 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
306 { IF, TOKENTYPE_IVAL, "IF" },
307 { LABEL, TOKENTYPE_PVAL, "LABEL" },
308 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
309 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
310 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
311 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
312 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
313 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
314 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
315 { MY, TOKENTYPE_IVAL, "MY" },
316 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
317 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
318 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
319 { OROP, TOKENTYPE_IVAL, "OROP" },
320 { OROR, TOKENTYPE_NONE, "OROR" },
321 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
322 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
323 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
324 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
325 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
326 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
327 { PREINC, TOKENTYPE_NONE, "PREINC" },
328 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
329 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
330 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
331 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
332 { SUB, TOKENTYPE_NONE, "SUB" },
333 { THING, TOKENTYPE_OPVAL, "THING" },
334 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
335 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
336 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
337 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
338 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
339 { USE, TOKENTYPE_IVAL, "USE" },
340 { WHEN, TOKENTYPE_IVAL, "WHEN" },
341 { WHILE, TOKENTYPE_IVAL, "WHILE" },
342 { WORD, TOKENTYPE_OPVAL, "WORD" },
343 { 0, TOKENTYPE_NONE, NULL }
346 /* dump the returned token in rv, plus any optional arg in yylval */
349 S_tokereport(pTHX_ I32 rv)
353 const char *name = NULL;
354 enum token_type type = TOKENTYPE_NONE;
355 const struct debug_tokens *p;
356 SV* const report = newSVpvs("<== ");
358 for (p = debug_tokens; p->token; p++) {
359 if (p->token == (int)rv) {
366 Perl_sv_catpv(aTHX_ report, name);
367 else if ((char)rv > ' ' && (char)rv < '~')
368 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
370 sv_catpvs(report, "EOF");
372 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
375 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
378 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
380 case TOKENTYPE_OPNUM:
381 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
382 PL_op_name[yylval.ival]);
385 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
387 case TOKENTYPE_OPVAL:
389 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
390 PL_op_name[yylval.opval->op_type]);
391 if (yylval.opval->op_type == OP_CONST) {
392 Perl_sv_catpvf(aTHX_ report, " %s",
393 SvPEEK(cSVOPx_sv(yylval.opval)));
398 sv_catpvs(report, "(opval=null)");
401 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
407 /* print the buffer with suitable escapes */
410 S_printbuf(pTHX_ const char* fmt, const char* s)
412 SV* const tmp = newSVpvs("");
413 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
422 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
423 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
427 S_ao(pTHX_ int toketype)
430 if (*PL_bufptr == '=') {
432 if (toketype == ANDAND)
433 yylval.ival = OP_ANDASSIGN;
434 else if (toketype == OROR)
435 yylval.ival = OP_ORASSIGN;
436 else if (toketype == DORDOR)
437 yylval.ival = OP_DORASSIGN;
445 * When Perl expects an operator and finds something else, no_op
446 * prints the warning. It always prints "<something> found where
447 * operator expected. It prints "Missing semicolon on previous line?"
448 * if the surprise occurs at the start of the line. "do you need to
449 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
450 * where the compiler doesn't know if foo is a method call or a function.
451 * It prints "Missing operator before end of line" if there's nothing
452 * after the missing operator, or "... before <...>" if there is something
453 * after the missing operator.
457 S_no_op(pTHX_ const char *what, char *s)
460 char * const oldbp = PL_bufptr;
461 const bool is_first = (PL_oldbufptr == PL_linestart);
467 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
468 if (ckWARN_d(WARN_SYNTAX)) {
470 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
471 "\t(Missing semicolon on previous line?)\n");
472 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
474 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
476 if (t < PL_bufptr && isSPACE(*t))
477 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
478 "\t(Do you need to predeclare %.*s?)\n",
479 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
483 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
484 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
492 * Complain about missing quote/regexp/heredoc terminator.
493 * If it's called with NULL then it cauterizes the line buffer.
494 * If we're in a delimited string and the delimiter is a control
495 * character, it's reformatted into a two-char sequence like ^C.
500 S_missingterm(pTHX_ char *s)
506 char * const nl = strrchr(s,'\n');
512 iscntrl(PL_multi_close)
514 PL_multi_close < 32 || PL_multi_close == 127
518 tmpbuf[1] = (char)toCTRL(PL_multi_close);
523 *tmpbuf = (char)PL_multi_close;
527 q = strchr(s,'"') ? '\'' : '"';
528 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
531 #define FEATURE_IS_ENABLED(name) \
532 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
533 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
535 * S_feature_is_enabled
536 * Check whether the named feature is enabled.
539 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
542 HV * const hinthv = GvHV(PL_hintgv);
543 char he_name[32] = "feature_";
544 (void) my_strlcpy(&he_name[8], name, 24);
546 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
554 Perl_deprecate(pTHX_ const char *s)
556 if (ckWARN(WARN_DEPRECATED))
557 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
561 Perl_deprecate_old(pTHX_ const char *s)
563 /* This function should NOT be called for any new deprecated warnings */
564 /* Use Perl_deprecate instead */
566 /* It is here to maintain backward compatibility with the pre-5.8 */
567 /* warnings category hierarchy. The "deprecated" category used to */
568 /* live under the "syntax" category. It is now a top-level category */
569 /* in its own right. */
571 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
572 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
573 "Use of %s is deprecated", s);
577 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
578 * utf16-to-utf8-reversed.
581 #ifdef PERL_CR_FILTER
585 register const char *s = SvPVX_const(sv);
586 register const char * const e = s + SvCUR(sv);
587 /* outer loop optimized to do nothing if there are no CR-LFs */
589 if (*s++ == '\r' && *s == '\n') {
590 /* hit a CR-LF, need to copy the rest */
591 register char *d = s - 1;
594 if (*s == '\r' && s[1] == '\n')
605 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
607 const I32 count = FILTER_READ(idx+1, sv, maxlen);
608 if (count > 0 && !maxlen)
618 * Initialize variables. Uses the Perl save_stack to save its state (for
619 * recursive calls to the parser).
623 Perl_lex_start(pTHX_ SV *line)
626 const char *s = NULL;
630 /* create and initialise a parser */
632 Newxz(parser, 1, yy_parser);
633 parser->old_parser = PL_parser;
636 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
637 parser->ps = parser->stack;
638 parser->stack_size = YYINITDEPTH;
640 parser->stack->state = 0;
641 parser->yyerrstatus = 0;
642 parser->yychar = YYEMPTY; /* Cause a token to be read. */
644 /* initialise lexer state */
646 SAVEI32(PL_lex_state);
648 if (PL_lex_state == LEX_KNOWNEXT) {
649 I32 toke = parser->old_parser->lasttoke;
650 while (--toke >= 0) {
651 SAVEI32(PL_nexttoke[toke].next_type);
652 SAVEVPTR(PL_nexttoke[toke].next_val);
654 SAVEVPTR(PL_nexttoke[toke].next_mad);
657 SAVEI32(PL_curforce);
659 if (PL_lex_state == LEX_KNOWNEXT) {
660 I32 toke = PL_nexttoke;
661 while (--toke >= 0) {
662 SAVEI32(PL_nexttype[toke]);
663 SAVEVPTR(PL_nextval[toke]);
665 SAVEI32(PL_nexttoke);
668 SAVECOPLINE(PL_curcop);
671 SAVEPPTR(PL_oldbufptr);
672 SAVEPPTR(PL_oldoldbufptr);
673 SAVEPPTR(PL_last_lop);
674 SAVEPPTR(PL_last_uni);
675 SAVEPPTR(PL_linestart);
676 SAVESPTR(PL_linestr);
677 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
680 PL_lex_state = LEX_NORMAL;
682 Newx(parser->lex_brackstack, 120, char);
683 Newx(parser->lex_casestack, 12, char);
684 *parser->lex_casestack = '\0';
690 s = SvPV_const(line, len);
695 PL_linestr = newSVpvs("\n;");
696 } else if (SvREADONLY(line) || s[len-1] != ';') {
697 PL_linestr = newSVsv(line);
699 sv_catpvs(PL_linestr, "\n;");
702 SvREFCNT_inc_simple_void_NN(line);
705 /* PL_linestr needs to survive until end of scope, not just the next
706 FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
707 SAVEFREESV(PL_linestr);
708 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
709 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
710 PL_last_lop = PL_last_uni = NULL;
716 * Finalizer for lexing operations. Must be called when the parser is
717 * done with the lexer.
724 PL_doextract = FALSE;
729 * This subroutine has nothing to do with tilting, whether at windmills
730 * or pinball tables. Its name is short for "increment line". It
731 * increments the current line number in CopLINE(PL_curcop) and checks
732 * to see whether the line starts with a comment of the form
733 * # line 500 "foo.pm"
734 * If so, it sets the current line number and file to the values in the comment.
738 S_incline(pTHX_ char *s)
746 CopLINE_inc(PL_curcop);
749 while (SPACE_OR_TAB(*s))
751 if (strnEQ(s, "line", 4))
755 if (SPACE_OR_TAB(*s))
759 while (SPACE_OR_TAB(*s))
767 while (SPACE_OR_TAB(*s))
769 if (*s == '"' && (t = strchr(s+1, '"'))) {
779 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
781 if (*e != '\n' && *e != '\0')
782 return; /* false alarm */
788 const char * const cf = CopFILE(PL_curcop);
789 STRLEN tmplen = cf ? strlen(cf) : 0;
790 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
791 /* must copy *{"::_<(eval N)[oldfilename:L]"}
792 * to *{"::_<newfilename"} */
793 char smallbuf[256], smallbuf2[256];
794 char *tmpbuf, *tmpbuf2;
796 STRLEN tmplen2 = strlen(s);
797 if (tmplen + 3 < sizeof smallbuf)
800 Newx(tmpbuf, tmplen + 3, char);
801 if (tmplen2 + 3 < sizeof smallbuf2)
804 Newx(tmpbuf2, tmplen2 + 3, char);
805 tmpbuf[0] = tmpbuf2[0] = '_';
806 tmpbuf[1] = tmpbuf2[1] = '<';
807 memcpy(tmpbuf + 2, cf, ++tmplen);
808 memcpy(tmpbuf2 + 2, s, ++tmplen2);
810 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
812 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
814 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
815 /* adjust ${"::_<newfilename"} to store the new file name */
816 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
817 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
818 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
821 if (tmpbuf != smallbuf) Safefree(tmpbuf);
822 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
825 CopFILE_free(PL_curcop);
826 CopFILE_set(PL_curcop, s);
829 CopLINE_set(PL_curcop, atoi(n)-1);
833 /* skip space before PL_thistoken */
836 S_skipspace0(pTHX_ register char *s)
843 PL_thiswhite = newSVpvs("");
844 sv_catsv(PL_thiswhite, PL_skipwhite);
845 sv_free(PL_skipwhite);
848 PL_realtokenstart = s - SvPVX(PL_linestr);
852 /* skip space after PL_thistoken */
855 S_skipspace1(pTHX_ register char *s)
857 const char *start = s;
858 I32 startoff = start - SvPVX(PL_linestr);
863 start = SvPVX(PL_linestr) + startoff;
864 if (!PL_thistoken && PL_realtokenstart >= 0) {
865 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
866 PL_thistoken = newSVpvn(tstart, start - tstart);
868 PL_realtokenstart = -1;
871 PL_nextwhite = newSVpvs("");
872 sv_catsv(PL_nextwhite, PL_skipwhite);
873 sv_free(PL_skipwhite);
880 S_skipspace2(pTHX_ register char *s, SV **svp)
883 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
884 const I32 startoff = s - SvPVX(PL_linestr);
887 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
888 if (!PL_madskills || !svp)
890 start = SvPVX(PL_linestr) + startoff;
891 if (!PL_thistoken && PL_realtokenstart >= 0) {
892 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
893 PL_thistoken = newSVpvn(tstart, start - tstart);
894 PL_realtokenstart = -1;
899 sv_setsv(*svp, PL_skipwhite);
900 sv_free(PL_skipwhite);
909 S_update_debugger_info_pv(pTHX_ const char *buf, STRLEN len)
911 AV *av = CopFILEAVx(PL_curcop);
913 SV * const sv = newSV(0);
914 sv_upgrade(sv, SVt_PVMG);
915 sv_setpvn(sv, buf, len);
918 av_store(av, (I32)CopLINE(PL_curcop), sv);
923 S_update_debugger_info_sv(pTHX_ SV *orig_sv)
925 AV *av = CopFILEAVx(PL_curcop);
927 SV * const sv = newSV(0);
928 sv_upgrade(sv, SVt_PVMG);
929 sv_setsv(sv, orig_sv);
932 av_store(av, (I32)CopLINE(PL_curcop), sv);
938 * Called to gobble the appropriate amount and type of whitespace.
939 * Skips comments as well.
943 S_skipspace(pTHX_ register char *s)
948 int startoff = s - SvPVX(PL_linestr);
951 sv_free(PL_skipwhite);
956 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
957 while (s < PL_bufend && SPACE_OR_TAB(*s))
967 SSize_t oldprevlen, oldoldprevlen;
968 SSize_t oldloplen = 0, oldunilen = 0;
969 while (s < PL_bufend && isSPACE(*s)) {
970 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
975 if (s < PL_bufend && *s == '#') {
976 while (s < PL_bufend && *s != '\n')
980 if (PL_in_eval && !PL_rsfp) {
987 /* only continue to recharge the buffer if we're at the end
988 * of the buffer, we're not reading from a source filter, and
989 * we're in normal lexing mode
991 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
992 PL_lex_state == LEX_FORMLINE)
999 /* try to recharge the buffer */
1001 curoff = s - SvPVX(PL_linestr);
1004 if ((s = filter_gets(PL_linestr, PL_rsfp,
1005 (prevlen = SvCUR(PL_linestr)))) == NULL)
1008 if (PL_madskills && curoff != startoff) {
1010 PL_skipwhite = newSVpvs("");
1011 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1015 /* mustn't throw out old stuff yet if madpropping */
1016 SvCUR(PL_linestr) = curoff;
1017 s = SvPVX(PL_linestr) + curoff;
1019 if (curoff && s[-1] == '\n')
1023 /* end of file. Add on the -p or -n magic */
1024 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1027 sv_catpv(PL_linestr,
1028 ";}continue{print or die qq(-p destination: $!\\n);}");
1030 sv_setpv(PL_linestr,
1031 ";}continue{print or die qq(-p destination: $!\\n);}");
1033 PL_minus_n = PL_minus_p = 0;
1035 else if (PL_minus_n) {
1037 sv_catpvn(PL_linestr, ";}", 2);
1039 sv_setpvn(PL_linestr, ";}", 2);
1045 sv_catpvn(PL_linestr,";", 1);
1047 sv_setpvn(PL_linestr,";", 1);
1050 /* reset variables for next time we lex */
1051 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1057 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1058 PL_last_lop = PL_last_uni = NULL;
1060 /* Close the filehandle. Could be from -P preprocessor,
1061 * STDIN, or a regular file. If we were reading code from
1062 * STDIN (because the commandline held no -e or filename)
1063 * then we don't close it, we reset it so the code can
1064 * read from STDIN too.
1067 if (PL_preprocess && !PL_in_eval)
1068 (void)PerlProc_pclose(PL_rsfp);
1069 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1070 PerlIO_clearerr(PL_rsfp);
1072 (void)PerlIO_close(PL_rsfp);
1077 /* not at end of file, so we only read another line */
1078 /* make corresponding updates to old pointers, for yyerror() */
1079 oldprevlen = PL_oldbufptr - PL_bufend;
1080 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1082 oldunilen = PL_last_uni - PL_bufend;
1084 oldloplen = PL_last_lop - PL_bufend;
1085 PL_linestart = PL_bufptr = s + prevlen;
1086 PL_bufend = s + SvCUR(PL_linestr);
1088 PL_oldbufptr = s + oldprevlen;
1089 PL_oldoldbufptr = s + oldoldprevlen;
1091 PL_last_uni = s + oldunilen;
1093 PL_last_lop = s + oldloplen;
1096 /* debugger active and we're not compiling the debugger code,
1097 * so store the line into the debugger's array of lines
1099 if (PERLDB_LINE && PL_curstash != PL_debstash)
1100 update_debugger_info_pv(PL_bufptr, PL_bufend - PL_bufptr);
1107 PL_skipwhite = newSVpvs("");
1108 curoff = s - SvPVX(PL_linestr);
1109 if (curoff - startoff)
1110 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1119 * Check the unary operators to ensure there's no ambiguity in how they're
1120 * used. An ambiguous piece of code would be:
1122 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1123 * the +5 is its argument.
1133 if (PL_oldoldbufptr != PL_last_uni)
1135 while (isSPACE(*PL_last_uni))
1138 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1140 if ((t = strchr(s, '(')) && t < PL_bufptr)
1143 if (ckWARN_d(WARN_AMBIGUOUS)){
1144 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1145 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1146 (int)(s - PL_last_uni), PL_last_uni);
1151 * LOP : macro to build a list operator. Its behaviour has been replaced
1152 * with a subroutine, S_lop() for which LOP is just another name.
1155 #define LOP(f,x) return lop(f,x,s)
1159 * Build a list operator (or something that might be one). The rules:
1160 * - if we have a next token, then it's a list operator [why?]
1161 * - if the next thing is an opening paren, then it's a function
1162 * - else it's a list operator
1166 S_lop(pTHX_ I32 f, int x, char *s)
1173 PL_last_lop = PL_oldbufptr;
1174 PL_last_lop_op = (OPCODE)f;
1177 return REPORT(LSTOP);
1180 return REPORT(LSTOP);
1183 return REPORT(FUNC);
1186 return REPORT(FUNC);
1188 return REPORT(LSTOP);
1194 * Sets up for an eventual force_next(). start_force(0) basically does
1195 * an unshift, while start_force(-1) does a push. yylex removes items
1200 S_start_force(pTHX_ int where)
1204 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1205 where = PL_lasttoke;
1206 assert(PL_curforce < 0 || PL_curforce == where);
1207 if (PL_curforce != where) {
1208 for (i = PL_lasttoke; i > where; --i) {
1209 PL_nexttoke[i] = PL_nexttoke[i-1];
1213 if (PL_curforce < 0) /* in case of duplicate start_force() */
1214 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1215 PL_curforce = where;
1218 curmad('^', newSVpvs(""));
1219 CURMAD('_', PL_nextwhite);
1224 S_curmad(pTHX_ char slot, SV *sv)
1230 if (PL_curforce < 0)
1231 where = &PL_thismad;
1233 where = &PL_nexttoke[PL_curforce].next_mad;
1236 sv_setpvn(sv, "", 0);
1239 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1241 else if (PL_encoding) {
1242 sv_recode_to_utf8(sv, PL_encoding);
1247 /* keep a slot open for the head of the list? */
1248 if (slot != '_' && *where && (*where)->mad_key == '^') {
1249 (*where)->mad_key = slot;
1250 sv_free((*where)->mad_val);
1251 (*where)->mad_val = (void*)sv;
1254 addmad(newMADsv(slot, sv), where, 0);
1257 # define start_force(where) NOOP
1258 # define curmad(slot, sv) NOOP
1263 * When the lexer realizes it knows the next token (for instance,
1264 * it is reordering tokens for the parser) then it can call S_force_next
1265 * to know what token to return the next time the lexer is called. Caller
1266 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1267 * and possibly PL_expect to ensure the lexer handles the token correctly.
1271 S_force_next(pTHX_ I32 type)
1275 if (PL_curforce < 0)
1276 start_force(PL_lasttoke);
1277 PL_nexttoke[PL_curforce].next_type = type;
1278 if (PL_lex_state != LEX_KNOWNEXT)
1279 PL_lex_defer = PL_lex_state;
1280 PL_lex_state = LEX_KNOWNEXT;
1281 PL_lex_expect = PL_expect;
1284 PL_nexttype[PL_nexttoke] = type;
1286 if (PL_lex_state != LEX_KNOWNEXT) {
1287 PL_lex_defer = PL_lex_state;
1288 PL_lex_expect = PL_expect;
1289 PL_lex_state = LEX_KNOWNEXT;
1295 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1298 SV * const sv = newSVpvn(start,len);
1299 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1306 * When the lexer knows the next thing is a word (for instance, it has
1307 * just seen -> and it knows that the next char is a word char, then
1308 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1312 * char *start : buffer position (must be within PL_linestr)
1313 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1314 * int check_keyword : if true, Perl checks to make sure the word isn't
1315 * a keyword (do this if the word is a label, e.g. goto FOO)
1316 * int allow_pack : if true, : characters will also be allowed (require,
1317 * use, etc. do this)
1318 * int allow_initial_tick : used by the "sub" lexer only.
1322 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1328 start = SKIPSPACE1(start);
1330 if (isIDFIRST_lazy_if(s,UTF) ||
1331 (allow_pack && *s == ':') ||
1332 (allow_initial_tick && *s == '\'') )
1334 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1335 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1337 start_force(PL_curforce);
1339 curmad('X', newSVpvn(start,s-start));
1340 if (token == METHOD) {
1345 PL_expect = XOPERATOR;
1348 NEXTVAL_NEXTTOKE.opval
1349 = (OP*)newSVOP(OP_CONST,0,
1350 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1351 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1359 * Called when the lexer wants $foo *foo &foo etc, but the program
1360 * text only contains the "foo" portion. The first argument is a pointer
1361 * to the "foo", and the second argument is the type symbol to prefix.
1362 * Forces the next token to be a "WORD".
1363 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1367 S_force_ident(pTHX_ register const char *s, int kind)
1371 const STRLEN len = strlen(s);
1372 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1373 start_force(PL_curforce);
1374 NEXTVAL_NEXTTOKE.opval = o;
1377 o->op_private = OPpCONST_ENTERED;
1378 /* XXX see note in pp_entereval() for why we forgo typo
1379 warnings if the symbol must be introduced in an eval.
1381 gv_fetchpvn_flags(s, len,
1382 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1384 kind == '$' ? SVt_PV :
1385 kind == '@' ? SVt_PVAV :
1386 kind == '%' ? SVt_PVHV :
1394 Perl_str_to_version(pTHX_ SV *sv)
1399 const char *start = SvPV_const(sv,len);
1400 const char * const end = start + len;
1401 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1402 while (start < end) {
1406 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1411 retval += ((NV)n)/nshift;
1420 * Forces the next token to be a version number.
1421 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1422 * and if "guessing" is TRUE, then no new token is created (and the caller
1423 * must use an alternative parsing method).
1427 S_force_version(pTHX_ char *s, int guessing)
1433 I32 startoff = s - SvPVX(PL_linestr);
1442 while (isDIGIT(*d) || *d == '_' || *d == '.')
1446 start_force(PL_curforce);
1447 curmad('X', newSVpvn(s,d-s));
1450 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1452 s = scan_num(s, &yylval);
1453 version = yylval.opval;
1454 ver = cSVOPx(version)->op_sv;
1455 if (SvPOK(ver) && !SvNIOK(ver)) {
1456 SvUPGRADE(ver, SVt_PVNV);
1457 SvNV_set(ver, str_to_version(ver));
1458 SvNOK_on(ver); /* hint that it is a version */
1461 else if (guessing) {
1464 sv_free(PL_nextwhite); /* let next token collect whitespace */
1466 s = SvPVX(PL_linestr) + startoff;
1474 if (PL_madskills && !version) {
1475 sv_free(PL_nextwhite); /* let next token collect whitespace */
1477 s = SvPVX(PL_linestr) + startoff;
1480 /* NOTE: The parser sees the package name and the VERSION swapped */
1481 start_force(PL_curforce);
1482 NEXTVAL_NEXTTOKE.opval = version;
1490 * Tokenize a quoted string passed in as an SV. It finds the next
1491 * chunk, up to end of string or a backslash. It may make a new
1492 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1497 S_tokeq(pTHX_ SV *sv)
1501 register char *send;
1509 s = SvPV_force(sv, len);
1510 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1513 while (s < send && *s != '\\')
1518 if ( PL_hints & HINT_NEW_STRING ) {
1519 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1525 if (s + 1 < send && (s[1] == '\\'))
1526 s++; /* all that, just for this */
1531 SvCUR_set(sv, d - SvPVX_const(sv));
1533 if ( PL_hints & HINT_NEW_STRING )
1534 return new_constant(NULL, 0, "q", sv, pv, "q");
1539 * Now come three functions related to double-quote context,
1540 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1541 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1542 * interact with PL_lex_state, and create fake ( ... ) argument lists
1543 * to handle functions and concatenation.
1544 * They assume that whoever calls them will be setting up a fake
1545 * join call, because each subthing puts a ',' after it. This lets
1548 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1550 * (I'm not sure whether the spurious commas at the end of lcfirst's
1551 * arguments and join's arguments are created or not).
1556 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1558 * Pattern matching will set PL_lex_op to the pattern-matching op to
1559 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1561 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1563 * Everything else becomes a FUNC.
1565 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1566 * had an OP_CONST or OP_READLINE). This just sets us up for a
1567 * call to S_sublex_push().
1571 S_sublex_start(pTHX)
1574 register const I32 op_type = yylval.ival;
1576 if (op_type == OP_NULL) {
1577 yylval.opval = PL_lex_op;
1581 if (op_type == OP_CONST || op_type == OP_READLINE) {
1582 SV *sv = tokeq(PL_lex_stuff);
1584 if (SvTYPE(sv) == SVt_PVIV) {
1585 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1587 const char * const p = SvPV_const(sv, len);
1588 SV * const nsv = newSVpvn(p, len);
1594 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1595 PL_lex_stuff = NULL;
1596 /* Allow <FH> // "foo" */
1597 if (op_type == OP_READLINE)
1598 PL_expect = XTERMORDORDOR;
1601 else if (op_type == OP_BACKTICK && PL_lex_op) {
1602 /* readpipe() vas overriden */
1603 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1604 yylval.opval = PL_lex_op;
1606 PL_lex_stuff = NULL;
1610 PL_sublex_info.super_state = PL_lex_state;
1611 PL_sublex_info.sub_inwhat = op_type;
1612 PL_sublex_info.sub_op = PL_lex_op;
1613 PL_lex_state = LEX_INTERPPUSH;
1617 yylval.opval = PL_lex_op;
1627 * Create a new scope to save the lexing state. The scope will be
1628 * ended in S_sublex_done. Returns a '(', starting the function arguments
1629 * to the uc, lc, etc. found before.
1630 * Sets PL_lex_state to LEX_INTERPCONCAT.
1639 PL_lex_state = PL_sublex_info.super_state;
1640 SAVEI32(PL_lex_dojoin);
1641 SAVEI32(PL_lex_brackets);
1642 SAVEI32(PL_lex_casemods);
1643 SAVEI32(PL_lex_starts);
1644 SAVEI32(PL_lex_state);
1645 SAVEVPTR(PL_lex_inpat);
1646 SAVEI32(PL_lex_inwhat);
1647 SAVECOPLINE(PL_curcop);
1648 SAVEPPTR(PL_bufptr);
1649 SAVEPPTR(PL_bufend);
1650 SAVEPPTR(PL_oldbufptr);
1651 SAVEPPTR(PL_oldoldbufptr);
1652 SAVEPPTR(PL_last_lop);
1653 SAVEPPTR(PL_last_uni);
1654 SAVEPPTR(PL_linestart);
1655 SAVESPTR(PL_linestr);
1656 SAVEGENERICPV(PL_lex_brackstack);
1657 SAVEGENERICPV(PL_lex_casestack);
1659 PL_linestr = PL_lex_stuff;
1660 PL_lex_stuff = NULL;
1662 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1663 = SvPVX(PL_linestr);
1664 PL_bufend += SvCUR(PL_linestr);
1665 PL_last_lop = PL_last_uni = NULL;
1666 SAVEFREESV(PL_linestr);
1668 PL_lex_dojoin = FALSE;
1669 PL_lex_brackets = 0;
1670 Newx(PL_lex_brackstack, 120, char);
1671 Newx(PL_lex_casestack, 12, char);
1672 PL_lex_casemods = 0;
1673 *PL_lex_casestack = '\0';
1675 PL_lex_state = LEX_INTERPCONCAT;
1676 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1678 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1679 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1680 PL_lex_inpat = PL_sublex_info.sub_op;
1682 PL_lex_inpat = NULL;
1689 * Restores lexer state after a S_sublex_push.
1696 if (!PL_lex_starts++) {
1697 SV * const sv = newSVpvs("");
1698 if (SvUTF8(PL_linestr))
1700 PL_expect = XOPERATOR;
1701 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1705 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1706 PL_lex_state = LEX_INTERPCASEMOD;
1710 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1711 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1712 PL_linestr = PL_lex_repl;
1714 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1715 PL_bufend += SvCUR(PL_linestr);
1716 PL_last_lop = PL_last_uni = NULL;
1717 SAVEFREESV(PL_linestr);
1718 PL_lex_dojoin = FALSE;
1719 PL_lex_brackets = 0;
1720 PL_lex_casemods = 0;
1721 *PL_lex_casestack = '\0';
1723 if (SvEVALED(PL_lex_repl)) {
1724 PL_lex_state = LEX_INTERPNORMAL;
1726 /* we don't clear PL_lex_repl here, so that we can check later
1727 whether this is an evalled subst; that means we rely on the
1728 logic to ensure sublex_done() is called again only via the
1729 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1732 PL_lex_state = LEX_INTERPCONCAT;
1742 PL_endwhite = newSVpvs("");
1743 sv_catsv(PL_endwhite, PL_thiswhite);
1747 sv_setpvn(PL_thistoken,"",0);
1749 PL_realtokenstart = -1;
1753 PL_bufend = SvPVX(PL_linestr);
1754 PL_bufend += SvCUR(PL_linestr);
1755 PL_expect = XOPERATOR;
1756 PL_sublex_info.sub_inwhat = 0;
1764 Extracts a pattern, double-quoted string, or transliteration. This
1767 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1768 processing a pattern (PL_lex_inpat is true), a transliteration
1769 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1771 Returns a pointer to the character scanned up to. If this is
1772 advanced from the start pointer supplied (i.e. if anything was
1773 successfully parsed), will leave an OP for the substring scanned
1774 in yylval. Caller must intuit reason for not parsing further
1775 by looking at the next characters herself.
1779 double-quoted style: \r and \n
1780 regexp special ones: \D \s
1783 case and quoting: \U \Q \E
1784 stops on @ and $, but not for $ as tail anchor
1786 In transliterations:
1787 characters are VERY literal, except for - not at the start or end
1788 of the string, which indicates a range. If the range is in bytes,
1789 scan_const expands the range to the full set of intermediate
1790 characters. If the range is in utf8, the hyphen is replaced with
1791 a certain range mark which will be handled by pmtrans() in op.c.
1793 In double-quoted strings:
1795 double-quoted style: \r and \n
1797 deprecated backrefs: \1 (in substitution replacements)
1798 case and quoting: \U \Q \E
1801 scan_const does *not* construct ops to handle interpolated strings.
1802 It stops processing as soon as it finds an embedded $ or @ variable
1803 and leaves it to the caller to work out what's going on.
1805 embedded arrays (whether in pattern or not) could be:
1806 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1808 $ in double-quoted strings must be the symbol of an embedded scalar.
1810 $ in pattern could be $foo or could be tail anchor. Assumption:
1811 it's a tail anchor if $ is the last thing in the string, or if it's
1812 followed by one of "()| \r\n\t"
1814 \1 (backreferences) are turned into $1
1816 The structure of the code is
1817 while (there's a character to process) {
1818 handle transliteration ranges
1819 skip regexp comments /(?#comment)/ and codes /(?{code})/
1820 skip #-initiated comments in //x patterns
1821 check for embedded arrays
1822 check for embedded scalars
1824 leave intact backslashes from leaveit (below)
1825 deprecate \1 in substitution replacements
1826 handle string-changing backslashes \l \U \Q \E, etc.
1827 switch (what was escaped) {
1828 handle \- in a transliteration (becomes a literal -)
1829 handle \132 (octal characters)
1830 handle \x15 and \x{1234} (hex characters)
1831 handle \N{name} (named characters)
1832 handle \cV (control characters)
1833 handle printf-style backslashes (\f, \r, \n, etc)
1835 } (end if backslash)
1836 } (end while character to read)
1841 S_scan_const(pTHX_ char *start)
1844 register char *send = PL_bufend; /* end of the constant */
1845 SV *sv = newSV(send - start); /* sv for the constant */
1846 register char *s = start; /* start of the constant */
1847 register char *d = SvPVX(sv); /* destination for copies */
1848 bool dorange = FALSE; /* are we in a translit range? */
1849 bool didrange = FALSE; /* did we just finish a range? */
1850 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1851 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1854 UV literal_endpoint = 0;
1855 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1858 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1859 /* If we are doing a trans and we know we want UTF8 set expectation */
1860 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1861 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1865 while (s < send || dorange) {
1866 /* get transliterations out of the way (they're most literal) */
1867 if (PL_lex_inwhat == OP_TRANS) {
1868 /* expand a range A-Z to the full set of characters. AIE! */
1870 I32 i; /* current expanded character */
1871 I32 min; /* first character in range */
1872 I32 max; /* last character in range */
1883 char * const c = (char*)utf8_hop((U8*)d, -1);
1887 *c = (char)UTF_TO_NATIVE(0xff);
1888 /* mark the range as done, and continue */
1894 i = d - SvPVX_const(sv); /* remember current offset */
1897 SvLEN(sv) + (has_utf8 ?
1898 (512 - UTF_CONTINUATION_MARK +
1901 /* How many two-byte within 0..255: 128 in UTF-8,
1902 * 96 in UTF-8-mod. */
1904 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1906 d = SvPVX(sv) + i; /* refresh d after realloc */
1910 for (j = 0; j <= 1; j++) {
1911 char * const c = (char*)utf8_hop((U8*)d, -1);
1912 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1918 max = (U8)0xff; /* only to \xff */
1919 uvmax = uv; /* \x{100} to uvmax */
1921 d = c; /* eat endpoint chars */
1926 d -= 2; /* eat the first char and the - */
1927 min = (U8)*d; /* first char in range */
1928 max = (U8)d[1]; /* last char in range */
1935 "Invalid range \"%c-%c\" in transliteration operator",
1936 (char)min, (char)max);
1940 if (literal_endpoint == 2 &&
1941 ((isLOWER(min) && isLOWER(max)) ||
1942 (isUPPER(min) && isUPPER(max)))) {
1944 for (i = min; i <= max; i++)
1946 *d++ = NATIVE_TO_NEED(has_utf8,i);
1948 for (i = min; i <= max; i++)
1950 *d++ = NATIVE_TO_NEED(has_utf8,i);
1955 for (i = min; i <= max; i++)
1958 const U8 ch = (U8)NATIVE_TO_UTF(i);
1959 if (UNI_IS_INVARIANT(ch))
1962 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1963 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1972 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1974 *d++ = (char)UTF_TO_NATIVE(0xff);
1976 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1980 /* mark the range as done, and continue */
1984 literal_endpoint = 0;
1989 /* range begins (ignore - as first or last char) */
1990 else if (*s == '-' && s+1 < send && s != start) {
1992 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1999 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2009 literal_endpoint = 0;
2010 native_range = TRUE;
2015 /* if we get here, we're not doing a transliteration */
2017 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2018 except for the last char, which will be done separately. */
2019 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2021 while (s+1 < send && *s != ')')
2022 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2024 else if (s[2] == '{' /* This should match regcomp.c */
2025 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
2028 char *regparse = s + (s[2] == '{' ? 3 : 4);
2031 while (count && (c = *regparse)) {
2032 if (c == '\\' && regparse[1])
2040 if (*regparse != ')')
2041 regparse--; /* Leave one char for continuation. */
2042 while (s < regparse)
2043 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2047 /* likewise skip #-initiated comments in //x patterns */
2048 else if (*s == '#' && PL_lex_inpat &&
2049 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2050 while (s+1 < send && *s != '\n')
2051 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2054 /* check for embedded arrays
2055 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2057 else if (*s == '@' && s[1]) {
2058 if (isALNUM_lazy_if(s+1,UTF))
2060 if (strchr(":'{$", s[1]))
2062 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2063 break; /* in regexp, neither @+ nor @- are interpolated */
2066 /* check for embedded scalars. only stop if we're sure it's a
2069 else if (*s == '$') {
2070 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2072 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2073 break; /* in regexp, $ might be tail anchor */
2076 /* End of else if chain - OP_TRANS rejoin rest */
2079 if (*s == '\\' && s+1 < send) {
2082 /* deprecate \1 in strings and substitution replacements */
2083 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2084 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2086 if (ckWARN(WARN_SYNTAX))
2087 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2092 /* string-change backslash escapes */
2093 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2097 /* skip any other backslash escapes in a pattern */
2098 else if (PL_lex_inpat) {
2099 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2100 goto default_action;
2103 /* if we get here, it's either a quoted -, or a digit */
2106 /* quoted - in transliterations */
2108 if (PL_lex_inwhat == OP_TRANS) {
2115 if ((isALPHA(*s) || isDIGIT(*s)) &&
2117 Perl_warner(aTHX_ packWARN(WARN_MISC),
2118 "Unrecognized escape \\%c passed through",
2120 /* default action is to copy the quoted character */
2121 goto default_action;
2124 /* \132 indicates an octal constant */
2125 case '0': case '1': case '2': case '3':
2126 case '4': case '5': case '6': case '7':
2130 uv = grok_oct(s, &len, &flags, NULL);
2133 goto NUM_ESCAPE_INSERT;
2135 /* \x24 indicates a hex constant */
2139 char* const e = strchr(s, '}');
2140 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2141 PERL_SCAN_DISALLOW_PREFIX;
2146 yyerror("Missing right brace on \\x{}");
2150 uv = grok_hex(s, &len, &flags, NULL);
2156 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2157 uv = grok_hex(s, &len, &flags, NULL);
2163 /* Insert oct or hex escaped character.
2164 * There will always enough room in sv since such
2165 * escapes will be longer than any UTF-8 sequence
2166 * they can end up as. */
2168 /* We need to map to chars to ASCII before doing the tests
2171 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2172 if (!has_utf8 && uv > 255) {
2173 /* Might need to recode whatever we have
2174 * accumulated so far if it contains any
2177 * (Can't we keep track of that and avoid
2178 * this rescan? --jhi)
2182 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2183 if (!NATIVE_IS_INVARIANT(*c)) {
2188 const STRLEN offset = d - SvPVX_const(sv);
2190 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2194 while (src >= (const U8 *)SvPVX_const(sv)) {
2195 if (!NATIVE_IS_INVARIANT(*src)) {
2196 const U8 ch = NATIVE_TO_ASCII(*src);
2197 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2198 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2208 if (has_utf8 || uv > 255) {
2209 d = (char*)uvchr_to_utf8((U8*)d, uv);
2211 if (PL_lex_inwhat == OP_TRANS &&
2212 PL_sublex_info.sub_op) {
2213 PL_sublex_info.sub_op->op_private |=
2214 (PL_lex_repl ? OPpTRANS_FROM_UTF
2218 if (uv > 255 && !dorange)
2219 native_range = FALSE;
2231 /* \N{LATIN SMALL LETTER A} is a named character */
2235 char* e = strchr(s, '}');
2242 yyerror("Missing right brace on \\N{}");
2246 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2248 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2249 PERL_SCAN_DISALLOW_PREFIX;
2252 uv = grok_hex(s, &len, &flags, NULL);
2253 if ( e > s && len != (STRLEN)(e - s) ) {
2257 goto NUM_ESCAPE_INSERT;
2259 res = newSVpvn(s + 1, e - s - 1);
2260 type = newSVpvn(s - 2,e - s + 3);
2261 res = new_constant( NULL, 0, "charnames",
2262 res, NULL, SvPVX(type) );
2265 sv_utf8_upgrade(res);
2266 str = SvPV_const(res,len);
2267 #ifdef EBCDIC_NEVER_MIND
2268 /* charnames uses pack U and that has been
2269 * recently changed to do the below uni->native
2270 * mapping, so this would be redundant (and wrong,
2271 * the code point would be doubly converted).
2272 * But leave this in just in case the pack U change
2273 * gets revoked, but the semantics is still
2274 * desireable for charnames. --jhi */
2276 UV uv = utf8_to_uvchr((const U8*)str, 0);
2279 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2281 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2282 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2283 str = SvPV_const(res, len);
2287 if (!has_utf8 && SvUTF8(res)) {
2288 const char * const ostart = SvPVX_const(sv);
2289 SvCUR_set(sv, d - ostart);
2292 sv_utf8_upgrade(sv);
2293 /* this just broke our allocation above... */
2294 SvGROW(sv, (STRLEN)(send - start));
2295 d = SvPVX(sv) + SvCUR(sv);
2298 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2299 const char * const odest = SvPVX_const(sv);
2301 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2302 d = SvPVX(sv) + (d - odest);
2306 native_range = FALSE; /* \N{} is guessed to be Unicode */
2308 Copy(str, d, len, char);
2315 yyerror("Missing braces on \\N{}");
2318 /* \c is a control character */
2327 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2330 yyerror("Missing control char name in \\c");
2334 /* printf-style backslashes, formfeeds, newlines, etc */
2336 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2339 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2342 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2345 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2348 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2351 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2354 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2360 } /* end if (backslash) */
2367 /* If we started with encoded form, or already know we want it
2368 and then encode the next character */
2369 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2371 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2372 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2375 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2376 const STRLEN off = d - SvPVX_const(sv);
2377 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2379 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2382 if (uv > 255 && !dorange)
2383 native_range = FALSE;
2387 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2389 } /* while loop to process each character */
2391 /* terminate the string and set up the sv */
2393 SvCUR_set(sv, d - SvPVX_const(sv));
2394 if (SvCUR(sv) >= SvLEN(sv))
2395 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2398 if (PL_encoding && !has_utf8) {
2399 sv_recode_to_utf8(sv, PL_encoding);
2405 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2406 PL_sublex_info.sub_op->op_private |=
2407 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2411 /* shrink the sv if we allocated more than we used */
2412 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2413 SvPV_shrink_to_cur(sv);
2416 /* return the substring (via yylval) only if we parsed anything */
2417 if (s > PL_bufptr) {
2418 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2419 sv = new_constant(start, s - start,
2420 (const char *)(PL_lex_inpat ? "qr" : "q"),
2423 (( PL_lex_inwhat == OP_TRANS
2425 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2428 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2435 * Returns TRUE if there's more to the expression (e.g., a subscript),
2438 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2440 * ->[ and ->{ return TRUE
2441 * { and [ outside a pattern are always subscripts, so return TRUE
2442 * if we're outside a pattern and it's not { or [, then return FALSE
2443 * if we're in a pattern and the first char is a {
2444 * {4,5} (any digits around the comma) returns FALSE
2445 * if we're in a pattern and the first char is a [
2447 * [SOMETHING] has a funky algorithm to decide whether it's a
2448 * character class or not. It has to deal with things like
2449 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2450 * anything else returns TRUE
2453 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2456 S_intuit_more(pTHX_ register char *s)
2459 if (PL_lex_brackets)
2461 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2463 if (*s != '{' && *s != '[')
2468 /* In a pattern, so maybe we have {n,m}. */
2485 /* On the other hand, maybe we have a character class */
2488 if (*s == ']' || *s == '^')
2491 /* this is terrifying, and it works */
2492 int weight = 2; /* let's weigh the evidence */
2494 unsigned char un_char = 255, last_un_char;
2495 const char * const send = strchr(s,']');
2496 char tmpbuf[sizeof PL_tokenbuf * 4];
2498 if (!send) /* has to be an expression */
2501 Zero(seen,256,char);
2504 else if (isDIGIT(*s)) {
2506 if (isDIGIT(s[1]) && s[2] == ']')
2512 for (; s < send; s++) {
2513 last_un_char = un_char;
2514 un_char = (unsigned char)*s;
2519 weight -= seen[un_char] * 10;
2520 if (isALNUM_lazy_if(s+1,UTF)) {
2522 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2523 len = (int)strlen(tmpbuf);
2524 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2529 else if (*s == '$' && s[1] &&
2530 strchr("[#!%*<>()-=",s[1])) {
2531 if (/*{*/ strchr("])} =",s[2]))
2540 if (strchr("wds]",s[1]))
2542 else if (seen[(U8)'\''] || seen[(U8)'"'])
2544 else if (strchr("rnftbxcav",s[1]))
2546 else if (isDIGIT(s[1])) {
2548 while (s[1] && isDIGIT(s[1]))
2558 if (strchr("aA01! ",last_un_char))
2560 if (strchr("zZ79~",s[1]))
2562 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2563 weight -= 5; /* cope with negative subscript */
2566 if (!isALNUM(last_un_char)
2567 && !(last_un_char == '$' || last_un_char == '@'
2568 || last_un_char == '&')
2569 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2574 if (keyword(tmpbuf, d - tmpbuf, 0))
2577 if (un_char == last_un_char + 1)
2579 weight -= seen[un_char];
2584 if (weight >= 0) /* probably a character class */
2594 * Does all the checking to disambiguate
2596 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2597 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2599 * First argument is the stuff after the first token, e.g. "bar".
2601 * Not a method if bar is a filehandle.
2602 * Not a method if foo is a subroutine prototyped to take a filehandle.
2603 * Not a method if it's really "Foo $bar"
2604 * Method if it's "foo $bar"
2605 * Not a method if it's really "print foo $bar"
2606 * Method if it's really "foo package::" (interpreted as package->foo)
2607 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2608 * Not a method if bar is a filehandle or package, but is quoted with
2613 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2616 char *s = start + (*start == '$');
2617 char tmpbuf[sizeof PL_tokenbuf];
2625 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2629 const char *proto = SvPVX_const(cv);
2640 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2641 /* start is the beginning of the possible filehandle/object,
2642 * and s is the end of it
2643 * tmpbuf is a copy of it
2646 if (*start == '$') {
2647 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2650 len = start - SvPVX(PL_linestr);
2654 start = SvPVX(PL_linestr) + len;
2658 return *s == '(' ? FUNCMETH : METHOD;
2660 if (!keyword(tmpbuf, len, 0)) {
2661 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2665 soff = s - SvPVX(PL_linestr);
2669 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2670 if (indirgv && GvCVu(indirgv))
2672 /* filehandle or package name makes it a method */
2673 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2675 soff = s - SvPVX(PL_linestr);
2678 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2679 return 0; /* no assumptions -- "=>" quotes bearword */
2681 start_force(PL_curforce);
2682 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2683 newSVpvn(tmpbuf,len));
2684 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2686 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2691 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2693 return *s == '(' ? FUNCMETH : METHOD;
2701 * Return a string of Perl code to load the debugger. If PERL5DB
2702 * is set, it will return the contents of that, otherwise a
2703 * compile-time require of perl5db.pl.
2711 const char * const pdb = PerlEnv_getenv("PERL5DB");
2715 SETERRNO(0,SS_NORMAL);
2716 return "BEGIN { require 'perl5db.pl' }";
2722 /* Encoded script support. filter_add() effectively inserts a
2723 * 'pre-processing' function into the current source input stream.
2724 * Note that the filter function only applies to the current source file
2725 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2727 * The datasv parameter (which may be NULL) can be used to pass
2728 * private data to this instance of the filter. The filter function
2729 * can recover the SV using the FILTER_DATA macro and use it to
2730 * store private buffers and state information.
2732 * The supplied datasv parameter is upgraded to a PVIO type
2733 * and the IoDIRP/IoANY field is used to store the function pointer,
2734 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2735 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2736 * private use must be set using malloc'd pointers.
2740 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2746 if (!PL_rsfp_filters)
2747 PL_rsfp_filters = newAV();
2750 SvUPGRADE(datasv, SVt_PVIO);
2751 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2752 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2753 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2754 FPTR2DPTR(void *, IoANY(datasv)),
2755 SvPV_nolen(datasv)));
2756 av_unshift(PL_rsfp_filters, 1);
2757 av_store(PL_rsfp_filters, 0, datasv) ;
2762 /* Delete most recently added instance of this filter function. */
2764 Perl_filter_del(pTHX_ filter_t funcp)
2770 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2771 FPTR2DPTR(void*, funcp)));
2773 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2775 /* if filter is on top of stack (usual case) just pop it off */
2776 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2777 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2778 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2779 IoANY(datasv) = (void *)NULL;
2780 sv_free(av_pop(PL_rsfp_filters));
2784 /* we need to search for the correct entry and clear it */
2785 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2789 /* Invoke the idxth filter function for the current rsfp. */
2790 /* maxlen 0 = read one text line */
2792 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2797 /* This API is bad. It should have been using unsigned int for maxlen.
2798 Not sure if we want to change the API, but if not we should sanity
2799 check the value here. */
2800 const unsigned int correct_length
2809 if (!PL_rsfp_filters)
2811 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2812 /* Provide a default input filter to make life easy. */
2813 /* Note that we append to the line. This is handy. */
2814 DEBUG_P(PerlIO_printf(Perl_debug_log,
2815 "filter_read %d: from rsfp\n", idx));
2816 if (correct_length) {
2819 const int old_len = SvCUR(buf_sv);
2821 /* ensure buf_sv is large enough */
2822 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2823 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2824 correct_length)) <= 0) {
2825 if (PerlIO_error(PL_rsfp))
2826 return -1; /* error */
2828 return 0 ; /* end of file */
2830 SvCUR_set(buf_sv, old_len + len) ;
2833 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2834 if (PerlIO_error(PL_rsfp))
2835 return -1; /* error */
2837 return 0 ; /* end of file */
2840 return SvCUR(buf_sv);
2842 /* Skip this filter slot if filter has been deleted */
2843 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2844 DEBUG_P(PerlIO_printf(Perl_debug_log,
2845 "filter_read %d: skipped (filter deleted)\n",
2847 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2849 /* Get function pointer hidden within datasv */
2850 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2851 DEBUG_P(PerlIO_printf(Perl_debug_log,
2852 "filter_read %d: via function %p (%s)\n",
2853 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2854 /* Call function. The function is expected to */
2855 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2856 /* Return: <0:error, =0:eof, >0:not eof */
2857 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2861 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2864 #ifdef PERL_CR_FILTER
2865 if (!PL_rsfp_filters) {
2866 filter_add(S_cr_textfilter,NULL);
2869 if (PL_rsfp_filters) {
2871 SvCUR_set(sv, 0); /* start with empty line */
2872 if (FILTER_READ(0, sv, 0) > 0)
2873 return ( SvPVX(sv) ) ;
2878 return (sv_gets(sv, fp, append));
2882 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2887 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2891 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2892 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2894 return GvHV(gv); /* Foo:: */
2897 /* use constant CLASS => 'MyClass' */
2898 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2899 if (gv && GvCV(gv)) {
2900 SV * const sv = cv_const_sv(GvCV(gv));
2902 pkgname = SvPV_nolen_const(sv);
2905 return gv_stashpv(pkgname, FALSE);
2909 * S_readpipe_override
2910 * Check whether readpipe() is overriden, and generates the appropriate
2911 * optree, provided sublex_start() is called afterwards.
2914 S_readpipe_override(pTHX)
2917 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2918 yylval.ival = OP_BACKTICK;
2920 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2922 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2923 && (gv_readpipe = *gvp) != (GV*)&PL_sv_undef
2924 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2926 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2927 append_elem(OP_LIST,
2928 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2929 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2939 * The intent of this yylex wrapper is to minimize the changes to the
2940 * tokener when we aren't interested in collecting madprops. It remains
2941 * to be seen how successful this strategy will be...
2948 char *s = PL_bufptr;
2950 /* make sure PL_thiswhite is initialized */
2954 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2955 if (PL_pending_ident)
2956 return S_pending_ident(aTHX);
2958 /* previous token ate up our whitespace? */
2959 if (!PL_lasttoke && PL_nextwhite) {
2960 PL_thiswhite = PL_nextwhite;
2964 /* isolate the token, and figure out where it is without whitespace */
2965 PL_realtokenstart = -1;
2969 assert(PL_curforce < 0);
2971 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2972 if (!PL_thistoken) {
2973 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2974 PL_thistoken = newSVpvs("");
2976 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
2977 PL_thistoken = newSVpvn(tstart, s - tstart);
2980 if (PL_thismad) /* install head */
2981 CURMAD('X', PL_thistoken);
2984 /* last whitespace of a sublex? */
2985 if (optype == ')' && PL_endwhite) {
2986 CURMAD('X', PL_endwhite);
2991 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
2992 if (!PL_thiswhite && !PL_endwhite && !optype) {
2993 sv_free(PL_thistoken);
2998 /* put off final whitespace till peg */
2999 if (optype == ';' && !PL_rsfp) {
3000 PL_nextwhite = PL_thiswhite;
3003 else if (PL_thisopen) {
3004 CURMAD('q', PL_thisopen);
3006 sv_free(PL_thistoken);
3010 /* Store actual token text as madprop X */
3011 CURMAD('X', PL_thistoken);
3015 /* add preceding whitespace as madprop _ */
3016 CURMAD('_', PL_thiswhite);
3020 /* add quoted material as madprop = */
3021 CURMAD('=', PL_thisstuff);
3025 /* add terminating quote as madprop Q */
3026 CURMAD('Q', PL_thisclose);
3030 /* special processing based on optype */
3034 /* opval doesn't need a TOKEN since it can already store mp */
3045 append_madprops(PL_thismad, yylval.opval, 0);
3053 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3062 /* remember any fake bracket that lexer is about to discard */
3063 if (PL_lex_brackets == 1 &&
3064 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3067 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3070 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3071 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3074 break; /* don't bother looking for trailing comment */
3083 /* attach a trailing comment to its statement instead of next token */
3087 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3089 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3091 if (*s == '\n' || *s == '#') {
3092 while (s < PL_bufend && *s != '\n')
3096 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3097 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3114 /* Create new token struct. Note: opvals return early above. */
3115 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3122 S_tokenize_use(pTHX_ int is_use, char *s) {
3124 if (PL_expect != XSTATE)
3125 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3126 is_use ? "use" : "no"));
3128 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3129 s = force_version(s, TRUE);
3130 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3131 start_force(PL_curforce);
3132 NEXTVAL_NEXTTOKE.opval = NULL;
3135 else if (*s == 'v') {
3136 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3137 s = force_version(s, FALSE);
3141 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3142 s = force_version(s, FALSE);
3144 yylval.ival = is_use;
3148 static const char* const exp_name[] =
3149 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3150 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3157 Works out what to call the token just pulled out of the input
3158 stream. The yacc parser takes care of taking the ops we return and
3159 stitching them into a tree.
3165 if read an identifier
3166 if we're in a my declaration
3167 croak if they tried to say my($foo::bar)
3168 build the ops for a my() declaration
3169 if it's an access to a my() variable
3170 are we in a sort block?
3171 croak if my($a); $a <=> $b
3172 build ops for access to a my() variable
3173 if in a dq string, and they've said @foo and we can't find @foo
3175 build ops for a bareword
3176 if we already built the token before, use it.
3181 #pragma segment Perl_yylex
3187 register char *s = PL_bufptr;
3192 /* orig_keyword, gvp, and gv are initialized here because
3193 * jump to the label just_a_word_zero can bypass their
3194 * initialization later. */
3195 I32 orig_keyword = 0;
3200 SV* tmp = newSVpvs("");
3201 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3202 (IV)CopLINE(PL_curcop),
3203 lex_state_names[PL_lex_state],
3204 exp_name[PL_expect],
3205 pv_display(tmp, s, strlen(s), 0, 60));
3208 /* check if there's an identifier for us to look at */
3209 if (PL_pending_ident)
3210 return REPORT(S_pending_ident(aTHX));
3212 /* no identifier pending identification */
3214 switch (PL_lex_state) {
3216 case LEX_NORMAL: /* Some compilers will produce faster */
3217 case LEX_INTERPNORMAL: /* code if we comment these out. */
3221 /* when we've already built the next token, just pull it out of the queue */
3225 yylval = PL_nexttoke[PL_lasttoke].next_val;
3227 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3228 PL_nexttoke[PL_lasttoke].next_mad = 0;
3229 if (PL_thismad && PL_thismad->mad_key == '_') {
3230 PL_thiswhite = (SV*)PL_thismad->mad_val;
3231 PL_thismad->mad_val = 0;
3232 mad_free(PL_thismad);
3237 PL_lex_state = PL_lex_defer;
3238 PL_expect = PL_lex_expect;
3239 PL_lex_defer = LEX_NORMAL;
3240 if (!PL_nexttoke[PL_lasttoke].next_type)
3245 yylval = PL_nextval[PL_nexttoke];
3247 PL_lex_state = PL_lex_defer;
3248 PL_expect = PL_lex_expect;
3249 PL_lex_defer = LEX_NORMAL;
3253 /* FIXME - can these be merged? */
3254 return(PL_nexttoke[PL_lasttoke].next_type);
3256 return REPORT(PL_nexttype[PL_nexttoke]);
3259 /* interpolated case modifiers like \L \U, including \Q and \E.
3260 when we get here, PL_bufptr is at the \
3262 case LEX_INTERPCASEMOD:
3264 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3265 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3267 /* handle \E or end of string */
3268 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3270 if (PL_lex_casemods) {
3271 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3272 PL_lex_casestack[PL_lex_casemods] = '\0';
3274 if (PL_bufptr != PL_bufend
3275 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3277 PL_lex_state = LEX_INTERPCONCAT;
3280 PL_thistoken = newSVpvs("\\E");
3286 while (PL_bufptr != PL_bufend &&
3287 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3289 PL_thiswhite = newSVpvs("");
3290 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3294 if (PL_bufptr != PL_bufend)
3297 PL_lex_state = LEX_INTERPCONCAT;
3301 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3302 "### Saw case modifier\n"); });
3304 if (s[1] == '\\' && s[2] == 'E') {
3307 PL_thiswhite = newSVpvs("");
3308 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3311 PL_lex_state = LEX_INTERPCONCAT;
3316 if (!PL_madskills) /* when just compiling don't need correct */
3317 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3318 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3319 if ((*s == 'L' || *s == 'U') &&
3320 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3321 PL_lex_casestack[--PL_lex_casemods] = '\0';
3324 if (PL_lex_casemods > 10)
3325 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3326 PL_lex_casestack[PL_lex_casemods++] = *s;
3327 PL_lex_casestack[PL_lex_casemods] = '\0';
3328 PL_lex_state = LEX_INTERPCONCAT;
3329 start_force(PL_curforce);
3330 NEXTVAL_NEXTTOKE.ival = 0;
3332 start_force(PL_curforce);
3334 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3336 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3338 NEXTVAL_NEXTTOKE.ival = OP_LC;
3340 NEXTVAL_NEXTTOKE.ival = OP_UC;
3342 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3344 Perl_croak(aTHX_ "panic: yylex");
3346 SV* const tmpsv = newSVpvs("");
3347 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3353 if (PL_lex_starts) {
3359 sv_free(PL_thistoken);
3360 PL_thistoken = newSVpvs("");
3363 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3364 if (PL_lex_casemods == 1 && PL_lex_inpat)
3373 case LEX_INTERPPUSH:
3374 return REPORT(sublex_push());
3376 case LEX_INTERPSTART:
3377 if (PL_bufptr == PL_bufend)
3378 return REPORT(sublex_done());
3379 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3380 "### Interpolated variable\n"); });
3382 PL_lex_dojoin = (*PL_bufptr == '@');
3383 PL_lex_state = LEX_INTERPNORMAL;
3384 if (PL_lex_dojoin) {
3385 start_force(PL_curforce);
3386 NEXTVAL_NEXTTOKE.ival = 0;
3388 start_force(PL_curforce);
3389 force_ident("\"", '$');
3390 start_force(PL_curforce);
3391 NEXTVAL_NEXTTOKE.ival = 0;
3393 start_force(PL_curforce);
3394 NEXTVAL_NEXTTOKE.ival = 0;
3396 start_force(PL_curforce);
3397 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3400 if (PL_lex_starts++) {
3405 sv_free(PL_thistoken);
3406 PL_thistoken = newSVpvs("");
3409 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3410 if (!PL_lex_casemods && PL_lex_inpat)
3417 case LEX_INTERPENDMAYBE:
3418 if (intuit_more(PL_bufptr)) {
3419 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3425 if (PL_lex_dojoin) {
3426 PL_lex_dojoin = FALSE;
3427 PL_lex_state = LEX_INTERPCONCAT;
3431 sv_free(PL_thistoken);
3432 PL_thistoken = newSVpvs("");
3437 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3438 && SvEVALED(PL_lex_repl))
3440 if (PL_bufptr != PL_bufend)
3441 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3445 case LEX_INTERPCONCAT:
3447 if (PL_lex_brackets)
3448 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3450 if (PL_bufptr == PL_bufend)
3451 return REPORT(sublex_done());
3453 if (SvIVX(PL_linestr) == '\'') {
3454 SV *sv = newSVsv(PL_linestr);
3457 else if ( PL_hints & HINT_NEW_RE )
3458 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
3459 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3463 s = scan_const(PL_bufptr);
3465 PL_lex_state = LEX_INTERPCASEMOD;
3467 PL_lex_state = LEX_INTERPSTART;
3470 if (s != PL_bufptr) {
3471 start_force(PL_curforce);
3473 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3475 NEXTVAL_NEXTTOKE = yylval;
3478 if (PL_lex_starts++) {
3482 sv_free(PL_thistoken);
3483 PL_thistoken = newSVpvs("");
3486 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3487 if (!PL_lex_casemods && PL_lex_inpat)
3500 PL_lex_state = LEX_NORMAL;
3501 s = scan_formline(PL_bufptr);
3502 if (!PL_lex_formbrack)
3508 PL_oldoldbufptr = PL_oldbufptr;
3514 sv_free(PL_thistoken);
3517 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3521 if (isIDFIRST_lazy_if(s,UTF))
3523 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
3526 goto fake_eof; /* emulate EOF on ^D or ^Z */
3535 if (PL_lex_brackets) {
3536 yyerror((const char *)
3538 ? "Format not terminated"
3539 : "Missing right curly or square bracket"));
3541 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3542 "### Tokener got EOF\n");
3546 if (s++ < PL_bufend)
3547 goto retry; /* ignore stray nulls */
3550 if (!PL_in_eval && !PL_preambled) {
3551 PL_preambled = TRUE;
3556 sv_setpv(PL_linestr,incl_perldb());
3557 if (SvCUR(PL_linestr))
3558 sv_catpvs(PL_linestr,";");
3560 while(AvFILLp(PL_preambleav) >= 0) {
3561 SV *tmpsv = av_shift(PL_preambleav);
3562 sv_catsv(PL_linestr, tmpsv);
3563 sv_catpvs(PL_linestr, ";");
3566 sv_free((SV*)PL_preambleav);
3567 PL_preambleav = NULL;
3569 if (PL_minus_n || PL_minus_p) {
3570 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3572 sv_catpvs(PL_linestr,"chomp;");
3575 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3576 || *PL_splitstr == '"')
3577 && strchr(PL_splitstr + 1, *PL_splitstr))
3578 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3580 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3581 bytes can be used as quoting characters. :-) */
3582 const char *splits = PL_splitstr;
3583 sv_catpvs(PL_linestr, "our @F=split(q\0");
3586 if (*splits == '\\')
3587 sv_catpvn(PL_linestr, splits, 1);
3588 sv_catpvn(PL_linestr, splits, 1);
3589 } while (*splits++);
3590 /* This loop will embed the trailing NUL of
3591 PL_linestr as the last thing it does before
3593 sv_catpvs(PL_linestr, ");");
3597 sv_catpvs(PL_linestr,"our @F=split(' ');");
3601 sv_catpvs(PL_linestr,"use feature ':5.10';");
3602 sv_catpvs(PL_linestr, "\n");
3603 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3604 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3605 PL_last_lop = PL_last_uni = NULL;
3606 if (PERLDB_LINE && PL_curstash != PL_debstash)
3607 update_debugger_info_sv(PL_linestr);
3611 bof = PL_rsfp ? TRUE : FALSE;
3612 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3615 PL_realtokenstart = -1;
3618 if (PL_preprocess && !PL_in_eval)
3619 (void)PerlProc_pclose(PL_rsfp);
3620 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3621 PerlIO_clearerr(PL_rsfp);
3623 (void)PerlIO_close(PL_rsfp);
3625 PL_doextract = FALSE;
3627 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3632 sv_setpv(PL_linestr,
3635 ? ";}continue{print;}" : ";}"));
3636 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3637 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3638 PL_last_lop = PL_last_uni = NULL;
3639 PL_minus_n = PL_minus_p = 0;
3642 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3643 PL_last_lop = PL_last_uni = NULL;
3644 sv_setpvn(PL_linestr,"",0);
3645 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3647 /* If it looks like the start of a BOM or raw UTF-16,
3648 * check if it in fact is. */
3654 #ifdef PERLIO_IS_STDIO
3655 # ifdef __GNU_LIBRARY__
3656 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3657 # define FTELL_FOR_PIPE_IS_BROKEN
3661 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3662 # define FTELL_FOR_PIPE_IS_BROKEN
3667 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3668 /* This loses the possibility to detect the bof
3669 * situation on perl -P when the libc5 is being used.
3670 * Workaround? Maybe attach some extra state to PL_rsfp?
3673 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3675 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3678 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3679 s = swallow_bom((U8*)s);
3683 /* Incest with pod. */
3686 sv_catsv(PL_thiswhite, PL_linestr);
3688 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3689 sv_setpvn(PL_linestr, "", 0);
3690 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3691 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3692 PL_last_lop = PL_last_uni = NULL;
3693 PL_doextract = FALSE;
3697 } while (PL_doextract);
3698 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3699 if (PERLDB_LINE && PL_curstash != PL_debstash)
3700 update_debugger_info_sv(PL_linestr);
3701 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3702 PL_last_lop = PL_last_uni = NULL;
3703 if (CopLINE(PL_curcop) == 1) {
3704 while (s < PL_bufend && isSPACE(*s))
3706 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3710 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3714 if (*s == '#' && *(s+1) == '!')
3716 #ifdef ALTERNATE_SHEBANG
3718 static char const as[] = ALTERNATE_SHEBANG;
3719 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3720 d = s + (sizeof(as) - 1);
3722 #endif /* ALTERNATE_SHEBANG */
3731 while (*d && !isSPACE(*d))
3735 #ifdef ARG_ZERO_IS_SCRIPT
3736 if (ipathend > ipath) {
3738 * HP-UX (at least) sets argv[0] to the script name,
3739 * which makes $^X incorrect. And Digital UNIX and Linux,
3740 * at least, set argv[0] to the basename of the Perl
3741 * interpreter. So, having found "#!", we'll set it right.
3743 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3745 assert(SvPOK(x) || SvGMAGICAL(x));
3746 if (sv_eq(x, CopFILESV(PL_curcop))) {
3747 sv_setpvn(x, ipath, ipathend - ipath);
3753 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3754 const char * const lstart = SvPV_const(x,llen);
3756 bstart += blen - llen;
3757 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3758 sv_setpvn(x, ipath, ipathend - ipath);
3763 TAINT_NOT; /* $^X is always tainted, but that's OK */
3765 #endif /* ARG_ZERO_IS_SCRIPT */
3770 d = instr(s,"perl -");
3772 d = instr(s,"perl");
3774 /* avoid getting into infinite loops when shebang
3775 * line contains "Perl" rather than "perl" */
3777 for (d = ipathend-4; d >= ipath; --d) {
3778 if ((*d == 'p' || *d == 'P')
3779 && !ibcmp(d, "perl", 4))
3789 #ifdef ALTERNATE_SHEBANG
3791 * If the ALTERNATE_SHEBANG on this system starts with a
3792 * character that can be part of a Perl expression, then if
3793 * we see it but not "perl", we're probably looking at the
3794 * start of Perl code, not a request to hand off to some
3795 * other interpreter. Similarly, if "perl" is there, but
3796 * not in the first 'word' of the line, we assume the line
3797 * contains the start of the Perl program.
3799 if (d && *s != '#') {
3800 const char *c = ipath;
3801 while (*c && !strchr("; \t\r\n\f\v#", *c))
3804 d = NULL; /* "perl" not in first word; ignore */
3806 *s = '#'; /* Don't try to parse shebang line */
3808 #endif /* ALTERNATE_SHEBANG */
3809 #ifndef MACOS_TRADITIONAL
3814 !instr(s,"indir") &&
3815 instr(PL_origargv[0],"perl"))
3822 while (s < PL_bufend && isSPACE(*s))
3824 if (s < PL_bufend) {
3825 Newxz(newargv,PL_origargc+3,char*);
3827 while (s < PL_bufend && !isSPACE(*s))
3830 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3833 newargv = PL_origargv;
3836 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3838 Perl_croak(aTHX_ "Can't exec %s", ipath);
3842 while (*d && !isSPACE(*d))
3844 while (SPACE_OR_TAB(*d))
3848 const bool switches_done = PL_doswitches;
3849 const U32 oldpdb = PL_perldb;
3850 const bool oldn = PL_minus_n;
3851 const bool oldp = PL_minus_p;
3854 if (*d == 'M' || *d == 'm' || *d == 'C') {
3855 const char * const m = d;
3856 while (*d && !isSPACE(*d))
3858 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3861 d = moreswitches(d);
3863 if (PL_doswitches && !switches_done) {
3864 int argc = PL_origargc;
3865 char **argv = PL_origargv;
3868 } while (argc && argv[0][0] == '-' && argv[0][1]);
3869 init_argv_symbols(argc,argv);
3871 if ((PERLDB_LINE && !oldpdb) ||
3872 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3873 /* if we have already added "LINE: while (<>) {",
3874 we must not do it again */
3876 sv_setpvn(PL_linestr, "", 0);
3877 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3878 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3879 PL_last_lop = PL_last_uni = NULL;
3880 PL_preambled = FALSE;
3882 (void)gv_fetchfile(PL_origfilename);
3889 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3891 PL_lex_state = LEX_FORMLINE;
3896 #ifdef PERL_STRICT_CR
3897 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3899 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3901 case ' ': case '\t': case '\f': case 013:
3902 #ifdef MACOS_TRADITIONAL
3906 PL_realtokenstart = -1;
3915 PL_realtokenstart = -1;
3919 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3920 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3921 /* handle eval qq[#line 1 "foo"\n ...] */
3922 CopLINE_dec(PL_curcop);
3925 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3927 if (!PL_in_eval || PL_rsfp)
3932 while (d < PL_bufend && *d != '\n')
3936 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3937 Perl_croak(aTHX_ "panic: input overflow");
3940 PL_thiswhite = newSVpvn(s, d - s);
3945 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3947 PL_lex_state = LEX_FORMLINE;
3953 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
3954 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
3957 TOKEN(PEG); /* make sure any #! line is accessible */
3962 /* if (PL_madskills && PL_lex_formbrack) { */
3964 while (d < PL_bufend && *d != '\n')
3968 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3969 Perl_croak(aTHX_ "panic: input overflow");
3970 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
3972 PL_thiswhite = newSVpvs("");
3973 if (CopLINE(PL_curcop) == 1) {
3974 sv_setpvn(PL_thiswhite, "", 0);
3977 sv_catpvn(PL_thiswhite, s, d - s);
3991 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3999 while (s < PL_bufend && SPACE_OR_TAB(*s))
4002 if (strnEQ(s,"=>",2)) {
4003 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4004 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4005 OPERATOR('-'); /* unary minus */
4007 PL_last_uni = PL_oldbufptr;
4009 case 'r': ftst = OP_FTEREAD; break;
4010 case 'w': ftst = OP_FTEWRITE; break;
4011 case 'x': ftst = OP_FTEEXEC; break;
4012 case 'o': ftst = OP_FTEOWNED; break;
4013 case 'R': ftst = OP_FTRREAD; break;
4014 case 'W': ftst = OP_FTRWRITE; break;
4015 case 'X': ftst = OP_FTREXEC; break;
4016 case 'O': ftst = OP_FTROWNED; break;
4017 case 'e': ftst = OP_FTIS; break;
4018 case 'z': ftst = OP_FTZERO; break;
4019 case 's': ftst = OP_FTSIZE; break;
4020 case 'f': ftst = OP_FTFILE; break;
4021 case 'd': ftst = OP_FTDIR; break;
4022 case 'l': ftst = OP_FTLINK; break;
4023 case 'p': ftst = OP_FTPIPE; break;
4024 case 'S': ftst = OP_FTSOCK; break;
4025 case 'u': ftst = OP_FTSUID; break;
4026 case 'g': ftst = OP_FTSGID; break;
4027 case 'k': ftst = OP_FTSVTX; break;
4028 case 'b': ftst = OP_FTBLK; break;
4029 case 'c': ftst = OP_FTCHR; break;
4030 case 't': ftst = OP_FTTTY; break;
4031 case 'T': ftst = OP_FTTEXT; break;
4032 case 'B': ftst = OP_FTBINARY; break;
4033 case 'M': case 'A': case 'C':
4034 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4036 case 'M': ftst = OP_FTMTIME; break;
4037 case 'A': ftst = OP_FTATIME; break;
4038 case 'C': ftst = OP_FTCTIME; break;
4046 PL_last_lop_op = (OPCODE)ftst;
4047 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4048 "### Saw file test %c\n", (int)tmp);
4053 /* Assume it was a minus followed by a one-letter named
4054 * subroutine call (or a -bareword), then. */
4055 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4056 "### '-%c' looked like a file test but was not\n",
4063 const char tmp = *s++;
4066 if (PL_expect == XOPERATOR)
4071 else if (*s == '>') {
4074 if (isIDFIRST_lazy_if(s,UTF)) {
4075 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4083 if (PL_expect == XOPERATOR)
4086 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4088 OPERATOR('-'); /* unary minus */
4094 const char tmp = *s++;
4097 if (PL_expect == XOPERATOR)
4102 if (PL_expect == XOPERATOR)
4105 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4112 if (PL_expect != XOPERATOR) {
4113 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4114 PL_expect = XOPERATOR;
4115 force_ident(PL_tokenbuf, '*');
4128 if (PL_expect == XOPERATOR) {
4132 PL_tokenbuf[0] = '%';
4133 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
4134 if (!PL_tokenbuf[1]) {
4137 PL_pending_ident = '%';
4148 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
4149 && FEATURE_IS_ENABLED("~~"))
4156 const char tmp = *s++;
4162 goto just_a_word_zero_gv;
4165 switch (PL_expect) {
4171 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4173 PL_bufptr = s; /* update in case we back off */
4179 PL_expect = XTERMBLOCK;
4182 stuffstart = s - SvPVX(PL_linestr) - 1;
4186 while (isIDFIRST_lazy_if(s,UTF)) {
4189 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4190 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4191 if (tmp < 0) tmp = -tmp;
4206 sv = newSVpvn(s, len);
4208 d = scan_str(d,TRUE,TRUE);
4210 /* MUST advance bufptr here to avoid bogus
4211 "at end of line" context messages from yyerror().
4213 PL_bufptr = s + len;
4214 yyerror("Unterminated attribute parameter in attribute list");
4218 return REPORT(0); /* EOF indicator */
4222 sv_catsv(sv, PL_lex_stuff);
4223 attrs = append_elem(OP_LIST, attrs,
4224 newSVOP(OP_CONST, 0, sv));
4225 SvREFCNT_dec(PL_lex_stuff);
4226 PL_lex_stuff = NULL;
4229 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4231 if (PL_in_my == KEY_our) {
4233 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
4235 /* skip to avoid loading attributes.pm */
4237 deprecate(":unique");
4240 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4243 /* NOTE: any CV attrs applied here need to be part of
4244 the CVf_BUILTIN_ATTRS define in cv.h! */
4245 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4247 CvLVALUE_on(PL_compcv);
4249 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4251 CvLOCKED_on(PL_compcv);
4253 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4255 CvMETHOD_on(PL_compcv);
4257 else if (!PL_in_my && len == 9 && strnEQ(SvPVX(sv), "assertion", len)) {
4259 CvASSERTION_on(PL_compcv);
4261 /* After we've set the flags, it could be argued that
4262 we don't need to do the attributes.pm-based setting
4263 process, and shouldn't bother appending recognized
4264 flags. To experiment with that, uncomment the
4265 following "else". (Note that's already been
4266 uncommented. That keeps the above-applied built-in
4267 attributes from being intercepted (and possibly
4268 rejected) by a package's attribute routines, but is
4269 justified by the performance win for the common case
4270 of applying only built-in attributes.) */
4272 attrs = append_elem(OP_LIST, attrs,
4273 newSVOP(OP_CONST, 0,
4277 if (*s == ':' && s[1] != ':')
4280 break; /* require real whitespace or :'s */
4281 /* XXX losing whitespace on sequential attributes here */
4285 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4286 if (*s != ';' && *s != '}' && *s != tmp
4287 && (tmp != '=' || *s != ')')) {
4288 const char q = ((*s == '\'') ? '"' : '\'');
4289 /* If here for an expression, and parsed no attrs, back
4291 if (tmp == '=' && !attrs) {
4295 /* MUST advance bufptr here to avoid bogus "at end of line"
4296 context messages from yyerror().
4299 yyerror( (const char *)
4301 ? Perl_form(aTHX_ "Invalid separator character "
4302 "%c%c%c in attribute list", q, *s, q)
4303 : "Unterminated attribute list" ) );
4311 start_force(PL_curforce);
4312 NEXTVAL_NEXTTOKE.opval = attrs;
4313 CURMAD('_', PL_nextwhite);
4318 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4319 (s - SvPVX(PL_linestr)) - stuffstart);
4327 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4328 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4336 const char tmp = *s++;
4341 const char tmp = *s++;
4349 if (PL_lex_brackets <= 0)
4350 yyerror("Unmatched right square bracket");
4353 if (PL_lex_state == LEX_INTERPNORMAL) {
4354 if (PL_lex_brackets == 0) {
4355 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
4356 PL_lex_state = LEX_INTERPEND;
4363 if (PL_lex_brackets > 100) {
4364 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4366 switch (PL_expect) {
4368 if (PL_lex_formbrack) {
4372 if (PL_oldoldbufptr == PL_last_lop)
4373 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4375 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4376 OPERATOR(HASHBRACK);
4378 while (s < PL_bufend && SPACE_OR_TAB(*s))
4381 PL_tokenbuf[0] = '\0';
4382 if (d < PL_bufend && *d == '-') {
4383 PL_tokenbuf[0] = '-';
4385 while (d < PL_bufend && SPACE_OR_TAB(*d))
4388 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4389 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4391 while (d < PL_bufend && SPACE_OR_TAB(*d))
4394 const char minus = (PL_tokenbuf[0] == '-');
4395 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4403 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4408 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4413 if (PL_oldoldbufptr == PL_last_lop)
4414 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4416 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4419 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4421 /* This hack is to get the ${} in the message. */
4423 yyerror("syntax error");
4426 OPERATOR(HASHBRACK);
4428 /* This hack serves to disambiguate a pair of curlies
4429 * as being a block or an anon hash. Normally, expectation
4430 * determines that, but in cases where we're not in a
4431 * position to expect anything in particular (like inside
4432 * eval"") we have to resolve the ambiguity. This code
4433 * covers the case where the first term in the curlies is a
4434 * quoted string. Most other cases need to be explicitly
4435 * disambiguated by prepending a "+" before the opening
4436 * curly in order to force resolution as an anon hash.
4438 * XXX should probably propagate the outer expectation
4439 * into eval"" to rely less on this hack, but that could
4440 * potentially break current behavior of eval"".
4444 if (*s == '\'' || *s == '"' || *s == '`') {
4445 /* common case: get past first string, handling escapes */
4446 for (t++; t < PL_bufend && *t != *s;)
4447 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4451 else if (*s == 'q') {
4454 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4457 /* skip q//-like construct */
4459 char open, close, term;
4462 while (t < PL_bufend && isSPACE(*t))
4464 /* check for q => */
4465 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4466 OPERATOR(HASHBRACK);
4470 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4474 for (t++; t < PL_bufend; t++) {
4475 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4477 else if (*t == open)
4481 for (t++; t < PL_bufend; t++) {
4482 if (*t == '\\' && t+1 < PL_bufend)
4484 else if (*t == close && --brackets <= 0)
4486 else if (*t == open)
4493 /* skip plain q word */
4494 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4497 else if (isALNUM_lazy_if(t,UTF)) {
4499 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4502 while (t < PL_bufend && isSPACE(*t))
4504 /* if comma follows first term, call it an anon hash */
4505 /* XXX it could be a comma expression with loop modifiers */
4506 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4507 || (*t == '=' && t[1] == '>')))
4508 OPERATOR(HASHBRACK);
4509 if (PL_expect == XREF)
4512 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4518 yylval.ival = CopLINE(PL_curcop);
4519 if (isSPACE(*s) || *s == '#')
4520 PL_copline = NOLINE; /* invalidate current command line number */
4525 if (PL_lex_brackets <= 0)
4526 yyerror("Unmatched right curly bracket");
4528 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4529 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4530 PL_lex_formbrack = 0;
4531 if (PL_lex_state == LEX_INTERPNORMAL) {
4532 if (PL_lex_brackets == 0) {
4533 if (PL_expect & XFAKEBRACK) {
4534 PL_expect &= XENUMMASK;
4535 PL_lex_state = LEX_INTERPEND;
4540 PL_thiswhite = newSVpvs("");
4541 sv_catpvn(PL_thiswhite,"}",1);
4544 return yylex(); /* ignore fake brackets */
4546 if (*s == '-' && s[1] == '>')
4547 PL_lex_state = LEX_INTERPENDMAYBE;
4548 else if (*s != '[' && *s != '{')
4549 PL_lex_state = LEX_INTERPEND;
4552 if (PL_expect & XFAKEBRACK) {
4553 PL_expect &= XENUMMASK;
4555 return yylex(); /* ignore fake brackets */
4557 start_force(PL_curforce);
4559 curmad('X', newSVpvn(s-1,1));
4560 CURMAD('_', PL_thiswhite);
4565 PL_thistoken = newSVpvs("");
4573 if (PL_expect == XOPERATOR) {
4574 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4575 && isIDFIRST_lazy_if(s,UTF))
4577 CopLINE_dec(PL_curcop);
4578 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4579 CopLINE_inc(PL_curcop);
4584 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4586 PL_expect = XOPERATOR;
4587 force_ident(PL_tokenbuf, '&');
4591 yylval.ival = (OPpENTERSUB_AMPER<<8);
4603 const char tmp = *s++;
4610 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4611 && strchr("+-*/%.^&|<",tmp))
4612 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4613 "Reversed %c= operator",(int)tmp);
4615 if (PL_expect == XSTATE && isALPHA(tmp) &&
4616 (s == PL_linestart+1 || s[-2] == '\n') )
4618 if (PL_in_eval && !PL_rsfp) {
4623 if (strnEQ(s,"=cut",4)) {
4639 PL_thiswhite = newSVpvs("");
4640 sv_catpvn(PL_thiswhite, PL_linestart,
4641 PL_bufend - PL_linestart);
4645 PL_doextract = TRUE;
4649 if (PL_lex_brackets < PL_lex_formbrack) {
4651 #ifdef PERL_STRICT_CR
4652 while (SPACE_OR_TAB(*t))
4654 while (SPACE_OR_TAB(*t) || *t == '\r')
4657 if (*t == '\n' || *t == '#') {
4668 const char tmp = *s++;
4670 /* was this !=~ where !~ was meant?
4671 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4673 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4674 const char *t = s+1;
4676 while (t < PL_bufend && isSPACE(*t))
4679 if (*t == '/' || *t == '?' ||
4680 ((*t == 'm' || *t == 's' || *t == 'y')
4681 && !isALNUM(t[1])) ||
4682 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4683 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4684 "!=~ should be !~");
4694 if (PL_expect != XOPERATOR) {
4695 if (s[1] != '<' && !strchr(s,'>'))
4698 s = scan_heredoc(s);
4700 s = scan_inputsymbol(s);
4701 TERM(sublex_start());
4707 SHop(OP_LEFT_SHIFT);
4721 const char tmp = *s++;
4723 SHop(OP_RIGHT_SHIFT);
4724 else if (tmp == '=')
4733 if (PL_expect == XOPERATOR) {
4734 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4736 deprecate_old(commaless_variable_list);
4737 return REPORT(','); /* grandfather non-comma-format format */
4741 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4742 PL_tokenbuf[0] = '@';
4743 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4744 sizeof PL_tokenbuf - 1, FALSE);
4745 if (PL_expect == XOPERATOR)
4746 no_op("Array length", s);
4747 if (!PL_tokenbuf[1])
4749 PL_expect = XOPERATOR;
4750 PL_pending_ident = '#';
4754 PL_tokenbuf[0] = '$';
4755 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4756 sizeof PL_tokenbuf - 1, FALSE);
4757 if (PL_expect == XOPERATOR)
4759 if (!PL_tokenbuf[1]) {
4761 yyerror("Final $ should be \\$ or $name");
4765 /* This kludge not intended to be bulletproof. */
4766 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4767 yylval.opval = newSVOP(OP_CONST, 0,
4768 newSViv(CopARYBASE_get(&PL_compiling)));
4769 yylval.opval->op_private = OPpCONST_ARYBASE;
4775 const char tmp = *s;
4776 if (PL_lex_state == LEX_NORMAL)
4779 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4780 && intuit_more(s)) {
4782 PL_tokenbuf[0] = '@';
4783 if (ckWARN(WARN_SYNTAX)) {
4786 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4789 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4790 while (t < PL_bufend && *t != ']')
4792 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4793 "Multidimensional syntax %.*s not supported",
4794 (int)((t - PL_bufptr) + 1), PL_bufptr);
4798 else if (*s == '{') {
4800 PL_tokenbuf[0] = '%';
4801 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4802 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4804 char tmpbuf[sizeof PL_tokenbuf];
4807 } while (isSPACE(*t));
4808 if (isIDFIRST_lazy_if(t,UTF)) {
4810 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4814 if (*t == ';' && get_cv(tmpbuf, FALSE))
4815 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4816 "You need to quote \"%s\"",
4823 PL_expect = XOPERATOR;
4824 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4825 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4826 if (!islop || PL_last_lop_op == OP_GREPSTART)
4827 PL_expect = XOPERATOR;
4828 else if (strchr("$@\"'`q", *s))
4829 PL_expect = XTERM; /* e.g. print $fh "foo" */
4830 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4831 PL_expect = XTERM; /* e.g. print $fh &sub */
4832 else if (isIDFIRST_lazy_if(s,UTF)) {
4833 char tmpbuf[sizeof PL_tokenbuf];
4835 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4836 if ((t2 = keyword(tmpbuf, len, 0))) {
4837 /* binary operators exclude handle interpretations */
4849 PL_expect = XTERM; /* e.g. print $fh length() */
4854 PL_expect = XTERM; /* e.g. print $fh subr() */
4857 else if (isDIGIT(*s))
4858 PL_expect = XTERM; /* e.g. print $fh 3 */
4859 else if (*s == '.' && isDIGIT(s[1]))
4860 PL_expect = XTERM; /* e.g. print $fh .3 */
4861 else if ((*s == '?' || *s == '-' || *s == '+')
4862 && !isSPACE(s[1]) && s[1] != '=')
4863 PL_expect = XTERM; /* e.g. print $fh -1 */
4864 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4866 PL_expect = XTERM; /* e.g. print $fh /.../
4867 XXX except DORDOR operator
4869 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4871 PL_expect = XTERM; /* print $fh <<"EOF" */
4874 PL_pending_ident = '$';
4878 if (PL_expect == XOPERATOR)
4880 PL_tokenbuf[0] = '@';
4881 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4882 if (!PL_tokenbuf[1]) {
4885 if (PL_lex_state == LEX_NORMAL)
4887 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4889 PL_tokenbuf[0] = '%';
4891 /* Warn about @ where they meant $. */
4892 if (*s == '[' || *s == '{') {
4893 if (ckWARN(WARN_SYNTAX)) {
4894 const char *t = s + 1;
4895 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4897 if (*t == '}' || *t == ']') {
4899 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4900 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4901 "Scalar value %.*s better written as $%.*s",
4902 (int)(t-PL_bufptr), PL_bufptr,
4903 (int)(t-PL_bufptr-1), PL_bufptr+1);
4908 PL_pending_ident = '@';
4911 case '/': /* may be division, defined-or, or pattern */
4912 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4916 case '?': /* may either be conditional or pattern */
4917 if(PL_expect == XOPERATOR) {
4925 /* A // operator. */
4935 /* Disable warning on "study /blah/" */
4936 if (PL_oldoldbufptr == PL_last_uni
4937 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4938 || memNE(PL_last_uni, "study", 5)
4939 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4942 s = scan_pat(s,OP_MATCH);
4943 TERM(sublex_start());
4947 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4948 #ifdef PERL_STRICT_CR
4951 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
4953 && (s == PL_linestart || s[-1] == '\n') )
4955 PL_lex_formbrack = 0;
4959 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
4965 yylval.ival = OPf_SPECIAL;
4971 if (PL_expect != XOPERATOR)
4976 case '0': case '1': case '2': case '3': case '4':
4977 case '5': case '6': case '7': case '8': case '9':
4978 s = scan_num(s, &yylval);
4979 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
4980 if (PL_expect == XOPERATOR)
4985 s = scan_str(s,!!PL_madskills,FALSE);
4986 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
4987 if (PL_expect == XOPERATOR) {
4988 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4990 deprecate_old(commaless_variable_list);
4991 return REPORT(','); /* grandfather non-comma-format format */
4998 yylval.ival = OP_CONST;
4999 TERM(sublex_start());
5002 s = scan_str(s,!!PL_madskills,FALSE);
5003 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5004 if (PL_expect == XOPERATOR) {
5005 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5007 deprecate_old(commaless_variable_list);
5008 return REPORT(','); /* grandfather non-comma-format format */
5015 yylval.ival = OP_CONST;
5016 /* FIXME. I think that this can be const if char *d is replaced by
5017 more localised variables. */
5018 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5019 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5020 yylval.ival = OP_STRINGIFY;
5024 TERM(sublex_start());
5027 s = scan_str(s,!!PL_madskills,FALSE);
5028 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5029 if (PL_expect == XOPERATOR)
5030 no_op("Backticks",s);
5033 readpipe_override();
5034 TERM(sublex_start());
5038 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
5039 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5041 if (PL_expect == XOPERATOR)
5042 no_op("Backslash",s);
5046 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5047 char *start = s + 2;
5048 while (isDIGIT(*start) || *start == '_')
5050 if (*start == '.' && isDIGIT(start[1])) {
5051 s = scan_num(s, &yylval);
5054 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5055 else if (!isALPHA(*start) && (PL_expect == XTERM
5056 || PL_expect == XREF || PL_expect == XSTATE
5057 || PL_expect == XTERMORDORDOR)) {
5058 /* XXX Use gv_fetchpvn rather than stomping on a const string */
5059 const char c = *start;
5062 gv = gv_fetchpv(s, 0, SVt_PVCV);
5065 s = scan_num(s, &yylval);
5072 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5114 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5116 /* Some keywords can be followed by any delimiter, including ':' */
5117 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5118 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5119 (PL_tokenbuf[0] == 'q' &&
5120 strchr("qwxr", PL_tokenbuf[1])))));
5122 /* x::* is just a word, unless x is "CORE" */
5123 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5127 while (d < PL_bufend && isSPACE(*d))
5128 d++; /* no comments skipped here, or s### is misparsed */
5130 /* Is this a label? */
5131 if (!tmp && PL_expect == XSTATE
5132 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5134 yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5139 /* Check for keywords */
5140 tmp = keyword(PL_tokenbuf, len, 0);
5142 /* Is this a word before a => operator? */
5143 if (*d == '=' && d[1] == '>') {
5146 = (OP*)newSVOP(OP_CONST, 0,
5147 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5148 yylval.opval->op_private = OPpCONST_BARE;
5152 if (tmp < 0) { /* second-class keyword? */
5153 GV *ogv = NULL; /* override (winner) */
5154 GV *hgv = NULL; /* hidden (loser) */
5155 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5157 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5160 if (GvIMPORTED_CV(gv))
5162 else if (! CvMETHOD(cv))
5166 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5167 (gv = *gvp) != (GV*)&PL_sv_undef &&
5168 GvCVu(gv) && GvIMPORTED_CV(gv))
5175 tmp = 0; /* overridden by import or by GLOBAL */
5178 && -tmp==KEY_lock /* XXX generalizable kludge */
5180 && !hv_fetchs(GvHVn(PL_incgv), "Thread.pm", FALSE))
5182 tmp = 0; /* any sub overrides "weak" keyword */
5184 else { /* no override */
5186 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
5187 Perl_warner(aTHX_ packWARN(WARN_MISC),
5188 "dump() better written as CORE::dump()");
5192 if (hgv && tmp != KEY_x && tmp != KEY_CORE
5193 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
5194 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5195 "Ambiguous call resolved as CORE::%s(), %s",
5196 GvENAME(hgv), "qualify as such or use &");
5203 default: /* not a keyword */
5204 /* Trade off - by using this evil construction we can pull the
5205 variable gv into the block labelled keylookup. If not, then
5206 we have to give it function scope so that the goto from the
5207 earlier ':' case doesn't bypass the initialisation. */
5209 just_a_word_zero_gv:
5217 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5220 SV *nextPL_nextwhite = 0;
5224 /* Get the rest if it looks like a package qualifier */
5226 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5228 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5231 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5232 *s == '\'' ? "'" : "::");
5237 if (PL_expect == XOPERATOR) {
5238 if (PL_bufptr == PL_linestart) {
5239 CopLINE_dec(PL_curcop);
5240 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
5241 CopLINE_inc(PL_curcop);
5244 no_op("Bareword",s);
5247 /* Look for a subroutine with this name in current package,
5248 unless name is "Foo::", in which case Foo is a bearword
5249 (and a package name). */
5251 if (len > 2 && !PL_madskills &&
5252 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5254 if (ckWARN(WARN_BAREWORD)
5255 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5256 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5257 "Bareword \"%s\" refers to nonexistent package",
5260 PL_tokenbuf[len] = '\0';
5266 /* Mustn't actually add anything to a symbol table.
5267 But also don't want to "initialise" any placeholder
5268 constants that might already be there into full
5269 blown PVGVs with attached PVCV. */
5270 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5271 GV_NOADD_NOINIT, SVt_PVCV);
5276 /* if we saw a global override before, get the right name */
5279 sv = newSVpvs("CORE::GLOBAL::");
5280 sv_catpv(sv,PL_tokenbuf);
5283 /* If len is 0, newSVpv does strlen(), which is correct.
5284 If len is non-zero, then it will be the true length,
5285 and so the scalar will be created correctly. */
5286 sv = newSVpv(PL_tokenbuf,len);
5289 if (PL_madskills && !PL_thistoken) {
5290 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5291 PL_thistoken = newSVpv(start,s - start);
5292 PL_realtokenstart = s - SvPVX(PL_linestr);
5296 /* Presume this is going to be a bareword of some sort. */
5299 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5300 yylval.opval->op_private = OPpCONST_BARE;
5301 /* UTF-8 package name? */
5302 if (UTF && !IN_BYTES &&
5303 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5306 /* And if "Foo::", then that's what it certainly is. */
5311 /* Do the explicit type check so that we don't need to force
5312 the initialisation of the symbol table to have a real GV.
5313 Beware - gv may not really be a PVGV, cv may not really be
5314 a PVCV, (because of the space optimisations that gv_init
5315 understands) But they're true if for this symbol there is
5316 respectively a typeglob and a subroutine.
5318 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5319 /* Real typeglob, so get the real subroutine: */
5321 /* A proxy for a subroutine in this package? */
5322 : SvOK(gv) ? (CV *) gv : NULL)
5325 /* See if it's the indirect object for a list operator. */
5327 if (PL_oldoldbufptr &&
5328 PL_oldoldbufptr < PL_bufptr &&
5329 (PL_oldoldbufptr == PL_last_lop
5330 || PL_oldoldbufptr == PL_last_uni) &&
5331 /* NO SKIPSPACE BEFORE HERE! */
5332 (PL_expect == XREF ||
5333 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5335 bool immediate_paren = *s == '(';
5337 /* (Now we can afford to cross potential line boundary.) */
5338 s = SKIPSPACE2(s,nextPL_nextwhite);
5340 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5343 /* Two barewords in a row may indicate method call. */
5345 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5346 (tmp = intuit_method(s, gv, cv)))
5349 /* If not a declared subroutine, it's an indirect object. */
5350 /* (But it's an indir obj regardless for sort.) */
5351 /* Also, if "_" follows a filetest operator, it's a bareword */
5354 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5356 (PL_last_lop_op != OP_MAPSTART &&
5357 PL_last_lop_op != OP_GREPSTART))))
5358 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5359 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5362 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5367 PL_expect = XOPERATOR;
5370 s = SKIPSPACE2(s,nextPL_nextwhite);
5371 PL_nextwhite = nextPL_nextwhite;
5376 /* Is this a word before a => operator? */
5377 if (*s == '=' && s[1] == '>' && !pkgname) {
5379 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
5380 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5381 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
5385 /* If followed by a paren, it's certainly a subroutine. */
5390 while (SPACE_OR_TAB(*d))
5392 if (*d == ')' && (sv = gv_const_sv(gv))) {
5396 char *par = SvPVX(PL_linestr) + PL_realtokenstart;
5397 sv_catpvn(PL_thistoken, par, s - par);
5399 sv_free(PL_nextwhite);
5409 PL_nextwhite = PL_thiswhite;
5412 start_force(PL_curforce);
5414 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5415 PL_expect = XOPERATOR;
5418 PL_nextwhite = nextPL_nextwhite;
5419 curmad('X', PL_thistoken);
5420 PL_thistoken = newSVpvs("");
5428 /* If followed by var or block, call it a method (unless sub) */
5430 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5431 PL_last_lop = PL_oldbufptr;
5432 PL_last_lop_op = OP_METHOD;
5436 /* If followed by a bareword, see if it looks like indir obj. */
5439 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5440 && (tmp = intuit_method(s, gv, cv)))
5443 /* Not a method, so call it a subroutine (if defined) */
5446 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5447 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5448 "Ambiguous use of -%s resolved as -&%s()",
5449 PL_tokenbuf, PL_tokenbuf);
5450 /* Check for a constant sub */
5451 if ((sv = gv_const_sv(gv))) {
5453 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
5454 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5455 yylval.opval->op_private = 0;
5459 /* Resolve to GV now. */
5460 if (SvTYPE(gv) != SVt_PVGV) {
5461 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5462 assert (SvTYPE(gv) == SVt_PVGV);
5463 /* cv must have been some sort of placeholder, so
5464 now needs replacing with a real code reference. */
5468 op_free(yylval.opval);
5469 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5470 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5471 PL_last_lop = PL_oldbufptr;
5472 PL_last_lop_op = OP_ENTERSUB;
5473 /* Is there a prototype? */
5481 const char *proto = SvPV_const((SV*)cv, protolen);
5484 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5486 while (*proto == ';')
5488 if (*proto == '&' && *s == '{') {
5489 sv_setpv(PL_subname,
5492 "__ANON__" : "__ANON__::__ANON__"));
5499 PL_nextwhite = PL_thiswhite;
5502 start_force(PL_curforce);
5503 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5506 PL_nextwhite = nextPL_nextwhite;
5507 curmad('X', PL_thistoken);
5508 PL_thistoken = newSVpvs("");
5515 /* Guess harder when madskills require "best effort". */
5516 if (PL_madskills && (!gv || !GvCVu(gv))) {
5517 int probable_sub = 0;
5518 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5520 else if (isALPHA(*s)) {
5524 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5525 if (!keyword(tmpbuf, tmplen, 0))
5528 while (d < PL_bufend && isSPACE(*d))
5530 if (*d == '=' && d[1] == '>')
5535 gv = gv_fetchpv(PL_tokenbuf, TRUE, SVt_PVCV);
5536 op_free(yylval.opval);
5537 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5538 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5539 PL_last_lop = PL_oldbufptr;
5540 PL_last_lop_op = OP_ENTERSUB;
5541 PL_nextwhite = PL_thiswhite;
5543 start_force(PL_curforce);
5544 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5546 PL_nextwhite = nextPL_nextwhite;
5547 curmad('X', PL_thistoken);
5548 PL_thistoken = newSVpvs("");
5553 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5560 /* Call it a bare word */
5562 if (PL_hints & HINT_STRICT_SUBS)
5563 yylval.opval->op_private |= OPpCONST_STRICT;
5566 if (lastchar != '-') {
5567 if (ckWARN(WARN_RESERVED)) {
5571 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
5572 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5579 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5580 && ckWARN_d(WARN_AMBIGUOUS)) {
5581 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5582 "Operator or semicolon missing before %c%s",
5583 lastchar, PL_tokenbuf);
5584 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5585 "Ambiguous use of %c resolved as operator %c",
5586 lastchar, lastchar);
5592 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5593 newSVpv(CopFILE(PL_curcop),0));
5597 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5598 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5601 case KEY___PACKAGE__:
5602 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5604 ? newSVhek(HvNAME_HEK(PL_curstash))
5611 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5612 const char *pname = "main";
5613 if (PL_tokenbuf[2] == 'D')
5614 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5615 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5619 GvIOp(gv) = newIO();
5620 IoIFP(GvIOp(gv)) = PL_rsfp;
5621 #if defined(HAS_FCNTL) && defined(F_SETFD)
5623 const int fd = PerlIO_fileno(PL_rsfp);
5624 fcntl(fd,F_SETFD,fd >= 3);
5627 /* Mark this internal pseudo-handle as clean */
5628 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5630 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
5631 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5632 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5634 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5635 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5636 /* if the script was opened in binmode, we need to revert
5637 * it to text mode for compatibility; but only iff it has CRs
5638 * XXX this is a questionable hack at best. */
5639 if (PL_bufend-PL_bufptr > 2
5640 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5643 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5644 loc = PerlIO_tell(PL_rsfp);
5645 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5648 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5650 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5651 #endif /* NETWARE */
5652 #ifdef PERLIO_IS_STDIO /* really? */
5653 # if defined(__BORLANDC__)
5654 /* XXX see note in do_binmode() */
5655 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5659 PerlIO_seek(PL_rsfp, loc, 0);
5663 #ifdef PERLIO_LAYERS
5666 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5667 else if (PL_encoding) {
5674 XPUSHs(PL_encoding);
5676 call_method("name", G_SCALAR);
5680 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5681 Perl_form(aTHX_ ":encoding(%"SVf")",
5690 if (PL_realtokenstart >= 0) {
5691 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5693 PL_endwhite = newSVpvs("");
5694 sv_catsv(PL_endwhite, PL_thiswhite);
5696 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5697 PL_realtokenstart = -1;
5699 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5700 SvCUR(PL_endwhite))) != Nullch) ;
5715 if (PL_expect == XSTATE) {
5722 if (*s == ':' && s[1] == ':') {
5725 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5726 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5727 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5730 else if (tmp == KEY_require || tmp == KEY_do)
5731 /* that's a way to remember we saw "CORE::" */
5744 LOP(OP_ACCEPT,XTERM);
5750 LOP(OP_ATAN2,XTERM);
5756 LOP(OP_BINMODE,XTERM);
5759 LOP(OP_BLESS,XTERM);
5768 /* When 'use switch' is in effect, continue has a dual
5769 life as a control operator. */
5771 if (!FEATURE_IS_ENABLED("switch"))
5774 /* We have to disambiguate the two senses of
5775 "continue". If the next token is a '{' then
5776 treat it as the start of a continue block;
5777 otherwise treat it as a control operator.
5789 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5806 if (!PL_cryptseen) {
5807 PL_cryptseen = TRUE;
5811 LOP(OP_CRYPT,XTERM);
5814 LOP(OP_CHMOD,XTERM);
5817 LOP(OP_CHOWN,XTERM);
5820 LOP(OP_CONNECT,XTERM);
5839 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5840 if (orig_keyword == KEY_do) {
5849 PL_hints |= HINT_BLOCK_SCOPE;
5859 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5860 LOP(OP_DBMOPEN,XTERM);
5866 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5873 yylval.ival = CopLINE(PL_curcop);
5889 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
5890 UNIBRACK(OP_ENTEREVAL);
5908 case KEY_endhostent:
5914 case KEY_endservent:
5917 case KEY_endprotoent:
5928 yylval.ival = CopLINE(PL_curcop);
5930 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
5933 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
5936 if ((PL_bufend - p) >= 3 &&
5937 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
5939 else if ((PL_bufend - p) >= 4 &&
5940 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
5943 if (isIDFIRST_lazy_if(p,UTF)) {
5944 p = scan_ident(p, PL_bufend,
5945 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5949 Perl_croak(aTHX_ "Missing $ on loop variable");
5951 s = SvPVX(PL_linestr) + soff;
5957 LOP(OP_FORMLINE,XTERM);
5963 LOP(OP_FCNTL,XTERM);
5969 LOP(OP_FLOCK,XTERM);
5978 LOP(OP_GREPSTART, XREF);
5981 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5996 case KEY_getpriority:
5997 LOP(OP_GETPRIORITY,XTERM);
5999 case KEY_getprotobyname:
6002 case KEY_getprotobynumber:
6003 LOP(OP_GPBYNUMBER,XTERM);
6005 case KEY_getprotoent:
6017 case KEY_getpeername:
6018 UNI(OP_GETPEERNAME);
6020 case KEY_gethostbyname:
6023 case KEY_gethostbyaddr:
6024 LOP(OP_GHBYADDR,XTERM);
6026 case KEY_gethostent:
6029 case KEY_getnetbyname:
6032 case KEY_getnetbyaddr:
6033 LOP(OP_GNBYADDR,XTERM);
6038 case KEY_getservbyname:
6039 LOP(OP_GSBYNAME,XTERM);
6041 case KEY_getservbyport:
6042 LOP(OP_GSBYPORT,XTERM);
6044 case KEY_getservent:
6047 case KEY_getsockname:
6048 UNI(OP_GETSOCKNAME);
6050 case KEY_getsockopt:
6051 LOP(OP_GSOCKOPT,XTERM);
6066 yylval.ival = CopLINE(PL_curcop);
6077 yylval.ival = CopLINE(PL_curcop);
6081 LOP(OP_INDEX,XTERM);
6087 LOP(OP_IOCTL,XTERM);
6099 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6131 LOP(OP_LISTEN,XTERM);
6140 s = scan_pat(s,OP_MATCH);
6141 TERM(sublex_start());
6144 LOP(OP_MAPSTART, XREF);
6147 LOP(OP_MKDIR,XTERM);
6150 LOP(OP_MSGCTL,XTERM);
6153 LOP(OP_MSGGET,XTERM);
6156 LOP(OP_MSGRCV,XTERM);
6159 LOP(OP_MSGSND,XTERM);
6166 if (isIDFIRST_lazy_if(s,UTF)) {
6170 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6171 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6173 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6174 if (!PL_in_my_stash) {
6177 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6181 if (PL_madskills) { /* just add type to declarator token */
6182 sv_catsv(PL_thistoken, PL_nextwhite);
6184 sv_catpvn(PL_thistoken, start, s - start);
6192 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6199 s = tokenize_use(0, s);
6203 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6210 if (isIDFIRST_lazy_if(s,UTF)) {
6212 for (d = s; isALNUM_lazy_if(d,UTF);)
6214 for (t=d; isSPACE(*t);)
6216 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6218 && !(t[0] == '=' && t[1] == '>')
6220 int parms_len = (int)(d-s);
6221 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6222 "Precedence problem: open %.*s should be open(%.*s)",
6223 parms_len, s, parms_len, s);
6229 yylval.ival = OP_OR;
6239 LOP(OP_OPEN_DIR,XTERM);
6242 checkcomma(s,PL_tokenbuf,"filehandle");
6246 checkcomma(s,PL_tokenbuf,"filehandle");
6265 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6269 LOP(OP_PIPE_OP,XTERM);
6272 s = scan_str(s,!!PL_madskills,FALSE);
6275 yylval.ival = OP_CONST;
6276 TERM(sublex_start());
6282 s = scan_str(s,!!PL_madskills,FALSE);
6285 PL_expect = XOPERATOR;
6287 if (SvCUR(PL_lex_stuff)) {
6290 d = SvPV_force(PL_lex_stuff, len);
6292 for (; isSPACE(*d) && len; --len, ++d)
6297 if (!warned && ckWARN(WARN_QW)) {
6298 for (; !isSPACE(*d) && len; --len, ++d) {
6300 Perl_warner(aTHX_ packWARN(WARN_QW),
6301 "Possible attempt to separate words with commas");
6304 else if (*d == '#') {
6305 Perl_warner(aTHX_ packWARN(WARN_QW),
6306 "Possible attempt to put comments in qw() list");
6312 for (; !isSPACE(*d) && len; --len, ++d)
6315 sv = newSVpvn(b, d-b);
6316 if (DO_UTF8(PL_lex_stuff))
6318 words = append_elem(OP_LIST, words,
6319 newSVOP(OP_CONST, 0, tokeq(sv)));
6323 start_force(PL_curforce);
6324 NEXTVAL_NEXTTOKE.opval = words;
6329 SvREFCNT_dec(PL_lex_stuff);
6330 PL_lex_stuff = NULL;
6336 s = scan_str(s,!!PL_madskills,FALSE);
6339 yylval.ival = OP_STRINGIFY;
6340 if (SvIVX(PL_lex_stuff) == '\'')
6341 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6342 TERM(sublex_start());
6345 s = scan_pat(s,OP_QR);
6346 TERM(sublex_start());
6349 s = scan_str(s,!!PL_madskills,FALSE);
6352 readpipe_override();
6353 TERM(sublex_start());
6361 s = force_version(s, FALSE);
6363 else if (*s != 'v' || !isDIGIT(s[1])
6364 || (s = force_version(s, TRUE), *s == 'v'))
6366 *PL_tokenbuf = '\0';
6367 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6368 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6369 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
6371 yyerror("<> should be quotes");
6373 if (orig_keyword == KEY_require) {
6381 PL_last_uni = PL_oldbufptr;
6382 PL_last_lop_op = OP_REQUIRE;
6384 return REPORT( (int)REQUIRE );
6390 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6394 LOP(OP_RENAME,XTERM);
6403 LOP(OP_RINDEX,XTERM);
6413 UNIDOR(OP_READLINE);
6426 LOP(OP_REVERSE,XTERM);
6429 UNIDOR(OP_READLINK);
6437 TERM(sublex_start());
6439 TOKEN(1); /* force error */
6442 checkcomma(s,PL_tokenbuf,"filehandle");
6452 LOP(OP_SELECT,XTERM);
6458 LOP(OP_SEMCTL,XTERM);
6461 LOP(OP_SEMGET,XTERM);
6464 LOP(OP_SEMOP,XTERM);
6470 LOP(OP_SETPGRP,XTERM);
6472 case KEY_setpriority:
6473 LOP(OP_SETPRIORITY,XTERM);
6475 case KEY_sethostent:
6481 case KEY_setservent:
6484 case KEY_setprotoent:
6494 LOP(OP_SEEKDIR,XTERM);
6496 case KEY_setsockopt:
6497 LOP(OP_SSOCKOPT,XTERM);
6503 LOP(OP_SHMCTL,XTERM);
6506 LOP(OP_SHMGET,XTERM);
6509 LOP(OP_SHMREAD,XTERM);
6512 LOP(OP_SHMWRITE,XTERM);
6515 LOP(OP_SHUTDOWN,XTERM);
6524 LOP(OP_SOCKET,XTERM);
6526 case KEY_socketpair:
6527 LOP(OP_SOCKPAIR,XTERM);
6530 checkcomma(s,PL_tokenbuf,"subroutine name");
6532 if (*s == ';' || *s == ')') /* probably a close */
6533 Perl_croak(aTHX_ "sort is now a reserved word");
6535 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6539 LOP(OP_SPLIT,XTERM);
6542 LOP(OP_SPRINTF,XTERM);
6545 LOP(OP_SPLICE,XTERM);
6560 LOP(OP_SUBSTR,XTERM);
6566 char tmpbuf[sizeof PL_tokenbuf];
6567 SSize_t tboffset = 0;
6568 expectation attrful;
6569 bool have_name, have_proto;
6570 const int key = tmp;
6575 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6576 SV *subtoken = newSVpvn(tstart, s - tstart);
6580 s = SKIPSPACE2(s,tmpwhite);
6585 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6586 (*s == ':' && s[1] == ':'))
6593 attrful = XATTRBLOCK;
6594 /* remember buffer pos'n for later force_word */
6595 tboffset = s - PL_oldbufptr;
6596 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6599 nametoke = newSVpvn(s, d - s);
6601 if (strchr(tmpbuf, ':'))
6602 sv_setpv(PL_subname, tmpbuf);
6604 sv_setsv(PL_subname,PL_curstname);
6605 sv_catpvs(PL_subname,"::");
6606 sv_catpvn(PL_subname,tmpbuf,len);
6613 CURMAD('X', nametoke);
6614 CURMAD('_', tmpwhite);
6615 (void) force_word(PL_oldbufptr + tboffset, WORD,
6618 s = SKIPSPACE2(d,tmpwhite);
6625 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6626 PL_expect = XTERMBLOCK;
6627 attrful = XATTRTERM;
6628 sv_setpvn(PL_subname,"?",1);
6632 if (key == KEY_format) {
6634 PL_lex_formbrack = PL_lex_brackets + 1;
6636 PL_thistoken = subtoken;
6640 (void) force_word(PL_oldbufptr + tboffset, WORD,
6646 /* Look for a prototype */
6649 bool bad_proto = FALSE;
6650 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6652 s = scan_str(s,!!PL_madskills,FALSE);
6654 Perl_croak(aTHX_ "Prototype not terminated");
6655 /* strip spaces and check for bad characters */
6656 d = SvPVX(PL_lex_stuff);
6658 for (p = d; *p; ++p) {
6661 if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
6667 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6668 "Illegal character in prototype for %"SVf" : %s",
6669 SVfARG(PL_subname), d);
6670 SvCUR_set(PL_lex_stuff, tmp);
6675 CURMAD('q', PL_thisopen);
6676 CURMAD('_', tmpwhite);
6677 CURMAD('=', PL_thisstuff);
6678 CURMAD('Q', PL_thisclose);
6679 NEXTVAL_NEXTTOKE.opval =
6680 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6681 PL_lex_stuff = Nullsv;
6684 s = SKIPSPACE2(s,tmpwhite);
6692 if (*s == ':' && s[1] != ':')
6693 PL_expect = attrful;
6694 else if (*s != '{' && key == KEY_sub) {
6696 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6698 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6705 curmad('^', newSVpvs(""));
6706 CURMAD('_', tmpwhite);
6710 PL_thistoken = subtoken;
6713 NEXTVAL_NEXTTOKE.opval =
6714 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6715 PL_lex_stuff = NULL;
6720 sv_setpv(PL_subname,
6722 (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"));
6726 (void) force_word(PL_oldbufptr + tboffset, WORD,
6736 LOP(OP_SYSTEM,XREF);
6739 LOP(OP_SYMLINK,XTERM);
6742 LOP(OP_SYSCALL,XTERM);
6745 LOP(OP_SYSOPEN,XTERM);
6748 LOP(OP_SYSSEEK,XTERM);
6751 LOP(OP_SYSREAD,XTERM);
6754 LOP(OP_SYSWRITE,XTERM);
6758 TERM(sublex_start());
6779 LOP(OP_TRUNCATE,XTERM);
6791 yylval.ival = CopLINE(PL_curcop);
6795 yylval.ival = CopLINE(PL_curcop);
6799 LOP(OP_UNLINK,XTERM);
6805 LOP(OP_UNPACK,XTERM);
6808 LOP(OP_UTIME,XTERM);
6814 LOP(OP_UNSHIFT,XTERM);
6817 s = tokenize_use(1, s);
6827 yylval.ival = CopLINE(PL_curcop);
6831 yylval.ival = CopLINE(PL_curcop);
6835 PL_hints |= HINT_BLOCK_SCOPE;
6842 LOP(OP_WAITPID,XTERM);
6851 ctl_l[0] = toCTRL('L');
6853 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6856 /* Make sure $^L is defined */
6857 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
6862 if (PL_expect == XOPERATOR)
6868 yylval.ival = OP_XOR;
6873 TERM(sublex_start());
6878 #pragma segment Main
6882 S_pending_ident(pTHX)
6887 /* pit holds the identifier we read and pending_ident is reset */
6888 char pit = PL_pending_ident;
6889 PL_pending_ident = 0;
6891 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
6892 DEBUG_T({ PerlIO_printf(Perl_debug_log,
6893 "### Pending identifier '%s'\n", PL_tokenbuf); });
6895 /* if we're in a my(), we can't allow dynamics here.
6896 $foo'bar has already been turned into $foo::bar, so
6897 just check for colons.
6899 if it's a legal name, the OP is a PADANY.
6902 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
6903 if (strchr(PL_tokenbuf,':'))
6904 yyerror(Perl_form(aTHX_ "No package name allowed for "
6905 "variable %s in \"our\"",
6907 tmp = allocmy(PL_tokenbuf);
6910 if (strchr(PL_tokenbuf,':'))
6911 yyerror(Perl_form(aTHX_ PL_no_myglob,
6912 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
6914 yylval.opval = newOP(OP_PADANY, 0);
6915 yylval.opval->op_targ = allocmy(PL_tokenbuf);
6921 build the ops for accesses to a my() variable.
6923 Deny my($a) or my($b) in a sort block, *if* $a or $b is
6924 then used in a comparison. This catches most, but not
6925 all cases. For instance, it catches
6926 sort { my($a); $a <=> $b }
6928 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
6929 (although why you'd do that is anyone's guess).
6932 if (!strchr(PL_tokenbuf,':')) {
6934 tmp = pad_findmy(PL_tokenbuf);
6935 if (tmp != NOT_IN_PAD) {
6936 /* might be an "our" variable" */
6937 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6938 /* build ops for a bareword */
6939 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
6940 HEK * const stashname = HvNAME_HEK(stash);
6941 SV * const sym = newSVhek(stashname);
6942 sv_catpvs(sym, "::");
6943 sv_catpv(sym, PL_tokenbuf+1);
6944 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
6945 yylval.opval->op_private = OPpCONST_ENTERED;
6948 ? (GV_ADDMULTI | GV_ADDINEVAL)
6951 ((PL_tokenbuf[0] == '$') ? SVt_PV
6952 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6957 /* if it's a sort block and they're naming $a or $b */
6958 if (PL_last_lop_op == OP_SORT &&
6959 PL_tokenbuf[0] == '$' &&
6960 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
6963 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
6964 d < PL_bufend && *d != '\n';
6967 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
6968 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
6974 yylval.opval = newOP(OP_PADANY, 0);
6975 yylval.opval->op_targ = tmp;
6981 Whine if they've said @foo in a doublequoted string,
6982 and @foo isn't a variable we can find in the symbol
6985 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
6986 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
6987 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
6988 && ckWARN(WARN_AMBIGUOUS))
6990 /* Downgraded from fatal to warning 20000522 mjd */
6991 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6992 "Possible unintended interpolation of %s in string",
6997 /* build ops for a bareword */
6998 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
6999 yylval.opval->op_private = OPpCONST_ENTERED;
7002 /* If the identifier refers to a stash, don't autovivify it.
7003 * Change 24660 had the side effect of causing symbol table
7004 * hashes to always be defined, even if they were freshly
7005 * created and the only reference in the entire program was
7006 * the single statement with the defined %foo::bar:: test.
7007 * It appears that all code in the wild doing this actually
7008 * wants to know whether sub-packages have been loaded, so
7009 * by avoiding auto-vivifying symbol tables, we ensure that
7010 * defined %foo::bar:: continues to be false, and the existing
7011 * tests still give the expected answers, even though what
7012 * they're actually testing has now changed subtly.
7014 (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
7016 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7017 ((PL_tokenbuf[0] == '$') ? SVt_PV
7018 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7024 * The following code was generated by perl_keyword.pl.
7028 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7033 case 1: /* 5 tokens of length 1 */
7065 case 2: /* 18 tokens of length 2 */
7211 case 3: /* 29 tokens of length 3 */
7215 if (name[1] == 'N' &&
7278 if (name[1] == 'i' &&
7300 return (all_keywords || FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
7318 if (name[1] == 'o' &&
7327 if (name[1] == 'e' &&
7336 if (name[1] == 'n' &&
7345 if (name[1] == 'o' &&
7354 if (name[1] == 'a' &&
7363 if (name[1] == 'o' &&
7425 if (name[1] == 'e' &&
7439 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7465 if (name[1] == 'i' &&
7474 if (name[1] == 's' &&
7483 if (name[1] == 'e' &&
7492 if (name[1] == 'o' &&
7504 case 4: /* 41 tokens of length 4 */
7508 if (name[1] == 'O' &&
7518 if (name[1] == 'N' &&
7528 if (name[1] == 'i' &&
7538 if (name[1] == 'h' &&
7548 if (name[1] == 'u' &&
7561 if (name[2] == 'c' &&
7570 if (name[2] == 's' &&
7579 if (name[2] == 'a' &&
7615 if (name[1] == 'o' &&
7628 if (name[2] == 't' &&
7637 if (name[2] == 'o' &&
7646 if (name[2] == 't' &&
7655 if (name[2] == 'e' &&
7668 if (name[1] == 'o' &&
7681 if (name[2] == 'y' &&
7690 if (name[2] == 'l' &&
7706 if (name[2] == 's' &&
7715 if (name[2] == 'n' &&
7724 if (name[2] == 'c' &&
7737 if (name[1] == 'e' &&
7747 if (name[1] == 'p' &&
7760 if (name[2] == 'c' &&
7769 if (name[2] == 'p' &&
7778 if (name[2] == 's' &&
7794 if (name[2] == 'n' &&
7864 if (name[2] == 'r' &&
7873 if (name[2] == 'r' &&
7882 if (name[2] == 'a' &&
7898 if (name[2] == 'l' &&
7960 if (name[2] == 'e' &&
7963 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
7976 case 5: /* 39 tokens of length 5 */
7980 if (name[1] == 'E' &&
7991 if (name[1] == 'H' &&
8005 if (name[2] == 'a' &&
8015 if (name[2] == 'a' &&
8032 if (name[2] == 'e' &&
8042 if (name[2] == 'e' &&
8046 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8062 if (name[3] == 'i' &&
8071 if (name[3] == 'o' &&
8107 if (name[2] == 'o' &&
8117 if (name[2] == 'y' &&
8131 if (name[1] == 'l' &&
8145 if (name[2] == 'n' &&
8155 if (name[2] == 'o' &&
8169 if (name[1] == 'i' &&
8174 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8183 if (name[2] == 'd' &&
8193 if (name[2] == 'c' &&
8210 if (name[2] == 'c' &&
8220 if (name[2] == 't' &&
8234 if (name[1] == 'k' &&
8245 if (name[1] == 'r' &&
8259 if (name[2] == 's' &&
8269 if (name[2] == 'd' &&
8286 if (name[2] == 'm' &&
8296 if (name[2] == 'i' &&
8306 if (name[2] == 'e' &&
8316 if (name[2] == 'l' &&
8326 if (name[2] == 'a' &&
8339 if (name[3] == 't' &&
8342 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8348 if (name[3] == 'd' &&
8365 if (name[1] == 'i' &&
8379 if (name[2] == 'a' &&
8392 if (name[3] == 'e' &&
8427 if (name[2] == 'i' &&
8444 if (name[2] == 'i' &&
8454 if (name[2] == 'i' &&
8471 case 6: /* 33 tokens of length 6 */
8475 if (name[1] == 'c' &&
8490 if (name[2] == 'l' &&
8501 if (name[2] == 'r' &&
8516 if (name[1] == 'e' &&
8531 if (name[2] == 's' &&
8536 if(ckWARN_d(WARN_SYNTAX))
8537 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8543 if (name[2] == 'i' &&
8561 if (name[2] == 'l' &&
8572 if (name[2] == 'r' &&
8587 if (name[1] == 'm' &&
8602 if (name[2] == 'n' &&
8613 if (name[2] == 's' &&
8628 if (name[1] == 's' &&
8634 if (name[4] == 't' &&
8643 if (name[4] == 'e' &&
8652 if (name[4] == 'c' &&
8661 if (name[4] == 'n' &&
8677 if (name[1] == 'r' &&
8695 if (name[3] == 'a' &&
8705 if (name[3] == 'u' &&
8719 if (name[2] == 'n' &&
8737 if (name[2] == 'a' &&
8751 if (name[3] == 'e' &&
8764 if (name[4] == 't' &&
8773 if (name[4] == 'e' &&
8795 if (name[4] == 't' &&
8804 if (name[4] == 'e' &&
8820 if (name[2] == 'c' &&
8831 if (name[2] == 'l' &&
8842 if (name[2] == 'b' &&
8853 if (name[2] == 's' &&
8876 if (name[4] == 's' &&
8885 if (name[4] == 'n' &&
8898 if (name[3] == 'a' &&
8915 if (name[1] == 'a' &&
8930 case 7: /* 29 tokens of length 7 */
8934 if (name[1] == 'E' &&
8947 if (name[1] == '_' &&
8960 if (name[1] == 'i' &&
8967 return -KEY_binmode;
8973 if (name[1] == 'o' &&
8980 return -KEY_connect;
8989 if (name[2] == 'm' &&
8995 return -KEY_dbmopen;
9006 if (name[4] == 'u' &&
9010 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9016 if (name[4] == 'n' &&
9037 if (name[1] == 'o' &&
9050 if (name[1] == 'e' &&
9057 if (name[5] == 'r' &&
9060 return -KEY_getpgrp;
9066 if (name[5] == 'i' &&
9069 return -KEY_getppid;
9082 if (name[1] == 'c' &&
9089 return -KEY_lcfirst;
9095 if (name[1] == 'p' &&
9102 return -KEY_opendir;
9108 if (name[1] == 'a' &&
9126 if (name[3] == 'd' &&
9131 return -KEY_readdir;
9137 if (name[3] == 'u' &&
9148 if (name[3] == 'e' &&
9153 return -KEY_reverse;
9172 if (name[3] == 'k' &&
9177 return -KEY_seekdir;
9183 if (name[3] == 'p' &&
9188 return -KEY_setpgrp;
9198 if (name[2] == 'm' &&
9204 return -KEY_shmread;
9210 if (name[2] == 'r' &&
9216 return -KEY_sprintf;
9225 if (name[3] == 'l' &&
9230 return -KEY_symlink;
9239 if (name[4] == 'a' &&
9243 return -KEY_syscall;
9249 if (name[4] == 'p' &&
9253 return -KEY_sysopen;
9259 if (name[4] == 'e' &&
9263 return -KEY_sysread;
9269 if (name[4] == 'e' &&
9273 return -KEY_sysseek;
9291 if (name[1] == 'e' &&
9298 return -KEY_telldir;
9307 if (name[2] == 'f' &&
9313 return -KEY_ucfirst;
9319 if (name[2] == 's' &&
9325 return -KEY_unshift;
9335 if (name[1] == 'a' &&
9342 return -KEY_waitpid;
9351 case 8: /* 26 tokens of length 8 */
9355 if (name[1] == 'U' &&
9363 return KEY_AUTOLOAD;
9374 if (name[3] == 'A' &&
9380 return KEY___DATA__;
9386 if (name[3] == 'I' &&
9392 return -KEY___FILE__;
9398 if (name[3] == 'I' &&
9404 return -KEY___LINE__;
9420 if (name[2] == 'o' &&
9427 return -KEY_closedir;
9433 if (name[2] == 'n' &&
9440 return -KEY_continue;
9450 if (name[1] == 'b' &&
9458 return -KEY_dbmclose;
9464 if (name[1] == 'n' &&
9470 if (name[4] == 'r' &&
9475 return -KEY_endgrent;
9481 if (name[4] == 'w' &&
9486 return -KEY_endpwent;
9499 if (name[1] == 'o' &&
9507 return -KEY_formline;
9513 if (name[1] == 'e' &&
9524 if (name[6] == 'n' &&
9527 return -KEY_getgrent;
9533 if (name[6] == 'i' &&
9536 return -KEY_getgrgid;
9542 if (name[6] == 'a' &&
9545 return -KEY_getgrnam;
9558 if (name[4] == 'o' &&
9563 return -KEY_getlogin;
9574 if (name[6] == 'n' &&
9577 return -KEY_getpwent;
9583 if (name[6] == 'a' &&
9586 return -KEY_getpwnam;
9592 if (name[6] == 'i' &&
9595 return -KEY_getpwuid;
9615 if (name[1] == 'e' &&
9622 if (name[5] == 'i' &&
9629 return -KEY_readline;
9634 return -KEY_readlink;
9645 if (name[5] == 'i' &&
9649 return -KEY_readpipe;
9670 if (name[4] == 'r' &&
9675 return -KEY_setgrent;
9681 if (name[4] == 'w' &&
9686 return -KEY_setpwent;
9702 if (name[3] == 'w' &&
9708 return -KEY_shmwrite;
9714 if (name[3] == 't' &&
9720 return -KEY_shutdown;
9730 if (name[2] == 's' &&
9737 return -KEY_syswrite;
9747 if (name[1] == 'r' &&
9755 return -KEY_truncate;
9764 case 9: /* 9 tokens of length 9 */
9768 if (name[1] == 'N' &&
9777 return KEY_UNITCHECK;
9783 if (name[1] == 'n' &&
9792 return -KEY_endnetent;
9798 if (name[1] == 'e' &&
9807 return -KEY_getnetent;
9813 if (name[1] == 'o' &&
9822 return -KEY_localtime;
9828 if (name[1] == 'r' &&
9837 return KEY_prototype;
9843 if (name[1] == 'u' &&
9852 return -KEY_quotemeta;
9858 if (name[1] == 'e' &&
9867 return -KEY_rewinddir;
9873 if (name[1] == 'e' &&
9882 return -KEY_setnetent;
9888 if (name[1] == 'a' &&
9897 return -KEY_wantarray;
9906 case 10: /* 9 tokens of length 10 */
9910 if (name[1] == 'n' &&
9916 if (name[4] == 'o' &&
9923 return -KEY_endhostent;
9929 if (name[4] == 'e' &&
9936 return -KEY_endservent;
9949 if (name[1] == 'e' &&
9955 if (name[4] == 'o' &&
9962 return -KEY_gethostent;
9971 if (name[5] == 'r' &&
9977 return -KEY_getservent;
9983 if (name[5] == 'c' &&
9989 return -KEY_getsockopt;
10009 if (name[2] == 't')
10014 if (name[4] == 'o' &&
10021 return -KEY_sethostent;
10030 if (name[5] == 'r' &&
10036 return -KEY_setservent;
10042 if (name[5] == 'c' &&
10048 return -KEY_setsockopt;
10065 if (name[2] == 'c' &&
10074 return -KEY_socketpair;
10087 case 11: /* 8 tokens of length 11 */
10091 if (name[1] == '_' &&
10101 { /* __PACKAGE__ */
10102 return -KEY___PACKAGE__;
10108 if (name[1] == 'n' &&
10118 { /* endprotoent */
10119 return -KEY_endprotoent;
10125 if (name[1] == 'e' &&
10134 if (name[5] == 'e' &&
10140 { /* getpeername */
10141 return -KEY_getpeername;
10150 if (name[6] == 'o' &&
10155 { /* getpriority */
10156 return -KEY_getpriority;
10162 if (name[6] == 't' &&
10167 { /* getprotoent */
10168 return -KEY_getprotoent;
10182 if (name[4] == 'o' &&
10189 { /* getsockname */
10190 return -KEY_getsockname;
10203 if (name[1] == 'e' &&
10211 if (name[6] == 'o' &&
10216 { /* setpriority */
10217 return -KEY_setpriority;
10223 if (name[6] == 't' &&
10228 { /* setprotoent */
10229 return -KEY_setprotoent;
10245 case 12: /* 2 tokens of length 12 */
10246 if (name[0] == 'g' &&
10258 if (name[9] == 'd' &&
10261 { /* getnetbyaddr */
10262 return -KEY_getnetbyaddr;
10268 if (name[9] == 'a' &&
10271 { /* getnetbyname */
10272 return -KEY_getnetbyname;
10284 case 13: /* 4 tokens of length 13 */
10285 if (name[0] == 'g' &&
10292 if (name[4] == 'o' &&
10301 if (name[10] == 'd' &&
10304 { /* gethostbyaddr */
10305 return -KEY_gethostbyaddr;
10311 if (name[10] == 'a' &&
10314 { /* gethostbyname */
10315 return -KEY_gethostbyname;
10328 if (name[4] == 'e' &&
10337 if (name[10] == 'a' &&
10340 { /* getservbyname */
10341 return -KEY_getservbyname;
10347 if (name[10] == 'o' &&
10350 { /* getservbyport */
10351 return -KEY_getservbyport;
10370 case 14: /* 1 tokens of length 14 */
10371 if (name[0] == 'g' &&
10385 { /* getprotobyname */
10386 return -KEY_getprotobyname;
10391 case 16: /* 1 tokens of length 16 */
10392 if (name[0] == 'g' &&
10408 { /* getprotobynumber */
10409 return -KEY_getprotobynumber;
10423 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10427 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10428 if (ckWARN(WARN_SYNTAX)) {
10431 for (w = s+2; *w && level; w++) {
10434 else if (*w == ')')
10437 while (isSPACE(*w))
10439 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
10440 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10441 "%s (...) interpreted as function",name);
10444 while (s < PL_bufend && isSPACE(*s))
10448 while (s < PL_bufend && isSPACE(*s))
10450 if (isIDFIRST_lazy_if(s,UTF)) {
10451 const char * const w = s++;
10452 while (isALNUM_lazy_if(s,UTF))
10454 while (s < PL_bufend && isSPACE(*s))
10458 if (keyword(w, s - w, 0))
10461 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10462 if (gv && GvCVu(gv))
10464 Perl_croak(aTHX_ "No comma allowed after %s", what);
10469 /* Either returns sv, or mortalizes sv and returns a new SV*.
10470 Best used as sv=new_constant(..., sv, ...).
10471 If s, pv are NULL, calls subroutine with one argument,
10472 and type is used with error messages only. */
10475 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
10479 HV * const table = GvHV(PL_hintgv); /* ^H */
10483 const char *why1 = "", *why2 = "", *why3 = "";
10485 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10488 why2 = (const char *)
10489 (strEQ(key,"charnames")
10490 ? "(possibly a missing \"use charnames ...\")"
10492 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10493 (type ? type: "undef"), why2);
10495 /* This is convoluted and evil ("goto considered harmful")
10496 * but I do not understand the intricacies of all the different
10497 * failure modes of %^H in here. The goal here is to make
10498 * the most probable error message user-friendly. --jhi */
10503 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10504 (type ? type: "undef"), why1, why2, why3);
10506 yyerror(SvPVX_const(msg));
10510 cvp = hv_fetch(table, key, strlen(key), FALSE);
10511 if (!cvp || !SvOK(*cvp)) {
10514 why3 = "} is not defined";
10517 sv_2mortal(sv); /* Parent created it permanently */
10520 pv = sv_2mortal(newSVpvn(s, len));
10522 typesv = sv_2mortal(newSVpv(type, 0));
10524 typesv = &PL_sv_undef;
10526 PUSHSTACKi(PERLSI_OVERLOAD);
10538 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10542 /* Check the eval first */
10543 if (!PL_in_eval && SvTRUE(ERRSV)) {
10544 sv_catpvs(ERRSV, "Propagated");
10545 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10547 res = SvREFCNT_inc_simple(sv);
10551 SvREFCNT_inc_simple_void(res);
10560 why1 = "Call to &{$^H{";
10562 why3 = "}} did not return a defined value";
10570 /* Returns a NUL terminated string, with the length of the string written to
10574 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10577 register char *d = dest;
10578 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10581 Perl_croak(aTHX_ ident_too_long);
10582 if (isALNUM(*s)) /* UTF handled below */
10584 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10589 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10593 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10594 char *t = s + UTF8SKIP(s);
10596 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10600 Perl_croak(aTHX_ ident_too_long);
10601 Copy(s, d, len, char);
10614 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10617 char *bracket = NULL;
10619 register char *d = dest;
10620 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10625 while (isDIGIT(*s)) {
10627 Perl_croak(aTHX_ ident_too_long);
10634 Perl_croak(aTHX_ ident_too_long);
10635 if (isALNUM(*s)) /* UTF handled below */
10637 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10642 else if (*s == ':' && s[1] == ':') {
10646 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10647 char *t = s + UTF8SKIP(s);
10648 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10650 if (d + (t - s) > e)
10651 Perl_croak(aTHX_ ident_too_long);
10652 Copy(s, d, t - s, char);
10663 if (PL_lex_state != LEX_NORMAL)
10664 PL_lex_state = LEX_INTERPENDMAYBE;
10667 if (*s == '$' && s[1] &&
10668 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10681 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10686 if (isSPACE(s[-1])) {
10688 const char ch = *s++;
10689 if (!SPACE_OR_TAB(ch)) {
10695 if (isIDFIRST_lazy_if(d,UTF)) {
10699 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10700 end += UTF8SKIP(end);
10701 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10702 end += UTF8SKIP(end);
10704 Copy(s, d, end - s, char);
10709 while ((isALNUM(*s) || *s == ':') && d < e)
10712 Perl_croak(aTHX_ ident_too_long);
10715 while (s < send && SPACE_OR_TAB(*s))
10717 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10718 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10719 const char * const brack =
10721 ((*s == '[') ? "[...]" : "{...}");
10722 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10723 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10724 funny, dest, brack, funny, dest, brack);
10727 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10731 /* Handle extended ${^Foo} variables
10732 * 1999-02-27 mjd-perl-patch@plover.com */
10733 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10737 while (isALNUM(*s) && d < e) {
10741 Perl_croak(aTHX_ ident_too_long);
10746 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10747 PL_lex_state = LEX_INTERPEND;
10750 if (PL_lex_state == LEX_NORMAL) {
10751 if (ckWARN(WARN_AMBIGUOUS) &&
10752 (keyword(dest, d - dest, 0) || get_cv(dest, FALSE)))
10756 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10757 "Ambiguous use of %c{%s} resolved to %c%s",
10758 funny, dest, funny, dest);
10763 s = bracket; /* let the parser handle it */
10767 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10768 PL_lex_state = LEX_INTERPEND;
10773 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10775 PERL_UNUSED_CONTEXT;
10778 else if (ch == 'g')
10779 *pmfl |= PMf_GLOBAL;
10780 else if (ch == 'c')
10781 *pmfl |= PMf_CONTINUE;
10782 else if (ch == 'o')
10784 else if (ch == 'm')
10785 *pmfl |= PMf_MULTILINE;
10786 else if (ch == 's')
10787 *pmfl |= PMf_SINGLELINE;
10788 else if (ch == 'x')
10789 *pmfl |= PMf_EXTENDED;
10793 S_scan_pat(pTHX_ char *start, I32 type)
10797 char *s = scan_str(start,!!PL_madskills,FALSE);
10798 const char * const valid_flags =
10799 (const char *)((type == OP_QR) ? "iomsx" : "iogcmsx");
10806 const char * const delimiter = skipspace(start);
10810 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10811 : "Search pattern not terminated" ));
10814 pm = (PMOP*)newPMOP(type, 0);
10815 if (PL_multi_open == '?')
10816 pm->op_pmflags |= PMf_ONCE;
10820 while (*s && strchr(valid_flags, *s))
10821 pmflag(&pm->op_pmflags,*s++);
10823 if (PL_madskills && modstart != s) {
10824 SV* tmptoken = newSVpvn(modstart, s - modstart);
10825 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
10828 /* issue a warning if /c is specified,but /g is not */
10829 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
10830 && ckWARN(WARN_REGEXP))
10832 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
10835 pm->op_pmpermflags = pm->op_pmflags;
10837 PL_lex_op = (OP*)pm;
10838 yylval.ival = OP_MATCH;
10843 S_scan_subst(pTHX_ char *start)
10854 yylval.ival = OP_NULL;
10856 s = scan_str(start,!!PL_madskills,FALSE);
10859 Perl_croak(aTHX_ "Substitution pattern not terminated");
10861 if (s[-1] == PL_multi_open)
10864 if (PL_madskills) {
10865 CURMAD('q', PL_thisopen);
10866 CURMAD('_', PL_thiswhite);
10867 CURMAD('E', PL_thisstuff);
10868 CURMAD('Q', PL_thisclose);
10869 PL_realtokenstart = s - SvPVX(PL_linestr);
10873 first_start = PL_multi_start;
10874 s = scan_str(s,!!PL_madskills,FALSE);
10876 if (PL_lex_stuff) {
10877 SvREFCNT_dec(PL_lex_stuff);
10878 PL_lex_stuff = NULL;
10880 Perl_croak(aTHX_ "Substitution replacement not terminated");
10882 PL_multi_start = first_start; /* so whole substitution is taken together */
10884 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10887 if (PL_madskills) {
10888 CURMAD('z', PL_thisopen);
10889 CURMAD('R', PL_thisstuff);
10890 CURMAD('Z', PL_thisclose);
10900 else if (strchr("iogcmsx", *s))
10901 pmflag(&pm->op_pmflags,*s++);
10907 if (PL_madskills) {
10909 curmad('m', newSVpvn(modstart, s - modstart));
10910 append_madprops(PL_thismad, (OP*)pm, 0);
10914 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
10915 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10919 SV * const repl = newSVpvs("");
10921 PL_sublex_info.super_bufptr = s;
10922 PL_sublex_info.super_bufend = PL_bufend;
10924 pm->op_pmflags |= PMf_EVAL;
10926 sv_catpv(repl, (const char *)(es ? "eval " : "do "));
10927 sv_catpvs(repl, "{");
10928 sv_catsv(repl, PL_lex_repl);
10929 if (strchr(SvPVX(PL_lex_repl), '#'))
10930 sv_catpvs(repl, "\n");
10931 sv_catpvs(repl, "}");
10933 SvREFCNT_dec(PL_lex_repl);
10934 PL_lex_repl = repl;
10937 pm->op_pmpermflags = pm->op_pmflags;
10938 PL_lex_op = (OP*)pm;
10939 yylval.ival = OP_SUBST;
10944 S_scan_trans(pTHX_ char *start)
10957 yylval.ival = OP_NULL;
10959 s = scan_str(start,!!PL_madskills,FALSE);
10961 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10963 if (s[-1] == PL_multi_open)
10966 if (PL_madskills) {
10967 CURMAD('q', PL_thisopen);
10968 CURMAD('_', PL_thiswhite);
10969 CURMAD('E', PL_thisstuff);
10970 CURMAD('Q', PL_thisclose);
10971 PL_realtokenstart = s - SvPVX(PL_linestr);
10975 s = scan_str(s,!!PL_madskills,FALSE);
10977 if (PL_lex_stuff) {
10978 SvREFCNT_dec(PL_lex_stuff);
10979 PL_lex_stuff = NULL;
10981 Perl_croak(aTHX_ "Transliteration replacement not terminated");
10983 if (PL_madskills) {
10984 CURMAD('z', PL_thisopen);
10985 CURMAD('R', PL_thisstuff);
10986 CURMAD('Z', PL_thisclose);
10989 complement = del = squash = 0;
10996 complement = OPpTRANS_COMPLEMENT;
10999 del = OPpTRANS_DELETE;
11002 squash = OPpTRANS_SQUASH;
11011 Newx(tbl, complement&&!del?258:256, short);
11012 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11013 o->op_private &= ~OPpTRANS_ALL;
11014 o->op_private |= del|squash|complement|
11015 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11016 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11019 yylval.ival = OP_TRANS;
11022 if (PL_madskills) {
11024 curmad('m', newSVpvn(modstart, s - modstart));
11025 append_madprops(PL_thismad, o, 0);
11034 S_scan_heredoc(pTHX_ register char *s)
11038 I32 op_type = OP_SCALAR;
11042 const char *found_newline;
11046 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11048 I32 stuffstart = s - SvPVX(PL_linestr);
11051 PL_realtokenstart = -1;
11056 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11060 while (SPACE_OR_TAB(*peek))
11062 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11065 s = delimcpy(d, e, s, PL_bufend, term, &len);
11075 if (!isALNUM_lazy_if(s,UTF))
11076 deprecate_old("bare << to mean <<\"\"");
11077 for (; isALNUM_lazy_if(s,UTF); s++) {
11082 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11083 Perl_croak(aTHX_ "Delimiter for here document is too long");
11086 len = d - PL_tokenbuf;
11089 if (PL_madskills) {
11090 tstart = PL_tokenbuf + !outer;
11091 PL_thisclose = newSVpvn(tstart, len - !outer);
11092 tstart = SvPVX(PL_linestr) + stuffstart;
11093 PL_thisopen = newSVpvn(tstart, s - tstart);
11094 stuffstart = s - SvPVX(PL_linestr);
11097 #ifndef PERL_STRICT_CR
11098 d = strchr(s, '\r');
11100 char * const olds = s;
11102 while (s < PL_bufend) {
11108 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11117 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11124 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11125 herewas = newSVpvn(s,PL_bufend-s);
11129 herewas = newSVpvn(s-1,found_newline-s+1);
11132 herewas = newSVpvn(s,found_newline-s);
11136 if (PL_madskills) {
11137 tstart = SvPVX(PL_linestr) + stuffstart;
11139 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11141 PL_thisstuff = newSVpvn(tstart, s - tstart);
11144 s += SvCUR(herewas);
11147 stuffstart = s - SvPVX(PL_linestr);
11153 tmpstr = newSV(79);
11154 sv_upgrade(tmpstr, SVt_PVIV);
11155 if (term == '\'') {
11156 op_type = OP_CONST;
11157 SvIV_set(tmpstr, -1);
11159 else if (term == '`') {
11160 op_type = OP_BACKTICK;
11161 SvIV_set(tmpstr, '\\');
11165 PL_multi_start = CopLINE(PL_curcop);
11166 PL_multi_open = PL_multi_close = '<';
11167 term = *PL_tokenbuf;
11168 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11169 char * const bufptr = PL_sublex_info.super_bufptr;
11170 char * const bufend = PL_sublex_info.super_bufend;
11171 char * const olds = s - SvCUR(herewas);
11172 s = strchr(bufptr, '\n');
11176 while (s < bufend &&
11177 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11179 CopLINE_inc(PL_curcop);
11182 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11183 missingterm(PL_tokenbuf);
11185 sv_setpvn(herewas,bufptr,d-bufptr+1);
11186 sv_setpvn(tmpstr,d+1,s-d);
11188 sv_catpvn(herewas,s,bufend-s);
11189 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11196 while (s < PL_bufend &&
11197 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11199 CopLINE_inc(PL_curcop);
11201 if (s >= PL_bufend) {
11202 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11203 missingterm(PL_tokenbuf);
11205 sv_setpvn(tmpstr,d+1,s-d);
11207 if (PL_madskills) {
11209 sv_catpvn(PL_thisstuff, d + 1, s - d);
11211 PL_thisstuff = newSVpvn(d + 1, s - d);
11212 stuffstart = s - SvPVX(PL_linestr);
11216 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11218 sv_catpvn(herewas,s,PL_bufend-s);
11219 sv_setsv(PL_linestr,herewas);
11220 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11221 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11222 PL_last_lop = PL_last_uni = NULL;
11225 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
11226 while (s >= PL_bufend) { /* multiple line string? */
11228 if (PL_madskills) {
11229 tstart = SvPVX(PL_linestr) + stuffstart;
11231 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11233 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11237 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11238 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11239 missingterm(PL_tokenbuf);
11242 stuffstart = s - SvPVX(PL_linestr);
11244 CopLINE_inc(PL_curcop);
11245 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11246 PL_last_lop = PL_last_uni = NULL;
11247 #ifndef PERL_STRICT_CR
11248 if (PL_bufend - PL_linestart >= 2) {
11249 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11250 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11252 PL_bufend[-2] = '\n';
11254 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11256 else if (PL_bufend[-1] == '\r')
11257 PL_bufend[-1] = '\n';
11259 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11260 PL_bufend[-1] = '\n';
11262 if (PERLDB_LINE && PL_curstash != PL_debstash)
11263 update_debugger_info_sv(PL_linestr);
11264 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11265 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11266 *(SvPVX(PL_linestr) + off ) = ' ';
11267 sv_catsv(PL_linestr,herewas);
11268 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11269 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11273 sv_catsv(tmpstr,PL_linestr);
11278 PL_multi_end = CopLINE(PL_curcop);
11279 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11280 SvPV_shrink_to_cur(tmpstr);
11282 SvREFCNT_dec(herewas);
11284 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11286 else if (PL_encoding)
11287 sv_recode_to_utf8(tmpstr, PL_encoding);
11289 PL_lex_stuff = tmpstr;
11290 yylval.ival = op_type;
11294 /* scan_inputsymbol
11295 takes: current position in input buffer
11296 returns: new position in input buffer
11297 side-effects: yylval and lex_op are set.
11302 <FH> read from filehandle
11303 <pkg::FH> read from package qualified filehandle
11304 <pkg'FH> read from package qualified filehandle
11305 <$fh> read from filehandle in $fh
11306 <*.h> filename glob
11311 S_scan_inputsymbol(pTHX_ char *start)
11314 register char *s = start; /* current position in buffer */
11318 char *d = PL_tokenbuf; /* start of temp holding space */
11319 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11321 end = strchr(s, '\n');
11324 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11326 /* die if we didn't have space for the contents of the <>,
11327 or if it didn't end, or if we see a newline
11330 if (len >= (I32)sizeof PL_tokenbuf)
11331 Perl_croak(aTHX_ "Excessively long <> operator");
11333 Perl_croak(aTHX_ "Unterminated <> operator");
11338 Remember, only scalar variables are interpreted as filehandles by
11339 this code. Anything more complex (e.g., <$fh{$num}>) will be
11340 treated as a glob() call.
11341 This code makes use of the fact that except for the $ at the front,
11342 a scalar variable and a filehandle look the same.
11344 if (*d == '$' && d[1]) d++;
11346 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11347 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11350 /* If we've tried to read what we allow filehandles to look like, and
11351 there's still text left, then it must be a glob() and not a getline.
11352 Use scan_str to pull out the stuff between the <> and treat it
11353 as nothing more than a string.
11356 if (d - PL_tokenbuf != len) {
11357 yylval.ival = OP_GLOB;
11359 s = scan_str(start,!!PL_madskills,FALSE);
11361 Perl_croak(aTHX_ "Glob not terminated");
11365 bool readline_overriden = FALSE;
11368 /* we're in a filehandle read situation */
11371 /* turn <> into <ARGV> */
11373 Copy("ARGV",d,5,char);
11375 /* Check whether readline() is overriden */
11376 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11378 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11380 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11381 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
11382 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11383 readline_overriden = TRUE;
11385 /* if <$fh>, create the ops to turn the variable into a
11389 /* try to find it in the pad for this block, otherwise find
11390 add symbol table ops
11392 const PADOFFSET tmp = pad_findmy(d);
11393 if (tmp != NOT_IN_PAD) {
11394 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11395 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11396 HEK * const stashname = HvNAME_HEK(stash);
11397 SV * const sym = sv_2mortal(newSVhek(stashname));
11398 sv_catpvs(sym, "::");
11399 sv_catpv(sym, d+1);
11404 OP * const o = newOP(OP_PADSV, 0);
11406 PL_lex_op = readline_overriden
11407 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11408 append_elem(OP_LIST, o,
11409 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11410 : (OP*)newUNOP(OP_READLINE, 0, o);
11419 ? (GV_ADDMULTI | GV_ADDINEVAL)
11422 PL_lex_op = readline_overriden
11423 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11424 append_elem(OP_LIST,
11425 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11426 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11427 : (OP*)newUNOP(OP_READLINE, 0,
11428 newUNOP(OP_RV2SV, 0,
11429 newGVOP(OP_GV, 0, gv)));
11431 if (!readline_overriden)
11432 PL_lex_op->op_flags |= OPf_SPECIAL;
11433 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
11434 yylval.ival = OP_NULL;
11437 /* If it's none of the above, it must be a literal filehandle
11438 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11440 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11441 PL_lex_op = readline_overriden
11442 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11443 append_elem(OP_LIST,
11444 newGVOP(OP_GV, 0, gv),
11445 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11446 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11447 yylval.ival = OP_NULL;
11456 takes: start position in buffer
11457 keep_quoted preserve \ on the embedded delimiter(s)
11458 keep_delims preserve the delimiters around the string
11459 returns: position to continue reading from buffer
11460 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11461 updates the read buffer.
11463 This subroutine pulls a string out of the input. It is called for:
11464 q single quotes q(literal text)
11465 ' single quotes 'literal text'
11466 qq double quotes qq(interpolate $here please)
11467 " double quotes "interpolate $here please"
11468 qx backticks qx(/bin/ls -l)
11469 ` backticks `/bin/ls -l`
11470 qw quote words @EXPORT_OK = qw( func() $spam )
11471 m// regexp match m/this/
11472 s/// regexp substitute s/this/that/
11473 tr/// string transliterate tr/this/that/
11474 y/// string transliterate y/this/that/
11475 ($*@) sub prototypes sub foo ($)
11476 (stuff) sub attr parameters sub foo : attr(stuff)
11477 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11479 In most of these cases (all but <>, patterns and transliterate)
11480 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11481 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11482 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11485 It skips whitespace before the string starts, and treats the first
11486 character as the delimiter. If the delimiter is one of ([{< then
11487 the corresponding "close" character )]}> is used as the closing
11488 delimiter. It allows quoting of delimiters, and if the string has
11489 balanced delimiters ([{<>}]) it allows nesting.
11491 On success, the SV with the resulting string is put into lex_stuff or,
11492 if that is already non-NULL, into lex_repl. The second case occurs only
11493 when parsing the RHS of the special constructs s/// and tr/// (y///).
11494 For convenience, the terminating delimiter character is stuffed into
11499 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11502 SV *sv; /* scalar value: string */
11503 const char *tmps; /* temp string, used for delimiter matching */
11504 register char *s = start; /* current position in the buffer */
11505 register char term; /* terminating character */
11506 register char *to; /* current position in the sv's data */
11507 I32 brackets = 1; /* bracket nesting level */
11508 bool has_utf8 = FALSE; /* is there any utf8 content? */
11509 I32 termcode; /* terminating char. code */
11510 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11511 STRLEN termlen; /* length of terminating string */
11512 int last_off = 0; /* last position for nesting bracket */
11518 /* skip space before the delimiter */
11524 if (PL_realtokenstart >= 0) {
11525 stuffstart = PL_realtokenstart;
11526 PL_realtokenstart = -1;
11529 stuffstart = start - SvPVX(PL_linestr);
11531 /* mark where we are, in case we need to report errors */
11534 /* after skipping whitespace, the next character is the terminator */
11537 termcode = termstr[0] = term;
11541 termcode = utf8_to_uvchr((U8*)s, &termlen);
11542 Copy(s, termstr, termlen, U8);
11543 if (!UTF8_IS_INVARIANT(term))
11547 /* mark where we are */
11548 PL_multi_start = CopLINE(PL_curcop);
11549 PL_multi_open = term;
11551 /* find corresponding closing delimiter */
11552 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11553 termcode = termstr[0] = term = tmps[5];
11555 PL_multi_close = term;
11557 /* create a new SV to hold the contents. 79 is the SV's initial length.
11558 What a random number. */
11560 sv_upgrade(sv, SVt_PVIV);
11561 SvIV_set(sv, termcode);
11562 (void)SvPOK_only(sv); /* validate pointer */
11564 /* move past delimiter and try to read a complete string */
11566 sv_catpvn(sv, s, termlen);
11569 tstart = SvPVX(PL_linestr) + stuffstart;
11570 if (!PL_thisopen && !keep_delims) {
11571 PL_thisopen = newSVpvn(tstart, s - tstart);
11572 stuffstart = s - SvPVX(PL_linestr);
11576 if (PL_encoding && !UTF) {
11580 int offset = s - SvPVX_const(PL_linestr);
11581 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11582 &offset, (char*)termstr, termlen);
11583 const char * const ns = SvPVX_const(PL_linestr) + offset;
11584 char * const svlast = SvEND(sv) - 1;
11586 for (; s < ns; s++) {
11587 if (*s == '\n' && !PL_rsfp)
11588 CopLINE_inc(PL_curcop);
11591 goto read_more_line;
11593 /* handle quoted delimiters */
11594 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11596 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11598 if ((svlast-1 - t) % 2) {
11599 if (!keep_quoted) {
11600 *(svlast-1) = term;
11602 SvCUR_set(sv, SvCUR(sv) - 1);
11607 if (PL_multi_open == PL_multi_close) {
11613 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11614 /* At here, all closes are "was quoted" one,
11615 so we don't check PL_multi_close. */
11617 if (!keep_quoted && *(t+1) == PL_multi_open)
11622 else if (*t == PL_multi_open)
11630 SvCUR_set(sv, w - SvPVX_const(sv));
11632 last_off = w - SvPVX(sv);
11633 if (--brackets <= 0)
11638 if (!keep_delims) {
11639 SvCUR_set(sv, SvCUR(sv) - 1);
11645 /* extend sv if need be */
11646 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11647 /* set 'to' to the next character in the sv's string */
11648 to = SvPVX(sv)+SvCUR(sv);
11650 /* if open delimiter is the close delimiter read unbridle */
11651 if (PL_multi_open == PL_multi_close) {
11652 for (; s < PL_bufend; s++,to++) {
11653 /* embedded newlines increment the current line number */
11654 if (*s == '\n' && !PL_rsfp)
11655 CopLINE_inc(PL_curcop);
11656 /* handle quoted delimiters */
11657 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11658 if (!keep_quoted && s[1] == term)
11660 /* any other quotes are simply copied straight through */
11664 /* terminate when run out of buffer (the for() condition), or
11665 have found the terminator */
11666 else if (*s == term) {
11669 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11672 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11678 /* if the terminator isn't the same as the start character (e.g.,
11679 matched brackets), we have to allow more in the quoting, and
11680 be prepared for nested brackets.
11683 /* read until we run out of string, or we find the terminator */
11684 for (; s < PL_bufend; s++,to++) {
11685 /* embedded newlines increment the line count */
11686 if (*s == '\n' && !PL_rsfp)
11687 CopLINE_inc(PL_curcop);
11688 /* backslashes can escape the open or closing characters */
11689 if (*s == '\\' && s+1 < PL_bufend) {
11690 if (!keep_quoted &&
11691 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11696 /* allow nested opens and closes */
11697 else if (*s == PL_multi_close && --brackets <= 0)
11699 else if (*s == PL_multi_open)
11701 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11706 /* terminate the copied string and update the sv's end-of-string */
11708 SvCUR_set(sv, to - SvPVX_const(sv));
11711 * this next chunk reads more into the buffer if we're not done yet
11715 break; /* handle case where we are done yet :-) */
11717 #ifndef PERL_STRICT_CR
11718 if (to - SvPVX_const(sv) >= 2) {
11719 if ((to[-2] == '\r' && to[-1] == '\n') ||
11720 (to[-2] == '\n' && to[-1] == '\r'))
11724 SvCUR_set(sv, to - SvPVX_const(sv));
11726 else if (to[-1] == '\r')
11729 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11734 /* if we're out of file, or a read fails, bail and reset the current
11735 line marker so we can report where the unterminated string began
11738 if (PL_madskills) {
11739 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11741 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11743 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11747 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11749 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11755 /* we read a line, so increment our line counter */
11756 CopLINE_inc(PL_curcop);
11758 /* update debugger info */
11759 if (PERLDB_LINE && PL_curstash != PL_debstash)
11760 update_debugger_info_sv(PL_linestr);
11762 /* having changed the buffer, we must update PL_bufend */
11763 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11764 PL_last_lop = PL_last_uni = NULL;
11767 /* at this point, we have successfully read the delimited string */
11769 if (!PL_encoding || UTF) {
11771 if (PL_madskills) {
11772 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11773 const int len = s - tstart;
11775 sv_catpvn(PL_thisstuff, tstart, len);
11777 PL_thisstuff = newSVpvn(tstart, len);
11778 if (!PL_thisclose && !keep_delims)
11779 PL_thisclose = newSVpvn(s,termlen);
11784 sv_catpvn(sv, s, termlen);
11789 if (PL_madskills) {
11790 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11791 const int len = s - tstart - termlen;
11793 sv_catpvn(PL_thisstuff, tstart, len);
11795 PL_thisstuff = newSVpvn(tstart, len);
11796 if (!PL_thisclose && !keep_delims)
11797 PL_thisclose = newSVpvn(s - termlen,termlen);
11801 if (has_utf8 || PL_encoding)
11804 PL_multi_end = CopLINE(PL_curcop);
11806 /* if we allocated too much space, give some back */
11807 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11808 SvLEN_set(sv, SvCUR(sv) + 1);
11809 SvPV_renew(sv, SvLEN(sv));
11812 /* decide whether this is the first or second quoted string we've read
11825 takes: pointer to position in buffer
11826 returns: pointer to new position in buffer
11827 side-effects: builds ops for the constant in yylval.op
11829 Read a number in any of the formats that Perl accepts:
11831 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11832 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11835 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11837 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11840 If it reads a number without a decimal point or an exponent, it will
11841 try converting the number to an integer and see if it can do so
11842 without loss of precision.
11846 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11849 register const char *s = start; /* current position in buffer */
11850 register char *d; /* destination in temp buffer */
11851 register char *e; /* end of temp buffer */
11852 NV nv; /* number read, as a double */
11853 SV *sv = NULL; /* place to put the converted number */
11854 bool floatit; /* boolean: int or float? */
11855 const char *lastub = NULL; /* position of last underbar */
11856 static char const number_too_long[] = "Number too long";
11858 /* We use the first character to decide what type of number this is */
11862 Perl_croak(aTHX_ "panic: scan_num");
11864 /* if it starts with a 0, it could be an octal number, a decimal in
11865 0.13 disguise, or a hexadecimal number, or a binary number. */
11869 u holds the "number so far"
11870 shift the power of 2 of the base
11871 (hex == 4, octal == 3, binary == 1)
11872 overflowed was the number more than we can hold?
11874 Shift is used when we add a digit. It also serves as an "are
11875 we in octal/hex/binary?" indicator to disallow hex characters
11876 when in octal mode.
11881 bool overflowed = FALSE;
11882 bool just_zero = TRUE; /* just plain 0 or binary number? */
11883 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11884 static const char* const bases[5] =
11885 { "", "binary", "", "octal", "hexadecimal" };
11886 static const char* const Bases[5] =
11887 { "", "Binary", "", "Octal", "Hexadecimal" };
11888 static const char* const maxima[5] =
11890 "0b11111111111111111111111111111111",
11894 const char *base, *Base, *max;
11896 /* check for hex */
11901 } else if (s[1] == 'b') {
11906 /* check for a decimal in disguise */
11907 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11909 /* so it must be octal */
11916 if (ckWARN(WARN_SYNTAX))
11917 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11918 "Misplaced _ in number");
11922 base = bases[shift];
11923 Base = Bases[shift];
11924 max = maxima[shift];
11926 /* read the rest of the number */
11928 /* x is used in the overflow test,
11929 b is the digit we're adding on. */
11934 /* if we don't mention it, we're done */
11938 /* _ are ignored -- but warned about if consecutive */
11940 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11941 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11942 "Misplaced _ in number");
11946 /* 8 and 9 are not octal */
11947 case '8': case '9':
11949 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11953 case '2': case '3': case '4':
11954 case '5': case '6': case '7':
11956 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11959 case '0': case '1':
11960 b = *s++ & 15; /* ASCII digit -> value of digit */
11964 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11965 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11966 /* make sure they said 0x */
11969 b = (*s++ & 7) + 9;
11971 /* Prepare to put the digit we have onto the end
11972 of the number so far. We check for overflows.
11978 x = u << shift; /* make room for the digit */
11980 if ((x >> shift) != u
11981 && !(PL_hints & HINT_NEW_BINARY)) {
11984 if (ckWARN_d(WARN_OVERFLOW))
11985 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11986 "Integer overflow in %s number",
11989 u = x | b; /* add the digit to the end */
11992 n *= nvshift[shift];
11993 /* If an NV has not enough bits in its
11994 * mantissa to represent an UV this summing of
11995 * small low-order numbers is a waste of time
11996 * (because the NV cannot preserve the
11997 * low-order bits anyway): we could just
11998 * remember when did we overflow and in the
11999 * end just multiply n by the right
12007 /* if we get here, we had success: make a scalar value from
12012 /* final misplaced underbar check */
12013 if (s[-1] == '_') {
12014 if (ckWARN(WARN_SYNTAX))
12015 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12020 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
12021 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12022 "%s number > %s non-portable",
12028 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
12029 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12030 "%s number > %s non-portable",
12035 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12036 sv = new_constant(start, s - start, "integer",
12038 else if (PL_hints & HINT_NEW_BINARY)
12039 sv = new_constant(start, s - start, "binary", sv, NULL, NULL);
12044 handle decimal numbers.
12045 we're also sent here when we read a 0 as the first digit
12047 case '1': case '2': case '3': case '4': case '5':
12048 case '6': case '7': case '8': case '9': case '.':
12051 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12054 /* read next group of digits and _ and copy into d */
12055 while (isDIGIT(*s) || *s == '_') {
12056 /* skip underscores, checking for misplaced ones
12060 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12061 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12062 "Misplaced _ in number");
12066 /* check for end of fixed-length buffer */
12068 Perl_croak(aTHX_ number_too_long);
12069 /* if we're ok, copy the character */
12074 /* final misplaced underbar check */
12075 if (lastub && s == lastub + 1) {
12076 if (ckWARN(WARN_SYNTAX))
12077 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12080 /* read a decimal portion if there is one. avoid
12081 3..5 being interpreted as the number 3. followed
12084 if (*s == '.' && s[1] != '.') {
12089 if (ckWARN(WARN_SYNTAX))
12090 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12091 "Misplaced _ in number");
12095 /* copy, ignoring underbars, until we run out of digits.
12097 for (; isDIGIT(*s) || *s == '_'; s++) {
12098 /* fixed length buffer check */
12100 Perl_croak(aTHX_ number_too_long);
12102 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12103 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12104 "Misplaced _ in number");
12110 /* fractional part ending in underbar? */
12111 if (s[-1] == '_') {
12112 if (ckWARN(WARN_SYNTAX))
12113 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12114 "Misplaced _ in number");
12116 if (*s == '.' && isDIGIT(s[1])) {
12117 /* oops, it's really a v-string, but without the "v" */
12123 /* read exponent part, if present */
12124 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12128 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12129 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12131 /* stray preinitial _ */
12133 if (ckWARN(WARN_SYNTAX))
12134 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12135 "Misplaced _ in number");
12139 /* allow positive or negative exponent */
12140 if (*s == '+' || *s == '-')
12143 /* stray initial _ */
12145 if (ckWARN(WARN_SYNTAX))
12146 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12147 "Misplaced _ in number");
12151 /* read digits of exponent */
12152 while (isDIGIT(*s) || *s == '_') {
12155 Perl_croak(aTHX_ number_too_long);
12159 if (((lastub && s == lastub + 1) ||
12160 (!isDIGIT(s[1]) && s[1] != '_'))
12161 && ckWARN(WARN_SYNTAX))
12162 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12163 "Misplaced _ in number");
12170 /* make an sv from the string */
12174 We try to do an integer conversion first if no characters
12175 indicating "float" have been found.
12180 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12182 if (flags == IS_NUMBER_IN_UV) {
12184 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12187 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12188 if (uv <= (UV) IV_MIN)
12189 sv_setiv(sv, -(IV)uv);
12196 /* terminate the string */
12198 nv = Atof(PL_tokenbuf);
12202 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
12203 (PL_hints & HINT_NEW_INTEGER) )
12204 sv = new_constant(PL_tokenbuf,
12207 (floatit ? "float" : "integer"),
12211 /* if it starts with a v, it could be a v-string */
12214 sv = newSV(5); /* preallocate storage space */
12215 s = scan_vstring(s,sv);
12219 /* make the op for the constant and return */
12222 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12224 lvalp->opval = NULL;
12230 S_scan_formline(pTHX_ register char *s)
12233 register char *eol;
12235 SV * const stuff = newSVpvs("");
12236 bool needargs = FALSE;
12237 bool eofmt = FALSE;
12239 char *tokenstart = s;
12242 if (PL_madskills) {
12243 savewhite = PL_thiswhite;
12248 while (!needargs) {
12251 #ifdef PERL_STRICT_CR
12252 while (SPACE_OR_TAB(*t))
12255 while (SPACE_OR_TAB(*t) || *t == '\r')
12258 if (*t == '\n' || t == PL_bufend) {
12263 if (PL_in_eval && !PL_rsfp) {
12264 eol = (char *) memchr(s,'\n',PL_bufend-s);
12269 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12271 for (t = s; t < eol; t++) {
12272 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12274 goto enough; /* ~~ must be first line in formline */
12276 if (*t == '@' || *t == '^')
12280 sv_catpvn(stuff, s, eol-s);
12281 #ifndef PERL_STRICT_CR
12282 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12283 char *end = SvPVX(stuff) + SvCUR(stuff);
12286 SvCUR_set(stuff, SvCUR(stuff) - 1);
12296 if (PL_madskills) {
12298 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12300 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12303 s = filter_gets(PL_linestr, PL_rsfp, 0);
12305 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12307 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12309 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12310 PL_last_lop = PL_last_uni = NULL;
12319 if (SvCUR(stuff)) {
12322 PL_lex_state = LEX_NORMAL;
12323 start_force(PL_curforce);
12324 NEXTVAL_NEXTTOKE.ival = 0;
12328 PL_lex_state = LEX_FORMLINE;
12330 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12332 else if (PL_encoding)
12333 sv_recode_to_utf8(stuff, PL_encoding);
12335 start_force(PL_curforce);
12336 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12338 start_force(PL_curforce);
12339 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12343 SvREFCNT_dec(stuff);
12345 PL_lex_formbrack = 0;
12349 if (PL_madskills) {
12351 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12353 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12354 PL_thiswhite = savewhite;
12366 PL_cshlen = strlen(PL_cshname);
12368 #if defined(USE_ITHREADS)
12369 PERL_UNUSED_CONTEXT;
12375 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12378 const I32 oldsavestack_ix = PL_savestack_ix;
12379 CV* const outsidecv = PL_compcv;
12382 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12384 SAVEI32(PL_subline);
12385 save_item(PL_subname);
12386 SAVESPTR(PL_compcv);
12388 PL_compcv = (CV*)newSV(0);
12389 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
12390 CvFLAGS(PL_compcv) |= flags;
12392 PL_subline = CopLINE(PL_curcop);
12393 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12394 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
12395 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12397 return oldsavestack_ix;
12401 #pragma segment Perl_yylex
12404 Perl_yywarn(pTHX_ const char *s)
12407 PL_in_eval |= EVAL_WARNONLY;
12409 PL_in_eval &= ~EVAL_WARNONLY;
12414 Perl_yyerror(pTHX_ const char *s)
12417 const char *where = NULL;
12418 const char *context = NULL;
12421 int yychar = PL_parser->yychar;
12423 if (!yychar || (yychar == ';' && !PL_rsfp))
12425 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12426 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12427 PL_oldbufptr != PL_bufptr) {
12430 The code below is removed for NetWare because it abends/crashes on NetWare
12431 when the script has error such as not having the closing quotes like:
12432 if ($var eq "value)
12433 Checking of white spaces is anyway done in NetWare code.
12436 while (isSPACE(*PL_oldoldbufptr))
12439 context = PL_oldoldbufptr;
12440 contlen = PL_bufptr - PL_oldoldbufptr;
12442 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12443 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12446 The code below is removed for NetWare because it abends/crashes on NetWare
12447 when the script has error such as not having the closing quotes like:
12448 if ($var eq "value)
12449 Checking of white spaces is anyway done in NetWare code.
12452 while (isSPACE(*PL_oldbufptr))
12455 context = PL_oldbufptr;
12456 contlen = PL_bufptr - PL_oldbufptr;
12458 else if (yychar > 255)
12459 where = "next token ???";
12460 else if (yychar == -2) { /* YYEMPTY */
12461 if (PL_lex_state == LEX_NORMAL ||
12462 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12463 where = "at end of line";
12464 else if (PL_lex_inpat)
12465 where = "within pattern";
12467 where = "within string";
12470 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
12472 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12473 else if (isPRINT_LC(yychar))
12474 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
12476 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12477 where = SvPVX_const(where_sv);
12479 msg = sv_2mortal(newSVpv(s, 0));
12480 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12481 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12483 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12485 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12486 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12487 Perl_sv_catpvf(aTHX_ msg,
12488 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12489 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12492 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
12493 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12496 if (PL_error_count >= 10) {
12497 if (PL_in_eval && SvCUR(ERRSV))
12498 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12499 SVfARG(ERRSV), OutCopFILE(PL_curcop));
12501 Perl_croak(aTHX_ "%s has too many errors.\n",
12502 OutCopFILE(PL_curcop));
12505 PL_in_my_stash = NULL;
12509 #pragma segment Main
12513 S_swallow_bom(pTHX_ U8 *s)
12516 const STRLEN slen = SvCUR(PL_linestr);
12519 if (s[1] == 0xFE) {
12520 /* UTF-16 little-endian? (or UTF32-LE?) */
12521 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12522 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12523 #ifndef PERL_NO_UTF16_FILTER
12524 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12527 if (PL_bufend > (char*)s) {
12531 filter_add(utf16rev_textfilter, NULL);
12532 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12533 utf16_to_utf8_reversed(s, news,
12534 PL_bufend - (char*)s - 1,
12536 sv_setpvn(PL_linestr, (const char*)news, newlen);
12538 s = (U8*)SvPVX(PL_linestr);
12539 Copy(news, s, newlen, U8);
12543 SvUTF8_on(PL_linestr);
12544 s = (U8*)SvPVX(PL_linestr);
12546 /* FIXME - is this a general bug fix? */
12549 PL_bufend = SvPVX(PL_linestr) + newlen;
12552 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12557 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12558 #ifndef PERL_NO_UTF16_FILTER
12559 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12562 if (PL_bufend > (char *)s) {
12566 filter_add(utf16_textfilter, NULL);
12567 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12568 utf16_to_utf8(s, news,
12569 PL_bufend - (char*)s,
12571 sv_setpvn(PL_linestr, (const char*)news, newlen);
12573 SvUTF8_on(PL_linestr);
12574 s = (U8*)SvPVX(PL_linestr);
12575 PL_bufend = SvPVX(PL_linestr) + newlen;
12578 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12583 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12584 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12585 s += 3; /* UTF-8 */
12591 if (s[2] == 0xFE && s[3] == 0xFF) {
12592 /* UTF-32 big-endian */
12593 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12596 else if (s[2] == 0 && s[3] != 0) {
12599 * are a good indicator of UTF-16BE. */
12600 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12606 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12607 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12608 s += 4; /* UTF-8 */
12614 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12617 * are a good indicator of UTF-16LE. */
12618 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12627 * Restore a source filter.
12631 restore_rsfp(pTHX_ void *f)
12634 PerlIO * const fp = (PerlIO*)f;
12636 if (PL_rsfp == PerlIO_stdin())
12637 PerlIO_clearerr(PL_rsfp);
12638 else if (PL_rsfp && (PL_rsfp != fp))
12639 PerlIO_close(PL_rsfp);
12643 #ifndef PERL_NO_UTF16_FILTER
12645 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12648 const STRLEN old = SvCUR(sv);
12649 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12650 DEBUG_P(PerlIO_printf(Perl_debug_log,
12651 "utf16_textfilter(%p): %d %d (%d)\n",
12652 FPTR2DPTR(void *, utf16_textfilter),
12653 idx, maxlen, (int) count));
12657 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12658 Copy(SvPVX_const(sv), tmps, old, char);
12659 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12660 SvCUR(sv) - old, &newlen);
12661 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12663 DEBUG_P({sv_dump(sv);});
12668 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12671 const STRLEN old = SvCUR(sv);
12672 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12673 DEBUG_P(PerlIO_printf(Perl_debug_log,
12674 "utf16rev_textfilter(%p): %d %d (%d)\n",
12675 FPTR2DPTR(void *, utf16rev_textfilter),
12676 idx, maxlen, (int) count));
12680 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12681 Copy(SvPVX_const(sv), tmps, old, char);
12682 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12683 SvCUR(sv) - old, &newlen);
12684 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12686 DEBUG_P({ sv_dump(sv); });
12692 Returns a pointer to the next character after the parsed
12693 vstring, as well as updating the passed in sv.
12695 Function must be called like
12698 s = scan_vstring(s,sv);
12700 The sv should already be large enough to store the vstring
12701 passed in, for performance reasons.
12706 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
12709 const char *pos = s;
12710 const char *start = s;
12711 if (*pos == 'v') pos++; /* get past 'v' */
12712 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
12714 if ( *pos != '.') {
12715 /* this may not be a v-string if followed by => */
12716 const char *next = pos;
12717 while (next < PL_bufend && isSPACE(*next))
12719 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
12720 /* return string not v-string */
12721 sv_setpvn(sv,(char *)s,pos-s);
12722 return (char *)pos;
12726 if (!isALPHA(*pos)) {
12727 U8 tmpbuf[UTF8_MAXBYTES+1];
12730 s++; /* get past 'v' */
12732 sv_setpvn(sv, "", 0);
12735 /* this is atoi() that tolerates underscores */
12738 const char *end = pos;
12740 while (--end >= s) {
12742 const UV orev = rev;
12743 rev += (*end - '0') * mult;
12745 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12746 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12747 "Integer overflow in decimal number");
12751 if (rev > 0x7FFFFFFF)
12752 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12754 /* Append native character for the rev point */
12755 tmpend = uvchr_to_utf8(tmpbuf, rev);
12756 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12757 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12759 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
12765 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
12769 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12777 * c-indentation-style: bsd
12778 * c-basic-offset: 4
12779 * indent-tabs-mode: t
12782 * ex: set ts=8 sts=4 sw=4 noet: