3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yylval (PL_parser->yylval)
28 /* YYINITDEPTH -- initial size of the parser's stacks. */
29 #define YYINITDEPTH 200
31 /* XXX temporary backwards compatibility */
32 #define PL_lex_brackets (PL_parser->lex_brackets)
33 #define PL_lex_brackstack (PL_parser->lex_brackstack)
34 #define PL_lex_casemods (PL_parser->lex_casemods)
35 #define PL_lex_casestack (PL_parser->lex_casestack)
36 #define PL_lex_defer (PL_parser->lex_defer)
37 #define PL_lex_dojoin (PL_parser->lex_dojoin)
38 #define PL_lex_expect (PL_parser->lex_expect)
39 #define PL_lex_formbrack (PL_parser->lex_formbrack)
40 #define PL_lex_inpat (PL_parser->lex_inpat)
41 #define PL_lex_inwhat (PL_parser->lex_inwhat)
42 #define PL_lex_op (PL_parser->lex_op)
43 #define PL_lex_repl (PL_parser->lex_repl)
44 #define PL_lex_starts (PL_parser->lex_starts)
45 #define PL_lex_stuff (PL_parser->lex_stuff)
46 #define PL_multi_start (PL_parser->multi_start)
47 #define PL_multi_open (PL_parser->multi_open)
48 #define PL_multi_close (PL_parser->multi_close)
49 #define PL_pending_ident (PL_parser->pending_ident)
50 #define PL_preambled (PL_parser->preambled)
51 #define PL_sublex_info (PL_parser->sublex_info)
54 # define PL_endwhite (PL_parser->endwhite)
55 # define PL_faketokens (PL_parser->faketokens)
56 # define PL_lasttoke (PL_parser->lasttoke)
57 # define PL_nextwhite (PL_parser->nextwhite)
58 # define PL_realtokenstart (PL_parser->realtokenstart)
59 # define PL_skipwhite (PL_parser->skipwhite)
60 # define PL_thisclose (PL_parser->thisclose)
61 # define PL_thismad (PL_parser->thismad)
62 # define PL_thisopen (PL_parser->thisopen)
63 # define PL_thisstuff (PL_parser->thisstuff)
64 # define PL_thistoken (PL_parser->thistoken)
65 # define PL_thiswhite (PL_parser->thiswhite)
69 S_pending_ident(pTHX);
71 static const char ident_too_long[] = "Identifier too long";
72 static const char commaless_variable_list[] = "comma-less variable list";
74 static void restore_rsfp(pTHX_ void *f);
75 #ifndef PERL_NO_UTF16_FILTER
76 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
77 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
81 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
82 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
84 # define CURMAD(slot,sv)
85 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
88 #define XFAKEBRACK 128
91 #ifdef USE_UTF8_SCRIPTS
92 # define UTF (!IN_BYTES)
94 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
97 /* In variables named $^X, these are the legal values for X.
98 * 1999-02-27 mjd-perl-patch@plover.com */
99 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
101 /* On MacOS, respect nonbreaking spaces */
102 #ifdef MACOS_TRADITIONAL
103 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
105 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
108 /* LEX_* are values for PL_lex_state, the state of the lexer.
109 * They are arranged oddly so that the guard on the switch statement
110 * can get by with a single comparison (if the compiler is smart enough).
113 /* #define LEX_NOTPARSING 11 is done in perl.h. */
115 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
116 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
117 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
118 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
119 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
121 /* at end of code, eg "$x" followed by: */
122 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
123 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
125 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
126 string or after \E, $foo, etc */
127 #define LEX_INTERPCONST 2 /* NOT USED */
128 #define LEX_FORMLINE 1 /* expecting a format line */
129 #define LEX_KNOWNEXT 0 /* next token known; just return it */
133 static const char* const lex_state_names[] = {
152 #include "keywords.h"
154 /* CLINE is a macro that ensures PL_copline has a sane value */
159 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
162 # define SKIPSPACE0(s) skipspace0(s)
163 # define SKIPSPACE1(s) skipspace1(s)
164 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
165 # define PEEKSPACE(s) skipspace2(s,0)
167 # define SKIPSPACE0(s) skipspace(s)
168 # define SKIPSPACE1(s) skipspace(s)
169 # define SKIPSPACE2(s,tsv) skipspace(s)
170 # define PEEKSPACE(s) skipspace(s)
174 * Convenience functions to return different tokens and prime the
175 * lexer for the next token. They all take an argument.
177 * TOKEN : generic token (used for '(', DOLSHARP, etc)
178 * OPERATOR : generic operator
179 * AOPERATOR : assignment operator
180 * PREBLOCK : beginning the block after an if, while, foreach, ...
181 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
182 * PREREF : *EXPR where EXPR is not a simple identifier
183 * TERM : expression term
184 * LOOPX : loop exiting command (goto, last, dump, etc)
185 * FTST : file test operator
186 * FUN0 : zero-argument function
187 * FUN1 : not used, except for not, which isn't a UNIOP
188 * BOop : bitwise or or xor
190 * SHop : shift operator
191 * PWop : power operator
192 * PMop : pattern-matching operator
193 * Aop : addition-level operator
194 * Mop : multiplication-level operator
195 * Eop : equality-testing operator
196 * Rop : relational operator <= != gt
198 * Also see LOP and lop() below.
201 #ifdef DEBUGGING /* Serve -DT. */
202 # define REPORT(retval) tokereport((I32)retval)
204 # define REPORT(retval) (retval)
207 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
208 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
209 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
210 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
211 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
212 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
213 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
214 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
215 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
216 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
217 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
218 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
219 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
220 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
221 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
222 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
223 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
224 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
225 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
226 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
228 /* This bit of chicanery makes a unary function followed by
229 * a parenthesis into a function with one argument, highest precedence.
230 * The UNIDOR macro is for unary functions that can be followed by the //
231 * operator (such as C<shift // 0>).
233 #define UNI2(f,x) { \
237 PL_last_uni = PL_oldbufptr; \
238 PL_last_lop_op = f; \
240 return REPORT( (int)FUNC1 ); \
242 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
244 #define UNI(f) UNI2(f,XTERM)
245 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
247 #define UNIBRACK(f) { \
250 PL_last_uni = PL_oldbufptr; \
252 return REPORT( (int)FUNC1 ); \
254 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
257 /* grandfather return to old style */
258 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
262 /* how to interpret the yylval associated with the token */
266 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
272 static struct debug_tokens {
274 enum token_type type;
276 } const debug_tokens[] =
278 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
279 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
280 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
281 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
282 { ARROW, TOKENTYPE_NONE, "ARROW" },
283 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
284 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
285 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
286 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
287 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
288 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
289 { DO, TOKENTYPE_NONE, "DO" },
290 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
291 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
292 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
293 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
294 { ELSE, TOKENTYPE_NONE, "ELSE" },
295 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
296 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
297 { FOR, TOKENTYPE_IVAL, "FOR" },
298 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
299 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
300 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
301 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
302 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
303 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
304 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
305 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
306 { IF, TOKENTYPE_IVAL, "IF" },
307 { LABEL, TOKENTYPE_PVAL, "LABEL" },
308 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
309 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
310 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
311 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
312 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
313 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
314 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
315 { MY, TOKENTYPE_IVAL, "MY" },
316 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
317 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
318 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
319 { OROP, TOKENTYPE_IVAL, "OROP" },
320 { OROR, TOKENTYPE_NONE, "OROR" },
321 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
322 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
323 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
324 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
325 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
326 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
327 { PREINC, TOKENTYPE_NONE, "PREINC" },
328 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
329 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
330 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
331 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
332 { SUB, TOKENTYPE_NONE, "SUB" },
333 { THING, TOKENTYPE_OPVAL, "THING" },
334 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
335 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
336 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
337 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
338 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
339 { USE, TOKENTYPE_IVAL, "USE" },
340 { WHEN, TOKENTYPE_IVAL, "WHEN" },
341 { WHILE, TOKENTYPE_IVAL, "WHILE" },
342 { WORD, TOKENTYPE_OPVAL, "WORD" },
343 { 0, TOKENTYPE_NONE, NULL }
346 /* dump the returned token in rv, plus any optional arg in yylval */
349 S_tokereport(pTHX_ I32 rv)
353 const char *name = NULL;
354 enum token_type type = TOKENTYPE_NONE;
355 const struct debug_tokens *p;
356 SV* const report = newSVpvs("<== ");
358 for (p = debug_tokens; p->token; p++) {
359 if (p->token == (int)rv) {
366 Perl_sv_catpv(aTHX_ report, name);
367 else if ((char)rv > ' ' && (char)rv < '~')
368 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
370 sv_catpvs(report, "EOF");
372 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
375 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
378 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
380 case TOKENTYPE_OPNUM:
381 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
382 PL_op_name[yylval.ival]);
385 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
387 case TOKENTYPE_OPVAL:
389 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
390 PL_op_name[yylval.opval->op_type]);
391 if (yylval.opval->op_type == OP_CONST) {
392 Perl_sv_catpvf(aTHX_ report, " %s",
393 SvPEEK(cSVOPx_sv(yylval.opval)));
398 sv_catpvs(report, "(opval=null)");
401 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
407 /* print the buffer with suitable escapes */
410 S_printbuf(pTHX_ const char* fmt, const char* s)
412 SV* const tmp = newSVpvs("");
413 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
422 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
423 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
427 S_ao(pTHX_ int toketype)
430 if (*PL_bufptr == '=') {
432 if (toketype == ANDAND)
433 yylval.ival = OP_ANDASSIGN;
434 else if (toketype == OROR)
435 yylval.ival = OP_ORASSIGN;
436 else if (toketype == DORDOR)
437 yylval.ival = OP_DORASSIGN;
445 * When Perl expects an operator and finds something else, no_op
446 * prints the warning. It always prints "<something> found where
447 * operator expected. It prints "Missing semicolon on previous line?"
448 * if the surprise occurs at the start of the line. "do you need to
449 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
450 * where the compiler doesn't know if foo is a method call or a function.
451 * It prints "Missing operator before end of line" if there's nothing
452 * after the missing operator, or "... before <...>" if there is something
453 * after the missing operator.
457 S_no_op(pTHX_ const char *what, char *s)
460 char * const oldbp = PL_bufptr;
461 const bool is_first = (PL_oldbufptr == PL_linestart);
467 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
468 if (ckWARN_d(WARN_SYNTAX)) {
470 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
471 "\t(Missing semicolon on previous line?)\n");
472 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
474 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
476 if (t < PL_bufptr && isSPACE(*t))
477 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
478 "\t(Do you need to predeclare %.*s?)\n",
479 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
483 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
484 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
492 * Complain about missing quote/regexp/heredoc terminator.
493 * If it's called with NULL then it cauterizes the line buffer.
494 * If we're in a delimited string and the delimiter is a control
495 * character, it's reformatted into a two-char sequence like ^C.
500 S_missingterm(pTHX_ char *s)
506 char * const nl = strrchr(s,'\n');
512 iscntrl(PL_multi_close)
514 PL_multi_close < 32 || PL_multi_close == 127
518 tmpbuf[1] = (char)toCTRL(PL_multi_close);
523 *tmpbuf = (char)PL_multi_close;
527 q = strchr(s,'"') ? '\'' : '"';
528 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
531 #define FEATURE_IS_ENABLED(name) \
532 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
533 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
535 * S_feature_is_enabled
536 * Check whether the named feature is enabled.
539 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
542 HV * const hinthv = GvHV(PL_hintgv);
543 char he_name[32] = "feature_";
544 (void) my_strlcpy(&he_name[8], name, 24);
546 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
554 Perl_deprecate(pTHX_ const char *s)
556 if (ckWARN(WARN_DEPRECATED))
557 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
561 Perl_deprecate_old(pTHX_ const char *s)
563 /* This function should NOT be called for any new deprecated warnings */
564 /* Use Perl_deprecate instead */
566 /* It is here to maintain backward compatibility with the pre-5.8 */
567 /* warnings category hierarchy. The "deprecated" category used to */
568 /* live under the "syntax" category. It is now a top-level category */
569 /* in its own right. */
571 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
572 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
573 "Use of %s is deprecated", s);
577 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
578 * utf16-to-utf8-reversed.
581 #ifdef PERL_CR_FILTER
585 register const char *s = SvPVX_const(sv);
586 register const char * const e = s + SvCUR(sv);
587 /* outer loop optimized to do nothing if there are no CR-LFs */
589 if (*s++ == '\r' && *s == '\n') {
590 /* hit a CR-LF, need to copy the rest */
591 register char *d = s - 1;
594 if (*s == '\r' && s[1] == '\n')
605 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
607 const I32 count = FILTER_READ(idx+1, sv, maxlen);
608 if (count > 0 && !maxlen)
618 * Create a parser object and initialise its parser and lexer fields
622 Perl_lex_start(pTHX_ SV *line)
625 const char *s = NULL;
629 /* create and initialise a parser */
631 Newxz(parser, 1, yy_parser);
632 parser->old_parser = PL_parser;
635 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
636 parser->ps = parser->stack;
637 parser->stack_size = YYINITDEPTH;
639 parser->stack->state = 0;
640 parser->yyerrstatus = 0;
641 parser->yychar = YYEMPTY; /* Cause a token to be read. */
643 /* on scope exit, free this parser and restore any outer one */
646 /* initialise lexer state */
648 SAVEI32(PL_lex_state);
650 if (PL_lex_state == LEX_KNOWNEXT) {
651 I32 toke = parser->old_parser->lasttoke;
652 while (--toke >= 0) {
653 SAVEI32(PL_nexttoke[toke].next_type);
654 SAVEVPTR(PL_nexttoke[toke].next_val);
656 SAVEVPTR(PL_nexttoke[toke].next_mad);
659 SAVEI32(PL_curforce);
662 if (PL_lex_state == LEX_KNOWNEXT) {
663 I32 toke = PL_nexttoke;
664 while (--toke >= 0) {
665 SAVEI32(PL_nexttype[toke]);
666 SAVEVPTR(PL_nextval[toke]);
668 SAVEI32(PL_nexttoke);
671 SAVECOPLINE(PL_curcop);
674 SAVEPPTR(PL_oldbufptr);
675 SAVEPPTR(PL_oldoldbufptr);
676 SAVEPPTR(PL_last_lop);
677 SAVEPPTR(PL_last_uni);
678 SAVEPPTR(PL_linestart);
679 SAVESPTR(PL_linestr);
680 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
684 PL_lex_state = LEX_NORMAL;
686 Newx(parser->lex_brackstack, 120, char);
687 Newx(parser->lex_casestack, 12, char);
688 *parser->lex_casestack = '\0';
694 s = SvPV_const(line, len);
699 PL_linestr = newSVpvs("\n;");
700 } else if (SvREADONLY(line) || s[len-1] != ';') {
701 PL_linestr = newSVsv(line);
703 sv_catpvs(PL_linestr, "\n;");
706 SvREFCNT_inc_simple_void_NN(line);
709 /* PL_linestr needs to survive until end of scope, not just the next
710 FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
711 SAVEFREESV(PL_linestr);
712 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
713 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
714 PL_last_lop = PL_last_uni = NULL;
719 /* delete a parser object */
722 Perl_parser_free(pTHX_ const yy_parser *parser)
724 Safefree(parser->stack);
725 Safefree(parser->lex_brackstack);
726 Safefree(parser->lex_casestack);
727 PL_parser = parser->old_parser;
734 * Finalizer for lexing operations. Must be called when the parser is
735 * done with the lexer.
742 PL_doextract = FALSE;
747 * This subroutine has nothing to do with tilting, whether at windmills
748 * or pinball tables. Its name is short for "increment line". It
749 * increments the current line number in CopLINE(PL_curcop) and checks
750 * to see whether the line starts with a comment of the form
751 * # line 500 "foo.pm"
752 * If so, it sets the current line number and file to the values in the comment.
756 S_incline(pTHX_ const char *s)
763 CopLINE_inc(PL_curcop);
766 while (SPACE_OR_TAB(*s))
768 if (strnEQ(s, "line", 4))
772 if (SPACE_OR_TAB(*s))
776 while (SPACE_OR_TAB(*s))
784 while (SPACE_OR_TAB(*s))
786 if (*s == '"' && (t = strchr(s+1, '"'))) {
796 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
798 if (*e != '\n' && *e != '\0')
799 return; /* false alarm */
802 const STRLEN len = t - s;
804 const char * const cf = CopFILE(PL_curcop);
805 STRLEN tmplen = cf ? strlen(cf) : 0;
806 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
807 /* must copy *{"::_<(eval N)[oldfilename:L]"}
808 * to *{"::_<newfilename"} */
809 /* However, the long form of evals is only turned on by the
810 debugger - usually they're "(eval %lu)" */
814 STRLEN tmplen2 = len;
815 if (tmplen + 2 <= sizeof smallbuf)
818 Newx(tmpbuf, tmplen + 2, char);
821 memcpy(tmpbuf + 2, cf, tmplen);
823 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
828 if (tmplen2 + 2 <= sizeof smallbuf)
831 Newx(tmpbuf2, tmplen2 + 2, char);
833 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
834 /* Either they malloc'd it, or we malloc'd it,
835 so no prefix is present in ours. */
840 memcpy(tmpbuf2 + 2, s, tmplen2);
843 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
845 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
846 /* adjust ${"::_<newfilename"} to store the new file name */
847 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
848 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
849 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
852 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
854 if (tmpbuf != smallbuf) Safefree(tmpbuf);
857 CopFILE_free(PL_curcop);
858 CopFILE_setn(PL_curcop, s, len);
860 CopLINE_set(PL_curcop, atoi(n)-1);
864 /* skip space before PL_thistoken */
867 S_skipspace0(pTHX_ register char *s)
874 PL_thiswhite = newSVpvs("");
875 sv_catsv(PL_thiswhite, PL_skipwhite);
876 sv_free(PL_skipwhite);
879 PL_realtokenstart = s - SvPVX(PL_linestr);
883 /* skip space after PL_thistoken */
886 S_skipspace1(pTHX_ register char *s)
888 const char *start = s;
889 I32 startoff = start - SvPVX(PL_linestr);
894 start = SvPVX(PL_linestr) + startoff;
895 if (!PL_thistoken && PL_realtokenstart >= 0) {
896 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
897 PL_thistoken = newSVpvn(tstart, start - tstart);
899 PL_realtokenstart = -1;
902 PL_nextwhite = newSVpvs("");
903 sv_catsv(PL_nextwhite, PL_skipwhite);
904 sv_free(PL_skipwhite);
911 S_skipspace2(pTHX_ register char *s, SV **svp)
914 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
915 const I32 startoff = s - SvPVX(PL_linestr);
918 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
919 if (!PL_madskills || !svp)
921 start = SvPVX(PL_linestr) + startoff;
922 if (!PL_thistoken && PL_realtokenstart >= 0) {
923 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
924 PL_thistoken = newSVpvn(tstart, start - tstart);
925 PL_realtokenstart = -1;
930 sv_setsv(*svp, PL_skipwhite);
931 sv_free(PL_skipwhite);
940 S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
942 AV *av = CopFILEAVx(PL_curcop);
944 SV * const sv = newSV_type(SVt_PVMG);
946 sv_setsv(sv, orig_sv);
948 sv_setpvn(sv, buf, len);
951 av_store(av, (I32)CopLINE(PL_curcop), sv);
957 * Called to gobble the appropriate amount and type of whitespace.
958 * Skips comments as well.
962 S_skipspace(pTHX_ register char *s)
967 int startoff = s - SvPVX(PL_linestr);
970 sv_free(PL_skipwhite);
975 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
976 while (s < PL_bufend && SPACE_OR_TAB(*s))
986 SSize_t oldprevlen, oldoldprevlen;
987 SSize_t oldloplen = 0, oldunilen = 0;
988 while (s < PL_bufend && isSPACE(*s)) {
989 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
994 if (s < PL_bufend && *s == '#') {
995 while (s < PL_bufend && *s != '\n')
999 if (PL_in_eval && !PL_rsfp) {
1006 /* only continue to recharge the buffer if we're at the end
1007 * of the buffer, we're not reading from a source filter, and
1008 * we're in normal lexing mode
1010 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1011 PL_lex_state == LEX_FORMLINE)
1018 /* try to recharge the buffer */
1020 curoff = s - SvPVX(PL_linestr);
1023 if ((s = filter_gets(PL_linestr, PL_rsfp,
1024 (prevlen = SvCUR(PL_linestr)))) == NULL)
1027 if (PL_madskills && curoff != startoff) {
1029 PL_skipwhite = newSVpvs("");
1030 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1034 /* mustn't throw out old stuff yet if madpropping */
1035 SvCUR(PL_linestr) = curoff;
1036 s = SvPVX(PL_linestr) + curoff;
1038 if (curoff && s[-1] == '\n')
1042 /* end of file. Add on the -p or -n magic */
1043 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1046 sv_catpvs(PL_linestr,
1047 ";}continue{print or die qq(-p destination: $!\\n);}");
1049 sv_setpvs(PL_linestr,
1050 ";}continue{print or die qq(-p destination: $!\\n);}");
1052 PL_minus_n = PL_minus_p = 0;
1054 else if (PL_minus_n) {
1056 sv_catpvn(PL_linestr, ";}", 2);
1058 sv_setpvn(PL_linestr, ";}", 2);
1064 sv_catpvn(PL_linestr,";", 1);
1066 sv_setpvn(PL_linestr,";", 1);
1069 /* reset variables for next time we lex */
1070 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1076 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1077 PL_last_lop = PL_last_uni = NULL;
1079 /* Close the filehandle. Could be from -P preprocessor,
1080 * STDIN, or a regular file. If we were reading code from
1081 * STDIN (because the commandline held no -e or filename)
1082 * then we don't close it, we reset it so the code can
1083 * read from STDIN too.
1086 if (PL_preprocess && !PL_in_eval)
1087 (void)PerlProc_pclose(PL_rsfp);
1088 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1089 PerlIO_clearerr(PL_rsfp);
1091 (void)PerlIO_close(PL_rsfp);
1096 /* not at end of file, so we only read another line */
1097 /* make corresponding updates to old pointers, for yyerror() */
1098 oldprevlen = PL_oldbufptr - PL_bufend;
1099 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1101 oldunilen = PL_last_uni - PL_bufend;
1103 oldloplen = PL_last_lop - PL_bufend;
1104 PL_linestart = PL_bufptr = s + prevlen;
1105 PL_bufend = s + SvCUR(PL_linestr);
1107 PL_oldbufptr = s + oldprevlen;
1108 PL_oldoldbufptr = s + oldoldprevlen;
1110 PL_last_uni = s + oldunilen;
1112 PL_last_lop = s + oldloplen;
1115 /* debugger active and we're not compiling the debugger code,
1116 * so store the line into the debugger's array of lines
1118 if (PERLDB_LINE && PL_curstash != PL_debstash)
1119 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1126 PL_skipwhite = newSVpvs("");
1127 curoff = s - SvPVX(PL_linestr);
1128 if (curoff - startoff)
1129 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1138 * Check the unary operators to ensure there's no ambiguity in how they're
1139 * used. An ambiguous piece of code would be:
1141 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1142 * the +5 is its argument.
1152 if (PL_oldoldbufptr != PL_last_uni)
1154 while (isSPACE(*PL_last_uni))
1157 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1159 if ((t = strchr(s, '(')) && t < PL_bufptr)
1162 if (ckWARN_d(WARN_AMBIGUOUS)){
1163 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1164 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1165 (int)(s - PL_last_uni), PL_last_uni);
1170 * LOP : macro to build a list operator. Its behaviour has been replaced
1171 * with a subroutine, S_lop() for which LOP is just another name.
1174 #define LOP(f,x) return lop(f,x,s)
1178 * Build a list operator (or something that might be one). The rules:
1179 * - if we have a next token, then it's a list operator [why?]
1180 * - if the next thing is an opening paren, then it's a function
1181 * - else it's a list operator
1185 S_lop(pTHX_ I32 f, int x, char *s)
1192 PL_last_lop = PL_oldbufptr;
1193 PL_last_lop_op = (OPCODE)f;
1196 return REPORT(LSTOP);
1199 return REPORT(LSTOP);
1202 return REPORT(FUNC);
1205 return REPORT(FUNC);
1207 return REPORT(LSTOP);
1213 * Sets up for an eventual force_next(). start_force(0) basically does
1214 * an unshift, while start_force(-1) does a push. yylex removes items
1219 S_start_force(pTHX_ int where)
1223 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1224 where = PL_lasttoke;
1225 assert(PL_curforce < 0 || PL_curforce == where);
1226 if (PL_curforce != where) {
1227 for (i = PL_lasttoke; i > where; --i) {
1228 PL_nexttoke[i] = PL_nexttoke[i-1];
1232 if (PL_curforce < 0) /* in case of duplicate start_force() */
1233 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1234 PL_curforce = where;
1237 curmad('^', newSVpvs(""));
1238 CURMAD('_', PL_nextwhite);
1243 S_curmad(pTHX_ char slot, SV *sv)
1249 if (PL_curforce < 0)
1250 where = &PL_thismad;
1252 where = &PL_nexttoke[PL_curforce].next_mad;
1255 sv_setpvn(sv, "", 0);
1258 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1260 else if (PL_encoding) {
1261 sv_recode_to_utf8(sv, PL_encoding);
1266 /* keep a slot open for the head of the list? */
1267 if (slot != '_' && *where && (*where)->mad_key == '^') {
1268 (*where)->mad_key = slot;
1269 sv_free((*where)->mad_val);
1270 (*where)->mad_val = (void*)sv;
1273 addmad(newMADsv(slot, sv), where, 0);
1276 # define start_force(where) NOOP
1277 # define curmad(slot, sv) NOOP
1282 * When the lexer realizes it knows the next token (for instance,
1283 * it is reordering tokens for the parser) then it can call S_force_next
1284 * to know what token to return the next time the lexer is called. Caller
1285 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1286 * and possibly PL_expect to ensure the lexer handles the token correctly.
1290 S_force_next(pTHX_ I32 type)
1294 if (PL_curforce < 0)
1295 start_force(PL_lasttoke);
1296 PL_nexttoke[PL_curforce].next_type = type;
1297 if (PL_lex_state != LEX_KNOWNEXT)
1298 PL_lex_defer = PL_lex_state;
1299 PL_lex_state = LEX_KNOWNEXT;
1300 PL_lex_expect = PL_expect;
1303 PL_nexttype[PL_nexttoke] = type;
1305 if (PL_lex_state != LEX_KNOWNEXT) {
1306 PL_lex_defer = PL_lex_state;
1307 PL_lex_expect = PL_expect;
1308 PL_lex_state = LEX_KNOWNEXT;
1314 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1317 SV * const sv = newSVpvn(start,len);
1318 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1325 * When the lexer knows the next thing is a word (for instance, it has
1326 * just seen -> and it knows that the next char is a word char, then
1327 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1331 * char *start : buffer position (must be within PL_linestr)
1332 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1333 * int check_keyword : if true, Perl checks to make sure the word isn't
1334 * a keyword (do this if the word is a label, e.g. goto FOO)
1335 * int allow_pack : if true, : characters will also be allowed (require,
1336 * use, etc. do this)
1337 * int allow_initial_tick : used by the "sub" lexer only.
1341 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1347 start = SKIPSPACE1(start);
1349 if (isIDFIRST_lazy_if(s,UTF) ||
1350 (allow_pack && *s == ':') ||
1351 (allow_initial_tick && *s == '\'') )
1353 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1354 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1356 start_force(PL_curforce);
1358 curmad('X', newSVpvn(start,s-start));
1359 if (token == METHOD) {
1364 PL_expect = XOPERATOR;
1368 curmad('g', newSVpvs( "forced" ));
1369 NEXTVAL_NEXTTOKE.opval
1370 = (OP*)newSVOP(OP_CONST,0,
1371 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1372 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1380 * Called when the lexer wants $foo *foo &foo etc, but the program
1381 * text only contains the "foo" portion. The first argument is a pointer
1382 * to the "foo", and the second argument is the type symbol to prefix.
1383 * Forces the next token to be a "WORD".
1384 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1388 S_force_ident(pTHX_ register const char *s, int kind)
1392 const STRLEN len = strlen(s);
1393 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1394 start_force(PL_curforce);
1395 NEXTVAL_NEXTTOKE.opval = o;
1398 o->op_private = OPpCONST_ENTERED;
1399 /* XXX see note in pp_entereval() for why we forgo typo
1400 warnings if the symbol must be introduced in an eval.
1402 gv_fetchpvn_flags(s, len,
1403 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1405 kind == '$' ? SVt_PV :
1406 kind == '@' ? SVt_PVAV :
1407 kind == '%' ? SVt_PVHV :
1415 Perl_str_to_version(pTHX_ SV *sv)
1420 const char *start = SvPV_const(sv,len);
1421 const char * const end = start + len;
1422 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1423 while (start < end) {
1427 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1432 retval += ((NV)n)/nshift;
1441 * Forces the next token to be a version number.
1442 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1443 * and if "guessing" is TRUE, then no new token is created (and the caller
1444 * must use an alternative parsing method).
1448 S_force_version(pTHX_ char *s, int guessing)
1454 I32 startoff = s - SvPVX(PL_linestr);
1463 while (isDIGIT(*d) || *d == '_' || *d == '.')
1467 start_force(PL_curforce);
1468 curmad('X', newSVpvn(s,d-s));
1471 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1473 s = scan_num(s, &yylval);
1474 version = yylval.opval;
1475 ver = cSVOPx(version)->op_sv;
1476 if (SvPOK(ver) && !SvNIOK(ver)) {
1477 SvUPGRADE(ver, SVt_PVNV);
1478 SvNV_set(ver, str_to_version(ver));
1479 SvNOK_on(ver); /* hint that it is a version */
1482 else if (guessing) {
1485 sv_free(PL_nextwhite); /* let next token collect whitespace */
1487 s = SvPVX(PL_linestr) + startoff;
1495 if (PL_madskills && !version) {
1496 sv_free(PL_nextwhite); /* let next token collect whitespace */
1498 s = SvPVX(PL_linestr) + startoff;
1501 /* NOTE: The parser sees the package name and the VERSION swapped */
1502 start_force(PL_curforce);
1503 NEXTVAL_NEXTTOKE.opval = version;
1511 * Tokenize a quoted string passed in as an SV. It finds the next
1512 * chunk, up to end of string or a backslash. It may make a new
1513 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1518 S_tokeq(pTHX_ SV *sv)
1522 register char *send;
1530 s = SvPV_force(sv, len);
1531 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1534 while (s < send && *s != '\\')
1539 if ( PL_hints & HINT_NEW_STRING ) {
1540 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1546 if (s + 1 < send && (s[1] == '\\'))
1547 s++; /* all that, just for this */
1552 SvCUR_set(sv, d - SvPVX_const(sv));
1554 if ( PL_hints & HINT_NEW_STRING )
1555 return new_constant(NULL, 0, "q", sv, pv, "q");
1560 * Now come three functions related to double-quote context,
1561 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1562 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1563 * interact with PL_lex_state, and create fake ( ... ) argument lists
1564 * to handle functions and concatenation.
1565 * They assume that whoever calls them will be setting up a fake
1566 * join call, because each subthing puts a ',' after it. This lets
1569 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1571 * (I'm not sure whether the spurious commas at the end of lcfirst's
1572 * arguments and join's arguments are created or not).
1577 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1579 * Pattern matching will set PL_lex_op to the pattern-matching op to
1580 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1582 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1584 * Everything else becomes a FUNC.
1586 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1587 * had an OP_CONST or OP_READLINE). This just sets us up for a
1588 * call to S_sublex_push().
1592 S_sublex_start(pTHX)
1595 register const I32 op_type = yylval.ival;
1597 if (op_type == OP_NULL) {
1598 yylval.opval = PL_lex_op;
1602 if (op_type == OP_CONST || op_type == OP_READLINE) {
1603 SV *sv = tokeq(PL_lex_stuff);
1605 if (SvTYPE(sv) == SVt_PVIV) {
1606 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1608 const char * const p = SvPV_const(sv, len);
1609 SV * const nsv = newSVpvn(p, len);
1615 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1616 PL_lex_stuff = NULL;
1617 /* Allow <FH> // "foo" */
1618 if (op_type == OP_READLINE)
1619 PL_expect = XTERMORDORDOR;
1622 else if (op_type == OP_BACKTICK && PL_lex_op) {
1623 /* readpipe() vas overriden */
1624 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1625 yylval.opval = PL_lex_op;
1627 PL_lex_stuff = NULL;
1631 PL_sublex_info.super_state = PL_lex_state;
1632 PL_sublex_info.sub_inwhat = op_type;
1633 PL_sublex_info.sub_op = PL_lex_op;
1634 PL_lex_state = LEX_INTERPPUSH;
1638 yylval.opval = PL_lex_op;
1648 * Create a new scope to save the lexing state. The scope will be
1649 * ended in S_sublex_done. Returns a '(', starting the function arguments
1650 * to the uc, lc, etc. found before.
1651 * Sets PL_lex_state to LEX_INTERPCONCAT.
1660 PL_lex_state = PL_sublex_info.super_state;
1661 SAVEI32(PL_lex_dojoin);
1662 SAVEI32(PL_lex_brackets);
1663 SAVEI32(PL_lex_casemods);
1664 SAVEI32(PL_lex_starts);
1665 SAVEI32(PL_lex_state);
1666 SAVEVPTR(PL_lex_inpat);
1667 SAVEI32(PL_lex_inwhat);
1668 SAVECOPLINE(PL_curcop);
1669 SAVEPPTR(PL_bufptr);
1670 SAVEPPTR(PL_bufend);
1671 SAVEPPTR(PL_oldbufptr);
1672 SAVEPPTR(PL_oldoldbufptr);
1673 SAVEPPTR(PL_last_lop);
1674 SAVEPPTR(PL_last_uni);
1675 SAVEPPTR(PL_linestart);
1676 SAVESPTR(PL_linestr);
1677 SAVEGENERICPV(PL_lex_brackstack);
1678 SAVEGENERICPV(PL_lex_casestack);
1680 PL_linestr = PL_lex_stuff;
1681 PL_lex_stuff = NULL;
1683 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1684 = SvPVX(PL_linestr);
1685 PL_bufend += SvCUR(PL_linestr);
1686 PL_last_lop = PL_last_uni = NULL;
1687 SAVEFREESV(PL_linestr);
1689 PL_lex_dojoin = FALSE;
1690 PL_lex_brackets = 0;
1691 Newx(PL_lex_brackstack, 120, char);
1692 Newx(PL_lex_casestack, 12, char);
1693 PL_lex_casemods = 0;
1694 *PL_lex_casestack = '\0';
1696 PL_lex_state = LEX_INTERPCONCAT;
1697 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1699 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1700 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1701 PL_lex_inpat = PL_sublex_info.sub_op;
1703 PL_lex_inpat = NULL;
1710 * Restores lexer state after a S_sublex_push.
1717 if (!PL_lex_starts++) {
1718 SV * const sv = newSVpvs("");
1719 if (SvUTF8(PL_linestr))
1721 PL_expect = XOPERATOR;
1722 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1726 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1727 PL_lex_state = LEX_INTERPCASEMOD;
1731 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1732 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1733 PL_linestr = PL_lex_repl;
1735 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1736 PL_bufend += SvCUR(PL_linestr);
1737 PL_last_lop = PL_last_uni = NULL;
1738 SAVEFREESV(PL_linestr);
1739 PL_lex_dojoin = FALSE;
1740 PL_lex_brackets = 0;
1741 PL_lex_casemods = 0;
1742 *PL_lex_casestack = '\0';
1744 if (SvEVALED(PL_lex_repl)) {
1745 PL_lex_state = LEX_INTERPNORMAL;
1747 /* we don't clear PL_lex_repl here, so that we can check later
1748 whether this is an evalled subst; that means we rely on the
1749 logic to ensure sublex_done() is called again only via the
1750 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1753 PL_lex_state = LEX_INTERPCONCAT;
1763 PL_endwhite = newSVpvs("");
1764 sv_catsv(PL_endwhite, PL_thiswhite);
1768 sv_setpvn(PL_thistoken,"",0);
1770 PL_realtokenstart = -1;
1774 PL_bufend = SvPVX(PL_linestr);
1775 PL_bufend += SvCUR(PL_linestr);
1776 PL_expect = XOPERATOR;
1777 PL_sublex_info.sub_inwhat = 0;
1785 Extracts a pattern, double-quoted string, or transliteration. This
1788 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1789 processing a pattern (PL_lex_inpat is true), a transliteration
1790 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1792 Returns a pointer to the character scanned up to. If this is
1793 advanced from the start pointer supplied (i.e. if anything was
1794 successfully parsed), will leave an OP for the substring scanned
1795 in yylval. Caller must intuit reason for not parsing further
1796 by looking at the next characters herself.
1800 double-quoted style: \r and \n
1801 regexp special ones: \D \s
1804 case and quoting: \U \Q \E
1805 stops on @ and $, but not for $ as tail anchor
1807 In transliterations:
1808 characters are VERY literal, except for - not at the start or end
1809 of the string, which indicates a range. If the range is in bytes,
1810 scan_const expands the range to the full set of intermediate
1811 characters. If the range is in utf8, the hyphen is replaced with
1812 a certain range mark which will be handled by pmtrans() in op.c.
1814 In double-quoted strings:
1816 double-quoted style: \r and \n
1818 deprecated backrefs: \1 (in substitution replacements)
1819 case and quoting: \U \Q \E
1822 scan_const does *not* construct ops to handle interpolated strings.
1823 It stops processing as soon as it finds an embedded $ or @ variable
1824 and leaves it to the caller to work out what's going on.
1826 embedded arrays (whether in pattern or not) could be:
1827 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1829 $ in double-quoted strings must be the symbol of an embedded scalar.
1831 $ in pattern could be $foo or could be tail anchor. Assumption:
1832 it's a tail anchor if $ is the last thing in the string, or if it's
1833 followed by one of "()| \r\n\t"
1835 \1 (backreferences) are turned into $1
1837 The structure of the code is
1838 while (there's a character to process) {
1839 handle transliteration ranges
1840 skip regexp comments /(?#comment)/ and codes /(?{code})/
1841 skip #-initiated comments in //x patterns
1842 check for embedded arrays
1843 check for embedded scalars
1845 leave intact backslashes from leaveit (below)
1846 deprecate \1 in substitution replacements
1847 handle string-changing backslashes \l \U \Q \E, etc.
1848 switch (what was escaped) {
1849 handle \- in a transliteration (becomes a literal -)
1850 handle \132 (octal characters)
1851 handle \x15 and \x{1234} (hex characters)
1852 handle \N{name} (named characters)
1853 handle \cV (control characters)
1854 handle printf-style backslashes (\f, \r, \n, etc)
1856 } (end if backslash)
1857 } (end while character to read)
1862 S_scan_const(pTHX_ char *start)
1865 register char *send = PL_bufend; /* end of the constant */
1866 SV *sv = newSV(send - start); /* sv for the constant */
1867 register char *s = start; /* start of the constant */
1868 register char *d = SvPVX(sv); /* destination for copies */
1869 bool dorange = FALSE; /* are we in a translit range? */
1870 bool didrange = FALSE; /* did we just finish a range? */
1871 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1872 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1875 UV literal_endpoint = 0;
1876 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1879 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1880 /* If we are doing a trans and we know we want UTF8 set expectation */
1881 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1882 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1886 while (s < send || dorange) {
1887 /* get transliterations out of the way (they're most literal) */
1888 if (PL_lex_inwhat == OP_TRANS) {
1889 /* expand a range A-Z to the full set of characters. AIE! */
1891 I32 i; /* current expanded character */
1892 I32 min; /* first character in range */
1893 I32 max; /* last character in range */
1904 char * const c = (char*)utf8_hop((U8*)d, -1);
1908 *c = (char)UTF_TO_NATIVE(0xff);
1909 /* mark the range as done, and continue */
1915 i = d - SvPVX_const(sv); /* remember current offset */
1918 SvLEN(sv) + (has_utf8 ?
1919 (512 - UTF_CONTINUATION_MARK +
1922 /* How many two-byte within 0..255: 128 in UTF-8,
1923 * 96 in UTF-8-mod. */
1925 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1927 d = SvPVX(sv) + i; /* refresh d after realloc */
1931 for (j = 0; j <= 1; j++) {
1932 char * const c = (char*)utf8_hop((U8*)d, -1);
1933 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1939 max = (U8)0xff; /* only to \xff */
1940 uvmax = uv; /* \x{100} to uvmax */
1942 d = c; /* eat endpoint chars */
1947 d -= 2; /* eat the first char and the - */
1948 min = (U8)*d; /* first char in range */
1949 max = (U8)d[1]; /* last char in range */
1956 "Invalid range \"%c-%c\" in transliteration operator",
1957 (char)min, (char)max);
1961 if (literal_endpoint == 2 &&
1962 ((isLOWER(min) && isLOWER(max)) ||
1963 (isUPPER(min) && isUPPER(max)))) {
1965 for (i = min; i <= max; i++)
1967 *d++ = NATIVE_TO_NEED(has_utf8,i);
1969 for (i = min; i <= max; i++)
1971 *d++ = NATIVE_TO_NEED(has_utf8,i);
1976 for (i = min; i <= max; i++)
1979 const U8 ch = (U8)NATIVE_TO_UTF(i);
1980 if (UNI_IS_INVARIANT(ch))
1983 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1984 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1993 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1995 *d++ = (char)UTF_TO_NATIVE(0xff);
1997 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2001 /* mark the range as done, and continue */
2005 literal_endpoint = 0;
2010 /* range begins (ignore - as first or last char) */
2011 else if (*s == '-' && s+1 < send && s != start) {
2013 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2020 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2030 literal_endpoint = 0;
2031 native_range = TRUE;
2036 /* if we get here, we're not doing a transliteration */
2038 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2039 except for the last char, which will be done separately. */
2040 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2042 while (s+1 < send && *s != ')')
2043 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2045 else if (s[2] == '{' /* This should match regcomp.c */
2046 || (s[2] == '?' && s[3] == '{'))
2049 char *regparse = s + (s[2] == '{' ? 3 : 4);
2052 while (count && (c = *regparse)) {
2053 if (c == '\\' && regparse[1])
2061 if (*regparse != ')')
2062 regparse--; /* Leave one char for continuation. */
2063 while (s < regparse)
2064 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2068 /* likewise skip #-initiated comments in //x patterns */
2069 else if (*s == '#' && PL_lex_inpat &&
2070 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2071 while (s+1 < send && *s != '\n')
2072 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2075 /* check for embedded arrays
2076 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2078 else if (*s == '@' && s[1]) {
2079 if (isALNUM_lazy_if(s+1,UTF))
2081 if (strchr(":'{$", s[1]))
2083 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2084 break; /* in regexp, neither @+ nor @- are interpolated */
2087 /* check for embedded scalars. only stop if we're sure it's a
2090 else if (*s == '$') {
2091 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2093 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2094 break; /* in regexp, $ might be tail anchor */
2097 /* End of else if chain - OP_TRANS rejoin rest */
2100 if (*s == '\\' && s+1 < send) {
2103 /* deprecate \1 in strings and substitution replacements */
2104 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2105 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2107 if (ckWARN(WARN_SYNTAX))
2108 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2113 /* string-change backslash escapes */
2114 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2118 /* skip any other backslash escapes in a pattern */
2119 else if (PL_lex_inpat) {
2120 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2121 goto default_action;
2124 /* if we get here, it's either a quoted -, or a digit */
2127 /* quoted - in transliterations */
2129 if (PL_lex_inwhat == OP_TRANS) {
2136 if ((isALPHA(*s) || isDIGIT(*s)) &&
2138 Perl_warner(aTHX_ packWARN(WARN_MISC),
2139 "Unrecognized escape \\%c passed through",
2141 /* default action is to copy the quoted character */
2142 goto default_action;
2145 /* \132 indicates an octal constant */
2146 case '0': case '1': case '2': case '3':
2147 case '4': case '5': case '6': case '7':
2151 uv = grok_oct(s, &len, &flags, NULL);
2154 goto NUM_ESCAPE_INSERT;
2156 /* \x24 indicates a hex constant */
2160 char* const e = strchr(s, '}');
2161 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2162 PERL_SCAN_DISALLOW_PREFIX;
2167 yyerror("Missing right brace on \\x{}");
2171 uv = grok_hex(s, &len, &flags, NULL);
2177 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2178 uv = grok_hex(s, &len, &flags, NULL);
2184 /* Insert oct or hex escaped character.
2185 * There will always enough room in sv since such
2186 * escapes will be longer than any UTF-8 sequence
2187 * they can end up as. */
2189 /* We need to map to chars to ASCII before doing the tests
2192 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2193 if (!has_utf8 && uv > 255) {
2194 /* Might need to recode whatever we have
2195 * accumulated so far if it contains any
2198 * (Can't we keep track of that and avoid
2199 * this rescan? --jhi)
2203 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2204 if (!NATIVE_IS_INVARIANT(*c)) {
2209 const STRLEN offset = d - SvPVX_const(sv);
2211 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2215 while (src >= (const U8 *)SvPVX_const(sv)) {
2216 if (!NATIVE_IS_INVARIANT(*src)) {
2217 const U8 ch = NATIVE_TO_ASCII(*src);
2218 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2219 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2229 if (has_utf8 || uv > 255) {
2230 d = (char*)uvchr_to_utf8((U8*)d, uv);
2232 if (PL_lex_inwhat == OP_TRANS &&
2233 PL_sublex_info.sub_op) {
2234 PL_sublex_info.sub_op->op_private |=
2235 (PL_lex_repl ? OPpTRANS_FROM_UTF
2239 if (uv > 255 && !dorange)
2240 native_range = FALSE;
2252 /* \N{LATIN SMALL LETTER A} is a named character */
2256 char* e = strchr(s, '}');
2263 yyerror("Missing right brace on \\N{}");
2267 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2269 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2270 PERL_SCAN_DISALLOW_PREFIX;
2273 uv = grok_hex(s, &len, &flags, NULL);
2274 if ( e > s && len != (STRLEN)(e - s) ) {
2278 goto NUM_ESCAPE_INSERT;
2280 res = newSVpvn(s + 1, e - s - 1);
2281 type = newSVpvn(s - 2,e - s + 3);
2282 res = new_constant( NULL, 0, "charnames",
2283 res, NULL, SvPVX(type) );
2286 sv_utf8_upgrade(res);
2287 str = SvPV_const(res,len);
2288 #ifdef EBCDIC_NEVER_MIND
2289 /* charnames uses pack U and that has been
2290 * recently changed to do the below uni->native
2291 * mapping, so this would be redundant (and wrong,
2292 * the code point would be doubly converted).
2293 * But leave this in just in case the pack U change
2294 * gets revoked, but the semantics is still
2295 * desireable for charnames. --jhi */
2297 UV uv = utf8_to_uvchr((const U8*)str, 0);
2300 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2302 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2303 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2304 str = SvPV_const(res, len);
2308 if (!has_utf8 && SvUTF8(res)) {
2309 const char * const ostart = SvPVX_const(sv);
2310 SvCUR_set(sv, d - ostart);
2313 sv_utf8_upgrade(sv);
2314 /* this just broke our allocation above... */
2315 SvGROW(sv, (STRLEN)(send - start));
2316 d = SvPVX(sv) + SvCUR(sv);
2319 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2320 const char * const odest = SvPVX_const(sv);
2322 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2323 d = SvPVX(sv) + (d - odest);
2327 native_range = FALSE; /* \N{} is guessed to be Unicode */
2329 Copy(str, d, len, char);
2336 yyerror("Missing braces on \\N{}");
2339 /* \c is a control character */
2348 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2351 yyerror("Missing control char name in \\c");
2355 /* printf-style backslashes, formfeeds, newlines, etc */
2357 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2360 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2363 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2366 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2369 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2372 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2375 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2381 } /* end if (backslash) */
2388 /* If we started with encoded form, or already know we want it
2389 and then encode the next character */
2390 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2392 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2393 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2396 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2397 const STRLEN off = d - SvPVX_const(sv);
2398 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2400 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2403 if (uv > 255 && !dorange)
2404 native_range = FALSE;
2408 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2410 } /* while loop to process each character */
2412 /* terminate the string and set up the sv */
2414 SvCUR_set(sv, d - SvPVX_const(sv));
2415 if (SvCUR(sv) >= SvLEN(sv))
2416 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2419 if (PL_encoding && !has_utf8) {
2420 sv_recode_to_utf8(sv, PL_encoding);
2426 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2427 PL_sublex_info.sub_op->op_private |=
2428 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2432 /* shrink the sv if we allocated more than we used */
2433 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2434 SvPV_shrink_to_cur(sv);
2437 /* return the substring (via yylval) only if we parsed anything */
2438 if (s > PL_bufptr) {
2439 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2440 sv = new_constant(start, s - start,
2441 (const char *)(PL_lex_inpat ? "qr" : "q"),
2444 (( PL_lex_inwhat == OP_TRANS
2446 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2449 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2456 * Returns TRUE if there's more to the expression (e.g., a subscript),
2459 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2461 * ->[ and ->{ return TRUE
2462 * { and [ outside a pattern are always subscripts, so return TRUE
2463 * if we're outside a pattern and it's not { or [, then return FALSE
2464 * if we're in a pattern and the first char is a {
2465 * {4,5} (any digits around the comma) returns FALSE
2466 * if we're in a pattern and the first char is a [
2468 * [SOMETHING] has a funky algorithm to decide whether it's a
2469 * character class or not. It has to deal with things like
2470 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2471 * anything else returns TRUE
2474 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2477 S_intuit_more(pTHX_ register char *s)
2480 if (PL_lex_brackets)
2482 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2484 if (*s != '{' && *s != '[')
2489 /* In a pattern, so maybe we have {n,m}. */
2506 /* On the other hand, maybe we have a character class */
2509 if (*s == ']' || *s == '^')
2512 /* this is terrifying, and it works */
2513 int weight = 2; /* let's weigh the evidence */
2515 unsigned char un_char = 255, last_un_char;
2516 const char * const send = strchr(s,']');
2517 char tmpbuf[sizeof PL_tokenbuf * 4];
2519 if (!send) /* has to be an expression */
2522 Zero(seen,256,char);
2525 else if (isDIGIT(*s)) {
2527 if (isDIGIT(s[1]) && s[2] == ']')
2533 for (; s < send; s++) {
2534 last_un_char = un_char;
2535 un_char = (unsigned char)*s;
2540 weight -= seen[un_char] * 10;
2541 if (isALNUM_lazy_if(s+1,UTF)) {
2543 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2544 len = (int)strlen(tmpbuf);
2545 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2550 else if (*s == '$' && s[1] &&
2551 strchr("[#!%*<>()-=",s[1])) {
2552 if (/*{*/ strchr("])} =",s[2]))
2561 if (strchr("wds]",s[1]))
2563 else if (seen[(U8)'\''] || seen[(U8)'"'])
2565 else if (strchr("rnftbxcav",s[1]))
2567 else if (isDIGIT(s[1])) {
2569 while (s[1] && isDIGIT(s[1]))
2579 if (strchr("aA01! ",last_un_char))
2581 if (strchr("zZ79~",s[1]))
2583 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2584 weight -= 5; /* cope with negative subscript */
2587 if (!isALNUM(last_un_char)
2588 && !(last_un_char == '$' || last_un_char == '@'
2589 || last_un_char == '&')
2590 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2595 if (keyword(tmpbuf, d - tmpbuf, 0))
2598 if (un_char == last_un_char + 1)
2600 weight -= seen[un_char];
2605 if (weight >= 0) /* probably a character class */
2615 * Does all the checking to disambiguate
2617 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2618 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2620 * First argument is the stuff after the first token, e.g. "bar".
2622 * Not a method if bar is a filehandle.
2623 * Not a method if foo is a subroutine prototyped to take a filehandle.
2624 * Not a method if it's really "Foo $bar"
2625 * Method if it's "foo $bar"
2626 * Not a method if it's really "print foo $bar"
2627 * Method if it's really "foo package::" (interpreted as package->foo)
2628 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2629 * Not a method if bar is a filehandle or package, but is quoted with
2634 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2637 char *s = start + (*start == '$');
2638 char tmpbuf[sizeof PL_tokenbuf];
2646 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2650 const char *proto = SvPVX_const(cv);
2661 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2662 /* start is the beginning of the possible filehandle/object,
2663 * and s is the end of it
2664 * tmpbuf is a copy of it
2667 if (*start == '$') {
2668 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2669 isUPPER(*PL_tokenbuf))
2672 len = start - SvPVX(PL_linestr);
2676 start = SvPVX(PL_linestr) + len;
2680 return *s == '(' ? FUNCMETH : METHOD;
2682 if (!keyword(tmpbuf, len, 0)) {
2683 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2687 soff = s - SvPVX(PL_linestr);
2691 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2692 if (indirgv && GvCVu(indirgv))
2694 /* filehandle or package name makes it a method */
2695 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2697 soff = s - SvPVX(PL_linestr);
2700 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2701 return 0; /* no assumptions -- "=>" quotes bearword */
2703 start_force(PL_curforce);
2704 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2705 newSVpvn(tmpbuf,len));
2706 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2708 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2713 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2715 return *s == '(' ? FUNCMETH : METHOD;
2723 * Return a string of Perl code to load the debugger. If PERL5DB
2724 * is set, it will return the contents of that, otherwise a
2725 * compile-time require of perl5db.pl.
2733 const char * const pdb = PerlEnv_getenv("PERL5DB");
2737 SETERRNO(0,SS_NORMAL);
2738 return "BEGIN { require 'perl5db.pl' }";
2744 /* Encoded script support. filter_add() effectively inserts a
2745 * 'pre-processing' function into the current source input stream.
2746 * Note that the filter function only applies to the current source file
2747 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2749 * The datasv parameter (which may be NULL) can be used to pass
2750 * private data to this instance of the filter. The filter function
2751 * can recover the SV using the FILTER_DATA macro and use it to
2752 * store private buffers and state information.
2754 * The supplied datasv parameter is upgraded to a PVIO type
2755 * and the IoDIRP/IoANY field is used to store the function pointer,
2756 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2757 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2758 * private use must be set using malloc'd pointers.
2762 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2768 if (!PL_rsfp_filters)
2769 PL_rsfp_filters = newAV();
2772 SvUPGRADE(datasv, SVt_PVIO);
2773 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2774 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2775 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2776 FPTR2DPTR(void *, IoANY(datasv)),
2777 SvPV_nolen(datasv)));
2778 av_unshift(PL_rsfp_filters, 1);
2779 av_store(PL_rsfp_filters, 0, datasv) ;
2784 /* Delete most recently added instance of this filter function. */
2786 Perl_filter_del(pTHX_ filter_t funcp)
2792 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2793 FPTR2DPTR(void*, funcp)));
2795 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2797 /* if filter is on top of stack (usual case) just pop it off */
2798 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2799 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2800 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2801 IoANY(datasv) = (void *)NULL;
2802 sv_free(av_pop(PL_rsfp_filters));
2806 /* we need to search for the correct entry and clear it */
2807 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2811 /* Invoke the idxth filter function for the current rsfp. */
2812 /* maxlen 0 = read one text line */
2814 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2819 /* This API is bad. It should have been using unsigned int for maxlen.
2820 Not sure if we want to change the API, but if not we should sanity
2821 check the value here. */
2822 const unsigned int correct_length
2831 if (!PL_rsfp_filters)
2833 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2834 /* Provide a default input filter to make life easy. */
2835 /* Note that we append to the line. This is handy. */
2836 DEBUG_P(PerlIO_printf(Perl_debug_log,
2837 "filter_read %d: from rsfp\n", idx));
2838 if (correct_length) {
2841 const int old_len = SvCUR(buf_sv);
2843 /* ensure buf_sv is large enough */
2844 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2845 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2846 correct_length)) <= 0) {
2847 if (PerlIO_error(PL_rsfp))
2848 return -1; /* error */
2850 return 0 ; /* end of file */
2852 SvCUR_set(buf_sv, old_len + len) ;
2855 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2856 if (PerlIO_error(PL_rsfp))
2857 return -1; /* error */
2859 return 0 ; /* end of file */
2862 return SvCUR(buf_sv);
2864 /* Skip this filter slot if filter has been deleted */
2865 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2866 DEBUG_P(PerlIO_printf(Perl_debug_log,
2867 "filter_read %d: skipped (filter deleted)\n",
2869 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2871 /* Get function pointer hidden within datasv */
2872 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2873 DEBUG_P(PerlIO_printf(Perl_debug_log,
2874 "filter_read %d: via function %p (%s)\n",
2875 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2876 /* Call function. The function is expected to */
2877 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2878 /* Return: <0:error, =0:eof, >0:not eof */
2879 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2883 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2886 #ifdef PERL_CR_FILTER
2887 if (!PL_rsfp_filters) {
2888 filter_add(S_cr_textfilter,NULL);
2891 if (PL_rsfp_filters) {
2893 SvCUR_set(sv, 0); /* start with empty line */
2894 if (FILTER_READ(0, sv, 0) > 0)
2895 return ( SvPVX(sv) ) ;
2900 return (sv_gets(sv, fp, append));
2904 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2909 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2913 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2914 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2916 return GvHV(gv); /* Foo:: */
2919 /* use constant CLASS => 'MyClass' */
2920 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2921 if (gv && GvCV(gv)) {
2922 SV * const sv = cv_const_sv(GvCV(gv));
2924 pkgname = SvPV_nolen_const(sv);
2927 return gv_stashpv(pkgname, 0);
2931 * S_readpipe_override
2932 * Check whether readpipe() is overriden, and generates the appropriate
2933 * optree, provided sublex_start() is called afterwards.
2936 S_readpipe_override(pTHX)
2939 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2940 yylval.ival = OP_BACKTICK;
2942 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2944 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2945 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
2946 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2948 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2949 append_elem(OP_LIST,
2950 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2951 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2961 * The intent of this yylex wrapper is to minimize the changes to the
2962 * tokener when we aren't interested in collecting madprops. It remains
2963 * to be seen how successful this strategy will be...
2970 char *s = PL_bufptr;
2972 /* make sure PL_thiswhite is initialized */
2976 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2977 if (PL_pending_ident)
2978 return S_pending_ident(aTHX);
2980 /* previous token ate up our whitespace? */
2981 if (!PL_lasttoke && PL_nextwhite) {
2982 PL_thiswhite = PL_nextwhite;
2986 /* isolate the token, and figure out where it is without whitespace */
2987 PL_realtokenstart = -1;
2991 assert(PL_curforce < 0);
2993 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2994 if (!PL_thistoken) {
2995 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2996 PL_thistoken = newSVpvs("");
2998 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
2999 PL_thistoken = newSVpvn(tstart, s - tstart);
3002 if (PL_thismad) /* install head */
3003 CURMAD('X', PL_thistoken);
3006 /* last whitespace of a sublex? */
3007 if (optype == ')' && PL_endwhite) {
3008 CURMAD('X', PL_endwhite);
3013 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3014 if (!PL_thiswhite && !PL_endwhite && !optype) {
3015 sv_free(PL_thistoken);
3020 /* put off final whitespace till peg */
3021 if (optype == ';' && !PL_rsfp) {
3022 PL_nextwhite = PL_thiswhite;
3025 else if (PL_thisopen) {
3026 CURMAD('q', PL_thisopen);
3028 sv_free(PL_thistoken);
3032 /* Store actual token text as madprop X */
3033 CURMAD('X', PL_thistoken);
3037 /* add preceding whitespace as madprop _ */
3038 CURMAD('_', PL_thiswhite);
3042 /* add quoted material as madprop = */
3043 CURMAD('=', PL_thisstuff);
3047 /* add terminating quote as madprop Q */
3048 CURMAD('Q', PL_thisclose);
3052 /* special processing based on optype */
3056 /* opval doesn't need a TOKEN since it can already store mp */
3067 append_madprops(PL_thismad, yylval.opval, 0);
3075 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3084 /* remember any fake bracket that lexer is about to discard */
3085 if (PL_lex_brackets == 1 &&
3086 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3089 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3092 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3093 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3096 break; /* don't bother looking for trailing comment */
3105 /* attach a trailing comment to its statement instead of next token */
3109 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3111 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3113 if (*s == '\n' || *s == '#') {
3114 while (s < PL_bufend && *s != '\n')
3118 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3119 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3136 /* Create new token struct. Note: opvals return early above. */
3137 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3144 S_tokenize_use(pTHX_ int is_use, char *s) {
3146 if (PL_expect != XSTATE)
3147 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3148 is_use ? "use" : "no"));
3150 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3151 s = force_version(s, TRUE);
3152 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3153 start_force(PL_curforce);
3154 NEXTVAL_NEXTTOKE.opval = NULL;
3157 else if (*s == 'v') {
3158 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3159 s = force_version(s, FALSE);
3163 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3164 s = force_version(s, FALSE);
3166 yylval.ival = is_use;
3170 static const char* const exp_name[] =
3171 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3172 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3179 Works out what to call the token just pulled out of the input
3180 stream. The yacc parser takes care of taking the ops we return and
3181 stitching them into a tree.
3187 if read an identifier
3188 if we're in a my declaration
3189 croak if they tried to say my($foo::bar)
3190 build the ops for a my() declaration
3191 if it's an access to a my() variable
3192 are we in a sort block?
3193 croak if my($a); $a <=> $b
3194 build ops for access to a my() variable
3195 if in a dq string, and they've said @foo and we can't find @foo
3197 build ops for a bareword
3198 if we already built the token before, use it.
3203 #pragma segment Perl_yylex
3209 register char *s = PL_bufptr;
3214 /* orig_keyword, gvp, and gv are initialized here because
3215 * jump to the label just_a_word_zero can bypass their
3216 * initialization later. */
3217 I32 orig_keyword = 0;
3222 SV* tmp = newSVpvs("");
3223 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3224 (IV)CopLINE(PL_curcop),
3225 lex_state_names[PL_lex_state],
3226 exp_name[PL_expect],
3227 pv_display(tmp, s, strlen(s), 0, 60));
3230 /* check if there's an identifier for us to look at */
3231 if (PL_pending_ident)
3232 return REPORT(S_pending_ident(aTHX));
3234 /* no identifier pending identification */
3236 switch (PL_lex_state) {
3238 case LEX_NORMAL: /* Some compilers will produce faster */
3239 case LEX_INTERPNORMAL: /* code if we comment these out. */
3243 /* when we've already built the next token, just pull it out of the queue */
3247 yylval = PL_nexttoke[PL_lasttoke].next_val;
3249 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3250 PL_nexttoke[PL_lasttoke].next_mad = 0;
3251 if (PL_thismad && PL_thismad->mad_key == '_') {
3252 PL_thiswhite = (SV*)PL_thismad->mad_val;
3253 PL_thismad->mad_val = 0;
3254 mad_free(PL_thismad);
3259 PL_lex_state = PL_lex_defer;
3260 PL_expect = PL_lex_expect;
3261 PL_lex_defer = LEX_NORMAL;
3262 if (!PL_nexttoke[PL_lasttoke].next_type)
3267 yylval = PL_nextval[PL_nexttoke];
3269 PL_lex_state = PL_lex_defer;
3270 PL_expect = PL_lex_expect;
3271 PL_lex_defer = LEX_NORMAL;
3275 /* FIXME - can these be merged? */
3276 return(PL_nexttoke[PL_lasttoke].next_type);
3278 return REPORT(PL_nexttype[PL_nexttoke]);
3281 /* interpolated case modifiers like \L \U, including \Q and \E.
3282 when we get here, PL_bufptr is at the \
3284 case LEX_INTERPCASEMOD:
3286 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3287 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3289 /* handle \E or end of string */
3290 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3292 if (PL_lex_casemods) {
3293 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3294 PL_lex_casestack[PL_lex_casemods] = '\0';
3296 if (PL_bufptr != PL_bufend
3297 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3299 PL_lex_state = LEX_INTERPCONCAT;
3302 PL_thistoken = newSVpvs("\\E");
3308 while (PL_bufptr != PL_bufend &&
3309 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3311 PL_thiswhite = newSVpvs("");
3312 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3316 if (PL_bufptr != PL_bufend)
3319 PL_lex_state = LEX_INTERPCONCAT;
3323 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3324 "### Saw case modifier\n"); });
3326 if (s[1] == '\\' && s[2] == 'E') {
3329 PL_thiswhite = newSVpvs("");
3330 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3333 PL_lex_state = LEX_INTERPCONCAT;
3338 if (!PL_madskills) /* when just compiling don't need correct */
3339 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3340 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3341 if ((*s == 'L' || *s == 'U') &&
3342 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3343 PL_lex_casestack[--PL_lex_casemods] = '\0';
3346 if (PL_lex_casemods > 10)
3347 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3348 PL_lex_casestack[PL_lex_casemods++] = *s;
3349 PL_lex_casestack[PL_lex_casemods] = '\0';
3350 PL_lex_state = LEX_INTERPCONCAT;
3351 start_force(PL_curforce);
3352 NEXTVAL_NEXTTOKE.ival = 0;
3354 start_force(PL_curforce);
3356 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3358 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3360 NEXTVAL_NEXTTOKE.ival = OP_LC;
3362 NEXTVAL_NEXTTOKE.ival = OP_UC;
3364 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3366 Perl_croak(aTHX_ "panic: yylex");
3368 SV* const tmpsv = newSVpvs("");
3369 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3375 if (PL_lex_starts) {
3381 sv_free(PL_thistoken);
3382 PL_thistoken = newSVpvs("");
3385 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3386 if (PL_lex_casemods == 1 && PL_lex_inpat)
3395 case LEX_INTERPPUSH:
3396 return REPORT(sublex_push());
3398 case LEX_INTERPSTART:
3399 if (PL_bufptr == PL_bufend)
3400 return REPORT(sublex_done());
3401 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3402 "### Interpolated variable\n"); });
3404 PL_lex_dojoin = (*PL_bufptr == '@');
3405 PL_lex_state = LEX_INTERPNORMAL;
3406 if (PL_lex_dojoin) {
3407 start_force(PL_curforce);
3408 NEXTVAL_NEXTTOKE.ival = 0;
3410 start_force(PL_curforce);
3411 force_ident("\"", '$');
3412 start_force(PL_curforce);
3413 NEXTVAL_NEXTTOKE.ival = 0;
3415 start_force(PL_curforce);
3416 NEXTVAL_NEXTTOKE.ival = 0;
3418 start_force(PL_curforce);
3419 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3422 if (PL_lex_starts++) {
3427 sv_free(PL_thistoken);
3428 PL_thistoken = newSVpvs("");
3431 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3432 if (!PL_lex_casemods && PL_lex_inpat)
3439 case LEX_INTERPENDMAYBE:
3440 if (intuit_more(PL_bufptr)) {
3441 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3447 if (PL_lex_dojoin) {
3448 PL_lex_dojoin = FALSE;
3449 PL_lex_state = LEX_INTERPCONCAT;
3453 sv_free(PL_thistoken);
3454 PL_thistoken = newSVpvs("");
3459 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3460 && SvEVALED(PL_lex_repl))
3462 if (PL_bufptr != PL_bufend)
3463 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3467 case LEX_INTERPCONCAT:
3469 if (PL_lex_brackets)
3470 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3472 if (PL_bufptr == PL_bufend)
3473 return REPORT(sublex_done());
3475 if (SvIVX(PL_linestr) == '\'') {
3476 SV *sv = newSVsv(PL_linestr);
3479 else if ( PL_hints & HINT_NEW_RE )
3480 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
3481 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3485 s = scan_const(PL_bufptr);
3487 PL_lex_state = LEX_INTERPCASEMOD;
3489 PL_lex_state = LEX_INTERPSTART;
3492 if (s != PL_bufptr) {
3493 start_force(PL_curforce);
3495 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3497 NEXTVAL_NEXTTOKE = yylval;
3500 if (PL_lex_starts++) {
3504 sv_free(PL_thistoken);
3505 PL_thistoken = newSVpvs("");
3508 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3509 if (!PL_lex_casemods && PL_lex_inpat)
3522 PL_lex_state = LEX_NORMAL;
3523 s = scan_formline(PL_bufptr);
3524 if (!PL_lex_formbrack)
3530 PL_oldoldbufptr = PL_oldbufptr;
3536 sv_free(PL_thistoken);
3539 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3543 if (isIDFIRST_lazy_if(s,UTF))
3545 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
3548 goto fake_eof; /* emulate EOF on ^D or ^Z */
3557 if (PL_lex_brackets) {
3558 yyerror((const char *)
3560 ? "Format not terminated"
3561 : "Missing right curly or square bracket"));
3563 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3564 "### Tokener got EOF\n");
3568 if (s++ < PL_bufend)
3569 goto retry; /* ignore stray nulls */
3572 if (!PL_in_eval && !PL_preambled) {
3573 PL_preambled = TRUE;
3578 sv_setpv(PL_linestr,incl_perldb());
3579 if (SvCUR(PL_linestr))
3580 sv_catpvs(PL_linestr,";");
3582 while(AvFILLp(PL_preambleav) >= 0) {
3583 SV *tmpsv = av_shift(PL_preambleav);
3584 sv_catsv(PL_linestr, tmpsv);
3585 sv_catpvs(PL_linestr, ";");
3588 sv_free((SV*)PL_preambleav);
3589 PL_preambleav = NULL;
3591 if (PL_minus_n || PL_minus_p) {
3592 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3594 sv_catpvs(PL_linestr,"chomp;");
3597 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3598 || *PL_splitstr == '"')
3599 && strchr(PL_splitstr + 1, *PL_splitstr))
3600 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3602 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3603 bytes can be used as quoting characters. :-) */
3604 const char *splits = PL_splitstr;
3605 sv_catpvs(PL_linestr, "our @F=split(q\0");
3608 if (*splits == '\\')
3609 sv_catpvn(PL_linestr, splits, 1);
3610 sv_catpvn(PL_linestr, splits, 1);
3611 } while (*splits++);
3612 /* This loop will embed the trailing NUL of
3613 PL_linestr as the last thing it does before
3615 sv_catpvs(PL_linestr, ");");
3619 sv_catpvs(PL_linestr,"our @F=split(' ');");
3623 sv_catpvs(PL_linestr,"use feature ':5.10';");
3624 sv_catpvs(PL_linestr, "\n");
3625 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3626 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3627 PL_last_lop = PL_last_uni = NULL;
3628 if (PERLDB_LINE && PL_curstash != PL_debstash)
3629 update_debugger_info(PL_linestr, NULL, 0);
3633 bof = PL_rsfp ? TRUE : FALSE;
3634 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3637 PL_realtokenstart = -1;
3640 if (PL_preprocess && !PL_in_eval)
3641 (void)PerlProc_pclose(PL_rsfp);
3642 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3643 PerlIO_clearerr(PL_rsfp);
3645 (void)PerlIO_close(PL_rsfp);
3647 PL_doextract = FALSE;
3649 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3654 sv_setpv(PL_linestr,
3657 ? ";}continue{print;}" : ";}"));
3658 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3659 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3660 PL_last_lop = PL_last_uni = NULL;
3661 PL_minus_n = PL_minus_p = 0;
3664 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3665 PL_last_lop = PL_last_uni = NULL;
3666 sv_setpvn(PL_linestr,"",0);
3667 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3669 /* If it looks like the start of a BOM or raw UTF-16,
3670 * check if it in fact is. */
3676 #ifdef PERLIO_IS_STDIO
3677 # ifdef __GNU_LIBRARY__
3678 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3679 # define FTELL_FOR_PIPE_IS_BROKEN
3683 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3684 # define FTELL_FOR_PIPE_IS_BROKEN
3689 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3690 /* This loses the possibility to detect the bof
3691 * situation on perl -P when the libc5 is being used.
3692 * Workaround? Maybe attach some extra state to PL_rsfp?
3695 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3697 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3700 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3701 s = swallow_bom((U8*)s);
3705 /* Incest with pod. */
3708 sv_catsv(PL_thiswhite, PL_linestr);
3710 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3711 sv_setpvn(PL_linestr, "", 0);
3712 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3713 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3714 PL_last_lop = PL_last_uni = NULL;
3715 PL_doextract = FALSE;
3719 } while (PL_doextract);
3720 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3721 if (PERLDB_LINE && PL_curstash != PL_debstash)
3722 update_debugger_info(PL_linestr, NULL, 0);
3723 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3724 PL_last_lop = PL_last_uni = NULL;
3725 if (CopLINE(PL_curcop) == 1) {
3726 while (s < PL_bufend && isSPACE(*s))
3728 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3732 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3736 if (*s == '#' && *(s+1) == '!')
3738 #ifdef ALTERNATE_SHEBANG
3740 static char const as[] = ALTERNATE_SHEBANG;
3741 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3742 d = s + (sizeof(as) - 1);
3744 #endif /* ALTERNATE_SHEBANG */
3753 while (*d && !isSPACE(*d))
3757 #ifdef ARG_ZERO_IS_SCRIPT
3758 if (ipathend > ipath) {
3760 * HP-UX (at least) sets argv[0] to the script name,
3761 * which makes $^X incorrect. And Digital UNIX and Linux,
3762 * at least, set argv[0] to the basename of the Perl
3763 * interpreter. So, having found "#!", we'll set it right.
3765 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3767 assert(SvPOK(x) || SvGMAGICAL(x));
3768 if (sv_eq(x, CopFILESV(PL_curcop))) {
3769 sv_setpvn(x, ipath, ipathend - ipath);
3775 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3776 const char * const lstart = SvPV_const(x,llen);
3778 bstart += blen - llen;
3779 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3780 sv_setpvn(x, ipath, ipathend - ipath);
3785 TAINT_NOT; /* $^X is always tainted, but that's OK */
3787 #endif /* ARG_ZERO_IS_SCRIPT */
3792 d = instr(s,"perl -");
3794 d = instr(s,"perl");
3796 /* avoid getting into infinite loops when shebang
3797 * line contains "Perl" rather than "perl" */
3799 for (d = ipathend-4; d >= ipath; --d) {
3800 if ((*d == 'p' || *d == 'P')
3801 && !ibcmp(d, "perl", 4))
3811 #ifdef ALTERNATE_SHEBANG
3813 * If the ALTERNATE_SHEBANG on this system starts with a
3814 * character that can be part of a Perl expression, then if
3815 * we see it but not "perl", we're probably looking at the
3816 * start of Perl code, not a request to hand off to some
3817 * other interpreter. Similarly, if "perl" is there, but
3818 * not in the first 'word' of the line, we assume the line
3819 * contains the start of the Perl program.
3821 if (d && *s != '#') {
3822 const char *c = ipath;
3823 while (*c && !strchr("; \t\r\n\f\v#", *c))
3826 d = NULL; /* "perl" not in first word; ignore */
3828 *s = '#'; /* Don't try to parse shebang line */
3830 #endif /* ALTERNATE_SHEBANG */
3831 #ifndef MACOS_TRADITIONAL
3836 !instr(s,"indir") &&
3837 instr(PL_origargv[0],"perl"))
3844 while (s < PL_bufend && isSPACE(*s))
3846 if (s < PL_bufend) {
3847 Newxz(newargv,PL_origargc+3,char*);
3849 while (s < PL_bufend && !isSPACE(*s))
3852 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3855 newargv = PL_origargv;
3858 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3860 Perl_croak(aTHX_ "Can't exec %s", ipath);
3864 while (*d && !isSPACE(*d))
3866 while (SPACE_OR_TAB(*d))
3870 const bool switches_done = PL_doswitches;
3871 const U32 oldpdb = PL_perldb;
3872 const bool oldn = PL_minus_n;
3873 const bool oldp = PL_minus_p;
3876 if (*d == 'M' || *d == 'm' || *d == 'C') {
3877 const char * const m = d;
3878 while (*d && !isSPACE(*d))
3880 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3883 d = moreswitches(d);
3885 if (PL_doswitches && !switches_done) {
3886 int argc = PL_origargc;
3887 char **argv = PL_origargv;
3890 } while (argc && argv[0][0] == '-' && argv[0][1]);
3891 init_argv_symbols(argc,argv);
3893 if ((PERLDB_LINE && !oldpdb) ||
3894 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3895 /* if we have already added "LINE: while (<>) {",
3896 we must not do it again */
3898 sv_setpvn(PL_linestr, "", 0);
3899 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3900 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3901 PL_last_lop = PL_last_uni = NULL;
3902 PL_preambled = FALSE;
3904 (void)gv_fetchfile(PL_origfilename);
3911 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3913 PL_lex_state = LEX_FORMLINE;
3918 #ifdef PERL_STRICT_CR
3919 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3921 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3923 case ' ': case '\t': case '\f': case 013:
3924 #ifdef MACOS_TRADITIONAL
3928 PL_realtokenstart = -1;
3930 PL_thiswhite = newSVpvs("");
3931 sv_catpvn(PL_thiswhite, s, 1);
3938 PL_realtokenstart = -1;
3942 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3943 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3944 /* handle eval qq[#line 1 "foo"\n ...] */
3945 CopLINE_dec(PL_curcop);
3948 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3950 if (!PL_in_eval || PL_rsfp)
3955 while (d < PL_bufend && *d != '\n')
3959 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3960 Perl_croak(aTHX_ "panic: input overflow");
3963 PL_thiswhite = newSVpvn(s, d - s);
3968 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3970 PL_lex_state = LEX_FORMLINE;
3976 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
3977 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
3980 TOKEN(PEG); /* make sure any #! line is accessible */
3985 /* if (PL_madskills && PL_lex_formbrack) { */
3987 while (d < PL_bufend && *d != '\n')
3991 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3992 Perl_croak(aTHX_ "panic: input overflow");
3993 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
3995 PL_thiswhite = newSVpvs("");
3996 if (CopLINE(PL_curcop) == 1) {
3997 sv_setpvn(PL_thiswhite, "", 0);
4000 sv_catpvn(PL_thiswhite, s, d - s);
4014 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4022 while (s < PL_bufend && SPACE_OR_TAB(*s))
4025 if (strnEQ(s,"=>",2)) {
4026 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4027 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4028 OPERATOR('-'); /* unary minus */
4030 PL_last_uni = PL_oldbufptr;
4032 case 'r': ftst = OP_FTEREAD; break;
4033 case 'w': ftst = OP_FTEWRITE; break;
4034 case 'x': ftst = OP_FTEEXEC; break;
4035 case 'o': ftst = OP_FTEOWNED; break;
4036 case 'R': ftst = OP_FTRREAD; break;
4037 case 'W': ftst = OP_FTRWRITE; break;
4038 case 'X': ftst = OP_FTREXEC; break;
4039 case 'O': ftst = OP_FTROWNED; break;
4040 case 'e': ftst = OP_FTIS; break;
4041 case 'z': ftst = OP_FTZERO; break;
4042 case 's': ftst = OP_FTSIZE; break;
4043 case 'f': ftst = OP_FTFILE; break;
4044 case 'd': ftst = OP_FTDIR; break;
4045 case 'l': ftst = OP_FTLINK; break;
4046 case 'p': ftst = OP_FTPIPE; break;
4047 case 'S': ftst = OP_FTSOCK; break;
4048 case 'u': ftst = OP_FTSUID; break;
4049 case 'g': ftst = OP_FTSGID; break;
4050 case 'k': ftst = OP_FTSVTX; break;
4051 case 'b': ftst = OP_FTBLK; break;
4052 case 'c': ftst = OP_FTCHR; break;
4053 case 't': ftst = OP_FTTTY; break;
4054 case 'T': ftst = OP_FTTEXT; break;
4055 case 'B': ftst = OP_FTBINARY; break;
4056 case 'M': case 'A': case 'C':
4057 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4059 case 'M': ftst = OP_FTMTIME; break;
4060 case 'A': ftst = OP_FTATIME; break;
4061 case 'C': ftst = OP_FTCTIME; break;
4069 PL_last_lop_op = (OPCODE)ftst;
4070 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4071 "### Saw file test %c\n", (int)tmp);
4076 /* Assume it was a minus followed by a one-letter named
4077 * subroutine call (or a -bareword), then. */
4078 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4079 "### '-%c' looked like a file test but was not\n",
4086 const char tmp = *s++;
4089 if (PL_expect == XOPERATOR)
4094 else if (*s == '>') {
4097 if (isIDFIRST_lazy_if(s,UTF)) {
4098 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4106 if (PL_expect == XOPERATOR)
4109 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4111 OPERATOR('-'); /* unary minus */
4117 const char tmp = *s++;
4120 if (PL_expect == XOPERATOR)
4125 if (PL_expect == XOPERATOR)
4128 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4135 if (PL_expect != XOPERATOR) {
4136 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4137 PL_expect = XOPERATOR;
4138 force_ident(PL_tokenbuf, '*');
4151 if (PL_expect == XOPERATOR) {
4155 PL_tokenbuf[0] = '%';
4156 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4157 sizeof PL_tokenbuf - 1, FALSE);
4158 if (!PL_tokenbuf[1]) {
4161 PL_pending_ident = '%';
4172 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4179 const char tmp = *s++;
4185 goto just_a_word_zero_gv;
4188 switch (PL_expect) {
4194 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4196 PL_bufptr = s; /* update in case we back off */
4202 PL_expect = XTERMBLOCK;
4205 stuffstart = s - SvPVX(PL_linestr) - 1;
4209 while (isIDFIRST_lazy_if(s,UTF)) {
4212 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4213 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4214 if (tmp < 0) tmp = -tmp;
4229 sv = newSVpvn(s, len);
4231 d = scan_str(d,TRUE,TRUE);
4233 /* MUST advance bufptr here to avoid bogus
4234 "at end of line" context messages from yyerror().
4236 PL_bufptr = s + len;
4237 yyerror("Unterminated attribute parameter in attribute list");
4241 return REPORT(0); /* EOF indicator */
4245 sv_catsv(sv, PL_lex_stuff);
4246 attrs = append_elem(OP_LIST, attrs,
4247 newSVOP(OP_CONST, 0, sv));
4248 SvREFCNT_dec(PL_lex_stuff);
4249 PL_lex_stuff = NULL;
4252 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4254 if (PL_in_my == KEY_our) {
4256 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
4258 /* skip to avoid loading attributes.pm */
4260 deprecate(":unique");
4263 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4266 /* NOTE: any CV attrs applied here need to be part of
4267 the CVf_BUILTIN_ATTRS define in cv.h! */
4268 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4270 CvLVALUE_on(PL_compcv);
4272 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4274 CvLOCKED_on(PL_compcv);
4276 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4278 CvMETHOD_on(PL_compcv);
4280 else if (!PL_in_my && len == 9 && strnEQ(SvPVX(sv), "assertion", len)) {
4282 CvASSERTION_on(PL_compcv);
4284 /* After we've set the flags, it could be argued that
4285 we don't need to do the attributes.pm-based setting
4286 process, and shouldn't bother appending recognized
4287 flags. To experiment with that, uncomment the
4288 following "else". (Note that's already been
4289 uncommented. That keeps the above-applied built-in
4290 attributes from being intercepted (and possibly
4291 rejected) by a package's attribute routines, but is
4292 justified by the performance win for the common case
4293 of applying only built-in attributes.) */
4295 attrs = append_elem(OP_LIST, attrs,
4296 newSVOP(OP_CONST, 0,
4300 if (*s == ':' && s[1] != ':')
4303 break; /* require real whitespace or :'s */
4304 /* XXX losing whitespace on sequential attributes here */
4308 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4309 if (*s != ';' && *s != '}' && *s != tmp
4310 && (tmp != '=' || *s != ')')) {
4311 const char q = ((*s == '\'') ? '"' : '\'');
4312 /* If here for an expression, and parsed no attrs, back
4314 if (tmp == '=' && !attrs) {
4318 /* MUST advance bufptr here to avoid bogus "at end of line"
4319 context messages from yyerror().
4322 yyerror( (const char *)
4324 ? Perl_form(aTHX_ "Invalid separator character "
4325 "%c%c%c in attribute list", q, *s, q)
4326 : "Unterminated attribute list" ) );
4334 start_force(PL_curforce);
4335 NEXTVAL_NEXTTOKE.opval = attrs;
4336 CURMAD('_', PL_nextwhite);
4341 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4342 (s - SvPVX(PL_linestr)) - stuffstart);
4350 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4351 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4359 const char tmp = *s++;
4364 const char tmp = *s++;
4372 if (PL_lex_brackets <= 0)
4373 yyerror("Unmatched right square bracket");
4376 if (PL_lex_state == LEX_INTERPNORMAL) {
4377 if (PL_lex_brackets == 0) {
4378 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
4379 PL_lex_state = LEX_INTERPEND;
4386 if (PL_lex_brackets > 100) {
4387 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4389 switch (PL_expect) {
4391 if (PL_lex_formbrack) {
4395 if (PL_oldoldbufptr == PL_last_lop)
4396 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4398 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4399 OPERATOR(HASHBRACK);
4401 while (s < PL_bufend && SPACE_OR_TAB(*s))
4404 PL_tokenbuf[0] = '\0';
4405 if (d < PL_bufend && *d == '-') {
4406 PL_tokenbuf[0] = '-';
4408 while (d < PL_bufend && SPACE_OR_TAB(*d))
4411 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4412 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4414 while (d < PL_bufend && SPACE_OR_TAB(*d))
4417 const char minus = (PL_tokenbuf[0] == '-');
4418 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4426 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4431 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4436 if (PL_oldoldbufptr == PL_last_lop)
4437 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4439 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4442 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4444 /* This hack is to get the ${} in the message. */
4446 yyerror("syntax error");
4449 OPERATOR(HASHBRACK);
4451 /* This hack serves to disambiguate a pair of curlies
4452 * as being a block or an anon hash. Normally, expectation
4453 * determines that, but in cases where we're not in a
4454 * position to expect anything in particular (like inside
4455 * eval"") we have to resolve the ambiguity. This code
4456 * covers the case where the first term in the curlies is a
4457 * quoted string. Most other cases need to be explicitly
4458 * disambiguated by prepending a "+" before the opening
4459 * curly in order to force resolution as an anon hash.
4461 * XXX should probably propagate the outer expectation
4462 * into eval"" to rely less on this hack, but that could
4463 * potentially break current behavior of eval"".
4467 if (*s == '\'' || *s == '"' || *s == '`') {
4468 /* common case: get past first string, handling escapes */
4469 for (t++; t < PL_bufend && *t != *s;)
4470 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4474 else if (*s == 'q') {
4477 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4480 /* skip q//-like construct */
4482 char open, close, term;
4485 while (t < PL_bufend && isSPACE(*t))
4487 /* check for q => */
4488 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4489 OPERATOR(HASHBRACK);
4493 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4497 for (t++; t < PL_bufend; t++) {
4498 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4500 else if (*t == open)
4504 for (t++; t < PL_bufend; t++) {
4505 if (*t == '\\' && t+1 < PL_bufend)
4507 else if (*t == close && --brackets <= 0)
4509 else if (*t == open)
4516 /* skip plain q word */
4517 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4520 else if (isALNUM_lazy_if(t,UTF)) {
4522 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4525 while (t < PL_bufend && isSPACE(*t))
4527 /* if comma follows first term, call it an anon hash */
4528 /* XXX it could be a comma expression with loop modifiers */
4529 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4530 || (*t == '=' && t[1] == '>')))
4531 OPERATOR(HASHBRACK);
4532 if (PL_expect == XREF)
4535 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4541 yylval.ival = CopLINE(PL_curcop);
4542 if (isSPACE(*s) || *s == '#')
4543 PL_copline = NOLINE; /* invalidate current command line number */
4548 if (PL_lex_brackets <= 0)
4549 yyerror("Unmatched right curly bracket");
4551 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4552 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4553 PL_lex_formbrack = 0;
4554 if (PL_lex_state == LEX_INTERPNORMAL) {
4555 if (PL_lex_brackets == 0) {
4556 if (PL_expect & XFAKEBRACK) {
4557 PL_expect &= XENUMMASK;
4558 PL_lex_state = LEX_INTERPEND;
4563 PL_thiswhite = newSVpvs("");
4564 sv_catpvn(PL_thiswhite,"}",1);
4567 return yylex(); /* ignore fake brackets */
4569 if (*s == '-' && s[1] == '>')
4570 PL_lex_state = LEX_INTERPENDMAYBE;
4571 else if (*s != '[' && *s != '{')
4572 PL_lex_state = LEX_INTERPEND;
4575 if (PL_expect & XFAKEBRACK) {
4576 PL_expect &= XENUMMASK;
4578 return yylex(); /* ignore fake brackets */
4580 start_force(PL_curforce);
4582 curmad('X', newSVpvn(s-1,1));
4583 CURMAD('_', PL_thiswhite);
4588 PL_thistoken = newSVpvs("");
4596 if (PL_expect == XOPERATOR) {
4597 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4598 && isIDFIRST_lazy_if(s,UTF))
4600 CopLINE_dec(PL_curcop);
4601 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4602 CopLINE_inc(PL_curcop);
4607 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4609 PL_expect = XOPERATOR;
4610 force_ident(PL_tokenbuf, '&');
4614 yylval.ival = (OPpENTERSUB_AMPER<<8);
4626 const char tmp = *s++;
4633 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4634 && strchr("+-*/%.^&|<",tmp))
4635 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4636 "Reversed %c= operator",(int)tmp);
4638 if (PL_expect == XSTATE && isALPHA(tmp) &&
4639 (s == PL_linestart+1 || s[-2] == '\n') )
4641 if (PL_in_eval && !PL_rsfp) {
4646 if (strnEQ(s,"=cut",4)) {
4662 PL_thiswhite = newSVpvs("");
4663 sv_catpvn(PL_thiswhite, PL_linestart,
4664 PL_bufend - PL_linestart);
4668 PL_doextract = TRUE;
4672 if (PL_lex_brackets < PL_lex_formbrack) {
4674 #ifdef PERL_STRICT_CR
4675 while (SPACE_OR_TAB(*t))
4677 while (SPACE_OR_TAB(*t) || *t == '\r')
4680 if (*t == '\n' || *t == '#') {
4691 const char tmp = *s++;
4693 /* was this !=~ where !~ was meant?
4694 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4696 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4697 const char *t = s+1;
4699 while (t < PL_bufend && isSPACE(*t))
4702 if (*t == '/' || *t == '?' ||
4703 ((*t == 'm' || *t == 's' || *t == 'y')
4704 && !isALNUM(t[1])) ||
4705 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4706 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4707 "!=~ should be !~");
4717 if (PL_expect != XOPERATOR) {
4718 if (s[1] != '<' && !strchr(s,'>'))
4721 s = scan_heredoc(s);
4723 s = scan_inputsymbol(s);
4724 TERM(sublex_start());
4730 SHop(OP_LEFT_SHIFT);
4744 const char tmp = *s++;
4746 SHop(OP_RIGHT_SHIFT);
4747 else if (tmp == '=')
4756 if (PL_expect == XOPERATOR) {
4757 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4759 deprecate_old(commaless_variable_list);
4760 return REPORT(','); /* grandfather non-comma-format format */
4764 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4765 PL_tokenbuf[0] = '@';
4766 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4767 sizeof PL_tokenbuf - 1, FALSE);
4768 if (PL_expect == XOPERATOR)
4769 no_op("Array length", s);
4770 if (!PL_tokenbuf[1])
4772 PL_expect = XOPERATOR;
4773 PL_pending_ident = '#';
4777 PL_tokenbuf[0] = '$';
4778 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4779 sizeof PL_tokenbuf - 1, FALSE);
4780 if (PL_expect == XOPERATOR)
4782 if (!PL_tokenbuf[1]) {
4784 yyerror("Final $ should be \\$ or $name");
4788 /* This kludge not intended to be bulletproof. */
4789 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4790 yylval.opval = newSVOP(OP_CONST, 0,
4791 newSViv(CopARYBASE_get(&PL_compiling)));
4792 yylval.opval->op_private = OPpCONST_ARYBASE;
4798 const char tmp = *s;
4799 if (PL_lex_state == LEX_NORMAL)
4802 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4803 && intuit_more(s)) {
4805 PL_tokenbuf[0] = '@';
4806 if (ckWARN(WARN_SYNTAX)) {
4809 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4812 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4813 while (t < PL_bufend && *t != ']')
4815 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4816 "Multidimensional syntax %.*s not supported",
4817 (int)((t - PL_bufptr) + 1), PL_bufptr);
4821 else if (*s == '{') {
4823 PL_tokenbuf[0] = '%';
4824 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4825 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4827 char tmpbuf[sizeof PL_tokenbuf];
4830 } while (isSPACE(*t));
4831 if (isIDFIRST_lazy_if(t,UTF)) {
4833 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4837 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4838 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4839 "You need to quote \"%s\"",
4846 PL_expect = XOPERATOR;
4847 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4848 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4849 if (!islop || PL_last_lop_op == OP_GREPSTART)
4850 PL_expect = XOPERATOR;
4851 else if (strchr("$@\"'`q", *s))
4852 PL_expect = XTERM; /* e.g. print $fh "foo" */
4853 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4854 PL_expect = XTERM; /* e.g. print $fh &sub */
4855 else if (isIDFIRST_lazy_if(s,UTF)) {
4856 char tmpbuf[sizeof PL_tokenbuf];
4858 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4859 if ((t2 = keyword(tmpbuf, len, 0))) {
4860 /* binary operators exclude handle interpretations */
4872 PL_expect = XTERM; /* e.g. print $fh length() */
4877 PL_expect = XTERM; /* e.g. print $fh subr() */
4880 else if (isDIGIT(*s))
4881 PL_expect = XTERM; /* e.g. print $fh 3 */
4882 else if (*s == '.' && isDIGIT(s[1]))
4883 PL_expect = XTERM; /* e.g. print $fh .3 */
4884 else if ((*s == '?' || *s == '-' || *s == '+')
4885 && !isSPACE(s[1]) && s[1] != '=')
4886 PL_expect = XTERM; /* e.g. print $fh -1 */
4887 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4889 PL_expect = XTERM; /* e.g. print $fh /.../
4890 XXX except DORDOR operator
4892 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4894 PL_expect = XTERM; /* print $fh <<"EOF" */
4897 PL_pending_ident = '$';
4901 if (PL_expect == XOPERATOR)
4903 PL_tokenbuf[0] = '@';
4904 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4905 if (!PL_tokenbuf[1]) {
4908 if (PL_lex_state == LEX_NORMAL)
4910 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4912 PL_tokenbuf[0] = '%';
4914 /* Warn about @ where they meant $. */
4915 if (*s == '[' || *s == '{') {
4916 if (ckWARN(WARN_SYNTAX)) {
4917 const char *t = s + 1;
4918 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4920 if (*t == '}' || *t == ']') {
4922 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4923 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4924 "Scalar value %.*s better written as $%.*s",
4925 (int)(t-PL_bufptr), PL_bufptr,
4926 (int)(t-PL_bufptr-1), PL_bufptr+1);
4931 PL_pending_ident = '@';
4934 case '/': /* may be division, defined-or, or pattern */
4935 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4939 case '?': /* may either be conditional or pattern */
4940 if(PL_expect == XOPERATOR) {
4948 /* A // operator. */
4958 /* Disable warning on "study /blah/" */
4959 if (PL_oldoldbufptr == PL_last_uni
4960 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4961 || memNE(PL_last_uni, "study", 5)
4962 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4965 s = scan_pat(s,OP_MATCH);
4966 TERM(sublex_start());
4970 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4971 #ifdef PERL_STRICT_CR
4974 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
4976 && (s == PL_linestart || s[-1] == '\n') )
4978 PL_lex_formbrack = 0;
4982 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
4988 yylval.ival = OPf_SPECIAL;
4994 if (PL_expect != XOPERATOR)
4999 case '0': case '1': case '2': case '3': case '4':
5000 case '5': case '6': case '7': case '8': case '9':
5001 s = scan_num(s, &yylval);
5002 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5003 if (PL_expect == XOPERATOR)
5008 s = scan_str(s,!!PL_madskills,FALSE);
5009 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5010 if (PL_expect == XOPERATOR) {
5011 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5013 deprecate_old(commaless_variable_list);
5014 return REPORT(','); /* grandfather non-comma-format format */
5021 yylval.ival = OP_CONST;
5022 TERM(sublex_start());
5025 s = scan_str(s,!!PL_madskills,FALSE);
5026 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5027 if (PL_expect == XOPERATOR) {
5028 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5030 deprecate_old(commaless_variable_list);
5031 return REPORT(','); /* grandfather non-comma-format format */
5038 yylval.ival = OP_CONST;
5039 /* FIXME. I think that this can be const if char *d is replaced by
5040 more localised variables. */
5041 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5042 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5043 yylval.ival = OP_STRINGIFY;
5047 TERM(sublex_start());
5050 s = scan_str(s,!!PL_madskills,FALSE);
5051 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5052 if (PL_expect == XOPERATOR)
5053 no_op("Backticks",s);
5056 readpipe_override();
5057 TERM(sublex_start());
5061 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
5062 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5064 if (PL_expect == XOPERATOR)
5065 no_op("Backslash",s);
5069 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5070 char *start = s + 2;
5071 while (isDIGIT(*start) || *start == '_')
5073 if (*start == '.' && isDIGIT(start[1])) {
5074 s = scan_num(s, &yylval);
5077 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5078 else if (!isALPHA(*start) && (PL_expect == XTERM
5079 || PL_expect == XREF || PL_expect == XSTATE
5080 || PL_expect == XTERMORDORDOR)) {
5081 /* XXX Use gv_fetchpvn rather than stomping on a const string */
5082 const char c = *start;
5085 gv = gv_fetchpv(s, 0, SVt_PVCV);
5088 s = scan_num(s, &yylval);
5095 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5137 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5139 /* Some keywords can be followed by any delimiter, including ':' */
5140 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5141 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5142 (PL_tokenbuf[0] == 'q' &&
5143 strchr("qwxr", PL_tokenbuf[1])))));
5145 /* x::* is just a word, unless x is "CORE" */
5146 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5150 while (d < PL_bufend && isSPACE(*d))
5151 d++; /* no comments skipped here, or s### is misparsed */
5153 /* Is this a label? */
5154 if (!tmp && PL_expect == XSTATE
5155 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5157 yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5162 /* Check for keywords */
5163 tmp = keyword(PL_tokenbuf, len, 0);
5165 /* Is this a word before a => operator? */
5166 if (*d == '=' && d[1] == '>') {
5169 = (OP*)newSVOP(OP_CONST, 0,
5170 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5171 yylval.opval->op_private = OPpCONST_BARE;
5175 if (tmp < 0) { /* second-class keyword? */
5176 GV *ogv = NULL; /* override (winner) */
5177 GV *hgv = NULL; /* hidden (loser) */
5178 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5180 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5183 if (GvIMPORTED_CV(gv))
5185 else if (! CvMETHOD(cv))
5189 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5190 (gv = *gvp) && isGV_with_GP(gv) &&
5191 GvCVu(gv) && GvIMPORTED_CV(gv))
5198 tmp = 0; /* overridden by import or by GLOBAL */
5201 && -tmp==KEY_lock /* XXX generalizable kludge */
5203 && !hv_fetchs(GvHVn(PL_incgv), "Thread.pm", FALSE))
5205 tmp = 0; /* any sub overrides "weak" keyword */
5207 else { /* no override */
5209 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
5210 Perl_warner(aTHX_ packWARN(WARN_MISC),
5211 "dump() better written as CORE::dump()");
5215 if (hgv && tmp != KEY_x && tmp != KEY_CORE
5216 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
5217 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5218 "Ambiguous call resolved as CORE::%s(), %s",
5219 GvENAME(hgv), "qualify as such or use &");
5226 default: /* not a keyword */
5227 /* Trade off - by using this evil construction we can pull the
5228 variable gv into the block labelled keylookup. If not, then
5229 we have to give it function scope so that the goto from the
5230 earlier ':' case doesn't bypass the initialisation. */
5232 just_a_word_zero_gv:
5240 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5243 SV *nextPL_nextwhite = 0;
5247 /* Get the rest if it looks like a package qualifier */
5249 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5251 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5254 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5255 *s == '\'' ? "'" : "::");
5260 if (PL_expect == XOPERATOR) {
5261 if (PL_bufptr == PL_linestart) {
5262 CopLINE_dec(PL_curcop);
5263 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
5264 CopLINE_inc(PL_curcop);
5267 no_op("Bareword",s);
5270 /* Look for a subroutine with this name in current package,
5271 unless name is "Foo::", in which case Foo is a bearword
5272 (and a package name). */
5274 if (len > 2 && !PL_madskills &&
5275 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5277 if (ckWARN(WARN_BAREWORD)
5278 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5279 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5280 "Bareword \"%s\" refers to nonexistent package",
5283 PL_tokenbuf[len] = '\0';
5289 /* Mustn't actually add anything to a symbol table.
5290 But also don't want to "initialise" any placeholder
5291 constants that might already be there into full
5292 blown PVGVs with attached PVCV. */
5293 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5294 GV_NOADD_NOINIT, SVt_PVCV);
5299 /* if we saw a global override before, get the right name */
5302 sv = newSVpvs("CORE::GLOBAL::");
5303 sv_catpv(sv,PL_tokenbuf);
5306 /* If len is 0, newSVpv does strlen(), which is correct.
5307 If len is non-zero, then it will be the true length,
5308 and so the scalar will be created correctly. */
5309 sv = newSVpv(PL_tokenbuf,len);
5312 if (PL_madskills && !PL_thistoken) {
5313 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5314 PL_thistoken = newSVpv(start,s - start);
5315 PL_realtokenstart = s - SvPVX(PL_linestr);
5319 /* Presume this is going to be a bareword of some sort. */
5322 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5323 yylval.opval->op_private = OPpCONST_BARE;
5324 /* UTF-8 package name? */
5325 if (UTF && !IN_BYTES &&
5326 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5329 /* And if "Foo::", then that's what it certainly is. */
5334 /* Do the explicit type check so that we don't need to force
5335 the initialisation of the symbol table to have a real GV.
5336 Beware - gv may not really be a PVGV, cv may not really be
5337 a PVCV, (because of the space optimisations that gv_init
5338 understands) But they're true if for this symbol there is
5339 respectively a typeglob and a subroutine.
5341 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5342 /* Real typeglob, so get the real subroutine: */
5344 /* A proxy for a subroutine in this package? */
5345 : SvOK(gv) ? (CV *) gv : NULL)
5348 /* See if it's the indirect object for a list operator. */
5350 if (PL_oldoldbufptr &&
5351 PL_oldoldbufptr < PL_bufptr &&
5352 (PL_oldoldbufptr == PL_last_lop
5353 || PL_oldoldbufptr == PL_last_uni) &&
5354 /* NO SKIPSPACE BEFORE HERE! */
5355 (PL_expect == XREF ||
5356 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5358 bool immediate_paren = *s == '(';
5360 /* (Now we can afford to cross potential line boundary.) */
5361 s = SKIPSPACE2(s,nextPL_nextwhite);
5363 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5366 /* Two barewords in a row may indicate method call. */
5368 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5369 (tmp = intuit_method(s, gv, cv)))
5372 /* If not a declared subroutine, it's an indirect object. */
5373 /* (But it's an indir obj regardless for sort.) */
5374 /* Also, if "_" follows a filetest operator, it's a bareword */
5377 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5379 (PL_last_lop_op != OP_MAPSTART &&
5380 PL_last_lop_op != OP_GREPSTART))))
5381 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5382 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5385 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5390 PL_expect = XOPERATOR;
5393 s = SKIPSPACE2(s,nextPL_nextwhite);
5394 PL_nextwhite = nextPL_nextwhite;
5399 /* Is this a word before a => operator? */
5400 if (*s == '=' && s[1] == '>' && !pkgname) {
5402 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
5403 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5404 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
5408 /* If followed by a paren, it's certainly a subroutine. */
5413 while (SPACE_OR_TAB(*d))
5415 if (*d == ')' && (sv = gv_const_sv(gv))) {
5419 char *par = SvPVX(PL_linestr) + PL_realtokenstart;
5420 sv_catpvn(PL_thistoken, par, s - par);
5422 sv_free(PL_nextwhite);
5433 PL_nextwhite = PL_thiswhite;
5436 start_force(PL_curforce);
5438 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5439 PL_expect = XOPERATOR;
5442 PL_nextwhite = nextPL_nextwhite;
5443 curmad('X', PL_thistoken);
5444 PL_thistoken = newSVpvs("");
5452 /* If followed by var or block, call it a method (unless sub) */
5454 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5455 PL_last_lop = PL_oldbufptr;
5456 PL_last_lop_op = OP_METHOD;
5460 /* If followed by a bareword, see if it looks like indir obj. */
5463 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5464 && (tmp = intuit_method(s, gv, cv)))
5467 /* Not a method, so call it a subroutine (if defined) */
5470 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5471 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5472 "Ambiguous use of -%s resolved as -&%s()",
5473 PL_tokenbuf, PL_tokenbuf);
5474 /* Check for a constant sub */
5475 if ((sv = gv_const_sv(gv)) && !PL_madskills) {
5477 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
5478 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5479 yylval.opval->op_private = 0;
5483 /* Resolve to GV now. */
5484 if (SvTYPE(gv) != SVt_PVGV) {
5485 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5486 assert (SvTYPE(gv) == SVt_PVGV);
5487 /* cv must have been some sort of placeholder, so
5488 now needs replacing with a real code reference. */
5492 op_free(yylval.opval);
5493 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5494 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5495 PL_last_lop = PL_oldbufptr;
5496 PL_last_lop_op = OP_ENTERSUB;
5497 /* Is there a prototype? */
5505 const char *proto = SvPV_const((SV*)cv, protolen);
5508 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5510 while (*proto == ';')
5512 if (*proto == '&' && *s == '{') {
5513 sv_setpv(PL_subname,
5516 "__ANON__" : "__ANON__::__ANON__"));
5523 PL_nextwhite = PL_thiswhite;
5526 start_force(PL_curforce);
5527 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5530 PL_nextwhite = nextPL_nextwhite;
5531 curmad('X', PL_thistoken);
5532 PL_thistoken = newSVpvs("");
5539 /* Guess harder when madskills require "best effort". */
5540 if (PL_madskills && (!gv || !GvCVu(gv))) {
5541 int probable_sub = 0;
5542 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5544 else if (isALPHA(*s)) {
5548 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5549 if (!keyword(tmpbuf, tmplen, 0))
5552 while (d < PL_bufend && isSPACE(*d))
5554 if (*d == '=' && d[1] == '>')
5559 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5560 op_free(yylval.opval);
5561 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5562 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5563 PL_last_lop = PL_oldbufptr;
5564 PL_last_lop_op = OP_ENTERSUB;
5565 PL_nextwhite = PL_thiswhite;
5567 start_force(PL_curforce);
5568 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5570 PL_nextwhite = nextPL_nextwhite;
5571 curmad('X', PL_thistoken);
5572 PL_thistoken = newSVpvs("");
5577 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5584 /* Call it a bare word */
5586 if (PL_hints & HINT_STRICT_SUBS)
5587 yylval.opval->op_private |= OPpCONST_STRICT;
5590 if (lastchar != '-') {
5591 if (ckWARN(WARN_RESERVED)) {
5595 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5596 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5604 && ckWARN_d(WARN_AMBIGUOUS)) {
5605 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5606 "Operator or semicolon missing before %c%s",
5607 lastchar, PL_tokenbuf);
5608 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5609 "Ambiguous use of %c resolved as operator %c",
5610 lastchar, lastchar);
5616 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5617 newSVpv(CopFILE(PL_curcop),0));
5621 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5622 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5625 case KEY___PACKAGE__:
5626 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5628 ? newSVhek(HvNAME_HEK(PL_curstash))
5635 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5636 const char *pname = "main";
5637 if (PL_tokenbuf[2] == 'D')
5638 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5639 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5643 GvIOp(gv) = newIO();
5644 IoIFP(GvIOp(gv)) = PL_rsfp;
5645 #if defined(HAS_FCNTL) && defined(F_SETFD)
5647 const int fd = PerlIO_fileno(PL_rsfp);
5648 fcntl(fd,F_SETFD,fd >= 3);
5651 /* Mark this internal pseudo-handle as clean */
5652 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5654 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
5655 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5656 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5658 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5659 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5660 /* if the script was opened in binmode, we need to revert
5661 * it to text mode for compatibility; but only iff it has CRs
5662 * XXX this is a questionable hack at best. */
5663 if (PL_bufend-PL_bufptr > 2
5664 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5667 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5668 loc = PerlIO_tell(PL_rsfp);
5669 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5672 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5674 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5675 #endif /* NETWARE */
5676 #ifdef PERLIO_IS_STDIO /* really? */
5677 # if defined(__BORLANDC__)
5678 /* XXX see note in do_binmode() */
5679 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5683 PerlIO_seek(PL_rsfp, loc, 0);
5687 #ifdef PERLIO_LAYERS
5690 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5691 else if (PL_encoding) {
5698 XPUSHs(PL_encoding);
5700 call_method("name", G_SCALAR);
5704 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5705 Perl_form(aTHX_ ":encoding(%"SVf")",
5714 if (PL_realtokenstart >= 0) {
5715 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5717 PL_endwhite = newSVpvs("");
5718 sv_catsv(PL_endwhite, PL_thiswhite);
5720 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5721 PL_realtokenstart = -1;
5723 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5724 SvCUR(PL_endwhite))) != Nullch) ;
5739 if (PL_expect == XSTATE) {
5746 if (*s == ':' && s[1] == ':') {
5749 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5750 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5751 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5754 else if (tmp == KEY_require || tmp == KEY_do)
5755 /* that's a way to remember we saw "CORE::" */
5768 LOP(OP_ACCEPT,XTERM);
5774 LOP(OP_ATAN2,XTERM);
5780 LOP(OP_BINMODE,XTERM);
5783 LOP(OP_BLESS,XTERM);
5792 /* When 'use switch' is in effect, continue has a dual
5793 life as a control operator. */
5795 if (!FEATURE_IS_ENABLED("switch"))
5798 /* We have to disambiguate the two senses of
5799 "continue". If the next token is a '{' then
5800 treat it as the start of a continue block;
5801 otherwise treat it as a control operator.
5813 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5830 if (!PL_cryptseen) {
5831 PL_cryptseen = TRUE;
5835 LOP(OP_CRYPT,XTERM);
5838 LOP(OP_CHMOD,XTERM);
5841 LOP(OP_CHOWN,XTERM);
5844 LOP(OP_CONNECT,XTERM);
5863 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5864 if (orig_keyword == KEY_do) {
5873 PL_hints |= HINT_BLOCK_SCOPE;
5883 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5884 LOP(OP_DBMOPEN,XTERM);
5890 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5897 yylval.ival = CopLINE(PL_curcop);
5913 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
5914 UNIBRACK(OP_ENTEREVAL);
5932 case KEY_endhostent:
5938 case KEY_endservent:
5941 case KEY_endprotoent:
5952 yylval.ival = CopLINE(PL_curcop);
5954 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
5957 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
5960 if ((PL_bufend - p) >= 3 &&
5961 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
5963 else if ((PL_bufend - p) >= 4 &&
5964 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
5967 if (isIDFIRST_lazy_if(p,UTF)) {
5968 p = scan_ident(p, PL_bufend,
5969 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5973 Perl_croak(aTHX_ "Missing $ on loop variable");
5975 s = SvPVX(PL_linestr) + soff;
5981 LOP(OP_FORMLINE,XTERM);
5987 LOP(OP_FCNTL,XTERM);
5993 LOP(OP_FLOCK,XTERM);
6002 LOP(OP_GREPSTART, XREF);
6005 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6020 case KEY_getpriority:
6021 LOP(OP_GETPRIORITY,XTERM);
6023 case KEY_getprotobyname:
6026 case KEY_getprotobynumber:
6027 LOP(OP_GPBYNUMBER,XTERM);
6029 case KEY_getprotoent:
6041 case KEY_getpeername:
6042 UNI(OP_GETPEERNAME);
6044 case KEY_gethostbyname:
6047 case KEY_gethostbyaddr:
6048 LOP(OP_GHBYADDR,XTERM);
6050 case KEY_gethostent:
6053 case KEY_getnetbyname:
6056 case KEY_getnetbyaddr:
6057 LOP(OP_GNBYADDR,XTERM);
6062 case KEY_getservbyname:
6063 LOP(OP_GSBYNAME,XTERM);
6065 case KEY_getservbyport:
6066 LOP(OP_GSBYPORT,XTERM);
6068 case KEY_getservent:
6071 case KEY_getsockname:
6072 UNI(OP_GETSOCKNAME);
6074 case KEY_getsockopt:
6075 LOP(OP_GSOCKOPT,XTERM);
6090 yylval.ival = CopLINE(PL_curcop);
6101 yylval.ival = CopLINE(PL_curcop);
6105 LOP(OP_INDEX,XTERM);
6111 LOP(OP_IOCTL,XTERM);
6123 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6155 LOP(OP_LISTEN,XTERM);
6164 s = scan_pat(s,OP_MATCH);
6165 TERM(sublex_start());
6168 LOP(OP_MAPSTART, XREF);
6171 LOP(OP_MKDIR,XTERM);
6174 LOP(OP_MSGCTL,XTERM);
6177 LOP(OP_MSGGET,XTERM);
6180 LOP(OP_MSGRCV,XTERM);
6183 LOP(OP_MSGSND,XTERM);
6190 if (isIDFIRST_lazy_if(s,UTF)) {
6194 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6195 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6197 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6198 if (!PL_in_my_stash) {
6201 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6205 if (PL_madskills) { /* just add type to declarator token */
6206 sv_catsv(PL_thistoken, PL_nextwhite);
6208 sv_catpvn(PL_thistoken, start, s - start);
6216 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6223 s = tokenize_use(0, s);
6227 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6234 if (isIDFIRST_lazy_if(s,UTF)) {
6236 for (d = s; isALNUM_lazy_if(d,UTF);)
6238 for (t=d; isSPACE(*t);)
6240 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6242 && !(t[0] == '=' && t[1] == '>')
6244 int parms_len = (int)(d-s);
6245 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6246 "Precedence problem: open %.*s should be open(%.*s)",
6247 parms_len, s, parms_len, s);
6253 yylval.ival = OP_OR;
6263 LOP(OP_OPEN_DIR,XTERM);
6266 checkcomma(s,PL_tokenbuf,"filehandle");
6270 checkcomma(s,PL_tokenbuf,"filehandle");
6289 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6293 LOP(OP_PIPE_OP,XTERM);
6296 s = scan_str(s,!!PL_madskills,FALSE);
6299 yylval.ival = OP_CONST;
6300 TERM(sublex_start());
6306 s = scan_str(s,!!PL_madskills,FALSE);
6309 PL_expect = XOPERATOR;
6311 if (SvCUR(PL_lex_stuff)) {
6314 d = SvPV_force(PL_lex_stuff, len);
6316 for (; isSPACE(*d) && len; --len, ++d)
6321 if (!warned && ckWARN(WARN_QW)) {
6322 for (; !isSPACE(*d) && len; --len, ++d) {
6324 Perl_warner(aTHX_ packWARN(WARN_QW),
6325 "Possible attempt to separate words with commas");
6328 else if (*d == '#') {
6329 Perl_warner(aTHX_ packWARN(WARN_QW),
6330 "Possible attempt to put comments in qw() list");
6336 for (; !isSPACE(*d) && len; --len, ++d)
6339 sv = newSVpvn(b, d-b);
6340 if (DO_UTF8(PL_lex_stuff))
6342 words = append_elem(OP_LIST, words,
6343 newSVOP(OP_CONST, 0, tokeq(sv)));
6347 start_force(PL_curforce);
6348 NEXTVAL_NEXTTOKE.opval = words;
6353 SvREFCNT_dec(PL_lex_stuff);
6354 PL_lex_stuff = NULL;
6360 s = scan_str(s,!!PL_madskills,FALSE);
6363 yylval.ival = OP_STRINGIFY;
6364 if (SvIVX(PL_lex_stuff) == '\'')
6365 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6366 TERM(sublex_start());
6369 s = scan_pat(s,OP_QR);
6370 TERM(sublex_start());
6373 s = scan_str(s,!!PL_madskills,FALSE);
6376 readpipe_override();
6377 TERM(sublex_start());
6385 s = force_version(s, FALSE);
6387 else if (*s != 'v' || !isDIGIT(s[1])
6388 || (s = force_version(s, TRUE), *s == 'v'))
6390 *PL_tokenbuf = '\0';
6391 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6392 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6393 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6395 yyerror("<> should be quotes");
6397 if (orig_keyword == KEY_require) {
6405 PL_last_uni = PL_oldbufptr;
6406 PL_last_lop_op = OP_REQUIRE;
6408 return REPORT( (int)REQUIRE );
6414 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6418 LOP(OP_RENAME,XTERM);
6427 LOP(OP_RINDEX,XTERM);
6437 UNIDOR(OP_READLINE);
6441 UNIDOR(OP_BACKTICK);
6450 LOP(OP_REVERSE,XTERM);
6453 UNIDOR(OP_READLINK);
6461 TERM(sublex_start());
6463 TOKEN(1); /* force error */
6466 checkcomma(s,PL_tokenbuf,"filehandle");
6476 LOP(OP_SELECT,XTERM);
6482 LOP(OP_SEMCTL,XTERM);
6485 LOP(OP_SEMGET,XTERM);
6488 LOP(OP_SEMOP,XTERM);
6494 LOP(OP_SETPGRP,XTERM);
6496 case KEY_setpriority:
6497 LOP(OP_SETPRIORITY,XTERM);
6499 case KEY_sethostent:
6505 case KEY_setservent:
6508 case KEY_setprotoent:
6518 LOP(OP_SEEKDIR,XTERM);
6520 case KEY_setsockopt:
6521 LOP(OP_SSOCKOPT,XTERM);
6527 LOP(OP_SHMCTL,XTERM);
6530 LOP(OP_SHMGET,XTERM);
6533 LOP(OP_SHMREAD,XTERM);
6536 LOP(OP_SHMWRITE,XTERM);
6539 LOP(OP_SHUTDOWN,XTERM);
6548 LOP(OP_SOCKET,XTERM);
6550 case KEY_socketpair:
6551 LOP(OP_SOCKPAIR,XTERM);
6554 checkcomma(s,PL_tokenbuf,"subroutine name");
6556 if (*s == ';' || *s == ')') /* probably a close */
6557 Perl_croak(aTHX_ "sort is now a reserved word");
6559 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6563 LOP(OP_SPLIT,XTERM);
6566 LOP(OP_SPRINTF,XTERM);
6569 LOP(OP_SPLICE,XTERM);
6584 LOP(OP_SUBSTR,XTERM);
6590 char tmpbuf[sizeof PL_tokenbuf];
6591 SSize_t tboffset = 0;
6592 expectation attrful;
6593 bool have_name, have_proto;
6594 const int key = tmp;
6599 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6600 SV *subtoken = newSVpvn(tstart, s - tstart);
6604 s = SKIPSPACE2(s,tmpwhite);
6609 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6610 (*s == ':' && s[1] == ':'))
6617 attrful = XATTRBLOCK;
6618 /* remember buffer pos'n for later force_word */
6619 tboffset = s - PL_oldbufptr;
6620 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6623 nametoke = newSVpvn(s, d - s);
6625 if (memchr(tmpbuf, ':', len))
6626 sv_setpvn(PL_subname, tmpbuf, len);
6628 sv_setsv(PL_subname,PL_curstname);
6629 sv_catpvs(PL_subname,"::");
6630 sv_catpvn(PL_subname,tmpbuf,len);
6637 CURMAD('X', nametoke);
6638 CURMAD('_', tmpwhite);
6639 (void) force_word(PL_oldbufptr + tboffset, WORD,
6642 s = SKIPSPACE2(d,tmpwhite);
6649 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6650 PL_expect = XTERMBLOCK;
6651 attrful = XATTRTERM;
6652 sv_setpvn(PL_subname,"?",1);
6656 if (key == KEY_format) {
6658 PL_lex_formbrack = PL_lex_brackets + 1;
6660 PL_thistoken = subtoken;
6664 (void) force_word(PL_oldbufptr + tboffset, WORD,
6670 /* Look for a prototype */
6673 bool bad_proto = FALSE;
6674 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6676 s = scan_str(s,!!PL_madskills,FALSE);
6678 Perl_croak(aTHX_ "Prototype not terminated");
6679 /* strip spaces and check for bad characters */
6680 d = SvPVX(PL_lex_stuff);
6682 for (p = d; *p; ++p) {
6685 if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
6691 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6692 "Illegal character in prototype for %"SVf" : %s",
6693 SVfARG(PL_subname), d);
6694 SvCUR_set(PL_lex_stuff, tmp);
6699 CURMAD('q', PL_thisopen);
6700 CURMAD('_', tmpwhite);
6701 CURMAD('=', PL_thisstuff);
6702 CURMAD('Q', PL_thisclose);
6703 NEXTVAL_NEXTTOKE.opval =
6704 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6705 PL_lex_stuff = Nullsv;
6708 s = SKIPSPACE2(s,tmpwhite);
6716 if (*s == ':' && s[1] != ':')
6717 PL_expect = attrful;
6718 else if (*s != '{' && key == KEY_sub) {
6720 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6722 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6729 curmad('^', newSVpvs(""));
6730 CURMAD('_', tmpwhite);
6734 PL_thistoken = subtoken;
6737 NEXTVAL_NEXTTOKE.opval =
6738 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6739 PL_lex_stuff = NULL;
6744 sv_setpv(PL_subname,
6746 (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"));
6750 (void) force_word(PL_oldbufptr + tboffset, WORD,
6760 LOP(OP_SYSTEM,XREF);
6763 LOP(OP_SYMLINK,XTERM);
6766 LOP(OP_SYSCALL,XTERM);
6769 LOP(OP_SYSOPEN,XTERM);
6772 LOP(OP_SYSSEEK,XTERM);
6775 LOP(OP_SYSREAD,XTERM);
6778 LOP(OP_SYSWRITE,XTERM);
6782 TERM(sublex_start());
6803 LOP(OP_TRUNCATE,XTERM);
6815 yylval.ival = CopLINE(PL_curcop);
6819 yylval.ival = CopLINE(PL_curcop);
6823 LOP(OP_UNLINK,XTERM);
6829 LOP(OP_UNPACK,XTERM);
6832 LOP(OP_UTIME,XTERM);
6838 LOP(OP_UNSHIFT,XTERM);
6841 s = tokenize_use(1, s);
6851 yylval.ival = CopLINE(PL_curcop);
6855 yylval.ival = CopLINE(PL_curcop);
6859 PL_hints |= HINT_BLOCK_SCOPE;
6866 LOP(OP_WAITPID,XTERM);
6875 ctl_l[0] = toCTRL('L');
6877 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6880 /* Make sure $^L is defined */
6881 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
6886 if (PL_expect == XOPERATOR)
6892 yylval.ival = OP_XOR;
6897 TERM(sublex_start());
6902 #pragma segment Main
6906 S_pending_ident(pTHX)
6911 /* pit holds the identifier we read and pending_ident is reset */
6912 char pit = PL_pending_ident;
6913 PL_pending_ident = 0;
6915 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
6916 DEBUG_T({ PerlIO_printf(Perl_debug_log,
6917 "### Pending identifier '%s'\n", PL_tokenbuf); });
6919 /* if we're in a my(), we can't allow dynamics here.
6920 $foo'bar has already been turned into $foo::bar, so
6921 just check for colons.
6923 if it's a legal name, the OP is a PADANY.
6926 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
6927 if (strchr(PL_tokenbuf,':'))
6928 yyerror(Perl_form(aTHX_ "No package name allowed for "
6929 "variable %s in \"our\"",
6931 tmp = allocmy(PL_tokenbuf);
6934 if (strchr(PL_tokenbuf,':'))
6935 yyerror(Perl_form(aTHX_ PL_no_myglob,
6936 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
6938 yylval.opval = newOP(OP_PADANY, 0);
6939 yylval.opval->op_targ = allocmy(PL_tokenbuf);
6945 build the ops for accesses to a my() variable.
6947 Deny my($a) or my($b) in a sort block, *if* $a or $b is
6948 then used in a comparison. This catches most, but not
6949 all cases. For instance, it catches
6950 sort { my($a); $a <=> $b }
6952 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
6953 (although why you'd do that is anyone's guess).
6956 if (!strchr(PL_tokenbuf,':')) {
6958 tmp = pad_findmy(PL_tokenbuf);
6959 if (tmp != NOT_IN_PAD) {
6960 /* might be an "our" variable" */
6961 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6962 /* build ops for a bareword */
6963 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
6964 HEK * const stashname = HvNAME_HEK(stash);
6965 SV * const sym = newSVhek(stashname);
6966 sv_catpvs(sym, "::");
6967 sv_catpv(sym, PL_tokenbuf+1);
6968 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
6969 yylval.opval->op_private = OPpCONST_ENTERED;
6972 ? (GV_ADDMULTI | GV_ADDINEVAL)
6975 ((PL_tokenbuf[0] == '$') ? SVt_PV
6976 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6981 /* if it's a sort block and they're naming $a or $b */
6982 if (PL_last_lop_op == OP_SORT &&
6983 PL_tokenbuf[0] == '$' &&
6984 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
6987 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
6988 d < PL_bufend && *d != '\n';
6991 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
6992 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
6998 yylval.opval = newOP(OP_PADANY, 0);
6999 yylval.opval->op_targ = tmp;
7005 Whine if they've said @foo in a doublequoted string,
7006 and @foo isn't a variable we can find in the symbol
7009 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7010 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
7011 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7012 && ckWARN(WARN_AMBIGUOUS)
7013 /* DO NOT warn for @- and @+ */
7014 && !( PL_tokenbuf[2] == '\0' &&
7015 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7018 /* Downgraded from fatal to warning 20000522 mjd */
7019 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7020 "Possible unintended interpolation of %s in string",
7025 /* build ops for a bareword */
7026 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
7027 yylval.opval->op_private = OPpCONST_ENTERED;
7030 /* If the identifier refers to a stash, don't autovivify it.
7031 * Change 24660 had the side effect of causing symbol table
7032 * hashes to always be defined, even if they were freshly
7033 * created and the only reference in the entire program was
7034 * the single statement with the defined %foo::bar:: test.
7035 * It appears that all code in the wild doing this actually
7036 * wants to know whether sub-packages have been loaded, so
7037 * by avoiding auto-vivifying symbol tables, we ensure that
7038 * defined %foo::bar:: continues to be false, and the existing
7039 * tests still give the expected answers, even though what
7040 * they're actually testing has now changed subtly.
7042 (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
7044 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7045 ((PL_tokenbuf[0] == '$') ? SVt_PV
7046 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7052 * The following code was generated by perl_keyword.pl.
7056 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7061 case 1: /* 5 tokens of length 1 */
7093 case 2: /* 18 tokens of length 2 */
7239 case 3: /* 29 tokens of length 3 */
7243 if (name[1] == 'N' &&
7306 if (name[1] == 'i' &&
7328 return (all_keywords || FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
7346 if (name[1] == 'o' &&
7355 if (name[1] == 'e' &&
7364 if (name[1] == 'n' &&
7373 if (name[1] == 'o' &&
7382 if (name[1] == 'a' &&
7391 if (name[1] == 'o' &&
7453 if (name[1] == 'e' &&
7467 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7493 if (name[1] == 'i' &&
7502 if (name[1] == 's' &&
7511 if (name[1] == 'e' &&
7520 if (name[1] == 'o' &&
7532 case 4: /* 41 tokens of length 4 */
7536 if (name[1] == 'O' &&
7546 if (name[1] == 'N' &&
7556 if (name[1] == 'i' &&
7566 if (name[1] == 'h' &&
7576 if (name[1] == 'u' &&
7589 if (name[2] == 'c' &&
7598 if (name[2] == 's' &&
7607 if (name[2] == 'a' &&
7643 if (name[1] == 'o' &&
7656 if (name[2] == 't' &&
7665 if (name[2] == 'o' &&
7674 if (name[2] == 't' &&
7683 if (name[2] == 'e' &&
7696 if (name[1] == 'o' &&
7709 if (name[2] == 'y' &&
7718 if (name[2] == 'l' &&
7734 if (name[2] == 's' &&
7743 if (name[2] == 'n' &&
7752 if (name[2] == 'c' &&
7765 if (name[1] == 'e' &&
7775 if (name[1] == 'p' &&
7788 if (name[2] == 'c' &&
7797 if (name[2] == 'p' &&
7806 if (name[2] == 's' &&
7822 if (name[2] == 'n' &&
7892 if (name[2] == 'r' &&
7901 if (name[2] == 'r' &&
7910 if (name[2] == 'a' &&
7926 if (name[2] == 'l' &&
7988 if (name[2] == 'e' &&
7991 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8004 case 5: /* 39 tokens of length 5 */
8008 if (name[1] == 'E' &&
8019 if (name[1] == 'H' &&
8033 if (name[2] == 'a' &&
8043 if (name[2] == 'a' &&
8060 if (name[2] == 'e' &&
8070 if (name[2] == 'e' &&
8074 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8090 if (name[3] == 'i' &&
8099 if (name[3] == 'o' &&
8135 if (name[2] == 'o' &&
8145 if (name[2] == 'y' &&
8159 if (name[1] == 'l' &&
8173 if (name[2] == 'n' &&
8183 if (name[2] == 'o' &&
8197 if (name[1] == 'i' &&
8202 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8211 if (name[2] == 'd' &&
8221 if (name[2] == 'c' &&
8238 if (name[2] == 'c' &&
8248 if (name[2] == 't' &&
8262 if (name[1] == 'k' &&
8273 if (name[1] == 'r' &&
8287 if (name[2] == 's' &&
8297 if (name[2] == 'd' &&
8314 if (name[2] == 'm' &&
8324 if (name[2] == 'i' &&
8334 if (name[2] == 'e' &&
8344 if (name[2] == 'l' &&
8354 if (name[2] == 'a' &&
8367 if (name[3] == 't' &&
8370 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8376 if (name[3] == 'd' &&
8393 if (name[1] == 'i' &&
8407 if (name[2] == 'a' &&
8420 if (name[3] == 'e' &&
8455 if (name[2] == 'i' &&
8472 if (name[2] == 'i' &&
8482 if (name[2] == 'i' &&
8499 case 6: /* 33 tokens of length 6 */
8503 if (name[1] == 'c' &&
8518 if (name[2] == 'l' &&
8529 if (name[2] == 'r' &&
8544 if (name[1] == 'e' &&
8559 if (name[2] == 's' &&
8564 if(ckWARN_d(WARN_SYNTAX))
8565 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8571 if (name[2] == 'i' &&
8589 if (name[2] == 'l' &&
8600 if (name[2] == 'r' &&
8615 if (name[1] == 'm' &&
8630 if (name[2] == 'n' &&
8641 if (name[2] == 's' &&
8656 if (name[1] == 's' &&
8662 if (name[4] == 't' &&
8671 if (name[4] == 'e' &&
8680 if (name[4] == 'c' &&
8689 if (name[4] == 'n' &&
8705 if (name[1] == 'r' &&
8723 if (name[3] == 'a' &&
8733 if (name[3] == 'u' &&
8747 if (name[2] == 'n' &&
8765 if (name[2] == 'a' &&
8779 if (name[3] == 'e' &&
8792 if (name[4] == 't' &&
8801 if (name[4] == 'e' &&
8823 if (name[4] == 't' &&
8832 if (name[4] == 'e' &&
8848 if (name[2] == 'c' &&
8859 if (name[2] == 'l' &&
8870 if (name[2] == 'b' &&
8881 if (name[2] == 's' &&
8904 if (name[4] == 's' &&
8913 if (name[4] == 'n' &&
8926 if (name[3] == 'a' &&
8943 if (name[1] == 'a' &&
8958 case 7: /* 29 tokens of length 7 */
8962 if (name[1] == 'E' &&
8975 if (name[1] == '_' &&
8988 if (name[1] == 'i' &&
8995 return -KEY_binmode;
9001 if (name[1] == 'o' &&
9008 return -KEY_connect;
9017 if (name[2] == 'm' &&
9023 return -KEY_dbmopen;
9034 if (name[4] == 'u' &&
9038 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9044 if (name[4] == 'n' &&
9065 if (name[1] == 'o' &&
9078 if (name[1] == 'e' &&
9085 if (name[5] == 'r' &&
9088 return -KEY_getpgrp;
9094 if (name[5] == 'i' &&
9097 return -KEY_getppid;
9110 if (name[1] == 'c' &&
9117 return -KEY_lcfirst;
9123 if (name[1] == 'p' &&
9130 return -KEY_opendir;
9136 if (name[1] == 'a' &&
9154 if (name[3] == 'd' &&
9159 return -KEY_readdir;
9165 if (name[3] == 'u' &&
9176 if (name[3] == 'e' &&
9181 return -KEY_reverse;
9200 if (name[3] == 'k' &&
9205 return -KEY_seekdir;
9211 if (name[3] == 'p' &&
9216 return -KEY_setpgrp;
9226 if (name[2] == 'm' &&
9232 return -KEY_shmread;
9238 if (name[2] == 'r' &&
9244 return -KEY_sprintf;
9253 if (name[3] == 'l' &&
9258 return -KEY_symlink;
9267 if (name[4] == 'a' &&
9271 return -KEY_syscall;
9277 if (name[4] == 'p' &&
9281 return -KEY_sysopen;
9287 if (name[4] == 'e' &&
9291 return -KEY_sysread;
9297 if (name[4] == 'e' &&
9301 return -KEY_sysseek;
9319 if (name[1] == 'e' &&
9326 return -KEY_telldir;
9335 if (name[2] == 'f' &&
9341 return -KEY_ucfirst;
9347 if (name[2] == 's' &&
9353 return -KEY_unshift;
9363 if (name[1] == 'a' &&
9370 return -KEY_waitpid;
9379 case 8: /* 26 tokens of length 8 */
9383 if (name[1] == 'U' &&
9391 return KEY_AUTOLOAD;
9402 if (name[3] == 'A' &&
9408 return KEY___DATA__;
9414 if (name[3] == 'I' &&
9420 return -KEY___FILE__;
9426 if (name[3] == 'I' &&
9432 return -KEY___LINE__;
9448 if (name[2] == 'o' &&
9455 return -KEY_closedir;
9461 if (name[2] == 'n' &&
9468 return -KEY_continue;
9478 if (name[1] == 'b' &&
9486 return -KEY_dbmclose;
9492 if (name[1] == 'n' &&
9498 if (name[4] == 'r' &&
9503 return -KEY_endgrent;
9509 if (name[4] == 'w' &&
9514 return -KEY_endpwent;
9527 if (name[1] == 'o' &&
9535 return -KEY_formline;
9541 if (name[1] == 'e' &&
9552 if (name[6] == 'n' &&
9555 return -KEY_getgrent;
9561 if (name[6] == 'i' &&
9564 return -KEY_getgrgid;
9570 if (name[6] == 'a' &&
9573 return -KEY_getgrnam;
9586 if (name[4] == 'o' &&
9591 return -KEY_getlogin;
9602 if (name[6] == 'n' &&
9605 return -KEY_getpwent;
9611 if (name[6] == 'a' &&
9614 return -KEY_getpwnam;
9620 if (name[6] == 'i' &&
9623 return -KEY_getpwuid;
9643 if (name[1] == 'e' &&
9650 if (name[5] == 'i' &&
9657 return -KEY_readline;
9662 return -KEY_readlink;
9673 if (name[5] == 'i' &&
9677 return -KEY_readpipe;
9698 if (name[4] == 'r' &&
9703 return -KEY_setgrent;
9709 if (name[4] == 'w' &&
9714 return -KEY_setpwent;
9730 if (name[3] == 'w' &&
9736 return -KEY_shmwrite;
9742 if (name[3] == 't' &&
9748 return -KEY_shutdown;
9758 if (name[2] == 's' &&
9765 return -KEY_syswrite;
9775 if (name[1] == 'r' &&
9783 return -KEY_truncate;
9792 case 9: /* 9 tokens of length 9 */
9796 if (name[1] == 'N' &&
9805 return KEY_UNITCHECK;
9811 if (name[1] == 'n' &&
9820 return -KEY_endnetent;
9826 if (name[1] == 'e' &&
9835 return -KEY_getnetent;
9841 if (name[1] == 'o' &&
9850 return -KEY_localtime;
9856 if (name[1] == 'r' &&
9865 return KEY_prototype;
9871 if (name[1] == 'u' &&
9880 return -KEY_quotemeta;
9886 if (name[1] == 'e' &&
9895 return -KEY_rewinddir;
9901 if (name[1] == 'e' &&
9910 return -KEY_setnetent;
9916 if (name[1] == 'a' &&
9925 return -KEY_wantarray;
9934 case 10: /* 9 tokens of length 10 */
9938 if (name[1] == 'n' &&
9944 if (name[4] == 'o' &&
9951 return -KEY_endhostent;
9957 if (name[4] == 'e' &&
9964 return -KEY_endservent;
9977 if (name[1] == 'e' &&
9983 if (name[4] == 'o' &&
9990 return -KEY_gethostent;
9999 if (name[5] == 'r' &&
10005 return -KEY_getservent;
10011 if (name[5] == 'c' &&
10017 return -KEY_getsockopt;
10037 if (name[2] == 't')
10042 if (name[4] == 'o' &&
10049 return -KEY_sethostent;
10058 if (name[5] == 'r' &&
10064 return -KEY_setservent;
10070 if (name[5] == 'c' &&
10076 return -KEY_setsockopt;
10093 if (name[2] == 'c' &&
10102 return -KEY_socketpair;
10115 case 11: /* 8 tokens of length 11 */
10119 if (name[1] == '_' &&
10129 { /* __PACKAGE__ */
10130 return -KEY___PACKAGE__;
10136 if (name[1] == 'n' &&
10146 { /* endprotoent */
10147 return -KEY_endprotoent;
10153 if (name[1] == 'e' &&
10162 if (name[5] == 'e' &&
10168 { /* getpeername */
10169 return -KEY_getpeername;
10178 if (name[6] == 'o' &&
10183 { /* getpriority */
10184 return -KEY_getpriority;
10190 if (name[6] == 't' &&
10195 { /* getprotoent */
10196 return -KEY_getprotoent;
10210 if (name[4] == 'o' &&
10217 { /* getsockname */
10218 return -KEY_getsockname;
10231 if (name[1] == 'e' &&
10239 if (name[6] == 'o' &&
10244 { /* setpriority */
10245 return -KEY_setpriority;
10251 if (name[6] == 't' &&
10256 { /* setprotoent */
10257 return -KEY_setprotoent;
10273 case 12: /* 2 tokens of length 12 */
10274 if (name[0] == 'g' &&
10286 if (name[9] == 'd' &&
10289 { /* getnetbyaddr */
10290 return -KEY_getnetbyaddr;
10296 if (name[9] == 'a' &&
10299 { /* getnetbyname */
10300 return -KEY_getnetbyname;
10312 case 13: /* 4 tokens of length 13 */
10313 if (name[0] == 'g' &&
10320 if (name[4] == 'o' &&
10329 if (name[10] == 'd' &&
10332 { /* gethostbyaddr */
10333 return -KEY_gethostbyaddr;
10339 if (name[10] == 'a' &&
10342 { /* gethostbyname */
10343 return -KEY_gethostbyname;
10356 if (name[4] == 'e' &&
10365 if (name[10] == 'a' &&
10368 { /* getservbyname */
10369 return -KEY_getservbyname;
10375 if (name[10] == 'o' &&
10378 { /* getservbyport */
10379 return -KEY_getservbyport;
10398 case 14: /* 1 tokens of length 14 */
10399 if (name[0] == 'g' &&
10413 { /* getprotobyname */
10414 return -KEY_getprotobyname;
10419 case 16: /* 1 tokens of length 16 */
10420 if (name[0] == 'g' &&
10436 { /* getprotobynumber */
10437 return -KEY_getprotobynumber;
10451 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10455 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10456 if (ckWARN(WARN_SYNTAX)) {
10459 for (w = s+2; *w && level; w++) {
10462 else if (*w == ')')
10465 while (isSPACE(*w))
10467 /* the list of chars below is for end of statements or
10468 * block / parens, boolean operators (&&, ||, //) and branch
10469 * constructs (or, and, if, until, unless, while, err, for).
10470 * Not a very solid hack... */
10471 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10472 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10473 "%s (...) interpreted as function",name);
10476 while (s < PL_bufend && isSPACE(*s))
10480 while (s < PL_bufend && isSPACE(*s))
10482 if (isIDFIRST_lazy_if(s,UTF)) {
10483 const char * const w = s++;
10484 while (isALNUM_lazy_if(s,UTF))
10486 while (s < PL_bufend && isSPACE(*s))
10490 if (keyword(w, s - w, 0))
10493 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10494 if (gv && GvCVu(gv))
10496 Perl_croak(aTHX_ "No comma allowed after %s", what);
10501 /* Either returns sv, or mortalizes sv and returns a new SV*.
10502 Best used as sv=new_constant(..., sv, ...).
10503 If s, pv are NULL, calls subroutine with one argument,
10504 and type is used with error messages only. */
10507 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
10511 HV * const table = GvHV(PL_hintgv); /* ^H */
10515 const char *why1 = "", *why2 = "", *why3 = "";
10517 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10520 why2 = (const char *)
10521 (strEQ(key,"charnames")
10522 ? "(possibly a missing \"use charnames ...\")"
10524 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10525 (type ? type: "undef"), why2);
10527 /* This is convoluted and evil ("goto considered harmful")
10528 * but I do not understand the intricacies of all the different
10529 * failure modes of %^H in here. The goal here is to make
10530 * the most probable error message user-friendly. --jhi */
10535 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10536 (type ? type: "undef"), why1, why2, why3);
10538 yyerror(SvPVX_const(msg));
10542 cvp = hv_fetch(table, key, strlen(key), FALSE);
10543 if (!cvp || !SvOK(*cvp)) {
10546 why3 = "} is not defined";
10549 sv_2mortal(sv); /* Parent created it permanently */
10552 pv = sv_2mortal(newSVpvn(s, len));
10554 typesv = sv_2mortal(newSVpv(type, 0));
10556 typesv = &PL_sv_undef;
10558 PUSHSTACKi(PERLSI_OVERLOAD);
10570 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10574 /* Check the eval first */
10575 if (!PL_in_eval && SvTRUE(ERRSV)) {
10576 sv_catpvs(ERRSV, "Propagated");
10577 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10579 res = SvREFCNT_inc_simple(sv);
10583 SvREFCNT_inc_simple_void(res);
10592 why1 = "Call to &{$^H{";
10594 why3 = "}} did not return a defined value";
10602 /* Returns a NUL terminated string, with the length of the string written to
10606 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10609 register char *d = dest;
10610 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10613 Perl_croak(aTHX_ ident_too_long);
10614 if (isALNUM(*s)) /* UTF handled below */
10616 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10621 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10625 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10626 char *t = s + UTF8SKIP(s);
10628 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10632 Perl_croak(aTHX_ ident_too_long);
10633 Copy(s, d, len, char);
10646 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10649 char *bracket = NULL;
10651 register char *d = dest;
10652 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10657 while (isDIGIT(*s)) {
10659 Perl_croak(aTHX_ ident_too_long);
10666 Perl_croak(aTHX_ ident_too_long);
10667 if (isALNUM(*s)) /* UTF handled below */
10669 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10674 else if (*s == ':' && s[1] == ':') {
10678 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10679 char *t = s + UTF8SKIP(s);
10680 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10682 if (d + (t - s) > e)
10683 Perl_croak(aTHX_ ident_too_long);
10684 Copy(s, d, t - s, char);
10695 if (PL_lex_state != LEX_NORMAL)
10696 PL_lex_state = LEX_INTERPENDMAYBE;
10699 if (*s == '$' && s[1] &&
10700 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10713 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10718 if (isSPACE(s[-1])) {
10720 const char ch = *s++;
10721 if (!SPACE_OR_TAB(ch)) {
10727 if (isIDFIRST_lazy_if(d,UTF)) {
10731 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10732 end += UTF8SKIP(end);
10733 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10734 end += UTF8SKIP(end);
10736 Copy(s, d, end - s, char);
10741 while ((isALNUM(*s) || *s == ':') && d < e)
10744 Perl_croak(aTHX_ ident_too_long);
10747 while (s < send && SPACE_OR_TAB(*s))
10749 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10750 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10751 const char * const brack =
10753 ((*s == '[') ? "[...]" : "{...}");
10754 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10755 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10756 funny, dest, brack, funny, dest, brack);
10759 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10763 /* Handle extended ${^Foo} variables
10764 * 1999-02-27 mjd-perl-patch@plover.com */
10765 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10769 while (isALNUM(*s) && d < e) {
10773 Perl_croak(aTHX_ ident_too_long);
10778 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10779 PL_lex_state = LEX_INTERPEND;
10782 if (PL_lex_state == LEX_NORMAL) {
10783 if (ckWARN(WARN_AMBIGUOUS) &&
10784 (keyword(dest, d - dest, 0)
10785 || get_cvn_flags(dest, d - dest, 0)))
10789 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10790 "Ambiguous use of %c{%s} resolved to %c%s",
10791 funny, dest, funny, dest);
10796 s = bracket; /* let the parser handle it */
10800 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10801 PL_lex_state = LEX_INTERPEND;
10806 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10808 PERL_UNUSED_CONTEXT;
10812 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
10813 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10814 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10815 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10816 case KEEPCOPY_PAT_MOD: *pmfl |= PMf_KEEPCOPY; break;
10822 S_scan_pat(pTHX_ char *start, I32 type)
10826 char *s = scan_str(start,!!PL_madskills,FALSE);
10827 const char * const valid_flags =
10828 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10835 const char * const delimiter = skipspace(start);
10839 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10840 : "Search pattern not terminated" ));
10843 pm = (PMOP*)newPMOP(type, 0);
10844 if (PL_multi_open == '?') {
10845 /* This is the only point in the code that sets PMf_ONCE: */
10846 pm->op_pmflags |= PMf_ONCE;
10848 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10849 allows us to restrict the list needed by reset to just the ??
10851 assert(type != OP_TRANS);
10853 MAGIC *mg = mg_find((SV*)PL_curstash, PERL_MAGIC_symtab);
10856 mg = sv_magicext((SV*)PL_curstash, 0, PERL_MAGIC_symtab, 0, 0,
10859 elements = mg->mg_len / sizeof(PMOP**);
10860 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10861 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10862 mg->mg_len = elements * sizeof(PMOP**);
10863 PmopSTASH_set(pm,PL_curstash);
10869 while (*s && strchr(valid_flags, *s))
10870 pmflag(&pm->op_pmflags,*s++);
10872 if (PL_madskills && modstart != s) {
10873 SV* tmptoken = newSVpvn(modstart, s - modstart);
10874 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
10877 /* issue a warning if /c is specified,but /g is not */
10878 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
10879 && ckWARN(WARN_REGEXP))
10881 Perl_warner(aTHX_ packWARN(WARN_REGEXP),
10882 "Use of /c modifier is meaningless without /g" );
10885 PL_lex_op = (OP*)pm;
10886 yylval.ival = OP_MATCH;
10891 S_scan_subst(pTHX_ char *start)
10902 yylval.ival = OP_NULL;
10904 s = scan_str(start,!!PL_madskills,FALSE);
10907 Perl_croak(aTHX_ "Substitution pattern not terminated");
10909 if (s[-1] == PL_multi_open)
10912 if (PL_madskills) {
10913 CURMAD('q', PL_thisopen);
10914 CURMAD('_', PL_thiswhite);
10915 CURMAD('E', PL_thisstuff);
10916 CURMAD('Q', PL_thisclose);
10917 PL_realtokenstart = s - SvPVX(PL_linestr);
10921 first_start = PL_multi_start;
10922 s = scan_str(s,!!PL_madskills,FALSE);
10924 if (PL_lex_stuff) {
10925 SvREFCNT_dec(PL_lex_stuff);
10926 PL_lex_stuff = NULL;
10928 Perl_croak(aTHX_ "Substitution replacement not terminated");
10930 PL_multi_start = first_start; /* so whole substitution is taken together */
10932 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10935 if (PL_madskills) {
10936 CURMAD('z', PL_thisopen);
10937 CURMAD('R', PL_thisstuff);
10938 CURMAD('Z', PL_thisclose);
10944 if (*s == EXEC_PAT_MOD) {
10948 else if (strchr(S_PAT_MODS, *s))
10949 pmflag(&pm->op_pmflags,*s++);
10955 if (PL_madskills) {
10957 curmad('m', newSVpvn(modstart, s - modstart));
10958 append_madprops(PL_thismad, (OP*)pm, 0);
10962 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
10963 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10967 SV * const repl = newSVpvs("");
10969 PL_sublex_info.super_bufptr = s;
10970 PL_sublex_info.super_bufend = PL_bufend;
10972 pm->op_pmflags |= PMf_EVAL;
10974 sv_catpv(repl, (const char *)(es ? "eval " : "do "));
10975 sv_catpvs(repl, "{");
10976 sv_catsv(repl, PL_lex_repl);
10977 if (strchr(SvPVX(PL_lex_repl), '#'))
10978 sv_catpvs(repl, "\n");
10979 sv_catpvs(repl, "}");
10981 SvREFCNT_dec(PL_lex_repl);
10982 PL_lex_repl = repl;
10985 PL_lex_op = (OP*)pm;
10986 yylval.ival = OP_SUBST;
10991 S_scan_trans(pTHX_ char *start)
11004 yylval.ival = OP_NULL;
11006 s = scan_str(start,!!PL_madskills,FALSE);
11008 Perl_croak(aTHX_ "Transliteration pattern not terminated");
11010 if (s[-1] == PL_multi_open)
11013 if (PL_madskills) {
11014 CURMAD('q', PL_thisopen);
11015 CURMAD('_', PL_thiswhite);
11016 CURMAD('E', PL_thisstuff);
11017 CURMAD('Q', PL_thisclose);
11018 PL_realtokenstart = s - SvPVX(PL_linestr);
11022 s = scan_str(s,!!PL_madskills,FALSE);
11024 if (PL_lex_stuff) {
11025 SvREFCNT_dec(PL_lex_stuff);
11026 PL_lex_stuff = NULL;
11028 Perl_croak(aTHX_ "Transliteration replacement not terminated");
11030 if (PL_madskills) {
11031 CURMAD('z', PL_thisopen);
11032 CURMAD('R', PL_thisstuff);
11033 CURMAD('Z', PL_thisclose);
11036 complement = del = squash = 0;
11043 complement = OPpTRANS_COMPLEMENT;
11046 del = OPpTRANS_DELETE;
11049 squash = OPpTRANS_SQUASH;
11058 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11059 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11060 o->op_private &= ~OPpTRANS_ALL;
11061 o->op_private |= del|squash|complement|
11062 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11063 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11066 yylval.ival = OP_TRANS;
11069 if (PL_madskills) {
11071 curmad('m', newSVpvn(modstart, s - modstart));
11072 append_madprops(PL_thismad, o, 0);
11081 S_scan_heredoc(pTHX_ register char *s)
11085 I32 op_type = OP_SCALAR;
11089 const char *found_newline;
11093 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11095 I32 stuffstart = s - SvPVX(PL_linestr);
11098 PL_realtokenstart = -1;
11103 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11107 while (SPACE_OR_TAB(*peek))
11109 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11112 s = delimcpy(d, e, s, PL_bufend, term, &len);
11122 if (!isALNUM_lazy_if(s,UTF))
11123 deprecate_old("bare << to mean <<\"\"");
11124 for (; isALNUM_lazy_if(s,UTF); s++) {
11129 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11130 Perl_croak(aTHX_ "Delimiter for here document is too long");
11133 len = d - PL_tokenbuf;
11136 if (PL_madskills) {
11137 tstart = PL_tokenbuf + !outer;
11138 PL_thisclose = newSVpvn(tstart, len - !outer);
11139 tstart = SvPVX(PL_linestr) + stuffstart;
11140 PL_thisopen = newSVpvn(tstart, s - tstart);
11141 stuffstart = s - SvPVX(PL_linestr);
11144 #ifndef PERL_STRICT_CR
11145 d = strchr(s, '\r');
11147 char * const olds = s;
11149 while (s < PL_bufend) {
11155 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11164 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11171 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11172 herewas = newSVpvn(s,PL_bufend-s);
11176 herewas = newSVpvn(s-1,found_newline-s+1);
11179 herewas = newSVpvn(s,found_newline-s);
11183 if (PL_madskills) {
11184 tstart = SvPVX(PL_linestr) + stuffstart;
11186 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11188 PL_thisstuff = newSVpvn(tstart, s - tstart);
11191 s += SvCUR(herewas);
11194 stuffstart = s - SvPVX(PL_linestr);
11200 tmpstr = newSV_type(SVt_PVIV);
11201 SvGROW(tmpstr, 80);
11202 if (term == '\'') {
11203 op_type = OP_CONST;
11204 SvIV_set(tmpstr, -1);
11206 else if (term == '`') {
11207 op_type = OP_BACKTICK;
11208 SvIV_set(tmpstr, '\\');
11212 PL_multi_start = CopLINE(PL_curcop);
11213 PL_multi_open = PL_multi_close = '<';
11214 term = *PL_tokenbuf;
11215 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11216 char * const bufptr = PL_sublex_info.super_bufptr;
11217 char * const bufend = PL_sublex_info.super_bufend;
11218 char * const olds = s - SvCUR(herewas);
11219 s = strchr(bufptr, '\n');
11223 while (s < bufend &&
11224 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11226 CopLINE_inc(PL_curcop);
11229 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11230 missingterm(PL_tokenbuf);
11232 sv_setpvn(herewas,bufptr,d-bufptr+1);
11233 sv_setpvn(tmpstr,d+1,s-d);
11235 sv_catpvn(herewas,s,bufend-s);
11236 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11243 while (s < PL_bufend &&
11244 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11246 CopLINE_inc(PL_curcop);
11248 if (s >= PL_bufend) {
11249 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11250 missingterm(PL_tokenbuf);
11252 sv_setpvn(tmpstr,d+1,s-d);
11254 if (PL_madskills) {
11256 sv_catpvn(PL_thisstuff, d + 1, s - d);
11258 PL_thisstuff = newSVpvn(d + 1, s - d);
11259 stuffstart = s - SvPVX(PL_linestr);
11263 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11265 sv_catpvn(herewas,s,PL_bufend-s);
11266 sv_setsv(PL_linestr,herewas);
11267 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11268 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11269 PL_last_lop = PL_last_uni = NULL;
11272 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
11273 while (s >= PL_bufend) { /* multiple line string? */
11275 if (PL_madskills) {
11276 tstart = SvPVX(PL_linestr) + stuffstart;
11278 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11280 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11284 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11285 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11286 missingterm(PL_tokenbuf);
11289 stuffstart = s - SvPVX(PL_linestr);
11291 CopLINE_inc(PL_curcop);
11292 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11293 PL_last_lop = PL_last_uni = NULL;
11294 #ifndef PERL_STRICT_CR
11295 if (PL_bufend - PL_linestart >= 2) {
11296 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11297 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11299 PL_bufend[-2] = '\n';
11301 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11303 else if (PL_bufend[-1] == '\r')
11304 PL_bufend[-1] = '\n';
11306 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11307 PL_bufend[-1] = '\n';
11309 if (PERLDB_LINE && PL_curstash != PL_debstash)
11310 update_debugger_info(PL_linestr, NULL, 0);
11311 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11312 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11313 *(SvPVX(PL_linestr) + off ) = ' ';
11314 sv_catsv(PL_linestr,herewas);
11315 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11316 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11320 sv_catsv(tmpstr,PL_linestr);
11325 PL_multi_end = CopLINE(PL_curcop);
11326 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11327 SvPV_shrink_to_cur(tmpstr);
11329 SvREFCNT_dec(herewas);
11331 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11333 else if (PL_encoding)
11334 sv_recode_to_utf8(tmpstr, PL_encoding);
11336 PL_lex_stuff = tmpstr;
11337 yylval.ival = op_type;
11341 /* scan_inputsymbol
11342 takes: current position in input buffer
11343 returns: new position in input buffer
11344 side-effects: yylval and lex_op are set.
11349 <FH> read from filehandle
11350 <pkg::FH> read from package qualified filehandle
11351 <pkg'FH> read from package qualified filehandle
11352 <$fh> read from filehandle in $fh
11353 <*.h> filename glob
11358 S_scan_inputsymbol(pTHX_ char *start)
11361 register char *s = start; /* current position in buffer */
11365 char *d = PL_tokenbuf; /* start of temp holding space */
11366 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11368 end = strchr(s, '\n');
11371 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11373 /* die if we didn't have space for the contents of the <>,
11374 or if it didn't end, or if we see a newline
11377 if (len >= (I32)sizeof PL_tokenbuf)
11378 Perl_croak(aTHX_ "Excessively long <> operator");
11380 Perl_croak(aTHX_ "Unterminated <> operator");
11385 Remember, only scalar variables are interpreted as filehandles by
11386 this code. Anything more complex (e.g., <$fh{$num}>) will be
11387 treated as a glob() call.
11388 This code makes use of the fact that except for the $ at the front,
11389 a scalar variable and a filehandle look the same.
11391 if (*d == '$' && d[1]) d++;
11393 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11394 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11397 /* If we've tried to read what we allow filehandles to look like, and
11398 there's still text left, then it must be a glob() and not a getline.
11399 Use scan_str to pull out the stuff between the <> and treat it
11400 as nothing more than a string.
11403 if (d - PL_tokenbuf != len) {
11404 yylval.ival = OP_GLOB;
11406 s = scan_str(start,!!PL_madskills,FALSE);
11408 Perl_croak(aTHX_ "Glob not terminated");
11412 bool readline_overriden = FALSE;
11415 /* we're in a filehandle read situation */
11418 /* turn <> into <ARGV> */
11420 Copy("ARGV",d,5,char);
11422 /* Check whether readline() is overriden */
11423 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11425 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11427 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11428 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11429 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11430 readline_overriden = TRUE;
11432 /* if <$fh>, create the ops to turn the variable into a
11436 /* try to find it in the pad for this block, otherwise find
11437 add symbol table ops
11439 const PADOFFSET tmp = pad_findmy(d);
11440 if (tmp != NOT_IN_PAD) {
11441 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11442 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11443 HEK * const stashname = HvNAME_HEK(stash);
11444 SV * const sym = sv_2mortal(newSVhek(stashname));
11445 sv_catpvs(sym, "::");
11446 sv_catpv(sym, d+1);
11451 OP * const o = newOP(OP_PADSV, 0);
11453 PL_lex_op = readline_overriden
11454 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11455 append_elem(OP_LIST, o,
11456 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11457 : (OP*)newUNOP(OP_READLINE, 0, o);
11466 ? (GV_ADDMULTI | GV_ADDINEVAL)
11469 PL_lex_op = readline_overriden
11470 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11471 append_elem(OP_LIST,
11472 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11473 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11474 : (OP*)newUNOP(OP_READLINE, 0,
11475 newUNOP(OP_RV2SV, 0,
11476 newGVOP(OP_GV, 0, gv)));
11478 if (!readline_overriden)
11479 PL_lex_op->op_flags |= OPf_SPECIAL;
11480 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
11481 yylval.ival = OP_NULL;
11484 /* If it's none of the above, it must be a literal filehandle
11485 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11487 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11488 PL_lex_op = readline_overriden
11489 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11490 append_elem(OP_LIST,
11491 newGVOP(OP_GV, 0, gv),
11492 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11493 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11494 yylval.ival = OP_NULL;
11503 takes: start position in buffer
11504 keep_quoted preserve \ on the embedded delimiter(s)
11505 keep_delims preserve the delimiters around the string
11506 returns: position to continue reading from buffer
11507 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11508 updates the read buffer.
11510 This subroutine pulls a string out of the input. It is called for:
11511 q single quotes q(literal text)
11512 ' single quotes 'literal text'
11513 qq double quotes qq(interpolate $here please)
11514 " double quotes "interpolate $here please"
11515 qx backticks qx(/bin/ls -l)
11516 ` backticks `/bin/ls -l`
11517 qw quote words @EXPORT_OK = qw( func() $spam )
11518 m// regexp match m/this/
11519 s/// regexp substitute s/this/that/
11520 tr/// string transliterate tr/this/that/
11521 y/// string transliterate y/this/that/
11522 ($*@) sub prototypes sub foo ($)
11523 (stuff) sub attr parameters sub foo : attr(stuff)
11524 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11526 In most of these cases (all but <>, patterns and transliterate)
11527 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11528 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11529 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11532 It skips whitespace before the string starts, and treats the first
11533 character as the delimiter. If the delimiter is one of ([{< then
11534 the corresponding "close" character )]}> is used as the closing
11535 delimiter. It allows quoting of delimiters, and if the string has
11536 balanced delimiters ([{<>}]) it allows nesting.
11538 On success, the SV with the resulting string is put into lex_stuff or,
11539 if that is already non-NULL, into lex_repl. The second case occurs only
11540 when parsing the RHS of the special constructs s/// and tr/// (y///).
11541 For convenience, the terminating delimiter character is stuffed into
11546 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11549 SV *sv; /* scalar value: string */
11550 const char *tmps; /* temp string, used for delimiter matching */
11551 register char *s = start; /* current position in the buffer */
11552 register char term; /* terminating character */
11553 register char *to; /* current position in the sv's data */
11554 I32 brackets = 1; /* bracket nesting level */
11555 bool has_utf8 = FALSE; /* is there any utf8 content? */
11556 I32 termcode; /* terminating char. code */
11557 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11558 STRLEN termlen; /* length of terminating string */
11559 int last_off = 0; /* last position for nesting bracket */
11565 /* skip space before the delimiter */
11571 if (PL_realtokenstart >= 0) {
11572 stuffstart = PL_realtokenstart;
11573 PL_realtokenstart = -1;
11576 stuffstart = start - SvPVX(PL_linestr);
11578 /* mark where we are, in case we need to report errors */
11581 /* after skipping whitespace, the next character is the terminator */
11584 termcode = termstr[0] = term;
11588 termcode = utf8_to_uvchr((U8*)s, &termlen);
11589 Copy(s, termstr, termlen, U8);
11590 if (!UTF8_IS_INVARIANT(term))
11594 /* mark where we are */
11595 PL_multi_start = CopLINE(PL_curcop);
11596 PL_multi_open = term;
11598 /* find corresponding closing delimiter */
11599 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11600 termcode = termstr[0] = term = tmps[5];
11602 PL_multi_close = term;
11604 /* create a new SV to hold the contents. 79 is the SV's initial length.
11605 What a random number. */
11606 sv = newSV_type(SVt_PVIV);
11608 SvIV_set(sv, termcode);
11609 (void)SvPOK_only(sv); /* validate pointer */
11611 /* move past delimiter and try to read a complete string */
11613 sv_catpvn(sv, s, termlen);
11616 tstart = SvPVX(PL_linestr) + stuffstart;
11617 if (!PL_thisopen && !keep_delims) {
11618 PL_thisopen = newSVpvn(tstart, s - tstart);
11619 stuffstart = s - SvPVX(PL_linestr);
11623 if (PL_encoding && !UTF) {
11627 int offset = s - SvPVX_const(PL_linestr);
11628 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11629 &offset, (char*)termstr, termlen);
11630 const char * const ns = SvPVX_const(PL_linestr) + offset;
11631 char * const svlast = SvEND(sv) - 1;
11633 for (; s < ns; s++) {
11634 if (*s == '\n' && !PL_rsfp)
11635 CopLINE_inc(PL_curcop);
11638 goto read_more_line;
11640 /* handle quoted delimiters */
11641 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11643 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11645 if ((svlast-1 - t) % 2) {
11646 if (!keep_quoted) {
11647 *(svlast-1) = term;
11649 SvCUR_set(sv, SvCUR(sv) - 1);
11654 if (PL_multi_open == PL_multi_close) {
11660 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11661 /* At here, all closes are "was quoted" one,
11662 so we don't check PL_multi_close. */
11664 if (!keep_quoted && *(t+1) == PL_multi_open)
11669 else if (*t == PL_multi_open)
11677 SvCUR_set(sv, w - SvPVX_const(sv));
11679 last_off = w - SvPVX(sv);
11680 if (--brackets <= 0)
11685 if (!keep_delims) {
11686 SvCUR_set(sv, SvCUR(sv) - 1);
11692 /* extend sv if need be */
11693 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11694 /* set 'to' to the next character in the sv's string */
11695 to = SvPVX(sv)+SvCUR(sv);
11697 /* if open delimiter is the close delimiter read unbridle */
11698 if (PL_multi_open == PL_multi_close) {
11699 for (; s < PL_bufend; s++,to++) {
11700 /* embedded newlines increment the current line number */
11701 if (*s == '\n' && !PL_rsfp)
11702 CopLINE_inc(PL_curcop);
11703 /* handle quoted delimiters */
11704 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11705 if (!keep_quoted && s[1] == term)
11707 /* any other quotes are simply copied straight through */
11711 /* terminate when run out of buffer (the for() condition), or
11712 have found the terminator */
11713 else if (*s == term) {
11716 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11719 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11725 /* if the terminator isn't the same as the start character (e.g.,
11726 matched brackets), we have to allow more in the quoting, and
11727 be prepared for nested brackets.
11730 /* read until we run out of string, or we find the terminator */
11731 for (; s < PL_bufend; s++,to++) {
11732 /* embedded newlines increment the line count */
11733 if (*s == '\n' && !PL_rsfp)
11734 CopLINE_inc(PL_curcop);
11735 /* backslashes can escape the open or closing characters */
11736 if (*s == '\\' && s+1 < PL_bufend) {
11737 if (!keep_quoted &&
11738 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11743 /* allow nested opens and closes */
11744 else if (*s == PL_multi_close && --brackets <= 0)
11746 else if (*s == PL_multi_open)
11748 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11753 /* terminate the copied string and update the sv's end-of-string */
11755 SvCUR_set(sv, to - SvPVX_const(sv));
11758 * this next chunk reads more into the buffer if we're not done yet
11762 break; /* handle case where we are done yet :-) */
11764 #ifndef PERL_STRICT_CR
11765 if (to - SvPVX_const(sv) >= 2) {
11766 if ((to[-2] == '\r' && to[-1] == '\n') ||
11767 (to[-2] == '\n' && to[-1] == '\r'))
11771 SvCUR_set(sv, to - SvPVX_const(sv));
11773 else if (to[-1] == '\r')
11776 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11781 /* if we're out of file, or a read fails, bail and reset the current
11782 line marker so we can report where the unterminated string began
11785 if (PL_madskills) {
11786 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11788 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11790 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11794 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11796 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11802 /* we read a line, so increment our line counter */
11803 CopLINE_inc(PL_curcop);
11805 /* update debugger info */
11806 if (PERLDB_LINE && PL_curstash != PL_debstash)
11807 update_debugger_info(PL_linestr, NULL, 0);
11809 /* having changed the buffer, we must update PL_bufend */
11810 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11811 PL_last_lop = PL_last_uni = NULL;
11814 /* at this point, we have successfully read the delimited string */
11816 if (!PL_encoding || UTF) {
11818 if (PL_madskills) {
11819 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11820 const int len = s - tstart;
11822 sv_catpvn(PL_thisstuff, tstart, len);
11824 PL_thisstuff = newSVpvn(tstart, len);
11825 if (!PL_thisclose && !keep_delims)
11826 PL_thisclose = newSVpvn(s,termlen);
11831 sv_catpvn(sv, s, termlen);
11836 if (PL_madskills) {
11837 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11838 const int len = s - tstart - termlen;
11840 sv_catpvn(PL_thisstuff, tstart, len);
11842 PL_thisstuff = newSVpvn(tstart, len);
11843 if (!PL_thisclose && !keep_delims)
11844 PL_thisclose = newSVpvn(s - termlen,termlen);
11848 if (has_utf8 || PL_encoding)
11851 PL_multi_end = CopLINE(PL_curcop);
11853 /* if we allocated too much space, give some back */
11854 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11855 SvLEN_set(sv, SvCUR(sv) + 1);
11856 SvPV_renew(sv, SvLEN(sv));
11859 /* decide whether this is the first or second quoted string we've read
11872 takes: pointer to position in buffer
11873 returns: pointer to new position in buffer
11874 side-effects: builds ops for the constant in yylval.op
11876 Read a number in any of the formats that Perl accepts:
11878 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11879 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11882 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11884 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11887 If it reads a number without a decimal point or an exponent, it will
11888 try converting the number to an integer and see if it can do so
11889 without loss of precision.
11893 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11896 register const char *s = start; /* current position in buffer */
11897 register char *d; /* destination in temp buffer */
11898 register char *e; /* end of temp buffer */
11899 NV nv; /* number read, as a double */
11900 SV *sv = NULL; /* place to put the converted number */
11901 bool floatit; /* boolean: int or float? */
11902 const char *lastub = NULL; /* position of last underbar */
11903 static char const number_too_long[] = "Number too long";
11905 /* We use the first character to decide what type of number this is */
11909 Perl_croak(aTHX_ "panic: scan_num");
11911 /* if it starts with a 0, it could be an octal number, a decimal in
11912 0.13 disguise, or a hexadecimal number, or a binary number. */
11916 u holds the "number so far"
11917 shift the power of 2 of the base
11918 (hex == 4, octal == 3, binary == 1)
11919 overflowed was the number more than we can hold?
11921 Shift is used when we add a digit. It also serves as an "are
11922 we in octal/hex/binary?" indicator to disallow hex characters
11923 when in octal mode.
11928 bool overflowed = FALSE;
11929 bool just_zero = TRUE; /* just plain 0 or binary number? */
11930 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11931 static const char* const bases[5] =
11932 { "", "binary", "", "octal", "hexadecimal" };
11933 static const char* const Bases[5] =
11934 { "", "Binary", "", "Octal", "Hexadecimal" };
11935 static const char* const maxima[5] =
11937 "0b11111111111111111111111111111111",
11941 const char *base, *Base, *max;
11943 /* check for hex */
11948 } else if (s[1] == 'b') {
11953 /* check for a decimal in disguise */
11954 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11956 /* so it must be octal */
11963 if (ckWARN(WARN_SYNTAX))
11964 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11965 "Misplaced _ in number");
11969 base = bases[shift];
11970 Base = Bases[shift];
11971 max = maxima[shift];
11973 /* read the rest of the number */
11975 /* x is used in the overflow test,
11976 b is the digit we're adding on. */
11981 /* if we don't mention it, we're done */
11985 /* _ are ignored -- but warned about if consecutive */
11987 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11988 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11989 "Misplaced _ in number");
11993 /* 8 and 9 are not octal */
11994 case '8': case '9':
11996 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12000 case '2': case '3': case '4':
12001 case '5': case '6': case '7':
12003 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12006 case '0': case '1':
12007 b = *s++ & 15; /* ASCII digit -> value of digit */
12011 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12012 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12013 /* make sure they said 0x */
12016 b = (*s++ & 7) + 9;
12018 /* Prepare to put the digit we have onto the end
12019 of the number so far. We check for overflows.
12025 x = u << shift; /* make room for the digit */
12027 if ((x >> shift) != u
12028 && !(PL_hints & HINT_NEW_BINARY)) {
12031 if (ckWARN_d(WARN_OVERFLOW))
12032 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12033 "Integer overflow in %s number",
12036 u = x | b; /* add the digit to the end */
12039 n *= nvshift[shift];
12040 /* If an NV has not enough bits in its
12041 * mantissa to represent an UV this summing of
12042 * small low-order numbers is a waste of time
12043 * (because the NV cannot preserve the
12044 * low-order bits anyway): we could just
12045 * remember when did we overflow and in the
12046 * end just multiply n by the right
12054 /* if we get here, we had success: make a scalar value from
12059 /* final misplaced underbar check */
12060 if (s[-1] == '_') {
12061 if (ckWARN(WARN_SYNTAX))
12062 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12067 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
12068 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12069 "%s number > %s non-portable",
12075 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
12076 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12077 "%s number > %s non-portable",
12082 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12083 sv = new_constant(start, s - start, "integer",
12085 else if (PL_hints & HINT_NEW_BINARY)
12086 sv = new_constant(start, s - start, "binary", sv, NULL, NULL);
12091 handle decimal numbers.
12092 we're also sent here when we read a 0 as the first digit
12094 case '1': case '2': case '3': case '4': case '5':
12095 case '6': case '7': case '8': case '9': case '.':
12098 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12101 /* read next group of digits and _ and copy into d */
12102 while (isDIGIT(*s) || *s == '_') {
12103 /* skip underscores, checking for misplaced ones
12107 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12108 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12109 "Misplaced _ in number");
12113 /* check for end of fixed-length buffer */
12115 Perl_croak(aTHX_ number_too_long);
12116 /* if we're ok, copy the character */
12121 /* final misplaced underbar check */
12122 if (lastub && s == lastub + 1) {
12123 if (ckWARN(WARN_SYNTAX))
12124 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12127 /* read a decimal portion if there is one. avoid
12128 3..5 being interpreted as the number 3. followed
12131 if (*s == '.' && s[1] != '.') {
12136 if (ckWARN(WARN_SYNTAX))
12137 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12138 "Misplaced _ in number");
12142 /* copy, ignoring underbars, until we run out of digits.
12144 for (; isDIGIT(*s) || *s == '_'; s++) {
12145 /* fixed length buffer check */
12147 Perl_croak(aTHX_ number_too_long);
12149 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12150 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12151 "Misplaced _ in number");
12157 /* fractional part ending in underbar? */
12158 if (s[-1] == '_') {
12159 if (ckWARN(WARN_SYNTAX))
12160 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12161 "Misplaced _ in number");
12163 if (*s == '.' && isDIGIT(s[1])) {
12164 /* oops, it's really a v-string, but without the "v" */
12170 /* read exponent part, if present */
12171 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12175 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12176 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12178 /* stray preinitial _ */
12180 if (ckWARN(WARN_SYNTAX))
12181 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12182 "Misplaced _ in number");
12186 /* allow positive or negative exponent */
12187 if (*s == '+' || *s == '-')
12190 /* stray initial _ */
12192 if (ckWARN(WARN_SYNTAX))
12193 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12194 "Misplaced _ in number");
12198 /* read digits of exponent */
12199 while (isDIGIT(*s) || *s == '_') {
12202 Perl_croak(aTHX_ number_too_long);
12206 if (((lastub && s == lastub + 1) ||
12207 (!isDIGIT(s[1]) && s[1] != '_'))
12208 && ckWARN(WARN_SYNTAX))
12209 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12210 "Misplaced _ in number");
12217 /* make an sv from the string */
12221 We try to do an integer conversion first if no characters
12222 indicating "float" have been found.
12227 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12229 if (flags == IS_NUMBER_IN_UV) {
12231 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12234 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12235 if (uv <= (UV) IV_MIN)
12236 sv_setiv(sv, -(IV)uv);
12243 /* terminate the string */
12245 nv = Atof(PL_tokenbuf);
12249 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
12250 (PL_hints & HINT_NEW_INTEGER) )
12251 sv = new_constant(PL_tokenbuf,
12254 (floatit ? "float" : "integer"),
12258 /* if it starts with a v, it could be a v-string */
12261 sv = newSV(5); /* preallocate storage space */
12262 s = scan_vstring(s, PL_bufend, sv);
12266 /* make the op for the constant and return */
12269 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12271 lvalp->opval = NULL;
12277 S_scan_formline(pTHX_ register char *s)
12280 register char *eol;
12282 SV * const stuff = newSVpvs("");
12283 bool needargs = FALSE;
12284 bool eofmt = FALSE;
12286 char *tokenstart = s;
12289 if (PL_madskills) {
12290 savewhite = PL_thiswhite;
12295 while (!needargs) {
12298 #ifdef PERL_STRICT_CR
12299 while (SPACE_OR_TAB(*t))
12302 while (SPACE_OR_TAB(*t) || *t == '\r')
12305 if (*t == '\n' || t == PL_bufend) {
12310 if (PL_in_eval && !PL_rsfp) {
12311 eol = (char *) memchr(s,'\n',PL_bufend-s);
12316 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12318 for (t = s; t < eol; t++) {
12319 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12321 goto enough; /* ~~ must be first line in formline */
12323 if (*t == '@' || *t == '^')
12327 sv_catpvn(stuff, s, eol-s);
12328 #ifndef PERL_STRICT_CR
12329 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12330 char *end = SvPVX(stuff) + SvCUR(stuff);
12333 SvCUR_set(stuff, SvCUR(stuff) - 1);
12343 if (PL_madskills) {
12345 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12347 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12350 s = filter_gets(PL_linestr, PL_rsfp, 0);
12352 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12354 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12356 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12357 PL_last_lop = PL_last_uni = NULL;
12366 if (SvCUR(stuff)) {
12369 PL_lex_state = LEX_NORMAL;
12370 start_force(PL_curforce);
12371 NEXTVAL_NEXTTOKE.ival = 0;
12375 PL_lex_state = LEX_FORMLINE;
12377 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12379 else if (PL_encoding)
12380 sv_recode_to_utf8(stuff, PL_encoding);
12382 start_force(PL_curforce);
12383 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12385 start_force(PL_curforce);
12386 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12390 SvREFCNT_dec(stuff);
12392 PL_lex_formbrack = 0;
12396 if (PL_madskills) {
12398 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12400 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12401 PL_thiswhite = savewhite;
12413 PL_cshlen = strlen(PL_cshname);
12415 #if defined(USE_ITHREADS)
12416 PERL_UNUSED_CONTEXT;
12422 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12425 const I32 oldsavestack_ix = PL_savestack_ix;
12426 CV* const outsidecv = PL_compcv;
12429 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12431 SAVEI32(PL_subline);
12432 save_item(PL_subname);
12433 SAVESPTR(PL_compcv);
12435 PL_compcv = (CV*)newSV_type(is_format ? SVt_PVFM : SVt_PVCV);
12436 CvFLAGS(PL_compcv) |= flags;
12438 PL_subline = CopLINE(PL_curcop);
12439 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12440 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
12441 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12443 return oldsavestack_ix;
12447 #pragma segment Perl_yylex
12450 Perl_yywarn(pTHX_ const char *s)
12453 PL_in_eval |= EVAL_WARNONLY;
12455 PL_in_eval &= ~EVAL_WARNONLY;
12460 Perl_yyerror(pTHX_ const char *s)
12463 const char *where = NULL;
12464 const char *context = NULL;
12467 int yychar = PL_parser->yychar;
12469 if (!yychar || (yychar == ';' && !PL_rsfp))
12471 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12472 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12473 PL_oldbufptr != PL_bufptr) {
12476 The code below is removed for NetWare because it abends/crashes on NetWare
12477 when the script has error such as not having the closing quotes like:
12478 if ($var eq "value)
12479 Checking of white spaces is anyway done in NetWare code.
12482 while (isSPACE(*PL_oldoldbufptr))
12485 context = PL_oldoldbufptr;
12486 contlen = PL_bufptr - PL_oldoldbufptr;
12488 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12489 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12492 The code below is removed for NetWare because it abends/crashes on NetWare
12493 when the script has error such as not having the closing quotes like:
12494 if ($var eq "value)
12495 Checking of white spaces is anyway done in NetWare code.
12498 while (isSPACE(*PL_oldbufptr))
12501 context = PL_oldbufptr;
12502 contlen = PL_bufptr - PL_oldbufptr;
12504 else if (yychar > 255)
12505 where = "next token ???";
12506 else if (yychar == -2) { /* YYEMPTY */
12507 if (PL_lex_state == LEX_NORMAL ||
12508 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12509 where = "at end of line";
12510 else if (PL_lex_inpat)
12511 where = "within pattern";
12513 where = "within string";
12516 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
12518 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12519 else if (isPRINT_LC(yychar))
12520 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
12522 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12523 where = SvPVX_const(where_sv);
12525 msg = sv_2mortal(newSVpv(s, 0));
12526 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12527 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12529 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12531 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12532 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12533 Perl_sv_catpvf(aTHX_ msg,
12534 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12535 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12538 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
12539 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12542 if (PL_error_count >= 10) {
12543 if (PL_in_eval && SvCUR(ERRSV))
12544 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12545 SVfARG(ERRSV), OutCopFILE(PL_curcop));
12547 Perl_croak(aTHX_ "%s has too many errors.\n",
12548 OutCopFILE(PL_curcop));
12551 PL_in_my_stash = NULL;
12555 #pragma segment Main
12559 S_swallow_bom(pTHX_ U8 *s)
12562 const STRLEN slen = SvCUR(PL_linestr);
12565 if (s[1] == 0xFE) {
12566 /* UTF-16 little-endian? (or UTF32-LE?) */
12567 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12568 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12569 #ifndef PERL_NO_UTF16_FILTER
12570 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12573 if (PL_bufend > (char*)s) {
12577 filter_add(utf16rev_textfilter, NULL);
12578 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12579 utf16_to_utf8_reversed(s, news,
12580 PL_bufend - (char*)s - 1,
12582 sv_setpvn(PL_linestr, (const char*)news, newlen);
12584 s = (U8*)SvPVX(PL_linestr);
12585 Copy(news, s, newlen, U8);
12589 SvUTF8_on(PL_linestr);
12590 s = (U8*)SvPVX(PL_linestr);
12592 /* FIXME - is this a general bug fix? */
12595 PL_bufend = SvPVX(PL_linestr) + newlen;
12598 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12603 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12604 #ifndef PERL_NO_UTF16_FILTER
12605 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12608 if (PL_bufend > (char *)s) {
12612 filter_add(utf16_textfilter, NULL);
12613 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12614 utf16_to_utf8(s, news,
12615 PL_bufend - (char*)s,
12617 sv_setpvn(PL_linestr, (const char*)news, newlen);
12619 SvUTF8_on(PL_linestr);
12620 s = (U8*)SvPVX(PL_linestr);
12621 PL_bufend = SvPVX(PL_linestr) + newlen;
12624 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12629 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12630 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12631 s += 3; /* UTF-8 */
12637 if (s[2] == 0xFE && s[3] == 0xFF) {
12638 /* UTF-32 big-endian */
12639 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12642 else if (s[2] == 0 && s[3] != 0) {
12645 * are a good indicator of UTF-16BE. */
12646 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12652 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12653 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12654 s += 4; /* UTF-8 */
12660 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12663 * are a good indicator of UTF-16LE. */
12664 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12673 * Restore a source filter.
12677 restore_rsfp(pTHX_ void *f)
12680 PerlIO * const fp = (PerlIO*)f;
12682 if (PL_rsfp == PerlIO_stdin())
12683 PerlIO_clearerr(PL_rsfp);
12684 else if (PL_rsfp && (PL_rsfp != fp))
12685 PerlIO_close(PL_rsfp);
12689 #ifndef PERL_NO_UTF16_FILTER
12691 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12694 const STRLEN old = SvCUR(sv);
12695 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12696 DEBUG_P(PerlIO_printf(Perl_debug_log,
12697 "utf16_textfilter(%p): %d %d (%d)\n",
12698 FPTR2DPTR(void *, utf16_textfilter),
12699 idx, maxlen, (int) count));
12703 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12704 Copy(SvPVX_const(sv), tmps, old, char);
12705 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12706 SvCUR(sv) - old, &newlen);
12707 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12709 DEBUG_P({sv_dump(sv);});
12714 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12717 const STRLEN old = SvCUR(sv);
12718 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12719 DEBUG_P(PerlIO_printf(Perl_debug_log,
12720 "utf16rev_textfilter(%p): %d %d (%d)\n",
12721 FPTR2DPTR(void *, utf16rev_textfilter),
12722 idx, maxlen, (int) count));
12726 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12727 Copy(SvPVX_const(sv), tmps, old, char);
12728 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12729 SvCUR(sv) - old, &newlen);
12730 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12732 DEBUG_P({ sv_dump(sv); });
12738 Returns a pointer to the next character after the parsed
12739 vstring, as well as updating the passed in sv.
12741 Function must be called like
12744 s = scan_vstring(s,e,sv);
12746 where s and e are the start and end of the string.
12747 The sv should already be large enough to store the vstring
12748 passed in, for performance reasons.
12753 Perl_scan_vstring(pTHX_ const char *s, const char *e, SV *sv)
12756 const char *pos = s;
12757 const char *start = s;
12758 if (*pos == 'v') pos++; /* get past 'v' */
12759 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12761 if ( *pos != '.') {
12762 /* this may not be a v-string if followed by => */
12763 const char *next = pos;
12764 while (next < e && isSPACE(*next))
12766 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12767 /* return string not v-string */
12768 sv_setpvn(sv,(char *)s,pos-s);
12769 return (char *)pos;
12773 if (!isALPHA(*pos)) {
12774 U8 tmpbuf[UTF8_MAXBYTES+1];
12777 s++; /* get past 'v' */
12779 sv_setpvn(sv, "", 0);
12782 /* this is atoi() that tolerates underscores */
12785 const char *end = pos;
12787 while (--end >= s) {
12789 const UV orev = rev;
12790 rev += (*end - '0') * mult;
12792 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12793 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12794 "Integer overflow in decimal number");
12798 if (rev > 0x7FFFFFFF)
12799 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12801 /* Append native character for the rev point */
12802 tmpend = uvchr_to_utf8(tmpbuf, rev);
12803 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12804 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12806 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12812 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12816 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12824 * c-indentation-style: bsd
12825 * c-basic-offset: 4
12826 * indent-tabs-mode: t
12829 * ex: set ts=8 sts=4 sw=4 noet: