3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yylval (PL_parser->yylval)
28 /* YYINITDEPTH -- initial size of the parser's stacks. */
29 #define YYINITDEPTH 200
31 /* XXX temporary backwards compatibility */
32 #define PL_lex_brackets (PL_parser->lex_brackets)
33 #define PL_lex_brackstack (PL_parser->lex_brackstack)
34 #define PL_lex_casemods (PL_parser->lex_casemods)
35 #define PL_lex_casestack (PL_parser->lex_casestack)
36 #define PL_lex_defer (PL_parser->lex_defer)
37 #define PL_lex_dojoin (PL_parser->lex_dojoin)
38 #define PL_lex_expect (PL_parser->lex_expect)
39 #define PL_lex_formbrack (PL_parser->lex_formbrack)
40 #define PL_lex_inpat (PL_parser->lex_inpat)
41 #define PL_lex_inwhat (PL_parser->lex_inwhat)
42 #define PL_lex_op (PL_parser->lex_op)
43 #define PL_lex_repl (PL_parser->lex_repl)
44 #define PL_lex_starts (PL_parser->lex_starts)
45 #define PL_lex_stuff (PL_parser->lex_stuff)
46 #define PL_multi_start (PL_parser->multi_start)
47 #define PL_multi_open (PL_parser->multi_open)
48 #define PL_multi_close (PL_parser->multi_close)
49 #define PL_pending_ident (PL_parser->pending_ident)
50 #define PL_preambled (PL_parser->preambled)
51 #define PL_sublex_info (PL_parser->sublex_info)
52 #define PL_linestr (PL_parser->linestr)
53 #define PL_expect (PL_parser->expect)
54 #define PL_copline (PL_parser->copline)
58 # define PL_endwhite (PL_parser->endwhite)
59 # define PL_faketokens (PL_parser->faketokens)
60 # define PL_lasttoke (PL_parser->lasttoke)
61 # define PL_nextwhite (PL_parser->nextwhite)
62 # define PL_realtokenstart (PL_parser->realtokenstart)
63 # define PL_skipwhite (PL_parser->skipwhite)
64 # define PL_thisclose (PL_parser->thisclose)
65 # define PL_thismad (PL_parser->thismad)
66 # define PL_thisopen (PL_parser->thisopen)
67 # define PL_thisstuff (PL_parser->thisstuff)
68 # define PL_thistoken (PL_parser->thistoken)
69 # define PL_thiswhite (PL_parser->thiswhite)
73 S_pending_ident(pTHX);
75 static const char ident_too_long[] = "Identifier too long";
76 static const char commaless_variable_list[] = "comma-less variable list";
78 static void restore_rsfp(pTHX_ void *f);
79 #ifndef PERL_NO_UTF16_FILTER
80 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
81 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
85 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
86 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
88 # define CURMAD(slot,sv)
89 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
92 #define XFAKEBRACK 128
95 #ifdef USE_UTF8_SCRIPTS
96 # define UTF (!IN_BYTES)
98 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
101 /* In variables named $^X, these are the legal values for X.
102 * 1999-02-27 mjd-perl-patch@plover.com */
103 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
105 /* On MacOS, respect nonbreaking spaces */
106 #ifdef MACOS_TRADITIONAL
107 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
109 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
112 /* LEX_* are values for PL_lex_state, the state of the lexer.
113 * They are arranged oddly so that the guard on the switch statement
114 * can get by with a single comparison (if the compiler is smart enough).
117 /* #define LEX_NOTPARSING 11 is done in perl.h. */
119 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
120 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
121 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
122 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
123 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
125 /* at end of code, eg "$x" followed by: */
126 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
127 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
129 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
130 string or after \E, $foo, etc */
131 #define LEX_INTERPCONST 2 /* NOT USED */
132 #define LEX_FORMLINE 1 /* expecting a format line */
133 #define LEX_KNOWNEXT 0 /* next token known; just return it */
137 static const char* const lex_state_names[] = {
156 #include "keywords.h"
158 /* CLINE is a macro that ensures PL_copline has a sane value */
163 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
166 # define SKIPSPACE0(s) skipspace0(s)
167 # define SKIPSPACE1(s) skipspace1(s)
168 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
169 # define PEEKSPACE(s) skipspace2(s,0)
171 # define SKIPSPACE0(s) skipspace(s)
172 # define SKIPSPACE1(s) skipspace(s)
173 # define SKIPSPACE2(s,tsv) skipspace(s)
174 # define PEEKSPACE(s) skipspace(s)
178 * Convenience functions to return different tokens and prime the
179 * lexer for the next token. They all take an argument.
181 * TOKEN : generic token (used for '(', DOLSHARP, etc)
182 * OPERATOR : generic operator
183 * AOPERATOR : assignment operator
184 * PREBLOCK : beginning the block after an if, while, foreach, ...
185 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
186 * PREREF : *EXPR where EXPR is not a simple identifier
187 * TERM : expression term
188 * LOOPX : loop exiting command (goto, last, dump, etc)
189 * FTST : file test operator
190 * FUN0 : zero-argument function
191 * FUN1 : not used, except for not, which isn't a UNIOP
192 * BOop : bitwise or or xor
194 * SHop : shift operator
195 * PWop : power operator
196 * PMop : pattern-matching operator
197 * Aop : addition-level operator
198 * Mop : multiplication-level operator
199 * Eop : equality-testing operator
200 * Rop : relational operator <= != gt
202 * Also see LOP and lop() below.
205 #ifdef DEBUGGING /* Serve -DT. */
206 # define REPORT(retval) tokereport((I32)retval)
208 # define REPORT(retval) (retval)
211 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
212 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
213 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
214 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
215 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
216 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
217 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
218 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
219 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
220 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
221 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
222 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
223 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
224 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
225 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
226 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
227 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
228 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
229 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
230 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
232 /* This bit of chicanery makes a unary function followed by
233 * a parenthesis into a function with one argument, highest precedence.
234 * The UNIDOR macro is for unary functions that can be followed by the //
235 * operator (such as C<shift // 0>).
237 #define UNI2(f,x) { \
241 PL_last_uni = PL_oldbufptr; \
242 PL_last_lop_op = f; \
244 return REPORT( (int)FUNC1 ); \
246 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
248 #define UNI(f) UNI2(f,XTERM)
249 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
251 #define UNIBRACK(f) { \
254 PL_last_uni = PL_oldbufptr; \
256 return REPORT( (int)FUNC1 ); \
258 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
261 /* grandfather return to old style */
262 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
266 /* how to interpret the yylval associated with the token */
270 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
276 static struct debug_tokens {
278 enum token_type type;
280 } const debug_tokens[] =
282 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
283 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
284 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
285 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
286 { ARROW, TOKENTYPE_NONE, "ARROW" },
287 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
288 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
289 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
290 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
291 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
292 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
293 { DO, TOKENTYPE_NONE, "DO" },
294 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
295 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
296 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
297 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
298 { ELSE, TOKENTYPE_NONE, "ELSE" },
299 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
300 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
301 { FOR, TOKENTYPE_IVAL, "FOR" },
302 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
303 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
304 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
305 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
306 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
307 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
308 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
309 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
310 { IF, TOKENTYPE_IVAL, "IF" },
311 { LABEL, TOKENTYPE_PVAL, "LABEL" },
312 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
313 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
314 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
315 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
316 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
317 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
318 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
319 { MY, TOKENTYPE_IVAL, "MY" },
320 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
321 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
322 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
323 { OROP, TOKENTYPE_IVAL, "OROP" },
324 { OROR, TOKENTYPE_NONE, "OROR" },
325 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
326 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
327 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
328 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
329 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
330 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
331 { PREINC, TOKENTYPE_NONE, "PREINC" },
332 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
333 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
334 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
335 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
336 { SUB, TOKENTYPE_NONE, "SUB" },
337 { THING, TOKENTYPE_OPVAL, "THING" },
338 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
339 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
340 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
341 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
342 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
343 { USE, TOKENTYPE_IVAL, "USE" },
344 { WHEN, TOKENTYPE_IVAL, "WHEN" },
345 { WHILE, TOKENTYPE_IVAL, "WHILE" },
346 { WORD, TOKENTYPE_OPVAL, "WORD" },
347 { 0, TOKENTYPE_NONE, NULL }
350 /* dump the returned token in rv, plus any optional arg in yylval */
353 S_tokereport(pTHX_ I32 rv)
357 const char *name = NULL;
358 enum token_type type = TOKENTYPE_NONE;
359 const struct debug_tokens *p;
360 SV* const report = newSVpvs("<== ");
362 for (p = debug_tokens; p->token; p++) {
363 if (p->token == (int)rv) {
370 Perl_sv_catpv(aTHX_ report, name);
371 else if ((char)rv > ' ' && (char)rv < '~')
372 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
374 sv_catpvs(report, "EOF");
376 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
379 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
382 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
384 case TOKENTYPE_OPNUM:
385 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
386 PL_op_name[yylval.ival]);
389 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
391 case TOKENTYPE_OPVAL:
393 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
394 PL_op_name[yylval.opval->op_type]);
395 if (yylval.opval->op_type == OP_CONST) {
396 Perl_sv_catpvf(aTHX_ report, " %s",
397 SvPEEK(cSVOPx_sv(yylval.opval)));
402 sv_catpvs(report, "(opval=null)");
405 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
411 /* print the buffer with suitable escapes */
414 S_printbuf(pTHX_ const char* fmt, const char* s)
416 SV* const tmp = newSVpvs("");
417 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
426 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
427 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
431 S_ao(pTHX_ int toketype)
434 if (*PL_bufptr == '=') {
436 if (toketype == ANDAND)
437 yylval.ival = OP_ANDASSIGN;
438 else if (toketype == OROR)
439 yylval.ival = OP_ORASSIGN;
440 else if (toketype == DORDOR)
441 yylval.ival = OP_DORASSIGN;
449 * When Perl expects an operator and finds something else, no_op
450 * prints the warning. It always prints "<something> found where
451 * operator expected. It prints "Missing semicolon on previous line?"
452 * if the surprise occurs at the start of the line. "do you need to
453 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
454 * where the compiler doesn't know if foo is a method call or a function.
455 * It prints "Missing operator before end of line" if there's nothing
456 * after the missing operator, or "... before <...>" if there is something
457 * after the missing operator.
461 S_no_op(pTHX_ const char *what, char *s)
464 char * const oldbp = PL_bufptr;
465 const bool is_first = (PL_oldbufptr == PL_linestart);
471 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
472 if (ckWARN_d(WARN_SYNTAX)) {
474 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
475 "\t(Missing semicolon on previous line?)\n");
476 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
478 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
480 if (t < PL_bufptr && isSPACE(*t))
481 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
482 "\t(Do you need to predeclare %.*s?)\n",
483 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
487 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
488 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
496 * Complain about missing quote/regexp/heredoc terminator.
497 * If it's called with NULL then it cauterizes the line buffer.
498 * If we're in a delimited string and the delimiter is a control
499 * character, it's reformatted into a two-char sequence like ^C.
504 S_missingterm(pTHX_ char *s)
510 char * const nl = strrchr(s,'\n');
516 iscntrl(PL_multi_close)
518 PL_multi_close < 32 || PL_multi_close == 127
522 tmpbuf[1] = (char)toCTRL(PL_multi_close);
527 *tmpbuf = (char)PL_multi_close;
531 q = strchr(s,'"') ? '\'' : '"';
532 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
535 #define FEATURE_IS_ENABLED(name) \
536 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
537 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
539 * S_feature_is_enabled
540 * Check whether the named feature is enabled.
543 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
546 HV * const hinthv = GvHV(PL_hintgv);
547 char he_name[32] = "feature_";
548 (void) my_strlcpy(&he_name[8], name, 24);
550 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
558 Perl_deprecate(pTHX_ const char *s)
560 if (ckWARN(WARN_DEPRECATED))
561 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
565 Perl_deprecate_old(pTHX_ const char *s)
567 /* This function should NOT be called for any new deprecated warnings */
568 /* Use Perl_deprecate instead */
570 /* It is here to maintain backward compatibility with the pre-5.8 */
571 /* warnings category hierarchy. The "deprecated" category used to */
572 /* live under the "syntax" category. It is now a top-level category */
573 /* in its own right. */
575 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
576 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
577 "Use of %s is deprecated", s);
581 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
582 * utf16-to-utf8-reversed.
585 #ifdef PERL_CR_FILTER
589 register const char *s = SvPVX_const(sv);
590 register const char * const e = s + SvCUR(sv);
591 /* outer loop optimized to do nothing if there are no CR-LFs */
593 if (*s++ == '\r' && *s == '\n') {
594 /* hit a CR-LF, need to copy the rest */
595 register char *d = s - 1;
598 if (*s == '\r' && s[1] == '\n')
609 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
611 const I32 count = FILTER_READ(idx+1, sv, maxlen);
612 if (count > 0 && !maxlen)
622 * Create a parser object and initialise its parser and lexer fields
626 Perl_lex_start(pTHX_ SV *line)
629 const char *s = NULL;
633 /* create and initialise a parser */
635 Newxz(parser, 1, yy_parser);
636 parser->old_parser = PL_parser;
639 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
640 parser->ps = parser->stack;
641 parser->stack_size = YYINITDEPTH;
643 parser->stack->state = 0;
644 parser->yyerrstatus = 0;
645 parser->yychar = YYEMPTY; /* Cause a token to be read. */
647 /* on scope exit, free this parser and restore any outer one */
650 /* initialise lexer state */
652 SAVEI8(PL_lex_state);
654 if (PL_lex_state == LEX_KNOWNEXT) {
655 I32 toke = parser->old_parser->lasttoke;
656 while (--toke >= 0) {
657 SAVEI32(PL_nexttoke[toke].next_type);
658 SAVEVPTR(PL_nexttoke[toke].next_val);
660 SAVEVPTR(PL_nexttoke[toke].next_mad);
663 SAVEI32(PL_curforce);
666 if (PL_lex_state == LEX_KNOWNEXT) {
667 I32 toke = PL_nexttoke;
668 while (--toke >= 0) {
669 SAVEI32(PL_nexttype[toke]);
670 SAVEVPTR(PL_nextval[toke]);
672 SAVEI32(PL_nexttoke);
675 SAVECOPLINE(PL_curcop);
678 SAVEPPTR(PL_oldbufptr);
679 SAVEPPTR(PL_oldoldbufptr);
680 SAVEPPTR(PL_last_lop);
681 SAVEPPTR(PL_last_uni);
682 SAVEPPTR(PL_linestart);
683 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
685 parser->copline = NOLINE;
686 PL_lex_state = LEX_NORMAL;
687 parser->expect = XSTATE;
688 Newx(parser->lex_brackstack, 120, char);
689 Newx(parser->lex_casestack, 12, char);
690 *parser->lex_casestack = '\0';
696 s = SvPV_const(line, len);
702 parser->linestr = newSVpvs("\n;");
703 } else if (SvREADONLY(line) || s[len-1] != ';') {
704 parser->linestr = newSVsv(line);
706 sv_catpvs(parser->linestr, "\n;");
709 SvREFCNT_inc_simple_void_NN(line);
710 parser->linestr = line;
712 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(parser->linestr);
713 PL_bufend = PL_bufptr + SvCUR(parser->linestr);
714 PL_last_lop = PL_last_uni = NULL;
719 /* delete a parser object */
722 Perl_parser_free(pTHX_ const yy_parser *parser)
724 SvREFCNT_dec(parser->linestr);
726 Safefree(parser->stack);
727 Safefree(parser->lex_brackstack);
728 Safefree(parser->lex_casestack);
729 PL_parser = parser->old_parser;
736 * Finalizer for lexing operations. Must be called when the parser is
737 * done with the lexer.
744 PL_doextract = FALSE;
749 * This subroutine has nothing to do with tilting, whether at windmills
750 * or pinball tables. Its name is short for "increment line". It
751 * increments the current line number in CopLINE(PL_curcop) and checks
752 * to see whether the line starts with a comment of the form
753 * # line 500 "foo.pm"
754 * If so, it sets the current line number and file to the values in the comment.
758 S_incline(pTHX_ const char *s)
765 CopLINE_inc(PL_curcop);
768 while (SPACE_OR_TAB(*s))
770 if (strnEQ(s, "line", 4))
774 if (SPACE_OR_TAB(*s))
778 while (SPACE_OR_TAB(*s))
786 while (SPACE_OR_TAB(*s))
788 if (*s == '"' && (t = strchr(s+1, '"'))) {
798 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
800 if (*e != '\n' && *e != '\0')
801 return; /* false alarm */
804 const STRLEN len = t - s;
806 const char * const cf = CopFILE(PL_curcop);
807 STRLEN tmplen = cf ? strlen(cf) : 0;
808 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
809 /* must copy *{"::_<(eval N)[oldfilename:L]"}
810 * to *{"::_<newfilename"} */
811 /* However, the long form of evals is only turned on by the
812 debugger - usually they're "(eval %lu)" */
816 STRLEN tmplen2 = len;
817 if (tmplen + 2 <= sizeof smallbuf)
820 Newx(tmpbuf, tmplen + 2, char);
823 memcpy(tmpbuf + 2, cf, tmplen);
825 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
830 if (tmplen2 + 2 <= sizeof smallbuf)
833 Newx(tmpbuf2, tmplen2 + 2, char);
835 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
836 /* Either they malloc'd it, or we malloc'd it,
837 so no prefix is present in ours. */
842 memcpy(tmpbuf2 + 2, s, tmplen2);
845 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
847 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
848 /* adjust ${"::_<newfilename"} to store the new file name */
849 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
850 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
851 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
854 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
856 if (tmpbuf != smallbuf) Safefree(tmpbuf);
859 CopFILE_free(PL_curcop);
860 CopFILE_setn(PL_curcop, s, len);
862 CopLINE_set(PL_curcop, atoi(n)-1);
866 /* skip space before PL_thistoken */
869 S_skipspace0(pTHX_ register char *s)
876 PL_thiswhite = newSVpvs("");
877 sv_catsv(PL_thiswhite, PL_skipwhite);
878 sv_free(PL_skipwhite);
881 PL_realtokenstart = s - SvPVX(PL_linestr);
885 /* skip space after PL_thistoken */
888 S_skipspace1(pTHX_ register char *s)
890 const char *start = s;
891 I32 startoff = start - SvPVX(PL_linestr);
896 start = SvPVX(PL_linestr) + startoff;
897 if (!PL_thistoken && PL_realtokenstart >= 0) {
898 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
899 PL_thistoken = newSVpvn(tstart, start - tstart);
901 PL_realtokenstart = -1;
904 PL_nextwhite = newSVpvs("");
905 sv_catsv(PL_nextwhite, PL_skipwhite);
906 sv_free(PL_skipwhite);
913 S_skipspace2(pTHX_ register char *s, SV **svp)
916 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
917 const I32 startoff = s - SvPVX(PL_linestr);
920 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
921 if (!PL_madskills || !svp)
923 start = SvPVX(PL_linestr) + startoff;
924 if (!PL_thistoken && PL_realtokenstart >= 0) {
925 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
926 PL_thistoken = newSVpvn(tstart, start - tstart);
927 PL_realtokenstart = -1;
932 sv_setsv(*svp, PL_skipwhite);
933 sv_free(PL_skipwhite);
942 S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
944 AV *av = CopFILEAVx(PL_curcop);
946 SV * const sv = newSV_type(SVt_PVMG);
948 sv_setsv(sv, orig_sv);
950 sv_setpvn(sv, buf, len);
953 av_store(av, (I32)CopLINE(PL_curcop), sv);
959 * Called to gobble the appropriate amount and type of whitespace.
960 * Skips comments as well.
964 S_skipspace(pTHX_ register char *s)
969 int startoff = s - SvPVX(PL_linestr);
972 sv_free(PL_skipwhite);
977 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
978 while (s < PL_bufend && SPACE_OR_TAB(*s))
988 SSize_t oldprevlen, oldoldprevlen;
989 SSize_t oldloplen = 0, oldunilen = 0;
990 while (s < PL_bufend && isSPACE(*s)) {
991 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
996 if (s < PL_bufend && *s == '#') {
997 while (s < PL_bufend && *s != '\n')
1001 if (PL_in_eval && !PL_rsfp) {
1008 /* only continue to recharge the buffer if we're at the end
1009 * of the buffer, we're not reading from a source filter, and
1010 * we're in normal lexing mode
1012 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1013 PL_lex_state == LEX_FORMLINE)
1020 /* try to recharge the buffer */
1022 curoff = s - SvPVX(PL_linestr);
1025 if ((s = filter_gets(PL_linestr, PL_rsfp,
1026 (prevlen = SvCUR(PL_linestr)))) == NULL)
1029 if (PL_madskills && curoff != startoff) {
1031 PL_skipwhite = newSVpvs("");
1032 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1036 /* mustn't throw out old stuff yet if madpropping */
1037 SvCUR(PL_linestr) = curoff;
1038 s = SvPVX(PL_linestr) + curoff;
1040 if (curoff && s[-1] == '\n')
1044 /* end of file. Add on the -p or -n magic */
1045 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1048 sv_catpvs(PL_linestr,
1049 ";}continue{print or die qq(-p destination: $!\\n);}");
1051 sv_setpvs(PL_linestr,
1052 ";}continue{print or die qq(-p destination: $!\\n);}");
1054 PL_minus_n = PL_minus_p = 0;
1056 else if (PL_minus_n) {
1058 sv_catpvn(PL_linestr, ";}", 2);
1060 sv_setpvn(PL_linestr, ";}", 2);
1066 sv_catpvn(PL_linestr,";", 1);
1068 sv_setpvn(PL_linestr,";", 1);
1071 /* reset variables for next time we lex */
1072 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1078 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1079 PL_last_lop = PL_last_uni = NULL;
1081 /* Close the filehandle. Could be from -P preprocessor,
1082 * STDIN, or a regular file. If we were reading code from
1083 * STDIN (because the commandline held no -e or filename)
1084 * then we don't close it, we reset it so the code can
1085 * read from STDIN too.
1088 if (PL_preprocess && !PL_in_eval)
1089 (void)PerlProc_pclose(PL_rsfp);
1090 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1091 PerlIO_clearerr(PL_rsfp);
1093 (void)PerlIO_close(PL_rsfp);
1098 /* not at end of file, so we only read another line */
1099 /* make corresponding updates to old pointers, for yyerror() */
1100 oldprevlen = PL_oldbufptr - PL_bufend;
1101 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1103 oldunilen = PL_last_uni - PL_bufend;
1105 oldloplen = PL_last_lop - PL_bufend;
1106 PL_linestart = PL_bufptr = s + prevlen;
1107 PL_bufend = s + SvCUR(PL_linestr);
1109 PL_oldbufptr = s + oldprevlen;
1110 PL_oldoldbufptr = s + oldoldprevlen;
1112 PL_last_uni = s + oldunilen;
1114 PL_last_lop = s + oldloplen;
1117 /* debugger active and we're not compiling the debugger code,
1118 * so store the line into the debugger's array of lines
1120 if (PERLDB_LINE && PL_curstash != PL_debstash)
1121 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1128 PL_skipwhite = newSVpvs("");
1129 curoff = s - SvPVX(PL_linestr);
1130 if (curoff - startoff)
1131 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1140 * Check the unary operators to ensure there's no ambiguity in how they're
1141 * used. An ambiguous piece of code would be:
1143 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1144 * the +5 is its argument.
1154 if (PL_oldoldbufptr != PL_last_uni)
1156 while (isSPACE(*PL_last_uni))
1159 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1161 if ((t = strchr(s, '(')) && t < PL_bufptr)
1164 if (ckWARN_d(WARN_AMBIGUOUS)){
1165 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1166 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1167 (int)(s - PL_last_uni), PL_last_uni);
1172 * LOP : macro to build a list operator. Its behaviour has been replaced
1173 * with a subroutine, S_lop() for which LOP is just another name.
1176 #define LOP(f,x) return lop(f,x,s)
1180 * Build a list operator (or something that might be one). The rules:
1181 * - if we have a next token, then it's a list operator [why?]
1182 * - if the next thing is an opening paren, then it's a function
1183 * - else it's a list operator
1187 S_lop(pTHX_ I32 f, int x, char *s)
1194 PL_last_lop = PL_oldbufptr;
1195 PL_last_lop_op = (OPCODE)f;
1198 return REPORT(LSTOP);
1201 return REPORT(LSTOP);
1204 return REPORT(FUNC);
1207 return REPORT(FUNC);
1209 return REPORT(LSTOP);
1215 * Sets up for an eventual force_next(). start_force(0) basically does
1216 * an unshift, while start_force(-1) does a push. yylex removes items
1221 S_start_force(pTHX_ int where)
1225 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1226 where = PL_lasttoke;
1227 assert(PL_curforce < 0 || PL_curforce == where);
1228 if (PL_curforce != where) {
1229 for (i = PL_lasttoke; i > where; --i) {
1230 PL_nexttoke[i] = PL_nexttoke[i-1];
1234 if (PL_curforce < 0) /* in case of duplicate start_force() */
1235 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1236 PL_curforce = where;
1239 curmad('^', newSVpvs(""));
1240 CURMAD('_', PL_nextwhite);
1245 S_curmad(pTHX_ char slot, SV *sv)
1251 if (PL_curforce < 0)
1252 where = &PL_thismad;
1254 where = &PL_nexttoke[PL_curforce].next_mad;
1257 sv_setpvn(sv, "", 0);
1260 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1262 else if (PL_encoding) {
1263 sv_recode_to_utf8(sv, PL_encoding);
1268 /* keep a slot open for the head of the list? */
1269 if (slot != '_' && *where && (*where)->mad_key == '^') {
1270 (*where)->mad_key = slot;
1271 sv_free((*where)->mad_val);
1272 (*where)->mad_val = (void*)sv;
1275 addmad(newMADsv(slot, sv), where, 0);
1278 # define start_force(where) NOOP
1279 # define curmad(slot, sv) NOOP
1284 * When the lexer realizes it knows the next token (for instance,
1285 * it is reordering tokens for the parser) then it can call S_force_next
1286 * to know what token to return the next time the lexer is called. Caller
1287 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1288 * and possibly PL_expect to ensure the lexer handles the token correctly.
1292 S_force_next(pTHX_ I32 type)
1296 if (PL_curforce < 0)
1297 start_force(PL_lasttoke);
1298 PL_nexttoke[PL_curforce].next_type = type;
1299 if (PL_lex_state != LEX_KNOWNEXT)
1300 PL_lex_defer = PL_lex_state;
1301 PL_lex_state = LEX_KNOWNEXT;
1302 PL_lex_expect = PL_expect;
1305 PL_nexttype[PL_nexttoke] = type;
1307 if (PL_lex_state != LEX_KNOWNEXT) {
1308 PL_lex_defer = PL_lex_state;
1309 PL_lex_expect = PL_expect;
1310 PL_lex_state = LEX_KNOWNEXT;
1316 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1319 SV * const sv = newSVpvn(start,len);
1320 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1327 * When the lexer knows the next thing is a word (for instance, it has
1328 * just seen -> and it knows that the next char is a word char, then
1329 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1333 * char *start : buffer position (must be within PL_linestr)
1334 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1335 * int check_keyword : if true, Perl checks to make sure the word isn't
1336 * a keyword (do this if the word is a label, e.g. goto FOO)
1337 * int allow_pack : if true, : characters will also be allowed (require,
1338 * use, etc. do this)
1339 * int allow_initial_tick : used by the "sub" lexer only.
1343 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1349 start = SKIPSPACE1(start);
1351 if (isIDFIRST_lazy_if(s,UTF) ||
1352 (allow_pack && *s == ':') ||
1353 (allow_initial_tick && *s == '\'') )
1355 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1356 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1358 start_force(PL_curforce);
1360 curmad('X', newSVpvn(start,s-start));
1361 if (token == METHOD) {
1366 PL_expect = XOPERATOR;
1370 curmad('g', newSVpvs( "forced" ));
1371 NEXTVAL_NEXTTOKE.opval
1372 = (OP*)newSVOP(OP_CONST,0,
1373 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1374 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1382 * Called when the lexer wants $foo *foo &foo etc, but the program
1383 * text only contains the "foo" portion. The first argument is a pointer
1384 * to the "foo", and the second argument is the type symbol to prefix.
1385 * Forces the next token to be a "WORD".
1386 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1390 S_force_ident(pTHX_ register const char *s, int kind)
1394 const STRLEN len = strlen(s);
1395 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1396 start_force(PL_curforce);
1397 NEXTVAL_NEXTTOKE.opval = o;
1400 o->op_private = OPpCONST_ENTERED;
1401 /* XXX see note in pp_entereval() for why we forgo typo
1402 warnings if the symbol must be introduced in an eval.
1404 gv_fetchpvn_flags(s, len,
1405 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1407 kind == '$' ? SVt_PV :
1408 kind == '@' ? SVt_PVAV :
1409 kind == '%' ? SVt_PVHV :
1417 Perl_str_to_version(pTHX_ SV *sv)
1422 const char *start = SvPV_const(sv,len);
1423 const char * const end = start + len;
1424 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1425 while (start < end) {
1429 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1434 retval += ((NV)n)/nshift;
1443 * Forces the next token to be a version number.
1444 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1445 * and if "guessing" is TRUE, then no new token is created (and the caller
1446 * must use an alternative parsing method).
1450 S_force_version(pTHX_ char *s, int guessing)
1456 I32 startoff = s - SvPVX(PL_linestr);
1465 while (isDIGIT(*d) || *d == '_' || *d == '.')
1469 start_force(PL_curforce);
1470 curmad('X', newSVpvn(s,d-s));
1473 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1475 s = scan_num(s, &yylval);
1476 version = yylval.opval;
1477 ver = cSVOPx(version)->op_sv;
1478 if (SvPOK(ver) && !SvNIOK(ver)) {
1479 SvUPGRADE(ver, SVt_PVNV);
1480 SvNV_set(ver, str_to_version(ver));
1481 SvNOK_on(ver); /* hint that it is a version */
1484 else if (guessing) {
1487 sv_free(PL_nextwhite); /* let next token collect whitespace */
1489 s = SvPVX(PL_linestr) + startoff;
1497 if (PL_madskills && !version) {
1498 sv_free(PL_nextwhite); /* let next token collect whitespace */
1500 s = SvPVX(PL_linestr) + startoff;
1503 /* NOTE: The parser sees the package name and the VERSION swapped */
1504 start_force(PL_curforce);
1505 NEXTVAL_NEXTTOKE.opval = version;
1513 * Tokenize a quoted string passed in as an SV. It finds the next
1514 * chunk, up to end of string or a backslash. It may make a new
1515 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1520 S_tokeq(pTHX_ SV *sv)
1524 register char *send;
1532 s = SvPV_force(sv, len);
1533 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1536 while (s < send && *s != '\\')
1541 if ( PL_hints & HINT_NEW_STRING ) {
1542 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1548 if (s + 1 < send && (s[1] == '\\'))
1549 s++; /* all that, just for this */
1554 SvCUR_set(sv, d - SvPVX_const(sv));
1556 if ( PL_hints & HINT_NEW_STRING )
1557 return new_constant(NULL, 0, "q", sv, pv, "q");
1562 * Now come three functions related to double-quote context,
1563 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1564 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1565 * interact with PL_lex_state, and create fake ( ... ) argument lists
1566 * to handle functions and concatenation.
1567 * They assume that whoever calls them will be setting up a fake
1568 * join call, because each subthing puts a ',' after it. This lets
1571 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1573 * (I'm not sure whether the spurious commas at the end of lcfirst's
1574 * arguments and join's arguments are created or not).
1579 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1581 * Pattern matching will set PL_lex_op to the pattern-matching op to
1582 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1584 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1586 * Everything else becomes a FUNC.
1588 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1589 * had an OP_CONST or OP_READLINE). This just sets us up for a
1590 * call to S_sublex_push().
1594 S_sublex_start(pTHX)
1597 register const I32 op_type = yylval.ival;
1599 if (op_type == OP_NULL) {
1600 yylval.opval = PL_lex_op;
1604 if (op_type == OP_CONST || op_type == OP_READLINE) {
1605 SV *sv = tokeq(PL_lex_stuff);
1607 if (SvTYPE(sv) == SVt_PVIV) {
1608 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1610 const char * const p = SvPV_const(sv, len);
1611 SV * const nsv = newSVpvn(p, len);
1617 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1618 PL_lex_stuff = NULL;
1619 /* Allow <FH> // "foo" */
1620 if (op_type == OP_READLINE)
1621 PL_expect = XTERMORDORDOR;
1624 else if (op_type == OP_BACKTICK && PL_lex_op) {
1625 /* readpipe() vas overriden */
1626 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1627 yylval.opval = PL_lex_op;
1629 PL_lex_stuff = NULL;
1633 PL_sublex_info.super_state = PL_lex_state;
1634 PL_sublex_info.sub_inwhat = (U16)op_type;
1635 PL_sublex_info.sub_op = PL_lex_op;
1636 PL_lex_state = LEX_INTERPPUSH;
1640 yylval.opval = PL_lex_op;
1650 * Create a new scope to save the lexing state. The scope will be
1651 * ended in S_sublex_done. Returns a '(', starting the function arguments
1652 * to the uc, lc, etc. found before.
1653 * Sets PL_lex_state to LEX_INTERPCONCAT.
1662 PL_lex_state = PL_sublex_info.super_state;
1663 SAVEBOOL(PL_lex_dojoin);
1664 SAVEI32(PL_lex_brackets);
1665 SAVEI32(PL_lex_casemods);
1666 SAVEI32(PL_lex_starts);
1667 SAVEI8(PL_lex_state);
1668 SAVEVPTR(PL_lex_inpat);
1669 SAVEI16(PL_lex_inwhat);
1670 SAVECOPLINE(PL_curcop);
1671 SAVEPPTR(PL_bufptr);
1672 SAVEPPTR(PL_bufend);
1673 SAVEPPTR(PL_oldbufptr);
1674 SAVEPPTR(PL_oldoldbufptr);
1675 SAVEPPTR(PL_last_lop);
1676 SAVEPPTR(PL_last_uni);
1677 SAVEPPTR(PL_linestart);
1678 SAVESPTR(PL_linestr);
1679 SAVEGENERICPV(PL_lex_brackstack);
1680 SAVEGENERICPV(PL_lex_casestack);
1682 PL_linestr = PL_lex_stuff;
1683 PL_lex_stuff = NULL;
1685 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1686 = SvPVX(PL_linestr);
1687 PL_bufend += SvCUR(PL_linestr);
1688 PL_last_lop = PL_last_uni = NULL;
1689 SAVEFREESV(PL_linestr);
1691 PL_lex_dojoin = FALSE;
1692 PL_lex_brackets = 0;
1693 Newx(PL_lex_brackstack, 120, char);
1694 Newx(PL_lex_casestack, 12, char);
1695 PL_lex_casemods = 0;
1696 *PL_lex_casestack = '\0';
1698 PL_lex_state = LEX_INTERPCONCAT;
1699 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1701 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1702 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1703 PL_lex_inpat = PL_sublex_info.sub_op;
1705 PL_lex_inpat = NULL;
1712 * Restores lexer state after a S_sublex_push.
1719 if (!PL_lex_starts++) {
1720 SV * const sv = newSVpvs("");
1721 if (SvUTF8(PL_linestr))
1723 PL_expect = XOPERATOR;
1724 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1728 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1729 PL_lex_state = LEX_INTERPCASEMOD;
1733 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1734 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1735 PL_linestr = PL_lex_repl;
1737 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1738 PL_bufend += SvCUR(PL_linestr);
1739 PL_last_lop = PL_last_uni = NULL;
1740 SAVEFREESV(PL_linestr);
1741 PL_lex_dojoin = FALSE;
1742 PL_lex_brackets = 0;
1743 PL_lex_casemods = 0;
1744 *PL_lex_casestack = '\0';
1746 if (SvEVALED(PL_lex_repl)) {
1747 PL_lex_state = LEX_INTERPNORMAL;
1749 /* we don't clear PL_lex_repl here, so that we can check later
1750 whether this is an evalled subst; that means we rely on the
1751 logic to ensure sublex_done() is called again only via the
1752 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1755 PL_lex_state = LEX_INTERPCONCAT;
1765 PL_endwhite = newSVpvs("");
1766 sv_catsv(PL_endwhite, PL_thiswhite);
1770 sv_setpvn(PL_thistoken,"",0);
1772 PL_realtokenstart = -1;
1776 PL_bufend = SvPVX(PL_linestr);
1777 PL_bufend += SvCUR(PL_linestr);
1778 PL_expect = XOPERATOR;
1779 PL_sublex_info.sub_inwhat = 0;
1787 Extracts a pattern, double-quoted string, or transliteration. This
1790 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1791 processing a pattern (PL_lex_inpat is true), a transliteration
1792 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1794 Returns a pointer to the character scanned up to. If this is
1795 advanced from the start pointer supplied (i.e. if anything was
1796 successfully parsed), will leave an OP for the substring scanned
1797 in yylval. Caller must intuit reason for not parsing further
1798 by looking at the next characters herself.
1802 double-quoted style: \r and \n
1803 regexp special ones: \D \s
1806 case and quoting: \U \Q \E
1807 stops on @ and $, but not for $ as tail anchor
1809 In transliterations:
1810 characters are VERY literal, except for - not at the start or end
1811 of the string, which indicates a range. If the range is in bytes,
1812 scan_const expands the range to the full set of intermediate
1813 characters. If the range is in utf8, the hyphen is replaced with
1814 a certain range mark which will be handled by pmtrans() in op.c.
1816 In double-quoted strings:
1818 double-quoted style: \r and \n
1820 deprecated backrefs: \1 (in substitution replacements)
1821 case and quoting: \U \Q \E
1824 scan_const does *not* construct ops to handle interpolated strings.
1825 It stops processing as soon as it finds an embedded $ or @ variable
1826 and leaves it to the caller to work out what's going on.
1828 embedded arrays (whether in pattern or not) could be:
1829 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1831 $ in double-quoted strings must be the symbol of an embedded scalar.
1833 $ in pattern could be $foo or could be tail anchor. Assumption:
1834 it's a tail anchor if $ is the last thing in the string, or if it's
1835 followed by one of "()| \r\n\t"
1837 \1 (backreferences) are turned into $1
1839 The structure of the code is
1840 while (there's a character to process) {
1841 handle transliteration ranges
1842 skip regexp comments /(?#comment)/ and codes /(?{code})/
1843 skip #-initiated comments in //x patterns
1844 check for embedded arrays
1845 check for embedded scalars
1847 leave intact backslashes from leaveit (below)
1848 deprecate \1 in substitution replacements
1849 handle string-changing backslashes \l \U \Q \E, etc.
1850 switch (what was escaped) {
1851 handle \- in a transliteration (becomes a literal -)
1852 handle \132 (octal characters)
1853 handle \x15 and \x{1234} (hex characters)
1854 handle \N{name} (named characters)
1855 handle \cV (control characters)
1856 handle printf-style backslashes (\f, \r, \n, etc)
1858 } (end if backslash)
1859 } (end while character to read)
1864 S_scan_const(pTHX_ char *start)
1867 register char *send = PL_bufend; /* end of the constant */
1868 SV *sv = newSV(send - start); /* sv for the constant */
1869 register char *s = start; /* start of the constant */
1870 register char *d = SvPVX(sv); /* destination for copies */
1871 bool dorange = FALSE; /* are we in a translit range? */
1872 bool didrange = FALSE; /* did we just finish a range? */
1873 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1874 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1877 UV literal_endpoint = 0;
1878 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1881 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1882 /* If we are doing a trans and we know we want UTF8 set expectation */
1883 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1884 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1888 while (s < send || dorange) {
1889 /* get transliterations out of the way (they're most literal) */
1890 if (PL_lex_inwhat == OP_TRANS) {
1891 /* expand a range A-Z to the full set of characters. AIE! */
1893 I32 i; /* current expanded character */
1894 I32 min; /* first character in range */
1895 I32 max; /* last character in range */
1906 char * const c = (char*)utf8_hop((U8*)d, -1);
1910 *c = (char)UTF_TO_NATIVE(0xff);
1911 /* mark the range as done, and continue */
1917 i = d - SvPVX_const(sv); /* remember current offset */
1920 SvLEN(sv) + (has_utf8 ?
1921 (512 - UTF_CONTINUATION_MARK +
1924 /* How many two-byte within 0..255: 128 in UTF-8,
1925 * 96 in UTF-8-mod. */
1927 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1929 d = SvPVX(sv) + i; /* refresh d after realloc */
1933 for (j = 0; j <= 1; j++) {
1934 char * const c = (char*)utf8_hop((U8*)d, -1);
1935 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1941 max = (U8)0xff; /* only to \xff */
1942 uvmax = uv; /* \x{100} to uvmax */
1944 d = c; /* eat endpoint chars */
1949 d -= 2; /* eat the first char and the - */
1950 min = (U8)*d; /* first char in range */
1951 max = (U8)d[1]; /* last char in range */
1958 "Invalid range \"%c-%c\" in transliteration operator",
1959 (char)min, (char)max);
1963 if (literal_endpoint == 2 &&
1964 ((isLOWER(min) && isLOWER(max)) ||
1965 (isUPPER(min) && isUPPER(max)))) {
1967 for (i = min; i <= max; i++)
1969 *d++ = NATIVE_TO_NEED(has_utf8,i);
1971 for (i = min; i <= max; i++)
1973 *d++ = NATIVE_TO_NEED(has_utf8,i);
1978 for (i = min; i <= max; i++)
1981 const U8 ch = (U8)NATIVE_TO_UTF(i);
1982 if (UNI_IS_INVARIANT(ch))
1985 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1986 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1995 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1997 *d++ = (char)UTF_TO_NATIVE(0xff);
1999 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2003 /* mark the range as done, and continue */
2007 literal_endpoint = 0;
2012 /* range begins (ignore - as first or last char) */
2013 else if (*s == '-' && s+1 < send && s != start) {
2015 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2022 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2032 literal_endpoint = 0;
2033 native_range = TRUE;
2038 /* if we get here, we're not doing a transliteration */
2040 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2041 except for the last char, which will be done separately. */
2042 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2044 while (s+1 < send && *s != ')')
2045 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2047 else if (s[2] == '{' /* This should match regcomp.c */
2048 || (s[2] == '?' && s[3] == '{'))
2051 char *regparse = s + (s[2] == '{' ? 3 : 4);
2054 while (count && (c = *regparse)) {
2055 if (c == '\\' && regparse[1])
2063 if (*regparse != ')')
2064 regparse--; /* Leave one char for continuation. */
2065 while (s < regparse)
2066 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2070 /* likewise skip #-initiated comments in //x patterns */
2071 else if (*s == '#' && PL_lex_inpat &&
2072 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2073 while (s+1 < send && *s != '\n')
2074 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2077 /* check for embedded arrays
2078 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2080 else if (*s == '@' && s[1]) {
2081 if (isALNUM_lazy_if(s+1,UTF))
2083 if (strchr(":'{$", s[1]))
2085 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2086 break; /* in regexp, neither @+ nor @- are interpolated */
2089 /* check for embedded scalars. only stop if we're sure it's a
2092 else if (*s == '$') {
2093 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2095 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2096 break; /* in regexp, $ might be tail anchor */
2099 /* End of else if chain - OP_TRANS rejoin rest */
2102 if (*s == '\\' && s+1 < send) {
2105 /* deprecate \1 in strings and substitution replacements */
2106 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2107 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2109 if (ckWARN(WARN_SYNTAX))
2110 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2115 /* string-change backslash escapes */
2116 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2120 /* skip any other backslash escapes in a pattern */
2121 else if (PL_lex_inpat) {
2122 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2123 goto default_action;
2126 /* if we get here, it's either a quoted -, or a digit */
2129 /* quoted - in transliterations */
2131 if (PL_lex_inwhat == OP_TRANS) {
2138 if ((isALPHA(*s) || isDIGIT(*s)) &&
2140 Perl_warner(aTHX_ packWARN(WARN_MISC),
2141 "Unrecognized escape \\%c passed through",
2143 /* default action is to copy the quoted character */
2144 goto default_action;
2147 /* \132 indicates an octal constant */
2148 case '0': case '1': case '2': case '3':
2149 case '4': case '5': case '6': case '7':
2153 uv = grok_oct(s, &len, &flags, NULL);
2156 goto NUM_ESCAPE_INSERT;
2158 /* \x24 indicates a hex constant */
2162 char* const e = strchr(s, '}');
2163 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2164 PERL_SCAN_DISALLOW_PREFIX;
2169 yyerror("Missing right brace on \\x{}");
2173 uv = grok_hex(s, &len, &flags, NULL);
2179 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2180 uv = grok_hex(s, &len, &flags, NULL);
2186 /* Insert oct or hex escaped character.
2187 * There will always enough room in sv since such
2188 * escapes will be longer than any UTF-8 sequence
2189 * they can end up as. */
2191 /* We need to map to chars to ASCII before doing the tests
2194 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2195 if (!has_utf8 && uv > 255) {
2196 /* Might need to recode whatever we have
2197 * accumulated so far if it contains any
2200 * (Can't we keep track of that and avoid
2201 * this rescan? --jhi)
2205 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2206 if (!NATIVE_IS_INVARIANT(*c)) {
2211 const STRLEN offset = d - SvPVX_const(sv);
2213 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2217 while (src >= (const U8 *)SvPVX_const(sv)) {
2218 if (!NATIVE_IS_INVARIANT(*src)) {
2219 const U8 ch = NATIVE_TO_ASCII(*src);
2220 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2221 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2231 if (has_utf8 || uv > 255) {
2232 d = (char*)uvchr_to_utf8((U8*)d, uv);
2234 if (PL_lex_inwhat == OP_TRANS &&
2235 PL_sublex_info.sub_op) {
2236 PL_sublex_info.sub_op->op_private |=
2237 (PL_lex_repl ? OPpTRANS_FROM_UTF
2241 if (uv > 255 && !dorange)
2242 native_range = FALSE;
2254 /* \N{LATIN SMALL LETTER A} is a named character */
2258 char* e = strchr(s, '}');
2265 yyerror("Missing right brace on \\N{}");
2269 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2271 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2272 PERL_SCAN_DISALLOW_PREFIX;
2275 uv = grok_hex(s, &len, &flags, NULL);
2276 if ( e > s && len != (STRLEN)(e - s) ) {
2280 goto NUM_ESCAPE_INSERT;
2282 res = newSVpvn(s + 1, e - s - 1);
2283 type = newSVpvn(s - 2,e - s + 3);
2284 res = new_constant( NULL, 0, "charnames",
2285 res, NULL, SvPVX(type) );
2288 sv_utf8_upgrade(res);
2289 str = SvPV_const(res,len);
2290 #ifdef EBCDIC_NEVER_MIND
2291 /* charnames uses pack U and that has been
2292 * recently changed to do the below uni->native
2293 * mapping, so this would be redundant (and wrong,
2294 * the code point would be doubly converted).
2295 * But leave this in just in case the pack U change
2296 * gets revoked, but the semantics is still
2297 * desireable for charnames. --jhi */
2299 UV uv = utf8_to_uvchr((const U8*)str, 0);
2302 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2304 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2305 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2306 str = SvPV_const(res, len);
2310 if (!has_utf8 && SvUTF8(res)) {
2311 const char * const ostart = SvPVX_const(sv);
2312 SvCUR_set(sv, d - ostart);
2315 sv_utf8_upgrade(sv);
2316 /* this just broke our allocation above... */
2317 SvGROW(sv, (STRLEN)(send - start));
2318 d = SvPVX(sv) + SvCUR(sv);
2321 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2322 const char * const odest = SvPVX_const(sv);
2324 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2325 d = SvPVX(sv) + (d - odest);
2329 native_range = FALSE; /* \N{} is guessed to be Unicode */
2331 Copy(str, d, len, char);
2338 yyerror("Missing braces on \\N{}");
2341 /* \c is a control character */
2350 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2353 yyerror("Missing control char name in \\c");
2357 /* printf-style backslashes, formfeeds, newlines, etc */
2359 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2362 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2365 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2368 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2371 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2374 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2377 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2383 } /* end if (backslash) */
2390 /* If we started with encoded form, or already know we want it
2391 and then encode the next character */
2392 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2394 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2395 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2398 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2399 const STRLEN off = d - SvPVX_const(sv);
2400 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2402 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2405 if (uv > 255 && !dorange)
2406 native_range = FALSE;
2410 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2412 } /* while loop to process each character */
2414 /* terminate the string and set up the sv */
2416 SvCUR_set(sv, d - SvPVX_const(sv));
2417 if (SvCUR(sv) >= SvLEN(sv))
2418 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2421 if (PL_encoding && !has_utf8) {
2422 sv_recode_to_utf8(sv, PL_encoding);
2428 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2429 PL_sublex_info.sub_op->op_private |=
2430 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2434 /* shrink the sv if we allocated more than we used */
2435 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2436 SvPV_shrink_to_cur(sv);
2439 /* return the substring (via yylval) only if we parsed anything */
2440 if (s > PL_bufptr) {
2441 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2442 sv = new_constant(start, s - start,
2443 (const char *)(PL_lex_inpat ? "qr" : "q"),
2446 (( PL_lex_inwhat == OP_TRANS
2448 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2451 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2458 * Returns TRUE if there's more to the expression (e.g., a subscript),
2461 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2463 * ->[ and ->{ return TRUE
2464 * { and [ outside a pattern are always subscripts, so return TRUE
2465 * if we're outside a pattern and it's not { or [, then return FALSE
2466 * if we're in a pattern and the first char is a {
2467 * {4,5} (any digits around the comma) returns FALSE
2468 * if we're in a pattern and the first char is a [
2470 * [SOMETHING] has a funky algorithm to decide whether it's a
2471 * character class or not. It has to deal with things like
2472 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2473 * anything else returns TRUE
2476 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2479 S_intuit_more(pTHX_ register char *s)
2482 if (PL_lex_brackets)
2484 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2486 if (*s != '{' && *s != '[')
2491 /* In a pattern, so maybe we have {n,m}. */
2508 /* On the other hand, maybe we have a character class */
2511 if (*s == ']' || *s == '^')
2514 /* this is terrifying, and it works */
2515 int weight = 2; /* let's weigh the evidence */
2517 unsigned char un_char = 255, last_un_char;
2518 const char * const send = strchr(s,']');
2519 char tmpbuf[sizeof PL_tokenbuf * 4];
2521 if (!send) /* has to be an expression */
2524 Zero(seen,256,char);
2527 else if (isDIGIT(*s)) {
2529 if (isDIGIT(s[1]) && s[2] == ']')
2535 for (; s < send; s++) {
2536 last_un_char = un_char;
2537 un_char = (unsigned char)*s;
2542 weight -= seen[un_char] * 10;
2543 if (isALNUM_lazy_if(s+1,UTF)) {
2545 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2546 len = (int)strlen(tmpbuf);
2547 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2552 else if (*s == '$' && s[1] &&
2553 strchr("[#!%*<>()-=",s[1])) {
2554 if (/*{*/ strchr("])} =",s[2]))
2563 if (strchr("wds]",s[1]))
2565 else if (seen[(U8)'\''] || seen[(U8)'"'])
2567 else if (strchr("rnftbxcav",s[1]))
2569 else if (isDIGIT(s[1])) {
2571 while (s[1] && isDIGIT(s[1]))
2581 if (strchr("aA01! ",last_un_char))
2583 if (strchr("zZ79~",s[1]))
2585 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2586 weight -= 5; /* cope with negative subscript */
2589 if (!isALNUM(last_un_char)
2590 && !(last_un_char == '$' || last_un_char == '@'
2591 || last_un_char == '&')
2592 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2597 if (keyword(tmpbuf, d - tmpbuf, 0))
2600 if (un_char == last_un_char + 1)
2602 weight -= seen[un_char];
2607 if (weight >= 0) /* probably a character class */
2617 * Does all the checking to disambiguate
2619 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2620 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2622 * First argument is the stuff after the first token, e.g. "bar".
2624 * Not a method if bar is a filehandle.
2625 * Not a method if foo is a subroutine prototyped to take a filehandle.
2626 * Not a method if it's really "Foo $bar"
2627 * Method if it's "foo $bar"
2628 * Not a method if it's really "print foo $bar"
2629 * Method if it's really "foo package::" (interpreted as package->foo)
2630 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2631 * Not a method if bar is a filehandle or package, but is quoted with
2636 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2639 char *s = start + (*start == '$');
2640 char tmpbuf[sizeof PL_tokenbuf];
2648 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2652 const char *proto = SvPVX_const(cv);
2663 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2664 /* start is the beginning of the possible filehandle/object,
2665 * and s is the end of it
2666 * tmpbuf is a copy of it
2669 if (*start == '$') {
2670 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2671 isUPPER(*PL_tokenbuf))
2674 len = start - SvPVX(PL_linestr);
2678 start = SvPVX(PL_linestr) + len;
2682 return *s == '(' ? FUNCMETH : METHOD;
2684 if (!keyword(tmpbuf, len, 0)) {
2685 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2689 soff = s - SvPVX(PL_linestr);
2693 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2694 if (indirgv && GvCVu(indirgv))
2696 /* filehandle or package name makes it a method */
2697 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2699 soff = s - SvPVX(PL_linestr);
2702 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2703 return 0; /* no assumptions -- "=>" quotes bearword */
2705 start_force(PL_curforce);
2706 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2707 newSVpvn(tmpbuf,len));
2708 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2710 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2715 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2717 return *s == '(' ? FUNCMETH : METHOD;
2725 * Return a string of Perl code to load the debugger. If PERL5DB
2726 * is set, it will return the contents of that, otherwise a
2727 * compile-time require of perl5db.pl.
2735 const char * const pdb = PerlEnv_getenv("PERL5DB");
2739 SETERRNO(0,SS_NORMAL);
2740 return "BEGIN { require 'perl5db.pl' }";
2746 /* Encoded script support. filter_add() effectively inserts a
2747 * 'pre-processing' function into the current source input stream.
2748 * Note that the filter function only applies to the current source file
2749 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2751 * The datasv parameter (which may be NULL) can be used to pass
2752 * private data to this instance of the filter. The filter function
2753 * can recover the SV using the FILTER_DATA macro and use it to
2754 * store private buffers and state information.
2756 * The supplied datasv parameter is upgraded to a PVIO type
2757 * and the IoDIRP/IoANY field is used to store the function pointer,
2758 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2759 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2760 * private use must be set using malloc'd pointers.
2764 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2770 if (!PL_rsfp_filters)
2771 PL_rsfp_filters = newAV();
2774 SvUPGRADE(datasv, SVt_PVIO);
2775 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2776 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2777 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2778 FPTR2DPTR(void *, IoANY(datasv)),
2779 SvPV_nolen(datasv)));
2780 av_unshift(PL_rsfp_filters, 1);
2781 av_store(PL_rsfp_filters, 0, datasv) ;
2786 /* Delete most recently added instance of this filter function. */
2788 Perl_filter_del(pTHX_ filter_t funcp)
2794 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2795 FPTR2DPTR(void*, funcp)));
2797 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2799 /* if filter is on top of stack (usual case) just pop it off */
2800 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2801 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2802 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2803 IoANY(datasv) = (void *)NULL;
2804 sv_free(av_pop(PL_rsfp_filters));
2808 /* we need to search for the correct entry and clear it */
2809 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2813 /* Invoke the idxth filter function for the current rsfp. */
2814 /* maxlen 0 = read one text line */
2816 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2821 /* This API is bad. It should have been using unsigned int for maxlen.
2822 Not sure if we want to change the API, but if not we should sanity
2823 check the value here. */
2824 const unsigned int correct_length
2833 if (!PL_rsfp_filters)
2835 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2836 /* Provide a default input filter to make life easy. */
2837 /* Note that we append to the line. This is handy. */
2838 DEBUG_P(PerlIO_printf(Perl_debug_log,
2839 "filter_read %d: from rsfp\n", idx));
2840 if (correct_length) {
2843 const int old_len = SvCUR(buf_sv);
2845 /* ensure buf_sv is large enough */
2846 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2847 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2848 correct_length)) <= 0) {
2849 if (PerlIO_error(PL_rsfp))
2850 return -1; /* error */
2852 return 0 ; /* end of file */
2854 SvCUR_set(buf_sv, old_len + len) ;
2857 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2858 if (PerlIO_error(PL_rsfp))
2859 return -1; /* error */
2861 return 0 ; /* end of file */
2864 return SvCUR(buf_sv);
2866 /* Skip this filter slot if filter has been deleted */
2867 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2868 DEBUG_P(PerlIO_printf(Perl_debug_log,
2869 "filter_read %d: skipped (filter deleted)\n",
2871 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2873 /* Get function pointer hidden within datasv */
2874 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2875 DEBUG_P(PerlIO_printf(Perl_debug_log,
2876 "filter_read %d: via function %p (%s)\n",
2877 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2878 /* Call function. The function is expected to */
2879 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2880 /* Return: <0:error, =0:eof, >0:not eof */
2881 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2885 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2888 #ifdef PERL_CR_FILTER
2889 if (!PL_rsfp_filters) {
2890 filter_add(S_cr_textfilter,NULL);
2893 if (PL_rsfp_filters) {
2895 SvCUR_set(sv, 0); /* start with empty line */
2896 if (FILTER_READ(0, sv, 0) > 0)
2897 return ( SvPVX(sv) ) ;
2902 return (sv_gets(sv, fp, append));
2906 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2911 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2915 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2916 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2918 return GvHV(gv); /* Foo:: */
2921 /* use constant CLASS => 'MyClass' */
2922 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2923 if (gv && GvCV(gv)) {
2924 SV * const sv = cv_const_sv(GvCV(gv));
2926 pkgname = SvPV_nolen_const(sv);
2929 return gv_stashpv(pkgname, 0);
2933 * S_readpipe_override
2934 * Check whether readpipe() is overriden, and generates the appropriate
2935 * optree, provided sublex_start() is called afterwards.
2938 S_readpipe_override(pTHX)
2941 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2942 yylval.ival = OP_BACKTICK;
2944 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2946 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2947 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
2948 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2950 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2951 append_elem(OP_LIST,
2952 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2953 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2963 * The intent of this yylex wrapper is to minimize the changes to the
2964 * tokener when we aren't interested in collecting madprops. It remains
2965 * to be seen how successful this strategy will be...
2972 char *s = PL_bufptr;
2974 /* make sure PL_thiswhite is initialized */
2978 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2979 if (PL_pending_ident)
2980 return S_pending_ident(aTHX);
2982 /* previous token ate up our whitespace? */
2983 if (!PL_lasttoke && PL_nextwhite) {
2984 PL_thiswhite = PL_nextwhite;
2988 /* isolate the token, and figure out where it is without whitespace */
2989 PL_realtokenstart = -1;
2993 assert(PL_curforce < 0);
2995 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2996 if (!PL_thistoken) {
2997 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2998 PL_thistoken = newSVpvs("");
3000 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3001 PL_thistoken = newSVpvn(tstart, s - tstart);
3004 if (PL_thismad) /* install head */
3005 CURMAD('X', PL_thistoken);
3008 /* last whitespace of a sublex? */
3009 if (optype == ')' && PL_endwhite) {
3010 CURMAD('X', PL_endwhite);
3015 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3016 if (!PL_thiswhite && !PL_endwhite && !optype) {
3017 sv_free(PL_thistoken);
3022 /* put off final whitespace till peg */
3023 if (optype == ';' && !PL_rsfp) {
3024 PL_nextwhite = PL_thiswhite;
3027 else if (PL_thisopen) {
3028 CURMAD('q', PL_thisopen);
3030 sv_free(PL_thistoken);
3034 /* Store actual token text as madprop X */
3035 CURMAD('X', PL_thistoken);
3039 /* add preceding whitespace as madprop _ */
3040 CURMAD('_', PL_thiswhite);
3044 /* add quoted material as madprop = */
3045 CURMAD('=', PL_thisstuff);
3049 /* add terminating quote as madprop Q */
3050 CURMAD('Q', PL_thisclose);
3054 /* special processing based on optype */
3058 /* opval doesn't need a TOKEN since it can already store mp */
3069 append_madprops(PL_thismad, yylval.opval, 0);
3077 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3086 /* remember any fake bracket that lexer is about to discard */
3087 if (PL_lex_brackets == 1 &&
3088 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3091 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3094 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3095 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3098 break; /* don't bother looking for trailing comment */
3107 /* attach a trailing comment to its statement instead of next token */
3111 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3113 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3115 if (*s == '\n' || *s == '#') {
3116 while (s < PL_bufend && *s != '\n')
3120 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3121 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3138 /* Create new token struct. Note: opvals return early above. */
3139 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3146 S_tokenize_use(pTHX_ int is_use, char *s) {
3148 if (PL_expect != XSTATE)
3149 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3150 is_use ? "use" : "no"));
3152 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3153 s = force_version(s, TRUE);
3154 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3155 start_force(PL_curforce);
3156 NEXTVAL_NEXTTOKE.opval = NULL;
3159 else if (*s == 'v') {
3160 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3161 s = force_version(s, FALSE);
3165 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3166 s = force_version(s, FALSE);
3168 yylval.ival = is_use;
3172 static const char* const exp_name[] =
3173 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3174 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3181 Works out what to call the token just pulled out of the input
3182 stream. The yacc parser takes care of taking the ops we return and
3183 stitching them into a tree.
3189 if read an identifier
3190 if we're in a my declaration
3191 croak if they tried to say my($foo::bar)
3192 build the ops for a my() declaration
3193 if it's an access to a my() variable
3194 are we in a sort block?
3195 croak if my($a); $a <=> $b
3196 build ops for access to a my() variable
3197 if in a dq string, and they've said @foo and we can't find @foo
3199 build ops for a bareword
3200 if we already built the token before, use it.
3205 #pragma segment Perl_yylex
3211 register char *s = PL_bufptr;
3216 /* orig_keyword, gvp, and gv are initialized here because
3217 * jump to the label just_a_word_zero can bypass their
3218 * initialization later. */
3219 I32 orig_keyword = 0;
3224 SV* tmp = newSVpvs("");
3225 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3226 (IV)CopLINE(PL_curcop),
3227 lex_state_names[PL_lex_state],
3228 exp_name[PL_expect],
3229 pv_display(tmp, s, strlen(s), 0, 60));
3232 /* check if there's an identifier for us to look at */
3233 if (PL_pending_ident)
3234 return REPORT(S_pending_ident(aTHX));
3236 /* no identifier pending identification */
3238 switch (PL_lex_state) {
3240 case LEX_NORMAL: /* Some compilers will produce faster */
3241 case LEX_INTERPNORMAL: /* code if we comment these out. */
3245 /* when we've already built the next token, just pull it out of the queue */
3249 yylval = PL_nexttoke[PL_lasttoke].next_val;
3251 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3252 PL_nexttoke[PL_lasttoke].next_mad = 0;
3253 if (PL_thismad && PL_thismad->mad_key == '_') {
3254 PL_thiswhite = (SV*)PL_thismad->mad_val;
3255 PL_thismad->mad_val = 0;
3256 mad_free(PL_thismad);
3261 PL_lex_state = PL_lex_defer;
3262 PL_expect = PL_lex_expect;
3263 PL_lex_defer = LEX_NORMAL;
3264 if (!PL_nexttoke[PL_lasttoke].next_type)
3269 yylval = PL_nextval[PL_nexttoke];
3271 PL_lex_state = PL_lex_defer;
3272 PL_expect = PL_lex_expect;
3273 PL_lex_defer = LEX_NORMAL;
3277 /* FIXME - can these be merged? */
3278 return(PL_nexttoke[PL_lasttoke].next_type);
3280 return REPORT(PL_nexttype[PL_nexttoke]);
3283 /* interpolated case modifiers like \L \U, including \Q and \E.
3284 when we get here, PL_bufptr is at the \
3286 case LEX_INTERPCASEMOD:
3288 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3289 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3291 /* handle \E or end of string */
3292 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3294 if (PL_lex_casemods) {
3295 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3296 PL_lex_casestack[PL_lex_casemods] = '\0';
3298 if (PL_bufptr != PL_bufend
3299 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3301 PL_lex_state = LEX_INTERPCONCAT;
3304 PL_thistoken = newSVpvs("\\E");
3310 while (PL_bufptr != PL_bufend &&
3311 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3313 PL_thiswhite = newSVpvs("");
3314 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3318 if (PL_bufptr != PL_bufend)
3321 PL_lex_state = LEX_INTERPCONCAT;
3325 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3326 "### Saw case modifier\n"); });
3328 if (s[1] == '\\' && s[2] == 'E') {
3331 PL_thiswhite = newSVpvs("");
3332 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3335 PL_lex_state = LEX_INTERPCONCAT;
3340 if (!PL_madskills) /* when just compiling don't need correct */
3341 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3342 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3343 if ((*s == 'L' || *s == 'U') &&
3344 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3345 PL_lex_casestack[--PL_lex_casemods] = '\0';
3348 if (PL_lex_casemods > 10)
3349 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3350 PL_lex_casestack[PL_lex_casemods++] = *s;
3351 PL_lex_casestack[PL_lex_casemods] = '\0';
3352 PL_lex_state = LEX_INTERPCONCAT;
3353 start_force(PL_curforce);
3354 NEXTVAL_NEXTTOKE.ival = 0;
3356 start_force(PL_curforce);
3358 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3360 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3362 NEXTVAL_NEXTTOKE.ival = OP_LC;
3364 NEXTVAL_NEXTTOKE.ival = OP_UC;
3366 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3368 Perl_croak(aTHX_ "panic: yylex");
3370 SV* const tmpsv = newSVpvs("");
3371 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3377 if (PL_lex_starts) {
3383 sv_free(PL_thistoken);
3384 PL_thistoken = newSVpvs("");
3387 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3388 if (PL_lex_casemods == 1 && PL_lex_inpat)
3397 case LEX_INTERPPUSH:
3398 return REPORT(sublex_push());
3400 case LEX_INTERPSTART:
3401 if (PL_bufptr == PL_bufend)
3402 return REPORT(sublex_done());
3403 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3404 "### Interpolated variable\n"); });
3406 PL_lex_dojoin = (*PL_bufptr == '@');
3407 PL_lex_state = LEX_INTERPNORMAL;
3408 if (PL_lex_dojoin) {
3409 start_force(PL_curforce);
3410 NEXTVAL_NEXTTOKE.ival = 0;
3412 start_force(PL_curforce);
3413 force_ident("\"", '$');
3414 start_force(PL_curforce);
3415 NEXTVAL_NEXTTOKE.ival = 0;
3417 start_force(PL_curforce);
3418 NEXTVAL_NEXTTOKE.ival = 0;
3420 start_force(PL_curforce);
3421 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3424 if (PL_lex_starts++) {
3429 sv_free(PL_thistoken);
3430 PL_thistoken = newSVpvs("");
3433 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3434 if (!PL_lex_casemods && PL_lex_inpat)
3441 case LEX_INTERPENDMAYBE:
3442 if (intuit_more(PL_bufptr)) {
3443 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3449 if (PL_lex_dojoin) {
3450 PL_lex_dojoin = FALSE;
3451 PL_lex_state = LEX_INTERPCONCAT;
3455 sv_free(PL_thistoken);
3456 PL_thistoken = newSVpvs("");
3461 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3462 && SvEVALED(PL_lex_repl))
3464 if (PL_bufptr != PL_bufend)
3465 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3469 case LEX_INTERPCONCAT:
3471 if (PL_lex_brackets)
3472 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3474 if (PL_bufptr == PL_bufend)
3475 return REPORT(sublex_done());
3477 if (SvIVX(PL_linestr) == '\'') {
3478 SV *sv = newSVsv(PL_linestr);
3481 else if ( PL_hints & HINT_NEW_RE )
3482 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
3483 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3487 s = scan_const(PL_bufptr);
3489 PL_lex_state = LEX_INTERPCASEMOD;
3491 PL_lex_state = LEX_INTERPSTART;
3494 if (s != PL_bufptr) {
3495 start_force(PL_curforce);
3497 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3499 NEXTVAL_NEXTTOKE = yylval;
3502 if (PL_lex_starts++) {
3506 sv_free(PL_thistoken);
3507 PL_thistoken = newSVpvs("");
3510 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3511 if (!PL_lex_casemods && PL_lex_inpat)
3524 PL_lex_state = LEX_NORMAL;
3525 s = scan_formline(PL_bufptr);
3526 if (!PL_lex_formbrack)
3532 PL_oldoldbufptr = PL_oldbufptr;
3538 sv_free(PL_thistoken);
3541 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3545 if (isIDFIRST_lazy_if(s,UTF))
3547 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
3550 goto fake_eof; /* emulate EOF on ^D or ^Z */
3559 if (PL_lex_brackets) {
3560 yyerror((const char *)
3562 ? "Format not terminated"
3563 : "Missing right curly or square bracket"));
3565 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3566 "### Tokener got EOF\n");
3570 if (s++ < PL_bufend)
3571 goto retry; /* ignore stray nulls */
3574 if (!PL_in_eval && !PL_preambled) {
3575 PL_preambled = TRUE;
3580 sv_setpv(PL_linestr,incl_perldb());
3581 if (SvCUR(PL_linestr))
3582 sv_catpvs(PL_linestr,";");
3584 while(AvFILLp(PL_preambleav) >= 0) {
3585 SV *tmpsv = av_shift(PL_preambleav);
3586 sv_catsv(PL_linestr, tmpsv);
3587 sv_catpvs(PL_linestr, ";");
3590 sv_free((SV*)PL_preambleav);
3591 PL_preambleav = NULL;
3593 if (PL_minus_n || PL_minus_p) {
3594 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3596 sv_catpvs(PL_linestr,"chomp;");
3599 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3600 || *PL_splitstr == '"')
3601 && strchr(PL_splitstr + 1, *PL_splitstr))
3602 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3604 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3605 bytes can be used as quoting characters. :-) */
3606 const char *splits = PL_splitstr;
3607 sv_catpvs(PL_linestr, "our @F=split(q\0");
3610 if (*splits == '\\')
3611 sv_catpvn(PL_linestr, splits, 1);
3612 sv_catpvn(PL_linestr, splits, 1);
3613 } while (*splits++);
3614 /* This loop will embed the trailing NUL of
3615 PL_linestr as the last thing it does before
3617 sv_catpvs(PL_linestr, ");");
3621 sv_catpvs(PL_linestr,"our @F=split(' ');");
3625 sv_catpvs(PL_linestr,"use feature ':5.10';");
3626 sv_catpvs(PL_linestr, "\n");
3627 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3628 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3629 PL_last_lop = PL_last_uni = NULL;
3630 if (PERLDB_LINE && PL_curstash != PL_debstash)
3631 update_debugger_info(PL_linestr, NULL, 0);
3635 bof = PL_rsfp ? TRUE : FALSE;
3636 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3639 PL_realtokenstart = -1;
3642 if (PL_preprocess && !PL_in_eval)
3643 (void)PerlProc_pclose(PL_rsfp);
3644 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3645 PerlIO_clearerr(PL_rsfp);
3647 (void)PerlIO_close(PL_rsfp);
3649 PL_doextract = FALSE;
3651 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3656 sv_setpv(PL_linestr,
3659 ? ";}continue{print;}" : ";}"));
3660 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3661 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3662 PL_last_lop = PL_last_uni = NULL;
3663 PL_minus_n = PL_minus_p = 0;
3666 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3667 PL_last_lop = PL_last_uni = NULL;
3668 sv_setpvn(PL_linestr,"",0);
3669 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3671 /* If it looks like the start of a BOM or raw UTF-16,
3672 * check if it in fact is. */
3678 #ifdef PERLIO_IS_STDIO
3679 # ifdef __GNU_LIBRARY__
3680 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3681 # define FTELL_FOR_PIPE_IS_BROKEN
3685 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3686 # define FTELL_FOR_PIPE_IS_BROKEN
3691 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3692 /* This loses the possibility to detect the bof
3693 * situation on perl -P when the libc5 is being used.
3694 * Workaround? Maybe attach some extra state to PL_rsfp?
3697 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3699 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3702 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3703 s = swallow_bom((U8*)s);
3707 /* Incest with pod. */
3710 sv_catsv(PL_thiswhite, PL_linestr);
3712 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3713 sv_setpvn(PL_linestr, "", 0);
3714 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3715 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3716 PL_last_lop = PL_last_uni = NULL;
3717 PL_doextract = FALSE;
3721 } while (PL_doextract);
3722 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3723 if (PERLDB_LINE && PL_curstash != PL_debstash)
3724 update_debugger_info(PL_linestr, NULL, 0);
3725 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3726 PL_last_lop = PL_last_uni = NULL;
3727 if (CopLINE(PL_curcop) == 1) {
3728 while (s < PL_bufend && isSPACE(*s))
3730 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3734 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3738 if (*s == '#' && *(s+1) == '!')
3740 #ifdef ALTERNATE_SHEBANG
3742 static char const as[] = ALTERNATE_SHEBANG;
3743 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3744 d = s + (sizeof(as) - 1);
3746 #endif /* ALTERNATE_SHEBANG */
3755 while (*d && !isSPACE(*d))
3759 #ifdef ARG_ZERO_IS_SCRIPT
3760 if (ipathend > ipath) {
3762 * HP-UX (at least) sets argv[0] to the script name,
3763 * which makes $^X incorrect. And Digital UNIX and Linux,
3764 * at least, set argv[0] to the basename of the Perl
3765 * interpreter. So, having found "#!", we'll set it right.
3767 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3769 assert(SvPOK(x) || SvGMAGICAL(x));
3770 if (sv_eq(x, CopFILESV(PL_curcop))) {
3771 sv_setpvn(x, ipath, ipathend - ipath);
3777 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3778 const char * const lstart = SvPV_const(x,llen);
3780 bstart += blen - llen;
3781 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3782 sv_setpvn(x, ipath, ipathend - ipath);
3787 TAINT_NOT; /* $^X is always tainted, but that's OK */
3789 #endif /* ARG_ZERO_IS_SCRIPT */
3794 d = instr(s,"perl -");
3796 d = instr(s,"perl");
3798 /* avoid getting into infinite loops when shebang
3799 * line contains "Perl" rather than "perl" */
3801 for (d = ipathend-4; d >= ipath; --d) {
3802 if ((*d == 'p' || *d == 'P')
3803 && !ibcmp(d, "perl", 4))
3813 #ifdef ALTERNATE_SHEBANG
3815 * If the ALTERNATE_SHEBANG on this system starts with a
3816 * character that can be part of a Perl expression, then if
3817 * we see it but not "perl", we're probably looking at the
3818 * start of Perl code, not a request to hand off to some
3819 * other interpreter. Similarly, if "perl" is there, but
3820 * not in the first 'word' of the line, we assume the line
3821 * contains the start of the Perl program.
3823 if (d && *s != '#') {
3824 const char *c = ipath;
3825 while (*c && !strchr("; \t\r\n\f\v#", *c))
3828 d = NULL; /* "perl" not in first word; ignore */
3830 *s = '#'; /* Don't try to parse shebang line */
3832 #endif /* ALTERNATE_SHEBANG */
3833 #ifndef MACOS_TRADITIONAL
3838 !instr(s,"indir") &&
3839 instr(PL_origargv[0],"perl"))
3846 while (s < PL_bufend && isSPACE(*s))
3848 if (s < PL_bufend) {
3849 Newxz(newargv,PL_origargc+3,char*);
3851 while (s < PL_bufend && !isSPACE(*s))
3854 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3857 newargv = PL_origargv;
3860 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3862 Perl_croak(aTHX_ "Can't exec %s", ipath);
3866 while (*d && !isSPACE(*d))
3868 while (SPACE_OR_TAB(*d))
3872 const bool switches_done = PL_doswitches;
3873 const U32 oldpdb = PL_perldb;
3874 const bool oldn = PL_minus_n;
3875 const bool oldp = PL_minus_p;
3878 if (*d == 'M' || *d == 'm' || *d == 'C') {
3879 const char * const m = d;
3880 while (*d && !isSPACE(*d))
3882 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3885 d = moreswitches(d);
3887 if (PL_doswitches && !switches_done) {
3888 int argc = PL_origargc;
3889 char **argv = PL_origargv;
3892 } while (argc && argv[0][0] == '-' && argv[0][1]);
3893 init_argv_symbols(argc,argv);
3895 if ((PERLDB_LINE && !oldpdb) ||
3896 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3897 /* if we have already added "LINE: while (<>) {",
3898 we must not do it again */
3900 sv_setpvn(PL_linestr, "", 0);
3901 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3902 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3903 PL_last_lop = PL_last_uni = NULL;
3904 PL_preambled = FALSE;
3906 (void)gv_fetchfile(PL_origfilename);
3913 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3915 PL_lex_state = LEX_FORMLINE;
3920 #ifdef PERL_STRICT_CR
3921 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3923 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3925 case ' ': case '\t': case '\f': case 013:
3926 #ifdef MACOS_TRADITIONAL
3930 PL_realtokenstart = -1;
3932 PL_thiswhite = newSVpvs("");
3933 sv_catpvn(PL_thiswhite, s, 1);
3940 PL_realtokenstart = -1;
3944 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3945 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3946 /* handle eval qq[#line 1 "foo"\n ...] */
3947 CopLINE_dec(PL_curcop);
3950 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3952 if (!PL_in_eval || PL_rsfp)
3957 while (d < PL_bufend && *d != '\n')
3961 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3962 Perl_croak(aTHX_ "panic: input overflow");
3965 PL_thiswhite = newSVpvn(s, d - s);
3970 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3972 PL_lex_state = LEX_FORMLINE;
3978 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
3979 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
3982 TOKEN(PEG); /* make sure any #! line is accessible */
3987 /* if (PL_madskills && PL_lex_formbrack) { */
3989 while (d < PL_bufend && *d != '\n')
3993 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3994 Perl_croak(aTHX_ "panic: input overflow");
3995 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
3997 PL_thiswhite = newSVpvs("");
3998 if (CopLINE(PL_curcop) == 1) {
3999 sv_setpvn(PL_thiswhite, "", 0);
4002 sv_catpvn(PL_thiswhite, s, d - s);
4016 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4024 while (s < PL_bufend && SPACE_OR_TAB(*s))
4027 if (strnEQ(s,"=>",2)) {
4028 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4029 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4030 OPERATOR('-'); /* unary minus */
4032 PL_last_uni = PL_oldbufptr;
4034 case 'r': ftst = OP_FTEREAD; break;
4035 case 'w': ftst = OP_FTEWRITE; break;
4036 case 'x': ftst = OP_FTEEXEC; break;
4037 case 'o': ftst = OP_FTEOWNED; break;
4038 case 'R': ftst = OP_FTRREAD; break;
4039 case 'W': ftst = OP_FTRWRITE; break;
4040 case 'X': ftst = OP_FTREXEC; break;
4041 case 'O': ftst = OP_FTROWNED; break;
4042 case 'e': ftst = OP_FTIS; break;
4043 case 'z': ftst = OP_FTZERO; break;
4044 case 's': ftst = OP_FTSIZE; break;
4045 case 'f': ftst = OP_FTFILE; break;
4046 case 'd': ftst = OP_FTDIR; break;
4047 case 'l': ftst = OP_FTLINK; break;
4048 case 'p': ftst = OP_FTPIPE; break;
4049 case 'S': ftst = OP_FTSOCK; break;
4050 case 'u': ftst = OP_FTSUID; break;
4051 case 'g': ftst = OP_FTSGID; break;
4052 case 'k': ftst = OP_FTSVTX; break;
4053 case 'b': ftst = OP_FTBLK; break;
4054 case 'c': ftst = OP_FTCHR; break;
4055 case 't': ftst = OP_FTTTY; break;
4056 case 'T': ftst = OP_FTTEXT; break;
4057 case 'B': ftst = OP_FTBINARY; break;
4058 case 'M': case 'A': case 'C':
4059 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4061 case 'M': ftst = OP_FTMTIME; break;
4062 case 'A': ftst = OP_FTATIME; break;
4063 case 'C': ftst = OP_FTCTIME; break;
4071 PL_last_lop_op = (OPCODE)ftst;
4072 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4073 "### Saw file test %c\n", (int)tmp);
4078 /* Assume it was a minus followed by a one-letter named
4079 * subroutine call (or a -bareword), then. */
4080 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4081 "### '-%c' looked like a file test but was not\n",
4088 const char tmp = *s++;
4091 if (PL_expect == XOPERATOR)
4096 else if (*s == '>') {
4099 if (isIDFIRST_lazy_if(s,UTF)) {
4100 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4108 if (PL_expect == XOPERATOR)
4111 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4113 OPERATOR('-'); /* unary minus */
4119 const char tmp = *s++;
4122 if (PL_expect == XOPERATOR)
4127 if (PL_expect == XOPERATOR)
4130 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4137 if (PL_expect != XOPERATOR) {
4138 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4139 PL_expect = XOPERATOR;
4140 force_ident(PL_tokenbuf, '*');
4153 if (PL_expect == XOPERATOR) {
4157 PL_tokenbuf[0] = '%';
4158 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4159 sizeof PL_tokenbuf - 1, FALSE);
4160 if (!PL_tokenbuf[1]) {
4163 PL_pending_ident = '%';
4174 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4181 const char tmp = *s++;
4187 goto just_a_word_zero_gv;
4190 switch (PL_expect) {
4196 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4198 PL_bufptr = s; /* update in case we back off */
4204 PL_expect = XTERMBLOCK;
4207 stuffstart = s - SvPVX(PL_linestr) - 1;
4211 while (isIDFIRST_lazy_if(s,UTF)) {
4214 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4215 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4216 if (tmp < 0) tmp = -tmp;
4231 sv = newSVpvn(s, len);
4233 d = scan_str(d,TRUE,TRUE);
4235 /* MUST advance bufptr here to avoid bogus
4236 "at end of line" context messages from yyerror().
4238 PL_bufptr = s + len;
4239 yyerror("Unterminated attribute parameter in attribute list");
4243 return REPORT(0); /* EOF indicator */
4247 sv_catsv(sv, PL_lex_stuff);
4248 attrs = append_elem(OP_LIST, attrs,
4249 newSVOP(OP_CONST, 0, sv));
4250 SvREFCNT_dec(PL_lex_stuff);
4251 PL_lex_stuff = NULL;
4254 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4256 if (PL_in_my == KEY_our) {
4258 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
4260 /* skip to avoid loading attributes.pm */
4262 deprecate(":unique");
4265 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4268 /* NOTE: any CV attrs applied here need to be part of
4269 the CVf_BUILTIN_ATTRS define in cv.h! */
4270 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4272 CvLVALUE_on(PL_compcv);
4274 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4276 CvLOCKED_on(PL_compcv);
4278 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4280 CvMETHOD_on(PL_compcv);
4282 else if (!PL_in_my && len == 9 && strnEQ(SvPVX(sv), "assertion", len)) {
4284 CvASSERTION_on(PL_compcv);
4286 /* After we've set the flags, it could be argued that
4287 we don't need to do the attributes.pm-based setting
4288 process, and shouldn't bother appending recognized
4289 flags. To experiment with that, uncomment the
4290 following "else". (Note that's already been
4291 uncommented. That keeps the above-applied built-in
4292 attributes from being intercepted (and possibly
4293 rejected) by a package's attribute routines, but is
4294 justified by the performance win for the common case
4295 of applying only built-in attributes.) */
4297 attrs = append_elem(OP_LIST, attrs,
4298 newSVOP(OP_CONST, 0,
4302 if (*s == ':' && s[1] != ':')
4305 break; /* require real whitespace or :'s */
4306 /* XXX losing whitespace on sequential attributes here */
4310 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4311 if (*s != ';' && *s != '}' && *s != tmp
4312 && (tmp != '=' || *s != ')')) {
4313 const char q = ((*s == '\'') ? '"' : '\'');
4314 /* If here for an expression, and parsed no attrs, back
4316 if (tmp == '=' && !attrs) {
4320 /* MUST advance bufptr here to avoid bogus "at end of line"
4321 context messages from yyerror().
4324 yyerror( (const char *)
4326 ? Perl_form(aTHX_ "Invalid separator character "
4327 "%c%c%c in attribute list", q, *s, q)
4328 : "Unterminated attribute list" ) );
4336 start_force(PL_curforce);
4337 NEXTVAL_NEXTTOKE.opval = attrs;
4338 CURMAD('_', PL_nextwhite);
4343 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4344 (s - SvPVX(PL_linestr)) - stuffstart);
4352 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4353 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4361 const char tmp = *s++;
4366 const char tmp = *s++;
4374 if (PL_lex_brackets <= 0)
4375 yyerror("Unmatched right square bracket");
4378 if (PL_lex_state == LEX_INTERPNORMAL) {
4379 if (PL_lex_brackets == 0) {
4380 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
4381 PL_lex_state = LEX_INTERPEND;
4388 if (PL_lex_brackets > 100) {
4389 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4391 switch (PL_expect) {
4393 if (PL_lex_formbrack) {
4397 if (PL_oldoldbufptr == PL_last_lop)
4398 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4400 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4401 OPERATOR(HASHBRACK);
4403 while (s < PL_bufend && SPACE_OR_TAB(*s))
4406 PL_tokenbuf[0] = '\0';
4407 if (d < PL_bufend && *d == '-') {
4408 PL_tokenbuf[0] = '-';
4410 while (d < PL_bufend && SPACE_OR_TAB(*d))
4413 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4414 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4416 while (d < PL_bufend && SPACE_OR_TAB(*d))
4419 const char minus = (PL_tokenbuf[0] == '-');
4420 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4428 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4433 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4438 if (PL_oldoldbufptr == PL_last_lop)
4439 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4441 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4444 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4446 /* This hack is to get the ${} in the message. */
4448 yyerror("syntax error");
4451 OPERATOR(HASHBRACK);
4453 /* This hack serves to disambiguate a pair of curlies
4454 * as being a block or an anon hash. Normally, expectation
4455 * determines that, but in cases where we're not in a
4456 * position to expect anything in particular (like inside
4457 * eval"") we have to resolve the ambiguity. This code
4458 * covers the case where the first term in the curlies is a
4459 * quoted string. Most other cases need to be explicitly
4460 * disambiguated by prepending a "+" before the opening
4461 * curly in order to force resolution as an anon hash.
4463 * XXX should probably propagate the outer expectation
4464 * into eval"" to rely less on this hack, but that could
4465 * potentially break current behavior of eval"".
4469 if (*s == '\'' || *s == '"' || *s == '`') {
4470 /* common case: get past first string, handling escapes */
4471 for (t++; t < PL_bufend && *t != *s;)
4472 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4476 else if (*s == 'q') {
4479 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4482 /* skip q//-like construct */
4484 char open, close, term;
4487 while (t < PL_bufend && isSPACE(*t))
4489 /* check for q => */
4490 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4491 OPERATOR(HASHBRACK);
4495 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4499 for (t++; t < PL_bufend; t++) {
4500 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4502 else if (*t == open)
4506 for (t++; t < PL_bufend; t++) {
4507 if (*t == '\\' && t+1 < PL_bufend)
4509 else if (*t == close && --brackets <= 0)
4511 else if (*t == open)
4518 /* skip plain q word */
4519 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4522 else if (isALNUM_lazy_if(t,UTF)) {
4524 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4527 while (t < PL_bufend && isSPACE(*t))
4529 /* if comma follows first term, call it an anon hash */
4530 /* XXX it could be a comma expression with loop modifiers */
4531 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4532 || (*t == '=' && t[1] == '>')))
4533 OPERATOR(HASHBRACK);
4534 if (PL_expect == XREF)
4537 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4543 yylval.ival = CopLINE(PL_curcop);
4544 if (isSPACE(*s) || *s == '#')
4545 PL_copline = NOLINE; /* invalidate current command line number */
4550 if (PL_lex_brackets <= 0)
4551 yyerror("Unmatched right curly bracket");
4553 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4554 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4555 PL_lex_formbrack = 0;
4556 if (PL_lex_state == LEX_INTERPNORMAL) {
4557 if (PL_lex_brackets == 0) {
4558 if (PL_expect & XFAKEBRACK) {
4559 PL_expect &= XENUMMASK;
4560 PL_lex_state = LEX_INTERPEND;
4565 PL_thiswhite = newSVpvs("");
4566 sv_catpvn(PL_thiswhite,"}",1);
4569 return yylex(); /* ignore fake brackets */
4571 if (*s == '-' && s[1] == '>')
4572 PL_lex_state = LEX_INTERPENDMAYBE;
4573 else if (*s != '[' && *s != '{')
4574 PL_lex_state = LEX_INTERPEND;
4577 if (PL_expect & XFAKEBRACK) {
4578 PL_expect &= XENUMMASK;
4580 return yylex(); /* ignore fake brackets */
4582 start_force(PL_curforce);
4584 curmad('X', newSVpvn(s-1,1));
4585 CURMAD('_', PL_thiswhite);
4590 PL_thistoken = newSVpvs("");
4598 if (PL_expect == XOPERATOR) {
4599 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4600 && isIDFIRST_lazy_if(s,UTF))
4602 CopLINE_dec(PL_curcop);
4603 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4604 CopLINE_inc(PL_curcop);
4609 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4611 PL_expect = XOPERATOR;
4612 force_ident(PL_tokenbuf, '&');
4616 yylval.ival = (OPpENTERSUB_AMPER<<8);
4628 const char tmp = *s++;
4635 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4636 && strchr("+-*/%.^&|<",tmp))
4637 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4638 "Reversed %c= operator",(int)tmp);
4640 if (PL_expect == XSTATE && isALPHA(tmp) &&
4641 (s == PL_linestart+1 || s[-2] == '\n') )
4643 if (PL_in_eval && !PL_rsfp) {
4648 if (strnEQ(s,"=cut",4)) {
4664 PL_thiswhite = newSVpvs("");
4665 sv_catpvn(PL_thiswhite, PL_linestart,
4666 PL_bufend - PL_linestart);
4670 PL_doextract = TRUE;
4674 if (PL_lex_brackets < PL_lex_formbrack) {
4676 #ifdef PERL_STRICT_CR
4677 while (SPACE_OR_TAB(*t))
4679 while (SPACE_OR_TAB(*t) || *t == '\r')
4682 if (*t == '\n' || *t == '#') {
4693 const char tmp = *s++;
4695 /* was this !=~ where !~ was meant?
4696 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4698 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4699 const char *t = s+1;
4701 while (t < PL_bufend && isSPACE(*t))
4704 if (*t == '/' || *t == '?' ||
4705 ((*t == 'm' || *t == 's' || *t == 'y')
4706 && !isALNUM(t[1])) ||
4707 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4708 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4709 "!=~ should be !~");
4719 if (PL_expect != XOPERATOR) {
4720 if (s[1] != '<' && !strchr(s,'>'))
4723 s = scan_heredoc(s);
4725 s = scan_inputsymbol(s);
4726 TERM(sublex_start());
4732 SHop(OP_LEFT_SHIFT);
4746 const char tmp = *s++;
4748 SHop(OP_RIGHT_SHIFT);
4749 else if (tmp == '=')
4758 if (PL_expect == XOPERATOR) {
4759 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4761 deprecate_old(commaless_variable_list);
4762 return REPORT(','); /* grandfather non-comma-format format */
4766 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4767 PL_tokenbuf[0] = '@';
4768 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4769 sizeof PL_tokenbuf - 1, FALSE);
4770 if (PL_expect == XOPERATOR)
4771 no_op("Array length", s);
4772 if (!PL_tokenbuf[1])
4774 PL_expect = XOPERATOR;
4775 PL_pending_ident = '#';
4779 PL_tokenbuf[0] = '$';
4780 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4781 sizeof PL_tokenbuf - 1, FALSE);
4782 if (PL_expect == XOPERATOR)
4784 if (!PL_tokenbuf[1]) {
4786 yyerror("Final $ should be \\$ or $name");
4790 /* This kludge not intended to be bulletproof. */
4791 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4792 yylval.opval = newSVOP(OP_CONST, 0,
4793 newSViv(CopARYBASE_get(&PL_compiling)));
4794 yylval.opval->op_private = OPpCONST_ARYBASE;
4800 const char tmp = *s;
4801 if (PL_lex_state == LEX_NORMAL)
4804 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4805 && intuit_more(s)) {
4807 PL_tokenbuf[0] = '@';
4808 if (ckWARN(WARN_SYNTAX)) {
4811 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4814 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4815 while (t < PL_bufend && *t != ']')
4817 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4818 "Multidimensional syntax %.*s not supported",
4819 (int)((t - PL_bufptr) + 1), PL_bufptr);
4823 else if (*s == '{') {
4825 PL_tokenbuf[0] = '%';
4826 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4827 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4829 char tmpbuf[sizeof PL_tokenbuf];
4832 } while (isSPACE(*t));
4833 if (isIDFIRST_lazy_if(t,UTF)) {
4835 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4839 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4840 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4841 "You need to quote \"%s\"",
4848 PL_expect = XOPERATOR;
4849 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4850 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4851 if (!islop || PL_last_lop_op == OP_GREPSTART)
4852 PL_expect = XOPERATOR;
4853 else if (strchr("$@\"'`q", *s))
4854 PL_expect = XTERM; /* e.g. print $fh "foo" */
4855 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4856 PL_expect = XTERM; /* e.g. print $fh &sub */
4857 else if (isIDFIRST_lazy_if(s,UTF)) {
4858 char tmpbuf[sizeof PL_tokenbuf];
4860 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4861 if ((t2 = keyword(tmpbuf, len, 0))) {
4862 /* binary operators exclude handle interpretations */
4874 PL_expect = XTERM; /* e.g. print $fh length() */
4879 PL_expect = XTERM; /* e.g. print $fh subr() */
4882 else if (isDIGIT(*s))
4883 PL_expect = XTERM; /* e.g. print $fh 3 */
4884 else if (*s == '.' && isDIGIT(s[1]))
4885 PL_expect = XTERM; /* e.g. print $fh .3 */
4886 else if ((*s == '?' || *s == '-' || *s == '+')
4887 && !isSPACE(s[1]) && s[1] != '=')
4888 PL_expect = XTERM; /* e.g. print $fh -1 */
4889 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4891 PL_expect = XTERM; /* e.g. print $fh /.../
4892 XXX except DORDOR operator
4894 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4896 PL_expect = XTERM; /* print $fh <<"EOF" */
4899 PL_pending_ident = '$';
4903 if (PL_expect == XOPERATOR)
4905 PL_tokenbuf[0] = '@';
4906 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4907 if (!PL_tokenbuf[1]) {
4910 if (PL_lex_state == LEX_NORMAL)
4912 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4914 PL_tokenbuf[0] = '%';
4916 /* Warn about @ where they meant $. */
4917 if (*s == '[' || *s == '{') {
4918 if (ckWARN(WARN_SYNTAX)) {
4919 const char *t = s + 1;
4920 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4922 if (*t == '}' || *t == ']') {
4924 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4925 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4926 "Scalar value %.*s better written as $%.*s",
4927 (int)(t-PL_bufptr), PL_bufptr,
4928 (int)(t-PL_bufptr-1), PL_bufptr+1);
4933 PL_pending_ident = '@';
4936 case '/': /* may be division, defined-or, or pattern */
4937 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4941 case '?': /* may either be conditional or pattern */
4942 if(PL_expect == XOPERATOR) {
4950 /* A // operator. */
4960 /* Disable warning on "study /blah/" */
4961 if (PL_oldoldbufptr == PL_last_uni
4962 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4963 || memNE(PL_last_uni, "study", 5)
4964 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4967 s = scan_pat(s,OP_MATCH);
4968 TERM(sublex_start());
4972 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4973 #ifdef PERL_STRICT_CR
4976 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
4978 && (s == PL_linestart || s[-1] == '\n') )
4980 PL_lex_formbrack = 0;
4984 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
4990 yylval.ival = OPf_SPECIAL;
4996 if (PL_expect != XOPERATOR)
5001 case '0': case '1': case '2': case '3': case '4':
5002 case '5': case '6': case '7': case '8': case '9':
5003 s = scan_num(s, &yylval);
5004 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5005 if (PL_expect == XOPERATOR)
5010 s = scan_str(s,!!PL_madskills,FALSE);
5011 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5012 if (PL_expect == XOPERATOR) {
5013 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5015 deprecate_old(commaless_variable_list);
5016 return REPORT(','); /* grandfather non-comma-format format */
5023 yylval.ival = OP_CONST;
5024 TERM(sublex_start());
5027 s = scan_str(s,!!PL_madskills,FALSE);
5028 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5029 if (PL_expect == XOPERATOR) {
5030 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5032 deprecate_old(commaless_variable_list);
5033 return REPORT(','); /* grandfather non-comma-format format */
5040 yylval.ival = OP_CONST;
5041 /* FIXME. I think that this can be const if char *d is replaced by
5042 more localised variables. */
5043 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5044 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5045 yylval.ival = OP_STRINGIFY;
5049 TERM(sublex_start());
5052 s = scan_str(s,!!PL_madskills,FALSE);
5053 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5054 if (PL_expect == XOPERATOR)
5055 no_op("Backticks",s);
5058 readpipe_override();
5059 TERM(sublex_start());
5063 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
5064 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5066 if (PL_expect == XOPERATOR)
5067 no_op("Backslash",s);
5071 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5072 char *start = s + 2;
5073 while (isDIGIT(*start) || *start == '_')
5075 if (*start == '.' && isDIGIT(start[1])) {
5076 s = scan_num(s, &yylval);
5079 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5080 else if (!isALPHA(*start) && (PL_expect == XTERM
5081 || PL_expect == XREF || PL_expect == XSTATE
5082 || PL_expect == XTERMORDORDOR)) {
5083 /* XXX Use gv_fetchpvn rather than stomping on a const string */
5084 const char c = *start;
5087 gv = gv_fetchpv(s, 0, SVt_PVCV);
5090 s = scan_num(s, &yylval);
5097 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5139 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5141 /* Some keywords can be followed by any delimiter, including ':' */
5142 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5143 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5144 (PL_tokenbuf[0] == 'q' &&
5145 strchr("qwxr", PL_tokenbuf[1])))));
5147 /* x::* is just a word, unless x is "CORE" */
5148 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5152 while (d < PL_bufend && isSPACE(*d))
5153 d++; /* no comments skipped here, or s### is misparsed */
5155 /* Is this a label? */
5156 if (!tmp && PL_expect == XSTATE
5157 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5159 yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5164 /* Check for keywords */
5165 tmp = keyword(PL_tokenbuf, len, 0);
5167 /* Is this a word before a => operator? */
5168 if (*d == '=' && d[1] == '>') {
5171 = (OP*)newSVOP(OP_CONST, 0,
5172 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5173 yylval.opval->op_private = OPpCONST_BARE;
5177 if (tmp < 0) { /* second-class keyword? */
5178 GV *ogv = NULL; /* override (winner) */
5179 GV *hgv = NULL; /* hidden (loser) */
5180 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5182 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5185 if (GvIMPORTED_CV(gv))
5187 else if (! CvMETHOD(cv))
5191 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5192 (gv = *gvp) && isGV_with_GP(gv) &&
5193 GvCVu(gv) && GvIMPORTED_CV(gv))
5200 tmp = 0; /* overridden by import or by GLOBAL */
5203 && -tmp==KEY_lock /* XXX generalizable kludge */
5206 tmp = 0; /* any sub overrides "weak" keyword */
5208 else { /* no override */
5210 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
5211 Perl_warner(aTHX_ packWARN(WARN_MISC),
5212 "dump() better written as CORE::dump()");
5216 if (hgv && tmp != KEY_x && tmp != KEY_CORE
5217 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
5218 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5219 "Ambiguous call resolved as CORE::%s(), %s",
5220 GvENAME(hgv), "qualify as such or use &");
5227 default: /* not a keyword */
5228 /* Trade off - by using this evil construction we can pull the
5229 variable gv into the block labelled keylookup. If not, then
5230 we have to give it function scope so that the goto from the
5231 earlier ':' case doesn't bypass the initialisation. */
5233 just_a_word_zero_gv:
5241 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5244 SV *nextPL_nextwhite = 0;
5248 /* Get the rest if it looks like a package qualifier */
5250 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5252 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5255 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5256 *s == '\'' ? "'" : "::");
5261 if (PL_expect == XOPERATOR) {
5262 if (PL_bufptr == PL_linestart) {
5263 CopLINE_dec(PL_curcop);
5264 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
5265 CopLINE_inc(PL_curcop);
5268 no_op("Bareword",s);
5271 /* Look for a subroutine with this name in current package,
5272 unless name is "Foo::", in which case Foo is a bearword
5273 (and a package name). */
5275 if (len > 2 && !PL_madskills &&
5276 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5278 if (ckWARN(WARN_BAREWORD)
5279 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5280 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5281 "Bareword \"%s\" refers to nonexistent package",
5284 PL_tokenbuf[len] = '\0';
5290 /* Mustn't actually add anything to a symbol table.
5291 But also don't want to "initialise" any placeholder
5292 constants that might already be there into full
5293 blown PVGVs with attached PVCV. */
5294 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5295 GV_NOADD_NOINIT, SVt_PVCV);
5300 /* if we saw a global override before, get the right name */
5303 sv = newSVpvs("CORE::GLOBAL::");
5304 sv_catpv(sv,PL_tokenbuf);
5307 /* If len is 0, newSVpv does strlen(), which is correct.
5308 If len is non-zero, then it will be the true length,
5309 and so the scalar will be created correctly. */
5310 sv = newSVpv(PL_tokenbuf,len);
5313 if (PL_madskills && !PL_thistoken) {
5314 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5315 PL_thistoken = newSVpv(start,s - start);
5316 PL_realtokenstart = s - SvPVX(PL_linestr);
5320 /* Presume this is going to be a bareword of some sort. */
5323 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5324 yylval.opval->op_private = OPpCONST_BARE;
5325 /* UTF-8 package name? */
5326 if (UTF && !IN_BYTES &&
5327 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5330 /* And if "Foo::", then that's what it certainly is. */
5335 /* Do the explicit type check so that we don't need to force
5336 the initialisation of the symbol table to have a real GV.
5337 Beware - gv may not really be a PVGV, cv may not really be
5338 a PVCV, (because of the space optimisations that gv_init
5339 understands) But they're true if for this symbol there is
5340 respectively a typeglob and a subroutine.
5342 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5343 /* Real typeglob, so get the real subroutine: */
5345 /* A proxy for a subroutine in this package? */
5346 : SvOK(gv) ? (CV *) gv : NULL)
5349 /* See if it's the indirect object for a list operator. */
5351 if (PL_oldoldbufptr &&
5352 PL_oldoldbufptr < PL_bufptr &&
5353 (PL_oldoldbufptr == PL_last_lop
5354 || PL_oldoldbufptr == PL_last_uni) &&
5355 /* NO SKIPSPACE BEFORE HERE! */
5356 (PL_expect == XREF ||
5357 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5359 bool immediate_paren = *s == '(';
5361 /* (Now we can afford to cross potential line boundary.) */
5362 s = SKIPSPACE2(s,nextPL_nextwhite);
5364 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5367 /* Two barewords in a row may indicate method call. */
5369 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5370 (tmp = intuit_method(s, gv, cv)))
5373 /* If not a declared subroutine, it's an indirect object. */
5374 /* (But it's an indir obj regardless for sort.) */
5375 /* Also, if "_" follows a filetest operator, it's a bareword */
5378 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5380 (PL_last_lop_op != OP_MAPSTART &&
5381 PL_last_lop_op != OP_GREPSTART))))
5382 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5383 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5386 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5391 PL_expect = XOPERATOR;
5394 s = SKIPSPACE2(s,nextPL_nextwhite);
5395 PL_nextwhite = nextPL_nextwhite;
5400 /* Is this a word before a => operator? */
5401 if (*s == '=' && s[1] == '>' && !pkgname) {
5403 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
5404 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5405 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
5409 /* If followed by a paren, it's certainly a subroutine. */
5414 while (SPACE_OR_TAB(*d))
5416 if (*d == ')' && (sv = gv_const_sv(gv))) {
5420 char *par = SvPVX(PL_linestr) + PL_realtokenstart;
5421 sv_catpvn(PL_thistoken, par, s - par);
5423 sv_free(PL_nextwhite);
5434 PL_nextwhite = PL_thiswhite;
5437 start_force(PL_curforce);
5439 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5440 PL_expect = XOPERATOR;
5443 PL_nextwhite = nextPL_nextwhite;
5444 curmad('X', PL_thistoken);
5445 PL_thistoken = newSVpvs("");
5453 /* If followed by var or block, call it a method (unless sub) */
5455 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5456 PL_last_lop = PL_oldbufptr;
5457 PL_last_lop_op = OP_METHOD;
5461 /* If followed by a bareword, see if it looks like indir obj. */
5464 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5465 && (tmp = intuit_method(s, gv, cv)))
5468 /* Not a method, so call it a subroutine (if defined) */
5471 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5472 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5473 "Ambiguous use of -%s resolved as -&%s()",
5474 PL_tokenbuf, PL_tokenbuf);
5475 /* Check for a constant sub */
5476 if ((sv = gv_const_sv(gv)) && !PL_madskills) {
5478 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
5479 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5480 yylval.opval->op_private = 0;
5484 /* Resolve to GV now. */
5485 if (SvTYPE(gv) != SVt_PVGV) {
5486 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5487 assert (SvTYPE(gv) == SVt_PVGV);
5488 /* cv must have been some sort of placeholder, so
5489 now needs replacing with a real code reference. */
5493 op_free(yylval.opval);
5494 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5495 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5496 PL_last_lop = PL_oldbufptr;
5497 PL_last_lop_op = OP_ENTERSUB;
5498 /* Is there a prototype? */
5506 const char *proto = SvPV_const((SV*)cv, protolen);
5509 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5511 while (*proto == ';')
5513 if (*proto == '&' && *s == '{') {
5514 sv_setpv(PL_subname,
5517 "__ANON__" : "__ANON__::__ANON__"));
5524 PL_nextwhite = PL_thiswhite;
5527 start_force(PL_curforce);
5528 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5531 PL_nextwhite = nextPL_nextwhite;
5532 curmad('X', PL_thistoken);
5533 PL_thistoken = newSVpvs("");
5540 /* Guess harder when madskills require "best effort". */
5541 if (PL_madskills && (!gv || !GvCVu(gv))) {
5542 int probable_sub = 0;
5543 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5545 else if (isALPHA(*s)) {
5549 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5550 if (!keyword(tmpbuf, tmplen, 0))
5553 while (d < PL_bufend && isSPACE(*d))
5555 if (*d == '=' && d[1] == '>')
5560 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5561 op_free(yylval.opval);
5562 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5563 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5564 PL_last_lop = PL_oldbufptr;
5565 PL_last_lop_op = OP_ENTERSUB;
5566 PL_nextwhite = PL_thiswhite;
5568 start_force(PL_curforce);
5569 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5571 PL_nextwhite = nextPL_nextwhite;
5572 curmad('X', PL_thistoken);
5573 PL_thistoken = newSVpvs("");
5578 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5585 /* Call it a bare word */
5587 if (PL_hints & HINT_STRICT_SUBS)
5588 yylval.opval->op_private |= OPpCONST_STRICT;
5591 if (lastchar != '-') {
5592 if (ckWARN(WARN_RESERVED)) {
5596 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5597 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5604 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5605 && ckWARN_d(WARN_AMBIGUOUS)) {
5606 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5607 "Operator or semicolon missing before %c%s",
5608 lastchar, PL_tokenbuf);
5609 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5610 "Ambiguous use of %c resolved as operator %c",
5611 lastchar, lastchar);
5617 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5618 newSVpv(CopFILE(PL_curcop),0));
5622 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5623 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5626 case KEY___PACKAGE__:
5627 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5629 ? newSVhek(HvNAME_HEK(PL_curstash))
5636 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5637 const char *pname = "main";
5638 if (PL_tokenbuf[2] == 'D')
5639 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5640 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5644 GvIOp(gv) = newIO();
5645 IoIFP(GvIOp(gv)) = PL_rsfp;
5646 #if defined(HAS_FCNTL) && defined(F_SETFD)
5648 const int fd = PerlIO_fileno(PL_rsfp);
5649 fcntl(fd,F_SETFD,fd >= 3);
5652 /* Mark this internal pseudo-handle as clean */
5653 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5655 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
5656 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5657 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5659 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5660 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5661 /* if the script was opened in binmode, we need to revert
5662 * it to text mode for compatibility; but only iff it has CRs
5663 * XXX this is a questionable hack at best. */
5664 if (PL_bufend-PL_bufptr > 2
5665 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5668 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5669 loc = PerlIO_tell(PL_rsfp);
5670 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5673 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5675 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5676 #endif /* NETWARE */
5677 #ifdef PERLIO_IS_STDIO /* really? */
5678 # if defined(__BORLANDC__)
5679 /* XXX see note in do_binmode() */
5680 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5684 PerlIO_seek(PL_rsfp, loc, 0);
5688 #ifdef PERLIO_LAYERS
5691 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5692 else if (PL_encoding) {
5699 XPUSHs(PL_encoding);
5701 call_method("name", G_SCALAR);
5705 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5706 Perl_form(aTHX_ ":encoding(%"SVf")",
5715 if (PL_realtokenstart >= 0) {
5716 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5718 PL_endwhite = newSVpvs("");
5719 sv_catsv(PL_endwhite, PL_thiswhite);
5721 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5722 PL_realtokenstart = -1;
5724 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5725 SvCUR(PL_endwhite))) != Nullch) ;
5740 if (PL_expect == XSTATE) {
5747 if (*s == ':' && s[1] == ':') {
5750 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5751 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5752 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5755 else if (tmp == KEY_require || tmp == KEY_do)
5756 /* that's a way to remember we saw "CORE::" */
5769 LOP(OP_ACCEPT,XTERM);
5775 LOP(OP_ATAN2,XTERM);
5781 LOP(OP_BINMODE,XTERM);
5784 LOP(OP_BLESS,XTERM);
5793 /* When 'use switch' is in effect, continue has a dual
5794 life as a control operator. */
5796 if (!FEATURE_IS_ENABLED("switch"))
5799 /* We have to disambiguate the two senses of
5800 "continue". If the next token is a '{' then
5801 treat it as the start of a continue block;
5802 otherwise treat it as a control operator.
5814 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5831 if (!PL_cryptseen) {
5832 PL_cryptseen = TRUE;
5836 LOP(OP_CRYPT,XTERM);
5839 LOP(OP_CHMOD,XTERM);
5842 LOP(OP_CHOWN,XTERM);
5845 LOP(OP_CONNECT,XTERM);
5864 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5865 if (orig_keyword == KEY_do) {
5874 PL_hints |= HINT_BLOCK_SCOPE;
5884 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5885 LOP(OP_DBMOPEN,XTERM);
5891 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5898 yylval.ival = CopLINE(PL_curcop);
5914 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
5915 UNIBRACK(OP_ENTEREVAL);
5933 case KEY_endhostent:
5939 case KEY_endservent:
5942 case KEY_endprotoent:
5953 yylval.ival = CopLINE(PL_curcop);
5955 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
5958 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
5961 if ((PL_bufend - p) >= 3 &&
5962 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
5964 else if ((PL_bufend - p) >= 4 &&
5965 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
5968 if (isIDFIRST_lazy_if(p,UTF)) {
5969 p = scan_ident(p, PL_bufend,
5970 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5974 Perl_croak(aTHX_ "Missing $ on loop variable");
5976 s = SvPVX(PL_linestr) + soff;
5982 LOP(OP_FORMLINE,XTERM);
5988 LOP(OP_FCNTL,XTERM);
5994 LOP(OP_FLOCK,XTERM);
6003 LOP(OP_GREPSTART, XREF);
6006 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6021 case KEY_getpriority:
6022 LOP(OP_GETPRIORITY,XTERM);
6024 case KEY_getprotobyname:
6027 case KEY_getprotobynumber:
6028 LOP(OP_GPBYNUMBER,XTERM);
6030 case KEY_getprotoent:
6042 case KEY_getpeername:
6043 UNI(OP_GETPEERNAME);
6045 case KEY_gethostbyname:
6048 case KEY_gethostbyaddr:
6049 LOP(OP_GHBYADDR,XTERM);
6051 case KEY_gethostent:
6054 case KEY_getnetbyname:
6057 case KEY_getnetbyaddr:
6058 LOP(OP_GNBYADDR,XTERM);
6063 case KEY_getservbyname:
6064 LOP(OP_GSBYNAME,XTERM);
6066 case KEY_getservbyport:
6067 LOP(OP_GSBYPORT,XTERM);
6069 case KEY_getservent:
6072 case KEY_getsockname:
6073 UNI(OP_GETSOCKNAME);
6075 case KEY_getsockopt:
6076 LOP(OP_GSOCKOPT,XTERM);
6091 yylval.ival = CopLINE(PL_curcop);
6102 yylval.ival = CopLINE(PL_curcop);
6106 LOP(OP_INDEX,XTERM);
6112 LOP(OP_IOCTL,XTERM);
6124 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6156 LOP(OP_LISTEN,XTERM);
6165 s = scan_pat(s,OP_MATCH);
6166 TERM(sublex_start());
6169 LOP(OP_MAPSTART, XREF);
6172 LOP(OP_MKDIR,XTERM);
6175 LOP(OP_MSGCTL,XTERM);
6178 LOP(OP_MSGGET,XTERM);
6181 LOP(OP_MSGRCV,XTERM);
6184 LOP(OP_MSGSND,XTERM);
6189 PL_in_my = (U16)tmp;
6191 if (isIDFIRST_lazy_if(s,UTF)) {
6195 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6196 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6198 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6199 if (!PL_in_my_stash) {
6202 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6206 if (PL_madskills) { /* just add type to declarator token */
6207 sv_catsv(PL_thistoken, PL_nextwhite);
6209 sv_catpvn(PL_thistoken, start, s - start);
6217 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6224 s = tokenize_use(0, s);
6228 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6235 if (isIDFIRST_lazy_if(s,UTF)) {
6237 for (d = s; isALNUM_lazy_if(d,UTF);)
6239 for (t=d; isSPACE(*t);)
6241 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6243 && !(t[0] == '=' && t[1] == '>')
6245 int parms_len = (int)(d-s);
6246 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6247 "Precedence problem: open %.*s should be open(%.*s)",
6248 parms_len, s, parms_len, s);
6254 yylval.ival = OP_OR;
6264 LOP(OP_OPEN_DIR,XTERM);
6267 checkcomma(s,PL_tokenbuf,"filehandle");
6271 checkcomma(s,PL_tokenbuf,"filehandle");
6290 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6294 LOP(OP_PIPE_OP,XTERM);
6297 s = scan_str(s,!!PL_madskills,FALSE);
6300 yylval.ival = OP_CONST;
6301 TERM(sublex_start());
6307 s = scan_str(s,!!PL_madskills,FALSE);
6310 PL_expect = XOPERATOR;
6312 if (SvCUR(PL_lex_stuff)) {
6315 d = SvPV_force(PL_lex_stuff, len);
6317 for (; isSPACE(*d) && len; --len, ++d)
6322 if (!warned && ckWARN(WARN_QW)) {
6323 for (; !isSPACE(*d) && len; --len, ++d) {
6325 Perl_warner(aTHX_ packWARN(WARN_QW),
6326 "Possible attempt to separate words with commas");
6329 else if (*d == '#') {
6330 Perl_warner(aTHX_ packWARN(WARN_QW),
6331 "Possible attempt to put comments in qw() list");
6337 for (; !isSPACE(*d) && len; --len, ++d)
6340 sv = newSVpvn(b, d-b);
6341 if (DO_UTF8(PL_lex_stuff))
6343 words = append_elem(OP_LIST, words,
6344 newSVOP(OP_CONST, 0, tokeq(sv)));
6348 start_force(PL_curforce);
6349 NEXTVAL_NEXTTOKE.opval = words;
6354 SvREFCNT_dec(PL_lex_stuff);
6355 PL_lex_stuff = NULL;
6361 s = scan_str(s,!!PL_madskills,FALSE);
6364 yylval.ival = OP_STRINGIFY;
6365 if (SvIVX(PL_lex_stuff) == '\'')
6366 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6367 TERM(sublex_start());
6370 s = scan_pat(s,OP_QR);
6371 TERM(sublex_start());
6374 s = scan_str(s,!!PL_madskills,FALSE);
6377 readpipe_override();
6378 TERM(sublex_start());
6386 s = force_version(s, FALSE);
6388 else if (*s != 'v' || !isDIGIT(s[1])
6389 || (s = force_version(s, TRUE), *s == 'v'))
6391 *PL_tokenbuf = '\0';
6392 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6393 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6394 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6396 yyerror("<> should be quotes");
6398 if (orig_keyword == KEY_require) {
6406 PL_last_uni = PL_oldbufptr;
6407 PL_last_lop_op = OP_REQUIRE;
6409 return REPORT( (int)REQUIRE );
6415 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6419 LOP(OP_RENAME,XTERM);
6428 LOP(OP_RINDEX,XTERM);
6438 UNIDOR(OP_READLINE);
6442 UNIDOR(OP_BACKTICK);
6451 LOP(OP_REVERSE,XTERM);
6454 UNIDOR(OP_READLINK);
6462 TERM(sublex_start());
6464 TOKEN(1); /* force error */
6467 checkcomma(s,PL_tokenbuf,"filehandle");
6477 LOP(OP_SELECT,XTERM);
6483 LOP(OP_SEMCTL,XTERM);
6486 LOP(OP_SEMGET,XTERM);
6489 LOP(OP_SEMOP,XTERM);
6495 LOP(OP_SETPGRP,XTERM);
6497 case KEY_setpriority:
6498 LOP(OP_SETPRIORITY,XTERM);
6500 case KEY_sethostent:
6506 case KEY_setservent:
6509 case KEY_setprotoent:
6519 LOP(OP_SEEKDIR,XTERM);
6521 case KEY_setsockopt:
6522 LOP(OP_SSOCKOPT,XTERM);
6528 LOP(OP_SHMCTL,XTERM);
6531 LOP(OP_SHMGET,XTERM);
6534 LOP(OP_SHMREAD,XTERM);
6537 LOP(OP_SHMWRITE,XTERM);
6540 LOP(OP_SHUTDOWN,XTERM);
6549 LOP(OP_SOCKET,XTERM);
6551 case KEY_socketpair:
6552 LOP(OP_SOCKPAIR,XTERM);
6555 checkcomma(s,PL_tokenbuf,"subroutine name");
6557 if (*s == ';' || *s == ')') /* probably a close */
6558 Perl_croak(aTHX_ "sort is now a reserved word");
6560 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6564 LOP(OP_SPLIT,XTERM);
6567 LOP(OP_SPRINTF,XTERM);
6570 LOP(OP_SPLICE,XTERM);
6585 LOP(OP_SUBSTR,XTERM);
6591 char tmpbuf[sizeof PL_tokenbuf];
6592 SSize_t tboffset = 0;
6593 expectation attrful;
6594 bool have_name, have_proto;
6595 const int key = tmp;
6600 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6601 SV *subtoken = newSVpvn(tstart, s - tstart);
6605 s = SKIPSPACE2(s,tmpwhite);
6610 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6611 (*s == ':' && s[1] == ':'))
6618 attrful = XATTRBLOCK;
6619 /* remember buffer pos'n for later force_word */
6620 tboffset = s - PL_oldbufptr;
6621 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6624 nametoke = newSVpvn(s, d - s);
6626 if (memchr(tmpbuf, ':', len))
6627 sv_setpvn(PL_subname, tmpbuf, len);
6629 sv_setsv(PL_subname,PL_curstname);
6630 sv_catpvs(PL_subname,"::");
6631 sv_catpvn(PL_subname,tmpbuf,len);
6638 CURMAD('X', nametoke);
6639 CURMAD('_', tmpwhite);
6640 (void) force_word(PL_oldbufptr + tboffset, WORD,
6643 s = SKIPSPACE2(d,tmpwhite);
6650 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6651 PL_expect = XTERMBLOCK;
6652 attrful = XATTRTERM;
6653 sv_setpvn(PL_subname,"?",1);
6657 if (key == KEY_format) {
6659 PL_lex_formbrack = PL_lex_brackets + 1;
6661 PL_thistoken = subtoken;
6665 (void) force_word(PL_oldbufptr + tboffset, WORD,
6671 /* Look for a prototype */
6674 bool bad_proto = FALSE;
6675 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6677 s = scan_str(s,!!PL_madskills,FALSE);
6679 Perl_croak(aTHX_ "Prototype not terminated");
6680 /* strip spaces and check for bad characters */
6681 d = SvPVX(PL_lex_stuff);
6683 for (p = d; *p; ++p) {
6686 if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
6692 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6693 "Illegal character in prototype for %"SVf" : %s",
6694 SVfARG(PL_subname), d);
6695 SvCUR_set(PL_lex_stuff, tmp);
6700 CURMAD('q', PL_thisopen);
6701 CURMAD('_', tmpwhite);
6702 CURMAD('=', PL_thisstuff);
6703 CURMAD('Q', PL_thisclose);
6704 NEXTVAL_NEXTTOKE.opval =
6705 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6706 PL_lex_stuff = Nullsv;
6709 s = SKIPSPACE2(s,tmpwhite);
6717 if (*s == ':' && s[1] != ':')
6718 PL_expect = attrful;
6719 else if (*s != '{' && key == KEY_sub) {
6721 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6723 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6730 curmad('^', newSVpvs(""));
6731 CURMAD('_', tmpwhite);
6735 PL_thistoken = subtoken;
6738 NEXTVAL_NEXTTOKE.opval =
6739 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6740 PL_lex_stuff = NULL;
6745 sv_setpv(PL_subname,
6747 (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"));
6751 (void) force_word(PL_oldbufptr + tboffset, WORD,
6761 LOP(OP_SYSTEM,XREF);
6764 LOP(OP_SYMLINK,XTERM);
6767 LOP(OP_SYSCALL,XTERM);
6770 LOP(OP_SYSOPEN,XTERM);
6773 LOP(OP_SYSSEEK,XTERM);
6776 LOP(OP_SYSREAD,XTERM);
6779 LOP(OP_SYSWRITE,XTERM);
6783 TERM(sublex_start());
6804 LOP(OP_TRUNCATE,XTERM);
6816 yylval.ival = CopLINE(PL_curcop);
6820 yylval.ival = CopLINE(PL_curcop);
6824 LOP(OP_UNLINK,XTERM);
6830 LOP(OP_UNPACK,XTERM);
6833 LOP(OP_UTIME,XTERM);
6839 LOP(OP_UNSHIFT,XTERM);
6842 s = tokenize_use(1, s);
6852 yylval.ival = CopLINE(PL_curcop);
6856 yylval.ival = CopLINE(PL_curcop);
6860 PL_hints |= HINT_BLOCK_SCOPE;
6867 LOP(OP_WAITPID,XTERM);
6876 ctl_l[0] = toCTRL('L');
6878 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6881 /* Make sure $^L is defined */
6882 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
6887 if (PL_expect == XOPERATOR)
6893 yylval.ival = OP_XOR;
6898 TERM(sublex_start());
6903 #pragma segment Main
6907 S_pending_ident(pTHX)
6912 /* pit holds the identifier we read and pending_ident is reset */
6913 char pit = PL_pending_ident;
6914 PL_pending_ident = 0;
6916 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
6917 DEBUG_T({ PerlIO_printf(Perl_debug_log,
6918 "### Pending identifier '%s'\n", PL_tokenbuf); });
6920 /* if we're in a my(), we can't allow dynamics here.
6921 $foo'bar has already been turned into $foo::bar, so
6922 just check for colons.
6924 if it's a legal name, the OP is a PADANY.
6927 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
6928 if (strchr(PL_tokenbuf,':'))
6929 yyerror(Perl_form(aTHX_ "No package name allowed for "
6930 "variable %s in \"our\"",
6932 tmp = allocmy(PL_tokenbuf);
6935 if (strchr(PL_tokenbuf,':'))
6936 yyerror(Perl_form(aTHX_ PL_no_myglob,
6937 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
6939 yylval.opval = newOP(OP_PADANY, 0);
6940 yylval.opval->op_targ = allocmy(PL_tokenbuf);
6946 build the ops for accesses to a my() variable.
6948 Deny my($a) or my($b) in a sort block, *if* $a or $b is
6949 then used in a comparison. This catches most, but not
6950 all cases. For instance, it catches
6951 sort { my($a); $a <=> $b }
6953 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
6954 (although why you'd do that is anyone's guess).
6957 if (!strchr(PL_tokenbuf,':')) {
6959 tmp = pad_findmy(PL_tokenbuf);
6960 if (tmp != NOT_IN_PAD) {
6961 /* might be an "our" variable" */
6962 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6963 /* build ops for a bareword */
6964 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
6965 HEK * const stashname = HvNAME_HEK(stash);
6966 SV * const sym = newSVhek(stashname);
6967 sv_catpvs(sym, "::");
6968 sv_catpv(sym, PL_tokenbuf+1);
6969 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
6970 yylval.opval->op_private = OPpCONST_ENTERED;
6973 ? (GV_ADDMULTI | GV_ADDINEVAL)
6976 ((PL_tokenbuf[0] == '$') ? SVt_PV
6977 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6982 /* if it's a sort block and they're naming $a or $b */
6983 if (PL_last_lop_op == OP_SORT &&
6984 PL_tokenbuf[0] == '$' &&
6985 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
6988 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
6989 d < PL_bufend && *d != '\n';
6992 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
6993 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
6999 yylval.opval = newOP(OP_PADANY, 0);
7000 yylval.opval->op_targ = tmp;
7006 Whine if they've said @foo in a doublequoted string,
7007 and @foo isn't a variable we can find in the symbol
7010 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7011 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
7012 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7013 && ckWARN(WARN_AMBIGUOUS)
7014 /* DO NOT warn for @- and @+ */
7015 && !( PL_tokenbuf[2] == '\0' &&
7016 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7019 /* Downgraded from fatal to warning 20000522 mjd */
7020 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7021 "Possible unintended interpolation of %s in string",
7026 /* build ops for a bareword */
7027 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
7028 yylval.opval->op_private = OPpCONST_ENTERED;
7031 /* If the identifier refers to a stash, don't autovivify it.
7032 * Change 24660 had the side effect of causing symbol table
7033 * hashes to always be defined, even if they were freshly
7034 * created and the only reference in the entire program was
7035 * the single statement with the defined %foo::bar:: test.
7036 * It appears that all code in the wild doing this actually
7037 * wants to know whether sub-packages have been loaded, so
7038 * by avoiding auto-vivifying symbol tables, we ensure that
7039 * defined %foo::bar:: continues to be false, and the existing
7040 * tests still give the expected answers, even though what
7041 * they're actually testing has now changed subtly.
7043 (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
7045 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7046 ((PL_tokenbuf[0] == '$') ? SVt_PV
7047 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7053 * The following code was generated by perl_keyword.pl.
7057 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7062 case 1: /* 5 tokens of length 1 */
7094 case 2: /* 18 tokens of length 2 */
7240 case 3: /* 29 tokens of length 3 */
7244 if (name[1] == 'N' &&
7307 if (name[1] == 'i' &&
7329 return (all_keywords || FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
7347 if (name[1] == 'o' &&
7356 if (name[1] == 'e' &&
7365 if (name[1] == 'n' &&
7374 if (name[1] == 'o' &&
7383 if (name[1] == 'a' &&
7392 if (name[1] == 'o' &&
7454 if (name[1] == 'e' &&
7468 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7494 if (name[1] == 'i' &&
7503 if (name[1] == 's' &&
7512 if (name[1] == 'e' &&
7521 if (name[1] == 'o' &&
7533 case 4: /* 41 tokens of length 4 */
7537 if (name[1] == 'O' &&
7547 if (name[1] == 'N' &&
7557 if (name[1] == 'i' &&
7567 if (name[1] == 'h' &&
7577 if (name[1] == 'u' &&
7590 if (name[2] == 'c' &&
7599 if (name[2] == 's' &&
7608 if (name[2] == 'a' &&
7644 if (name[1] == 'o' &&
7657 if (name[2] == 't' &&
7666 if (name[2] == 'o' &&
7675 if (name[2] == 't' &&
7684 if (name[2] == 'e' &&
7697 if (name[1] == 'o' &&
7710 if (name[2] == 'y' &&
7719 if (name[2] == 'l' &&
7735 if (name[2] == 's' &&
7744 if (name[2] == 'n' &&
7753 if (name[2] == 'c' &&
7766 if (name[1] == 'e' &&
7776 if (name[1] == 'p' &&
7789 if (name[2] == 'c' &&
7798 if (name[2] == 'p' &&
7807 if (name[2] == 's' &&
7823 if (name[2] == 'n' &&
7893 if (name[2] == 'r' &&
7902 if (name[2] == 'r' &&
7911 if (name[2] == 'a' &&
7927 if (name[2] == 'l' &&
7989 if (name[2] == 'e' &&
7992 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8005 case 5: /* 39 tokens of length 5 */
8009 if (name[1] == 'E' &&
8020 if (name[1] == 'H' &&
8034 if (name[2] == 'a' &&
8044 if (name[2] == 'a' &&
8061 if (name[2] == 'e' &&
8071 if (name[2] == 'e' &&
8075 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8091 if (name[3] == 'i' &&
8100 if (name[3] == 'o' &&
8136 if (name[2] == 'o' &&
8146 if (name[2] == 'y' &&
8160 if (name[1] == 'l' &&
8174 if (name[2] == 'n' &&
8184 if (name[2] == 'o' &&
8198 if (name[1] == 'i' &&
8203 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8212 if (name[2] == 'd' &&
8222 if (name[2] == 'c' &&
8239 if (name[2] == 'c' &&
8249 if (name[2] == 't' &&
8263 if (name[1] == 'k' &&
8274 if (name[1] == 'r' &&
8288 if (name[2] == 's' &&
8298 if (name[2] == 'd' &&
8315 if (name[2] == 'm' &&
8325 if (name[2] == 'i' &&
8335 if (name[2] == 'e' &&
8345 if (name[2] == 'l' &&
8355 if (name[2] == 'a' &&
8368 if (name[3] == 't' &&
8371 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8377 if (name[3] == 'd' &&
8394 if (name[1] == 'i' &&
8408 if (name[2] == 'a' &&
8421 if (name[3] == 'e' &&
8456 if (name[2] == 'i' &&
8473 if (name[2] == 'i' &&
8483 if (name[2] == 'i' &&
8500 case 6: /* 33 tokens of length 6 */
8504 if (name[1] == 'c' &&
8519 if (name[2] == 'l' &&
8530 if (name[2] == 'r' &&
8545 if (name[1] == 'e' &&
8560 if (name[2] == 's' &&
8565 if(ckWARN_d(WARN_SYNTAX))
8566 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8572 if (name[2] == 'i' &&
8590 if (name[2] == 'l' &&
8601 if (name[2] == 'r' &&
8616 if (name[1] == 'm' &&
8631 if (name[2] == 'n' &&
8642 if (name[2] == 's' &&
8657 if (name[1] == 's' &&
8663 if (name[4] == 't' &&
8672 if (name[4] == 'e' &&
8681 if (name[4] == 'c' &&
8690 if (name[4] == 'n' &&
8706 if (name[1] == 'r' &&
8724 if (name[3] == 'a' &&
8734 if (name[3] == 'u' &&
8748 if (name[2] == 'n' &&
8766 if (name[2] == 'a' &&
8780 if (name[3] == 'e' &&
8793 if (name[4] == 't' &&
8802 if (name[4] == 'e' &&
8824 if (name[4] == 't' &&
8833 if (name[4] == 'e' &&
8849 if (name[2] == 'c' &&
8860 if (name[2] == 'l' &&
8871 if (name[2] == 'b' &&
8882 if (name[2] == 's' &&
8905 if (name[4] == 's' &&
8914 if (name[4] == 'n' &&
8927 if (name[3] == 'a' &&
8944 if (name[1] == 'a' &&
8959 case 7: /* 29 tokens of length 7 */
8963 if (name[1] == 'E' &&
8976 if (name[1] == '_' &&
8989 if (name[1] == 'i' &&
8996 return -KEY_binmode;
9002 if (name[1] == 'o' &&
9009 return -KEY_connect;
9018 if (name[2] == 'm' &&
9024 return -KEY_dbmopen;
9035 if (name[4] == 'u' &&
9039 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9045 if (name[4] == 'n' &&
9066 if (name[1] == 'o' &&
9079 if (name[1] == 'e' &&
9086 if (name[5] == 'r' &&
9089 return -KEY_getpgrp;
9095 if (name[5] == 'i' &&
9098 return -KEY_getppid;
9111 if (name[1] == 'c' &&
9118 return -KEY_lcfirst;
9124 if (name[1] == 'p' &&
9131 return -KEY_opendir;
9137 if (name[1] == 'a' &&
9155 if (name[3] == 'd' &&
9160 return -KEY_readdir;
9166 if (name[3] == 'u' &&
9177 if (name[3] == 'e' &&
9182 return -KEY_reverse;
9201 if (name[3] == 'k' &&
9206 return -KEY_seekdir;
9212 if (name[3] == 'p' &&
9217 return -KEY_setpgrp;
9227 if (name[2] == 'm' &&
9233 return -KEY_shmread;
9239 if (name[2] == 'r' &&
9245 return -KEY_sprintf;
9254 if (name[3] == 'l' &&
9259 return -KEY_symlink;
9268 if (name[4] == 'a' &&
9272 return -KEY_syscall;
9278 if (name[4] == 'p' &&
9282 return -KEY_sysopen;
9288 if (name[4] == 'e' &&
9292 return -KEY_sysread;
9298 if (name[4] == 'e' &&
9302 return -KEY_sysseek;
9320 if (name[1] == 'e' &&
9327 return -KEY_telldir;
9336 if (name[2] == 'f' &&
9342 return -KEY_ucfirst;
9348 if (name[2] == 's' &&
9354 return -KEY_unshift;
9364 if (name[1] == 'a' &&
9371 return -KEY_waitpid;
9380 case 8: /* 26 tokens of length 8 */
9384 if (name[1] == 'U' &&
9392 return KEY_AUTOLOAD;
9403 if (name[3] == 'A' &&
9409 return KEY___DATA__;
9415 if (name[3] == 'I' &&
9421 return -KEY___FILE__;
9427 if (name[3] == 'I' &&
9433 return -KEY___LINE__;
9449 if (name[2] == 'o' &&
9456 return -KEY_closedir;
9462 if (name[2] == 'n' &&
9469 return -KEY_continue;
9479 if (name[1] == 'b' &&
9487 return -KEY_dbmclose;
9493 if (name[1] == 'n' &&
9499 if (name[4] == 'r' &&
9504 return -KEY_endgrent;
9510 if (name[4] == 'w' &&
9515 return -KEY_endpwent;
9528 if (name[1] == 'o' &&
9536 return -KEY_formline;
9542 if (name[1] == 'e' &&
9553 if (name[6] == 'n' &&
9556 return -KEY_getgrent;
9562 if (name[6] == 'i' &&
9565 return -KEY_getgrgid;
9571 if (name[6] == 'a' &&
9574 return -KEY_getgrnam;
9587 if (name[4] == 'o' &&
9592 return -KEY_getlogin;
9603 if (name[6] == 'n' &&
9606 return -KEY_getpwent;
9612 if (name[6] == 'a' &&
9615 return -KEY_getpwnam;
9621 if (name[6] == 'i' &&
9624 return -KEY_getpwuid;
9644 if (name[1] == 'e' &&
9651 if (name[5] == 'i' &&
9658 return -KEY_readline;
9663 return -KEY_readlink;
9674 if (name[5] == 'i' &&
9678 return -KEY_readpipe;
9699 if (name[4] == 'r' &&
9704 return -KEY_setgrent;
9710 if (name[4] == 'w' &&
9715 return -KEY_setpwent;
9731 if (name[3] == 'w' &&
9737 return -KEY_shmwrite;
9743 if (name[3] == 't' &&
9749 return -KEY_shutdown;
9759 if (name[2] == 's' &&
9766 return -KEY_syswrite;
9776 if (name[1] == 'r' &&
9784 return -KEY_truncate;
9793 case 9: /* 9 tokens of length 9 */
9797 if (name[1] == 'N' &&
9806 return KEY_UNITCHECK;
9812 if (name[1] == 'n' &&
9821 return -KEY_endnetent;
9827 if (name[1] == 'e' &&
9836 return -KEY_getnetent;
9842 if (name[1] == 'o' &&
9851 return -KEY_localtime;
9857 if (name[1] == 'r' &&
9866 return KEY_prototype;
9872 if (name[1] == 'u' &&
9881 return -KEY_quotemeta;
9887 if (name[1] == 'e' &&
9896 return -KEY_rewinddir;
9902 if (name[1] == 'e' &&
9911 return -KEY_setnetent;
9917 if (name[1] == 'a' &&
9926 return -KEY_wantarray;
9935 case 10: /* 9 tokens of length 10 */
9939 if (name[1] == 'n' &&
9945 if (name[4] == 'o' &&
9952 return -KEY_endhostent;
9958 if (name[4] == 'e' &&
9965 return -KEY_endservent;
9978 if (name[1] == 'e' &&
9984 if (name[4] == 'o' &&
9991 return -KEY_gethostent;
10000 if (name[5] == 'r' &&
10006 return -KEY_getservent;
10012 if (name[5] == 'c' &&
10018 return -KEY_getsockopt;
10038 if (name[2] == 't')
10043 if (name[4] == 'o' &&
10050 return -KEY_sethostent;
10059 if (name[5] == 'r' &&
10065 return -KEY_setservent;
10071 if (name[5] == 'c' &&
10077 return -KEY_setsockopt;
10094 if (name[2] == 'c' &&
10103 return -KEY_socketpair;
10116 case 11: /* 8 tokens of length 11 */
10120 if (name[1] == '_' &&
10130 { /* __PACKAGE__ */
10131 return -KEY___PACKAGE__;
10137 if (name[1] == 'n' &&
10147 { /* endprotoent */
10148 return -KEY_endprotoent;
10154 if (name[1] == 'e' &&
10163 if (name[5] == 'e' &&
10169 { /* getpeername */
10170 return -KEY_getpeername;
10179 if (name[6] == 'o' &&
10184 { /* getpriority */
10185 return -KEY_getpriority;
10191 if (name[6] == 't' &&
10196 { /* getprotoent */
10197 return -KEY_getprotoent;
10211 if (name[4] == 'o' &&
10218 { /* getsockname */
10219 return -KEY_getsockname;
10232 if (name[1] == 'e' &&
10240 if (name[6] == 'o' &&
10245 { /* setpriority */
10246 return -KEY_setpriority;
10252 if (name[6] == 't' &&
10257 { /* setprotoent */
10258 return -KEY_setprotoent;
10274 case 12: /* 2 tokens of length 12 */
10275 if (name[0] == 'g' &&
10287 if (name[9] == 'd' &&
10290 { /* getnetbyaddr */
10291 return -KEY_getnetbyaddr;
10297 if (name[9] == 'a' &&
10300 { /* getnetbyname */
10301 return -KEY_getnetbyname;
10313 case 13: /* 4 tokens of length 13 */
10314 if (name[0] == 'g' &&
10321 if (name[4] == 'o' &&
10330 if (name[10] == 'd' &&
10333 { /* gethostbyaddr */
10334 return -KEY_gethostbyaddr;
10340 if (name[10] == 'a' &&
10343 { /* gethostbyname */
10344 return -KEY_gethostbyname;
10357 if (name[4] == 'e' &&
10366 if (name[10] == 'a' &&
10369 { /* getservbyname */
10370 return -KEY_getservbyname;
10376 if (name[10] == 'o' &&
10379 { /* getservbyport */
10380 return -KEY_getservbyport;
10399 case 14: /* 1 tokens of length 14 */
10400 if (name[0] == 'g' &&
10414 { /* getprotobyname */
10415 return -KEY_getprotobyname;
10420 case 16: /* 1 tokens of length 16 */
10421 if (name[0] == 'g' &&
10437 { /* getprotobynumber */
10438 return -KEY_getprotobynumber;
10452 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10456 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10457 if (ckWARN(WARN_SYNTAX)) {
10460 for (w = s+2; *w && level; w++) {
10463 else if (*w == ')')
10466 while (isSPACE(*w))
10468 /* the list of chars below is for end of statements or
10469 * block / parens, boolean operators (&&, ||, //) and branch
10470 * constructs (or, and, if, until, unless, while, err, for).
10471 * Not a very solid hack... */
10472 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10473 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10474 "%s (...) interpreted as function",name);
10477 while (s < PL_bufend && isSPACE(*s))
10481 while (s < PL_bufend && isSPACE(*s))
10483 if (isIDFIRST_lazy_if(s,UTF)) {
10484 const char * const w = s++;
10485 while (isALNUM_lazy_if(s,UTF))
10487 while (s < PL_bufend && isSPACE(*s))
10491 if (keyword(w, s - w, 0))
10494 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10495 if (gv && GvCVu(gv))
10497 Perl_croak(aTHX_ "No comma allowed after %s", what);
10502 /* Either returns sv, or mortalizes sv and returns a new SV*.
10503 Best used as sv=new_constant(..., sv, ...).
10504 If s, pv are NULL, calls subroutine with one argument,
10505 and type is used with error messages only. */
10508 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
10512 HV * const table = GvHV(PL_hintgv); /* ^H */
10516 const char *why1 = "", *why2 = "", *why3 = "";
10518 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10521 why2 = (const char *)
10522 (strEQ(key,"charnames")
10523 ? "(possibly a missing \"use charnames ...\")"
10525 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10526 (type ? type: "undef"), why2);
10528 /* This is convoluted and evil ("goto considered harmful")
10529 * but I do not understand the intricacies of all the different
10530 * failure modes of %^H in here. The goal here is to make
10531 * the most probable error message user-friendly. --jhi */
10536 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10537 (type ? type: "undef"), why1, why2, why3);
10539 yyerror(SvPVX_const(msg));
10543 cvp = hv_fetch(table, key, strlen(key), FALSE);
10544 if (!cvp || !SvOK(*cvp)) {
10547 why3 = "} is not defined";
10550 sv_2mortal(sv); /* Parent created it permanently */
10553 pv = sv_2mortal(newSVpvn(s, len));
10555 typesv = sv_2mortal(newSVpv(type, 0));
10557 typesv = &PL_sv_undef;
10559 PUSHSTACKi(PERLSI_OVERLOAD);
10571 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10575 /* Check the eval first */
10576 if (!PL_in_eval && SvTRUE(ERRSV)) {
10577 sv_catpvs(ERRSV, "Propagated");
10578 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10580 res = SvREFCNT_inc_simple(sv);
10584 SvREFCNT_inc_simple_void(res);
10593 why1 = "Call to &{$^H{";
10595 why3 = "}} did not return a defined value";
10603 /* Returns a NUL terminated string, with the length of the string written to
10607 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10610 register char *d = dest;
10611 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10614 Perl_croak(aTHX_ ident_too_long);
10615 if (isALNUM(*s)) /* UTF handled below */
10617 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10622 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10626 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10627 char *t = s + UTF8SKIP(s);
10629 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10633 Perl_croak(aTHX_ ident_too_long);
10634 Copy(s, d, len, char);
10647 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10650 char *bracket = NULL;
10652 register char *d = dest;
10653 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10658 while (isDIGIT(*s)) {
10660 Perl_croak(aTHX_ ident_too_long);
10667 Perl_croak(aTHX_ ident_too_long);
10668 if (isALNUM(*s)) /* UTF handled below */
10670 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10675 else if (*s == ':' && s[1] == ':') {
10679 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10680 char *t = s + UTF8SKIP(s);
10681 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10683 if (d + (t - s) > e)
10684 Perl_croak(aTHX_ ident_too_long);
10685 Copy(s, d, t - s, char);
10696 if (PL_lex_state != LEX_NORMAL)
10697 PL_lex_state = LEX_INTERPENDMAYBE;
10700 if (*s == '$' && s[1] &&
10701 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10714 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10719 if (isSPACE(s[-1])) {
10721 const char ch = *s++;
10722 if (!SPACE_OR_TAB(ch)) {
10728 if (isIDFIRST_lazy_if(d,UTF)) {
10732 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10733 end += UTF8SKIP(end);
10734 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10735 end += UTF8SKIP(end);
10737 Copy(s, d, end - s, char);
10742 while ((isALNUM(*s) || *s == ':') && d < e)
10745 Perl_croak(aTHX_ ident_too_long);
10748 while (s < send && SPACE_OR_TAB(*s))
10750 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10751 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10752 const char * const brack =
10754 ((*s == '[') ? "[...]" : "{...}");
10755 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10756 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10757 funny, dest, brack, funny, dest, brack);
10760 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10764 /* Handle extended ${^Foo} variables
10765 * 1999-02-27 mjd-perl-patch@plover.com */
10766 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10770 while (isALNUM(*s) && d < e) {
10774 Perl_croak(aTHX_ ident_too_long);
10779 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10780 PL_lex_state = LEX_INTERPEND;
10783 if (PL_lex_state == LEX_NORMAL) {
10784 if (ckWARN(WARN_AMBIGUOUS) &&
10785 (keyword(dest, d - dest, 0)
10786 || get_cvn_flags(dest, d - dest, 0)))
10790 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10791 "Ambiguous use of %c{%s} resolved to %c%s",
10792 funny, dest, funny, dest);
10797 s = bracket; /* let the parser handle it */
10801 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10802 PL_lex_state = LEX_INTERPEND;
10807 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10809 PERL_UNUSED_CONTEXT;
10813 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
10814 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10815 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10816 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10817 case KEEPCOPY_PAT_MOD: *pmfl |= PMf_KEEPCOPY; break;
10823 S_scan_pat(pTHX_ char *start, I32 type)
10827 char *s = scan_str(start,!!PL_madskills,FALSE);
10828 const char * const valid_flags =
10829 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10836 const char * const delimiter = skipspace(start);
10840 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10841 : "Search pattern not terminated" ));
10844 pm = (PMOP*)newPMOP(type, 0);
10845 if (PL_multi_open == '?') {
10846 /* This is the only point in the code that sets PMf_ONCE: */
10847 pm->op_pmflags |= PMf_ONCE;
10849 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10850 allows us to restrict the list needed by reset to just the ??
10852 assert(type != OP_TRANS);
10854 MAGIC *mg = mg_find((SV*)PL_curstash, PERL_MAGIC_symtab);
10857 mg = sv_magicext((SV*)PL_curstash, 0, PERL_MAGIC_symtab, 0, 0,
10860 elements = mg->mg_len / sizeof(PMOP**);
10861 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10862 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10863 mg->mg_len = elements * sizeof(PMOP**);
10864 PmopSTASH_set(pm,PL_curstash);
10870 while (*s && strchr(valid_flags, *s))
10871 pmflag(&pm->op_pmflags,*s++);
10873 if (PL_madskills && modstart != s) {
10874 SV* tmptoken = newSVpvn(modstart, s - modstart);
10875 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
10878 /* issue a warning if /c is specified,but /g is not */
10879 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
10880 && ckWARN(WARN_REGEXP))
10882 Perl_warner(aTHX_ packWARN(WARN_REGEXP),
10883 "Use of /c modifier is meaningless without /g" );
10886 PL_lex_op = (OP*)pm;
10887 yylval.ival = OP_MATCH;
10892 S_scan_subst(pTHX_ char *start)
10903 yylval.ival = OP_NULL;
10905 s = scan_str(start,!!PL_madskills,FALSE);
10908 Perl_croak(aTHX_ "Substitution pattern not terminated");
10910 if (s[-1] == PL_multi_open)
10913 if (PL_madskills) {
10914 CURMAD('q', PL_thisopen);
10915 CURMAD('_', PL_thiswhite);
10916 CURMAD('E', PL_thisstuff);
10917 CURMAD('Q', PL_thisclose);
10918 PL_realtokenstart = s - SvPVX(PL_linestr);
10922 first_start = PL_multi_start;
10923 s = scan_str(s,!!PL_madskills,FALSE);
10925 if (PL_lex_stuff) {
10926 SvREFCNT_dec(PL_lex_stuff);
10927 PL_lex_stuff = NULL;
10929 Perl_croak(aTHX_ "Substitution replacement not terminated");
10931 PL_multi_start = first_start; /* so whole substitution is taken together */
10933 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10936 if (PL_madskills) {
10937 CURMAD('z', PL_thisopen);
10938 CURMAD('R', PL_thisstuff);
10939 CURMAD('Z', PL_thisclose);
10945 if (*s == EXEC_PAT_MOD) {
10949 else if (strchr(S_PAT_MODS, *s))
10950 pmflag(&pm->op_pmflags,*s++);
10956 if (PL_madskills) {
10958 curmad('m', newSVpvn(modstart, s - modstart));
10959 append_madprops(PL_thismad, (OP*)pm, 0);
10963 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
10964 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10968 SV * const repl = newSVpvs("");
10970 PL_sublex_info.super_bufptr = s;
10971 PL_sublex_info.super_bufend = PL_bufend;
10973 pm->op_pmflags |= PMf_EVAL;
10975 sv_catpv(repl, (const char *)(es ? "eval " : "do "));
10976 sv_catpvs(repl, "{");
10977 sv_catsv(repl, PL_lex_repl);
10978 if (strchr(SvPVX(PL_lex_repl), '#'))
10979 sv_catpvs(repl, "\n");
10980 sv_catpvs(repl, "}");
10982 SvREFCNT_dec(PL_lex_repl);
10983 PL_lex_repl = repl;
10986 PL_lex_op = (OP*)pm;
10987 yylval.ival = OP_SUBST;
10992 S_scan_trans(pTHX_ char *start)
11005 yylval.ival = OP_NULL;
11007 s = scan_str(start,!!PL_madskills,FALSE);
11009 Perl_croak(aTHX_ "Transliteration pattern not terminated");
11011 if (s[-1] == PL_multi_open)
11014 if (PL_madskills) {
11015 CURMAD('q', PL_thisopen);
11016 CURMAD('_', PL_thiswhite);
11017 CURMAD('E', PL_thisstuff);
11018 CURMAD('Q', PL_thisclose);
11019 PL_realtokenstart = s - SvPVX(PL_linestr);
11023 s = scan_str(s,!!PL_madskills,FALSE);
11025 if (PL_lex_stuff) {
11026 SvREFCNT_dec(PL_lex_stuff);
11027 PL_lex_stuff = NULL;
11029 Perl_croak(aTHX_ "Transliteration replacement not terminated");
11031 if (PL_madskills) {
11032 CURMAD('z', PL_thisopen);
11033 CURMAD('R', PL_thisstuff);
11034 CURMAD('Z', PL_thisclose);
11037 complement = del = squash = 0;
11044 complement = OPpTRANS_COMPLEMENT;
11047 del = OPpTRANS_DELETE;
11050 squash = OPpTRANS_SQUASH;
11059 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11060 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11061 o->op_private &= ~OPpTRANS_ALL;
11062 o->op_private |= del|squash|complement|
11063 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11064 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11067 yylval.ival = OP_TRANS;
11070 if (PL_madskills) {
11072 curmad('m', newSVpvn(modstart, s - modstart));
11073 append_madprops(PL_thismad, o, 0);
11082 S_scan_heredoc(pTHX_ register char *s)
11086 I32 op_type = OP_SCALAR;
11090 const char *found_newline;
11094 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11096 I32 stuffstart = s - SvPVX(PL_linestr);
11099 PL_realtokenstart = -1;
11104 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11108 while (SPACE_OR_TAB(*peek))
11110 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11113 s = delimcpy(d, e, s, PL_bufend, term, &len);
11123 if (!isALNUM_lazy_if(s,UTF))
11124 deprecate_old("bare << to mean <<\"\"");
11125 for (; isALNUM_lazy_if(s,UTF); s++) {
11130 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11131 Perl_croak(aTHX_ "Delimiter for here document is too long");
11134 len = d - PL_tokenbuf;
11137 if (PL_madskills) {
11138 tstart = PL_tokenbuf + !outer;
11139 PL_thisclose = newSVpvn(tstart, len - !outer);
11140 tstart = SvPVX(PL_linestr) + stuffstart;
11141 PL_thisopen = newSVpvn(tstart, s - tstart);
11142 stuffstart = s - SvPVX(PL_linestr);
11145 #ifndef PERL_STRICT_CR
11146 d = strchr(s, '\r');
11148 char * const olds = s;
11150 while (s < PL_bufend) {
11156 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11165 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11172 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11173 herewas = newSVpvn(s,PL_bufend-s);
11177 herewas = newSVpvn(s-1,found_newline-s+1);
11180 herewas = newSVpvn(s,found_newline-s);
11184 if (PL_madskills) {
11185 tstart = SvPVX(PL_linestr) + stuffstart;
11187 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11189 PL_thisstuff = newSVpvn(tstart, s - tstart);
11192 s += SvCUR(herewas);
11195 stuffstart = s - SvPVX(PL_linestr);
11201 tmpstr = newSV_type(SVt_PVIV);
11202 SvGROW(tmpstr, 80);
11203 if (term == '\'') {
11204 op_type = OP_CONST;
11205 SvIV_set(tmpstr, -1);
11207 else if (term == '`') {
11208 op_type = OP_BACKTICK;
11209 SvIV_set(tmpstr, '\\');
11213 PL_multi_start = CopLINE(PL_curcop);
11214 PL_multi_open = PL_multi_close = '<';
11215 term = *PL_tokenbuf;
11216 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11217 char * const bufptr = PL_sublex_info.super_bufptr;
11218 char * const bufend = PL_sublex_info.super_bufend;
11219 char * const olds = s - SvCUR(herewas);
11220 s = strchr(bufptr, '\n');
11224 while (s < bufend &&
11225 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11227 CopLINE_inc(PL_curcop);
11230 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11231 missingterm(PL_tokenbuf);
11233 sv_setpvn(herewas,bufptr,d-bufptr+1);
11234 sv_setpvn(tmpstr,d+1,s-d);
11236 sv_catpvn(herewas,s,bufend-s);
11237 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11244 while (s < PL_bufend &&
11245 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11247 CopLINE_inc(PL_curcop);
11249 if (s >= PL_bufend) {
11250 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11251 missingterm(PL_tokenbuf);
11253 sv_setpvn(tmpstr,d+1,s-d);
11255 if (PL_madskills) {
11257 sv_catpvn(PL_thisstuff, d + 1, s - d);
11259 PL_thisstuff = newSVpvn(d + 1, s - d);
11260 stuffstart = s - SvPVX(PL_linestr);
11264 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11266 sv_catpvn(herewas,s,PL_bufend-s);
11267 sv_setsv(PL_linestr,herewas);
11268 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11269 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11270 PL_last_lop = PL_last_uni = NULL;
11273 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
11274 while (s >= PL_bufend) { /* multiple line string? */
11276 if (PL_madskills) {
11277 tstart = SvPVX(PL_linestr) + stuffstart;
11279 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11281 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11285 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11286 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11287 missingterm(PL_tokenbuf);
11290 stuffstart = s - SvPVX(PL_linestr);
11292 CopLINE_inc(PL_curcop);
11293 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11294 PL_last_lop = PL_last_uni = NULL;
11295 #ifndef PERL_STRICT_CR
11296 if (PL_bufend - PL_linestart >= 2) {
11297 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11298 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11300 PL_bufend[-2] = '\n';
11302 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11304 else if (PL_bufend[-1] == '\r')
11305 PL_bufend[-1] = '\n';
11307 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11308 PL_bufend[-1] = '\n';
11310 if (PERLDB_LINE && PL_curstash != PL_debstash)
11311 update_debugger_info(PL_linestr, NULL, 0);
11312 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11313 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11314 *(SvPVX(PL_linestr) + off ) = ' ';
11315 sv_catsv(PL_linestr,herewas);
11316 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11317 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11321 sv_catsv(tmpstr,PL_linestr);
11326 PL_multi_end = CopLINE(PL_curcop);
11327 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11328 SvPV_shrink_to_cur(tmpstr);
11330 SvREFCNT_dec(herewas);
11332 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11334 else if (PL_encoding)
11335 sv_recode_to_utf8(tmpstr, PL_encoding);
11337 PL_lex_stuff = tmpstr;
11338 yylval.ival = op_type;
11342 /* scan_inputsymbol
11343 takes: current position in input buffer
11344 returns: new position in input buffer
11345 side-effects: yylval and lex_op are set.
11350 <FH> read from filehandle
11351 <pkg::FH> read from package qualified filehandle
11352 <pkg'FH> read from package qualified filehandle
11353 <$fh> read from filehandle in $fh
11354 <*.h> filename glob
11359 S_scan_inputsymbol(pTHX_ char *start)
11362 register char *s = start; /* current position in buffer */
11366 char *d = PL_tokenbuf; /* start of temp holding space */
11367 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11369 end = strchr(s, '\n');
11372 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11374 /* die if we didn't have space for the contents of the <>,
11375 or if it didn't end, or if we see a newline
11378 if (len >= (I32)sizeof PL_tokenbuf)
11379 Perl_croak(aTHX_ "Excessively long <> operator");
11381 Perl_croak(aTHX_ "Unterminated <> operator");
11386 Remember, only scalar variables are interpreted as filehandles by
11387 this code. Anything more complex (e.g., <$fh{$num}>) will be
11388 treated as a glob() call.
11389 This code makes use of the fact that except for the $ at the front,
11390 a scalar variable and a filehandle look the same.
11392 if (*d == '$' && d[1]) d++;
11394 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11395 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11398 /* If we've tried to read what we allow filehandles to look like, and
11399 there's still text left, then it must be a glob() and not a getline.
11400 Use scan_str to pull out the stuff between the <> and treat it
11401 as nothing more than a string.
11404 if (d - PL_tokenbuf != len) {
11405 yylval.ival = OP_GLOB;
11407 s = scan_str(start,!!PL_madskills,FALSE);
11409 Perl_croak(aTHX_ "Glob not terminated");
11413 bool readline_overriden = FALSE;
11416 /* we're in a filehandle read situation */
11419 /* turn <> into <ARGV> */
11421 Copy("ARGV",d,5,char);
11423 /* Check whether readline() is overriden */
11424 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11426 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11428 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11429 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11430 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11431 readline_overriden = TRUE;
11433 /* if <$fh>, create the ops to turn the variable into a
11437 /* try to find it in the pad for this block, otherwise find
11438 add symbol table ops
11440 const PADOFFSET tmp = pad_findmy(d);
11441 if (tmp != NOT_IN_PAD) {
11442 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11443 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11444 HEK * const stashname = HvNAME_HEK(stash);
11445 SV * const sym = sv_2mortal(newSVhek(stashname));
11446 sv_catpvs(sym, "::");
11447 sv_catpv(sym, d+1);
11452 OP * const o = newOP(OP_PADSV, 0);
11454 PL_lex_op = readline_overriden
11455 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11456 append_elem(OP_LIST, o,
11457 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11458 : (OP*)newUNOP(OP_READLINE, 0, o);
11467 ? (GV_ADDMULTI | GV_ADDINEVAL)
11470 PL_lex_op = readline_overriden
11471 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11472 append_elem(OP_LIST,
11473 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11474 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11475 : (OP*)newUNOP(OP_READLINE, 0,
11476 newUNOP(OP_RV2SV, 0,
11477 newGVOP(OP_GV, 0, gv)));
11479 if (!readline_overriden)
11480 PL_lex_op->op_flags |= OPf_SPECIAL;
11481 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
11482 yylval.ival = OP_NULL;
11485 /* If it's none of the above, it must be a literal filehandle
11486 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11488 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11489 PL_lex_op = readline_overriden
11490 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11491 append_elem(OP_LIST,
11492 newGVOP(OP_GV, 0, gv),
11493 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11494 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11495 yylval.ival = OP_NULL;
11504 takes: start position in buffer
11505 keep_quoted preserve \ on the embedded delimiter(s)
11506 keep_delims preserve the delimiters around the string
11507 returns: position to continue reading from buffer
11508 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11509 updates the read buffer.
11511 This subroutine pulls a string out of the input. It is called for:
11512 q single quotes q(literal text)
11513 ' single quotes 'literal text'
11514 qq double quotes qq(interpolate $here please)
11515 " double quotes "interpolate $here please"
11516 qx backticks qx(/bin/ls -l)
11517 ` backticks `/bin/ls -l`
11518 qw quote words @EXPORT_OK = qw( func() $spam )
11519 m// regexp match m/this/
11520 s/// regexp substitute s/this/that/
11521 tr/// string transliterate tr/this/that/
11522 y/// string transliterate y/this/that/
11523 ($*@) sub prototypes sub foo ($)
11524 (stuff) sub attr parameters sub foo : attr(stuff)
11525 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11527 In most of these cases (all but <>, patterns and transliterate)
11528 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11529 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11530 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11533 It skips whitespace before the string starts, and treats the first
11534 character as the delimiter. If the delimiter is one of ([{< then
11535 the corresponding "close" character )]}> is used as the closing
11536 delimiter. It allows quoting of delimiters, and if the string has
11537 balanced delimiters ([{<>}]) it allows nesting.
11539 On success, the SV with the resulting string is put into lex_stuff or,
11540 if that is already non-NULL, into lex_repl. The second case occurs only
11541 when parsing the RHS of the special constructs s/// and tr/// (y///).
11542 For convenience, the terminating delimiter character is stuffed into
11547 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11550 SV *sv; /* scalar value: string */
11551 const char *tmps; /* temp string, used for delimiter matching */
11552 register char *s = start; /* current position in the buffer */
11553 register char term; /* terminating character */
11554 register char *to; /* current position in the sv's data */
11555 I32 brackets = 1; /* bracket nesting level */
11556 bool has_utf8 = FALSE; /* is there any utf8 content? */
11557 I32 termcode; /* terminating char. code */
11558 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11559 STRLEN termlen; /* length of terminating string */
11560 int last_off = 0; /* last position for nesting bracket */
11566 /* skip space before the delimiter */
11572 if (PL_realtokenstart >= 0) {
11573 stuffstart = PL_realtokenstart;
11574 PL_realtokenstart = -1;
11577 stuffstart = start - SvPVX(PL_linestr);
11579 /* mark where we are, in case we need to report errors */
11582 /* after skipping whitespace, the next character is the terminator */
11585 termcode = termstr[0] = term;
11589 termcode = utf8_to_uvchr((U8*)s, &termlen);
11590 Copy(s, termstr, termlen, U8);
11591 if (!UTF8_IS_INVARIANT(term))
11595 /* mark where we are */
11596 PL_multi_start = CopLINE(PL_curcop);
11597 PL_multi_open = term;
11599 /* find corresponding closing delimiter */
11600 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11601 termcode = termstr[0] = term = tmps[5];
11603 PL_multi_close = term;
11605 /* create a new SV to hold the contents. 79 is the SV's initial length.
11606 What a random number. */
11607 sv = newSV_type(SVt_PVIV);
11609 SvIV_set(sv, termcode);
11610 (void)SvPOK_only(sv); /* validate pointer */
11612 /* move past delimiter and try to read a complete string */
11614 sv_catpvn(sv, s, termlen);
11617 tstart = SvPVX(PL_linestr) + stuffstart;
11618 if (!PL_thisopen && !keep_delims) {
11619 PL_thisopen = newSVpvn(tstart, s - tstart);
11620 stuffstart = s - SvPVX(PL_linestr);
11624 if (PL_encoding && !UTF) {
11628 int offset = s - SvPVX_const(PL_linestr);
11629 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11630 &offset, (char*)termstr, termlen);
11631 const char * const ns = SvPVX_const(PL_linestr) + offset;
11632 char * const svlast = SvEND(sv) - 1;
11634 for (; s < ns; s++) {
11635 if (*s == '\n' && !PL_rsfp)
11636 CopLINE_inc(PL_curcop);
11639 goto read_more_line;
11641 /* handle quoted delimiters */
11642 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11644 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11646 if ((svlast-1 - t) % 2) {
11647 if (!keep_quoted) {
11648 *(svlast-1) = term;
11650 SvCUR_set(sv, SvCUR(sv) - 1);
11655 if (PL_multi_open == PL_multi_close) {
11661 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11662 /* At here, all closes are "was quoted" one,
11663 so we don't check PL_multi_close. */
11665 if (!keep_quoted && *(t+1) == PL_multi_open)
11670 else if (*t == PL_multi_open)
11678 SvCUR_set(sv, w - SvPVX_const(sv));
11680 last_off = w - SvPVX(sv);
11681 if (--brackets <= 0)
11686 if (!keep_delims) {
11687 SvCUR_set(sv, SvCUR(sv) - 1);
11693 /* extend sv if need be */
11694 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11695 /* set 'to' to the next character in the sv's string */
11696 to = SvPVX(sv)+SvCUR(sv);
11698 /* if open delimiter is the close delimiter read unbridle */
11699 if (PL_multi_open == PL_multi_close) {
11700 for (; s < PL_bufend; s++,to++) {
11701 /* embedded newlines increment the current line number */
11702 if (*s == '\n' && !PL_rsfp)
11703 CopLINE_inc(PL_curcop);
11704 /* handle quoted delimiters */
11705 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11706 if (!keep_quoted && s[1] == term)
11708 /* any other quotes are simply copied straight through */
11712 /* terminate when run out of buffer (the for() condition), or
11713 have found the terminator */
11714 else if (*s == term) {
11717 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11720 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11726 /* if the terminator isn't the same as the start character (e.g.,
11727 matched brackets), we have to allow more in the quoting, and
11728 be prepared for nested brackets.
11731 /* read until we run out of string, or we find the terminator */
11732 for (; s < PL_bufend; s++,to++) {
11733 /* embedded newlines increment the line count */
11734 if (*s == '\n' && !PL_rsfp)
11735 CopLINE_inc(PL_curcop);
11736 /* backslashes can escape the open or closing characters */
11737 if (*s == '\\' && s+1 < PL_bufend) {
11738 if (!keep_quoted &&
11739 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11744 /* allow nested opens and closes */
11745 else if (*s == PL_multi_close && --brackets <= 0)
11747 else if (*s == PL_multi_open)
11749 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11754 /* terminate the copied string and update the sv's end-of-string */
11756 SvCUR_set(sv, to - SvPVX_const(sv));
11759 * this next chunk reads more into the buffer if we're not done yet
11763 break; /* handle case where we are done yet :-) */
11765 #ifndef PERL_STRICT_CR
11766 if (to - SvPVX_const(sv) >= 2) {
11767 if ((to[-2] == '\r' && to[-1] == '\n') ||
11768 (to[-2] == '\n' && to[-1] == '\r'))
11772 SvCUR_set(sv, to - SvPVX_const(sv));
11774 else if (to[-1] == '\r')
11777 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11782 /* if we're out of file, or a read fails, bail and reset the current
11783 line marker so we can report where the unterminated string began
11786 if (PL_madskills) {
11787 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11789 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11791 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11795 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11797 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11803 /* we read a line, so increment our line counter */
11804 CopLINE_inc(PL_curcop);
11806 /* update debugger info */
11807 if (PERLDB_LINE && PL_curstash != PL_debstash)
11808 update_debugger_info(PL_linestr, NULL, 0);
11810 /* having changed the buffer, we must update PL_bufend */
11811 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11812 PL_last_lop = PL_last_uni = NULL;
11815 /* at this point, we have successfully read the delimited string */
11817 if (!PL_encoding || UTF) {
11819 if (PL_madskills) {
11820 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11821 const int len = s - tstart;
11823 sv_catpvn(PL_thisstuff, tstart, len);
11825 PL_thisstuff = newSVpvn(tstart, len);
11826 if (!PL_thisclose && !keep_delims)
11827 PL_thisclose = newSVpvn(s,termlen);
11832 sv_catpvn(sv, s, termlen);
11837 if (PL_madskills) {
11838 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11839 const int len = s - tstart - termlen;
11841 sv_catpvn(PL_thisstuff, tstart, len);
11843 PL_thisstuff = newSVpvn(tstart, len);
11844 if (!PL_thisclose && !keep_delims)
11845 PL_thisclose = newSVpvn(s - termlen,termlen);
11849 if (has_utf8 || PL_encoding)
11852 PL_multi_end = CopLINE(PL_curcop);
11854 /* if we allocated too much space, give some back */
11855 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11856 SvLEN_set(sv, SvCUR(sv) + 1);
11857 SvPV_renew(sv, SvLEN(sv));
11860 /* decide whether this is the first or second quoted string we've read
11873 takes: pointer to position in buffer
11874 returns: pointer to new position in buffer
11875 side-effects: builds ops for the constant in yylval.op
11877 Read a number in any of the formats that Perl accepts:
11879 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11880 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11883 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11885 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11888 If it reads a number without a decimal point or an exponent, it will
11889 try converting the number to an integer and see if it can do so
11890 without loss of precision.
11894 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11897 register const char *s = start; /* current position in buffer */
11898 register char *d; /* destination in temp buffer */
11899 register char *e; /* end of temp buffer */
11900 NV nv; /* number read, as a double */
11901 SV *sv = NULL; /* place to put the converted number */
11902 bool floatit; /* boolean: int or float? */
11903 const char *lastub = NULL; /* position of last underbar */
11904 static char const number_too_long[] = "Number too long";
11906 /* We use the first character to decide what type of number this is */
11910 Perl_croak(aTHX_ "panic: scan_num");
11912 /* if it starts with a 0, it could be an octal number, a decimal in
11913 0.13 disguise, or a hexadecimal number, or a binary number. */
11917 u holds the "number so far"
11918 shift the power of 2 of the base
11919 (hex == 4, octal == 3, binary == 1)
11920 overflowed was the number more than we can hold?
11922 Shift is used when we add a digit. It also serves as an "are
11923 we in octal/hex/binary?" indicator to disallow hex characters
11924 when in octal mode.
11929 bool overflowed = FALSE;
11930 bool just_zero = TRUE; /* just plain 0 or binary number? */
11931 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11932 static const char* const bases[5] =
11933 { "", "binary", "", "octal", "hexadecimal" };
11934 static const char* const Bases[5] =
11935 { "", "Binary", "", "Octal", "Hexadecimal" };
11936 static const char* const maxima[5] =
11938 "0b11111111111111111111111111111111",
11942 const char *base, *Base, *max;
11944 /* check for hex */
11949 } else if (s[1] == 'b') {
11954 /* check for a decimal in disguise */
11955 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11957 /* so it must be octal */
11964 if (ckWARN(WARN_SYNTAX))
11965 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11966 "Misplaced _ in number");
11970 base = bases[shift];
11971 Base = Bases[shift];
11972 max = maxima[shift];
11974 /* read the rest of the number */
11976 /* x is used in the overflow test,
11977 b is the digit we're adding on. */
11982 /* if we don't mention it, we're done */
11986 /* _ are ignored -- but warned about if consecutive */
11988 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11989 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11990 "Misplaced _ in number");
11994 /* 8 and 9 are not octal */
11995 case '8': case '9':
11997 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12001 case '2': case '3': case '4':
12002 case '5': case '6': case '7':
12004 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12007 case '0': case '1':
12008 b = *s++ & 15; /* ASCII digit -> value of digit */
12012 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12013 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12014 /* make sure they said 0x */
12017 b = (*s++ & 7) + 9;
12019 /* Prepare to put the digit we have onto the end
12020 of the number so far. We check for overflows.
12026 x = u << shift; /* make room for the digit */
12028 if ((x >> shift) != u
12029 && !(PL_hints & HINT_NEW_BINARY)) {
12032 if (ckWARN_d(WARN_OVERFLOW))
12033 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12034 "Integer overflow in %s number",
12037 u = x | b; /* add the digit to the end */
12040 n *= nvshift[shift];
12041 /* If an NV has not enough bits in its
12042 * mantissa to represent an UV this summing of
12043 * small low-order numbers is a waste of time
12044 * (because the NV cannot preserve the
12045 * low-order bits anyway): we could just
12046 * remember when did we overflow and in the
12047 * end just multiply n by the right
12055 /* if we get here, we had success: make a scalar value from
12060 /* final misplaced underbar check */
12061 if (s[-1] == '_') {
12062 if (ckWARN(WARN_SYNTAX))
12063 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12068 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
12069 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12070 "%s number > %s non-portable",
12076 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
12077 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12078 "%s number > %s non-portable",
12083 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12084 sv = new_constant(start, s - start, "integer",
12086 else if (PL_hints & HINT_NEW_BINARY)
12087 sv = new_constant(start, s - start, "binary", sv, NULL, NULL);
12092 handle decimal numbers.
12093 we're also sent here when we read a 0 as the first digit
12095 case '1': case '2': case '3': case '4': case '5':
12096 case '6': case '7': case '8': case '9': case '.':
12099 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12102 /* read next group of digits and _ and copy into d */
12103 while (isDIGIT(*s) || *s == '_') {
12104 /* skip underscores, checking for misplaced ones
12108 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12109 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12110 "Misplaced _ in number");
12114 /* check for end of fixed-length buffer */
12116 Perl_croak(aTHX_ number_too_long);
12117 /* if we're ok, copy the character */
12122 /* final misplaced underbar check */
12123 if (lastub && s == lastub + 1) {
12124 if (ckWARN(WARN_SYNTAX))
12125 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12128 /* read a decimal portion if there is one. avoid
12129 3..5 being interpreted as the number 3. followed
12132 if (*s == '.' && s[1] != '.') {
12137 if (ckWARN(WARN_SYNTAX))
12138 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12139 "Misplaced _ in number");
12143 /* copy, ignoring underbars, until we run out of digits.
12145 for (; isDIGIT(*s) || *s == '_'; s++) {
12146 /* fixed length buffer check */
12148 Perl_croak(aTHX_ number_too_long);
12150 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12151 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12152 "Misplaced _ in number");
12158 /* fractional part ending in underbar? */
12159 if (s[-1] == '_') {
12160 if (ckWARN(WARN_SYNTAX))
12161 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12162 "Misplaced _ in number");
12164 if (*s == '.' && isDIGIT(s[1])) {
12165 /* oops, it's really a v-string, but without the "v" */
12171 /* read exponent part, if present */
12172 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12176 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12177 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12179 /* stray preinitial _ */
12181 if (ckWARN(WARN_SYNTAX))
12182 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12183 "Misplaced _ in number");
12187 /* allow positive or negative exponent */
12188 if (*s == '+' || *s == '-')
12191 /* stray initial _ */
12193 if (ckWARN(WARN_SYNTAX))
12194 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12195 "Misplaced _ in number");
12199 /* read digits of exponent */
12200 while (isDIGIT(*s) || *s == '_') {
12203 Perl_croak(aTHX_ number_too_long);
12207 if (((lastub && s == lastub + 1) ||
12208 (!isDIGIT(s[1]) && s[1] != '_'))
12209 && ckWARN(WARN_SYNTAX))
12210 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12211 "Misplaced _ in number");
12218 /* make an sv from the string */
12222 We try to do an integer conversion first if no characters
12223 indicating "float" have been found.
12228 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12230 if (flags == IS_NUMBER_IN_UV) {
12232 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12235 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12236 if (uv <= (UV) IV_MIN)
12237 sv_setiv(sv, -(IV)uv);
12244 /* terminate the string */
12246 nv = Atof(PL_tokenbuf);
12250 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
12251 (PL_hints & HINT_NEW_INTEGER) )
12252 sv = new_constant(PL_tokenbuf,
12255 (floatit ? "float" : "integer"),
12259 /* if it starts with a v, it could be a v-string */
12262 sv = newSV(5); /* preallocate storage space */
12263 s = scan_vstring(s, PL_bufend, sv);
12267 /* make the op for the constant and return */
12270 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12272 lvalp->opval = NULL;
12278 S_scan_formline(pTHX_ register char *s)
12281 register char *eol;
12283 SV * const stuff = newSVpvs("");
12284 bool needargs = FALSE;
12285 bool eofmt = FALSE;
12287 char *tokenstart = s;
12290 if (PL_madskills) {
12291 savewhite = PL_thiswhite;
12296 while (!needargs) {
12299 #ifdef PERL_STRICT_CR
12300 while (SPACE_OR_TAB(*t))
12303 while (SPACE_OR_TAB(*t) || *t == '\r')
12306 if (*t == '\n' || t == PL_bufend) {
12311 if (PL_in_eval && !PL_rsfp) {
12312 eol = (char *) memchr(s,'\n',PL_bufend-s);
12317 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12319 for (t = s; t < eol; t++) {
12320 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12322 goto enough; /* ~~ must be first line in formline */
12324 if (*t == '@' || *t == '^')
12328 sv_catpvn(stuff, s, eol-s);
12329 #ifndef PERL_STRICT_CR
12330 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12331 char *end = SvPVX(stuff) + SvCUR(stuff);
12334 SvCUR_set(stuff, SvCUR(stuff) - 1);
12344 if (PL_madskills) {
12346 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12348 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12351 s = filter_gets(PL_linestr, PL_rsfp, 0);
12353 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12355 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12357 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12358 PL_last_lop = PL_last_uni = NULL;
12367 if (SvCUR(stuff)) {
12370 PL_lex_state = LEX_NORMAL;
12371 start_force(PL_curforce);
12372 NEXTVAL_NEXTTOKE.ival = 0;
12376 PL_lex_state = LEX_FORMLINE;
12378 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12380 else if (PL_encoding)
12381 sv_recode_to_utf8(stuff, PL_encoding);
12383 start_force(PL_curforce);
12384 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12386 start_force(PL_curforce);
12387 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12391 SvREFCNT_dec(stuff);
12393 PL_lex_formbrack = 0;
12397 if (PL_madskills) {
12399 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12401 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12402 PL_thiswhite = savewhite;
12414 PL_cshlen = strlen(PL_cshname);
12416 #if defined(USE_ITHREADS)
12417 PERL_UNUSED_CONTEXT;
12423 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12426 const I32 oldsavestack_ix = PL_savestack_ix;
12427 CV* const outsidecv = PL_compcv;
12430 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12432 SAVEI32(PL_subline);
12433 save_item(PL_subname);
12434 SAVESPTR(PL_compcv);
12436 PL_compcv = (CV*)newSV_type(is_format ? SVt_PVFM : SVt_PVCV);
12437 CvFLAGS(PL_compcv) |= flags;
12439 PL_subline = CopLINE(PL_curcop);
12440 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12441 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
12442 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12444 return oldsavestack_ix;
12448 #pragma segment Perl_yylex
12451 Perl_yywarn(pTHX_ const char *s)
12454 PL_in_eval |= EVAL_WARNONLY;
12456 PL_in_eval &= ~EVAL_WARNONLY;
12461 Perl_yyerror(pTHX_ const char *s)
12464 const char *where = NULL;
12465 const char *context = NULL;
12468 int yychar = PL_parser->yychar;
12470 if (!yychar || (yychar == ';' && !PL_rsfp))
12472 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12473 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12474 PL_oldbufptr != PL_bufptr) {
12477 The code below is removed for NetWare because it abends/crashes on NetWare
12478 when the script has error such as not having the closing quotes like:
12479 if ($var eq "value)
12480 Checking of white spaces is anyway done in NetWare code.
12483 while (isSPACE(*PL_oldoldbufptr))
12486 context = PL_oldoldbufptr;
12487 contlen = PL_bufptr - PL_oldoldbufptr;
12489 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12490 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12493 The code below is removed for NetWare because it abends/crashes on NetWare
12494 when the script has error such as not having the closing quotes like:
12495 if ($var eq "value)
12496 Checking of white spaces is anyway done in NetWare code.
12499 while (isSPACE(*PL_oldbufptr))
12502 context = PL_oldbufptr;
12503 contlen = PL_bufptr - PL_oldbufptr;
12505 else if (yychar > 255)
12506 where = "next token ???";
12507 else if (yychar == -2) { /* YYEMPTY */
12508 if (PL_lex_state == LEX_NORMAL ||
12509 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12510 where = "at end of line";
12511 else if (PL_lex_inpat)
12512 where = "within pattern";
12514 where = "within string";
12517 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
12519 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12520 else if (isPRINT_LC(yychar))
12521 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
12523 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12524 where = SvPVX_const(where_sv);
12526 msg = sv_2mortal(newSVpv(s, 0));
12527 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12528 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12530 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12532 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12533 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12534 Perl_sv_catpvf(aTHX_ msg,
12535 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12536 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12539 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
12540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12543 if (PL_error_count >= 10) {
12544 if (PL_in_eval && SvCUR(ERRSV))
12545 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12546 SVfARG(ERRSV), OutCopFILE(PL_curcop));
12548 Perl_croak(aTHX_ "%s has too many errors.\n",
12549 OutCopFILE(PL_curcop));
12552 PL_in_my_stash = NULL;
12556 #pragma segment Main
12560 S_swallow_bom(pTHX_ U8 *s)
12563 const STRLEN slen = SvCUR(PL_linestr);
12566 if (s[1] == 0xFE) {
12567 /* UTF-16 little-endian? (or UTF32-LE?) */
12568 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12569 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12570 #ifndef PERL_NO_UTF16_FILTER
12571 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12574 if (PL_bufend > (char*)s) {
12578 filter_add(utf16rev_textfilter, NULL);
12579 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12580 utf16_to_utf8_reversed(s, news,
12581 PL_bufend - (char*)s - 1,
12583 sv_setpvn(PL_linestr, (const char*)news, newlen);
12585 s = (U8*)SvPVX(PL_linestr);
12586 Copy(news, s, newlen, U8);
12590 SvUTF8_on(PL_linestr);
12591 s = (U8*)SvPVX(PL_linestr);
12593 /* FIXME - is this a general bug fix? */
12596 PL_bufend = SvPVX(PL_linestr) + newlen;
12599 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12604 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12605 #ifndef PERL_NO_UTF16_FILTER
12606 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12609 if (PL_bufend > (char *)s) {
12613 filter_add(utf16_textfilter, NULL);
12614 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12615 utf16_to_utf8(s, news,
12616 PL_bufend - (char*)s,
12618 sv_setpvn(PL_linestr, (const char*)news, newlen);
12620 SvUTF8_on(PL_linestr);
12621 s = (U8*)SvPVX(PL_linestr);
12622 PL_bufend = SvPVX(PL_linestr) + newlen;
12625 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12630 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12631 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12632 s += 3; /* UTF-8 */
12638 if (s[2] == 0xFE && s[3] == 0xFF) {
12639 /* UTF-32 big-endian */
12640 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12643 else if (s[2] == 0 && s[3] != 0) {
12646 * are a good indicator of UTF-16BE. */
12647 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12653 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12654 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12655 s += 4; /* UTF-8 */
12661 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12664 * are a good indicator of UTF-16LE. */
12665 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12674 * Restore a source filter.
12678 restore_rsfp(pTHX_ void *f)
12681 PerlIO * const fp = (PerlIO*)f;
12683 if (PL_rsfp == PerlIO_stdin())
12684 PerlIO_clearerr(PL_rsfp);
12685 else if (PL_rsfp && (PL_rsfp != fp))
12686 PerlIO_close(PL_rsfp);
12690 #ifndef PERL_NO_UTF16_FILTER
12692 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12695 const STRLEN old = SvCUR(sv);
12696 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12697 DEBUG_P(PerlIO_printf(Perl_debug_log,
12698 "utf16_textfilter(%p): %d %d (%d)\n",
12699 FPTR2DPTR(void *, utf16_textfilter),
12700 idx, maxlen, (int) count));
12704 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12705 Copy(SvPVX_const(sv), tmps, old, char);
12706 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12707 SvCUR(sv) - old, &newlen);
12708 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12710 DEBUG_P({sv_dump(sv);});
12715 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12718 const STRLEN old = SvCUR(sv);
12719 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12720 DEBUG_P(PerlIO_printf(Perl_debug_log,
12721 "utf16rev_textfilter(%p): %d %d (%d)\n",
12722 FPTR2DPTR(void *, utf16rev_textfilter),
12723 idx, maxlen, (int) count));
12727 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12728 Copy(SvPVX_const(sv), tmps, old, char);
12729 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12730 SvCUR(sv) - old, &newlen);
12731 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12733 DEBUG_P({ sv_dump(sv); });
12739 Returns a pointer to the next character after the parsed
12740 vstring, as well as updating the passed in sv.
12742 Function must be called like
12745 s = scan_vstring(s,e,sv);
12747 where s and e are the start and end of the string.
12748 The sv should already be large enough to store the vstring
12749 passed in, for performance reasons.
12754 Perl_scan_vstring(pTHX_ const char *s, const char *e, SV *sv)
12757 const char *pos = s;
12758 const char *start = s;
12759 if (*pos == 'v') pos++; /* get past 'v' */
12760 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12762 if ( *pos != '.') {
12763 /* this may not be a v-string if followed by => */
12764 const char *next = pos;
12765 while (next < e && isSPACE(*next))
12767 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12768 /* return string not v-string */
12769 sv_setpvn(sv,(char *)s,pos-s);
12770 return (char *)pos;
12774 if (!isALPHA(*pos)) {
12775 U8 tmpbuf[UTF8_MAXBYTES+1];
12778 s++; /* get past 'v' */
12780 sv_setpvn(sv, "", 0);
12783 /* this is atoi() that tolerates underscores */
12786 const char *end = pos;
12788 while (--end >= s) {
12790 const UV orev = rev;
12791 rev += (*end - '0') * mult;
12793 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12794 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12795 "Integer overflow in decimal number");
12799 if (rev > 0x7FFFFFFF)
12800 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12802 /* Append native character for the rev point */
12803 tmpend = uvchr_to_utf8(tmpbuf, rev);
12804 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12805 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12807 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12813 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12817 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12825 * c-indentation-style: bsd
12826 * c-basic-offset: 4
12827 * indent-tabs-mode: t
12830 * ex: set ts=8 sts=4 sw=4 noet: