3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yylval (PL_parser->yylval)
28 /* YYINITDEPTH -- initial size of the parser's stacks. */
29 #define YYINITDEPTH 200
31 /* XXX temporary backwards compatibility */
32 #define PL_lex_brackets (PL_parser->lex_brackets)
33 #define PL_lex_brackstack (PL_parser->lex_brackstack)
34 #define PL_lex_casemods (PL_parser->lex_casemods)
35 #define PL_lex_casestack (PL_parser->lex_casestack)
36 #define PL_lex_defer (PL_parser->lex_defer)
37 #define PL_lex_dojoin (PL_parser->lex_dojoin)
38 #define PL_lex_expect (PL_parser->lex_expect)
39 #define PL_lex_formbrack (PL_parser->lex_formbrack)
40 #define PL_lex_inpat (PL_parser->lex_inpat)
41 #define PL_lex_inwhat (PL_parser->lex_inwhat)
42 #define PL_lex_op (PL_parser->lex_op)
43 #define PL_lex_repl (PL_parser->lex_repl)
44 #define PL_lex_starts (PL_parser->lex_starts)
45 #define PL_lex_stuff (PL_parser->lex_stuff)
46 #define PL_multi_start (PL_parser->multi_start)
47 #define PL_multi_open (PL_parser->multi_open)
48 #define PL_multi_close (PL_parser->multi_close)
49 #define PL_pending_ident (PL_parser->pending_ident)
50 #define PL_preambled (PL_parser->preambled)
51 #define PL_sublex_info (PL_parser->sublex_info)
52 #define PL_linestr (PL_parser->linestr)
56 # define PL_endwhite (PL_parser->endwhite)
57 # define PL_faketokens (PL_parser->faketokens)
58 # define PL_lasttoke (PL_parser->lasttoke)
59 # define PL_nextwhite (PL_parser->nextwhite)
60 # define PL_realtokenstart (PL_parser->realtokenstart)
61 # define PL_skipwhite (PL_parser->skipwhite)
62 # define PL_thisclose (PL_parser->thisclose)
63 # define PL_thismad (PL_parser->thismad)
64 # define PL_thisopen (PL_parser->thisopen)
65 # define PL_thisstuff (PL_parser->thisstuff)
66 # define PL_thistoken (PL_parser->thistoken)
67 # define PL_thiswhite (PL_parser->thiswhite)
71 S_pending_ident(pTHX);
73 static const char ident_too_long[] = "Identifier too long";
74 static const char commaless_variable_list[] = "comma-less variable list";
76 static void restore_rsfp(pTHX_ void *f);
77 #ifndef PERL_NO_UTF16_FILTER
78 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
79 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
83 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
84 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
86 # define CURMAD(slot,sv)
87 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
90 #define XFAKEBRACK 128
93 #ifdef USE_UTF8_SCRIPTS
94 # define UTF (!IN_BYTES)
96 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
99 /* In variables named $^X, these are the legal values for X.
100 * 1999-02-27 mjd-perl-patch@plover.com */
101 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
103 /* On MacOS, respect nonbreaking spaces */
104 #ifdef MACOS_TRADITIONAL
105 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
107 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
110 /* LEX_* are values for PL_lex_state, the state of the lexer.
111 * They are arranged oddly so that the guard on the switch statement
112 * can get by with a single comparison (if the compiler is smart enough).
115 /* #define LEX_NOTPARSING 11 is done in perl.h. */
117 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
118 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
119 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
120 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
121 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
123 /* at end of code, eg "$x" followed by: */
124 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
125 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
127 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
128 string or after \E, $foo, etc */
129 #define LEX_INTERPCONST 2 /* NOT USED */
130 #define LEX_FORMLINE 1 /* expecting a format line */
131 #define LEX_KNOWNEXT 0 /* next token known; just return it */
135 static const char* const lex_state_names[] = {
154 #include "keywords.h"
156 /* CLINE is a macro that ensures PL_copline has a sane value */
161 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
164 # define SKIPSPACE0(s) skipspace0(s)
165 # define SKIPSPACE1(s) skipspace1(s)
166 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
167 # define PEEKSPACE(s) skipspace2(s,0)
169 # define SKIPSPACE0(s) skipspace(s)
170 # define SKIPSPACE1(s) skipspace(s)
171 # define SKIPSPACE2(s,tsv) skipspace(s)
172 # define PEEKSPACE(s) skipspace(s)
176 * Convenience functions to return different tokens and prime the
177 * lexer for the next token. They all take an argument.
179 * TOKEN : generic token (used for '(', DOLSHARP, etc)
180 * OPERATOR : generic operator
181 * AOPERATOR : assignment operator
182 * PREBLOCK : beginning the block after an if, while, foreach, ...
183 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
184 * PREREF : *EXPR where EXPR is not a simple identifier
185 * TERM : expression term
186 * LOOPX : loop exiting command (goto, last, dump, etc)
187 * FTST : file test operator
188 * FUN0 : zero-argument function
189 * FUN1 : not used, except for not, which isn't a UNIOP
190 * BOop : bitwise or or xor
192 * SHop : shift operator
193 * PWop : power operator
194 * PMop : pattern-matching operator
195 * Aop : addition-level operator
196 * Mop : multiplication-level operator
197 * Eop : equality-testing operator
198 * Rop : relational operator <= != gt
200 * Also see LOP and lop() below.
203 #ifdef DEBUGGING /* Serve -DT. */
204 # define REPORT(retval) tokereport((I32)retval)
206 # define REPORT(retval) (retval)
209 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
210 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
211 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
212 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
213 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
214 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
215 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
216 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
217 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
218 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
219 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
220 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
221 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
222 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
223 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
224 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
225 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
226 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
227 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
228 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
230 /* This bit of chicanery makes a unary function followed by
231 * a parenthesis into a function with one argument, highest precedence.
232 * The UNIDOR macro is for unary functions that can be followed by the //
233 * operator (such as C<shift // 0>).
235 #define UNI2(f,x) { \
239 PL_last_uni = PL_oldbufptr; \
240 PL_last_lop_op = f; \
242 return REPORT( (int)FUNC1 ); \
244 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
246 #define UNI(f) UNI2(f,XTERM)
247 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
249 #define UNIBRACK(f) { \
252 PL_last_uni = PL_oldbufptr; \
254 return REPORT( (int)FUNC1 ); \
256 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
259 /* grandfather return to old style */
260 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
264 /* how to interpret the yylval associated with the token */
268 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
274 static struct debug_tokens {
276 enum token_type type;
278 } const debug_tokens[] =
280 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
281 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
282 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
283 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
284 { ARROW, TOKENTYPE_NONE, "ARROW" },
285 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
286 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
287 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
288 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
289 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
290 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
291 { DO, TOKENTYPE_NONE, "DO" },
292 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
293 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
294 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
295 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
296 { ELSE, TOKENTYPE_NONE, "ELSE" },
297 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
298 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
299 { FOR, TOKENTYPE_IVAL, "FOR" },
300 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
301 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
302 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
303 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
304 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
305 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
306 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
307 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
308 { IF, TOKENTYPE_IVAL, "IF" },
309 { LABEL, TOKENTYPE_PVAL, "LABEL" },
310 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
311 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
312 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
313 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
314 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
315 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
316 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
317 { MY, TOKENTYPE_IVAL, "MY" },
318 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
319 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
320 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
321 { OROP, TOKENTYPE_IVAL, "OROP" },
322 { OROR, TOKENTYPE_NONE, "OROR" },
323 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
324 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
325 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
326 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
327 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
328 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
329 { PREINC, TOKENTYPE_NONE, "PREINC" },
330 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
331 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
332 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
333 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
334 { SUB, TOKENTYPE_NONE, "SUB" },
335 { THING, TOKENTYPE_OPVAL, "THING" },
336 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
337 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
338 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
339 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
340 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
341 { USE, TOKENTYPE_IVAL, "USE" },
342 { WHEN, TOKENTYPE_IVAL, "WHEN" },
343 { WHILE, TOKENTYPE_IVAL, "WHILE" },
344 { WORD, TOKENTYPE_OPVAL, "WORD" },
345 { 0, TOKENTYPE_NONE, NULL }
348 /* dump the returned token in rv, plus any optional arg in yylval */
351 S_tokereport(pTHX_ I32 rv)
355 const char *name = NULL;
356 enum token_type type = TOKENTYPE_NONE;
357 const struct debug_tokens *p;
358 SV* const report = newSVpvs("<== ");
360 for (p = debug_tokens; p->token; p++) {
361 if (p->token == (int)rv) {
368 Perl_sv_catpv(aTHX_ report, name);
369 else if ((char)rv > ' ' && (char)rv < '~')
370 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
372 sv_catpvs(report, "EOF");
374 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
377 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
380 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
382 case TOKENTYPE_OPNUM:
383 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
384 PL_op_name[yylval.ival]);
387 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
389 case TOKENTYPE_OPVAL:
391 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
392 PL_op_name[yylval.opval->op_type]);
393 if (yylval.opval->op_type == OP_CONST) {
394 Perl_sv_catpvf(aTHX_ report, " %s",
395 SvPEEK(cSVOPx_sv(yylval.opval)));
400 sv_catpvs(report, "(opval=null)");
403 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
409 /* print the buffer with suitable escapes */
412 S_printbuf(pTHX_ const char* fmt, const char* s)
414 SV* const tmp = newSVpvs("");
415 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
424 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
425 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
429 S_ao(pTHX_ int toketype)
432 if (*PL_bufptr == '=') {
434 if (toketype == ANDAND)
435 yylval.ival = OP_ANDASSIGN;
436 else if (toketype == OROR)
437 yylval.ival = OP_ORASSIGN;
438 else if (toketype == DORDOR)
439 yylval.ival = OP_DORASSIGN;
447 * When Perl expects an operator and finds something else, no_op
448 * prints the warning. It always prints "<something> found where
449 * operator expected. It prints "Missing semicolon on previous line?"
450 * if the surprise occurs at the start of the line. "do you need to
451 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
452 * where the compiler doesn't know if foo is a method call or a function.
453 * It prints "Missing operator before end of line" if there's nothing
454 * after the missing operator, or "... before <...>" if there is something
455 * after the missing operator.
459 S_no_op(pTHX_ const char *what, char *s)
462 char * const oldbp = PL_bufptr;
463 const bool is_first = (PL_oldbufptr == PL_linestart);
469 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
470 if (ckWARN_d(WARN_SYNTAX)) {
472 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
473 "\t(Missing semicolon on previous line?)\n");
474 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
476 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
478 if (t < PL_bufptr && isSPACE(*t))
479 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
480 "\t(Do you need to predeclare %.*s?)\n",
481 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
485 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
486 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
494 * Complain about missing quote/regexp/heredoc terminator.
495 * If it's called with NULL then it cauterizes the line buffer.
496 * If we're in a delimited string and the delimiter is a control
497 * character, it's reformatted into a two-char sequence like ^C.
502 S_missingterm(pTHX_ char *s)
508 char * const nl = strrchr(s,'\n');
514 iscntrl(PL_multi_close)
516 PL_multi_close < 32 || PL_multi_close == 127
520 tmpbuf[1] = (char)toCTRL(PL_multi_close);
525 *tmpbuf = (char)PL_multi_close;
529 q = strchr(s,'"') ? '\'' : '"';
530 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
533 #define FEATURE_IS_ENABLED(name) \
534 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
535 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
537 * S_feature_is_enabled
538 * Check whether the named feature is enabled.
541 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
544 HV * const hinthv = GvHV(PL_hintgv);
545 char he_name[32] = "feature_";
546 (void) my_strlcpy(&he_name[8], name, 24);
548 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
556 Perl_deprecate(pTHX_ const char *s)
558 if (ckWARN(WARN_DEPRECATED))
559 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
563 Perl_deprecate_old(pTHX_ const char *s)
565 /* This function should NOT be called for any new deprecated warnings */
566 /* Use Perl_deprecate instead */
568 /* It is here to maintain backward compatibility with the pre-5.8 */
569 /* warnings category hierarchy. The "deprecated" category used to */
570 /* live under the "syntax" category. It is now a top-level category */
571 /* in its own right. */
573 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
574 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
575 "Use of %s is deprecated", s);
579 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
580 * utf16-to-utf8-reversed.
583 #ifdef PERL_CR_FILTER
587 register const char *s = SvPVX_const(sv);
588 register const char * const e = s + SvCUR(sv);
589 /* outer loop optimized to do nothing if there are no CR-LFs */
591 if (*s++ == '\r' && *s == '\n') {
592 /* hit a CR-LF, need to copy the rest */
593 register char *d = s - 1;
596 if (*s == '\r' && s[1] == '\n')
607 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
609 const I32 count = FILTER_READ(idx+1, sv, maxlen);
610 if (count > 0 && !maxlen)
620 * Create a parser object and initialise its parser and lexer fields
624 Perl_lex_start(pTHX_ SV *line)
627 const char *s = NULL;
631 /* create and initialise a parser */
633 Newxz(parser, 1, yy_parser);
634 parser->old_parser = PL_parser;
637 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
638 parser->ps = parser->stack;
639 parser->stack_size = YYINITDEPTH;
641 parser->stack->state = 0;
642 parser->yyerrstatus = 0;
643 parser->yychar = YYEMPTY; /* Cause a token to be read. */
645 /* on scope exit, free this parser and restore any outer one */
648 /* initialise lexer state */
650 SAVEI8(PL_lex_state);
652 if (PL_lex_state == LEX_KNOWNEXT) {
653 I32 toke = parser->old_parser->lasttoke;
654 while (--toke >= 0) {
655 SAVEI32(PL_nexttoke[toke].next_type);
656 SAVEVPTR(PL_nexttoke[toke].next_val);
658 SAVEVPTR(PL_nexttoke[toke].next_mad);
661 SAVEI32(PL_curforce);
664 if (PL_lex_state == LEX_KNOWNEXT) {
665 I32 toke = PL_nexttoke;
666 while (--toke >= 0) {
667 SAVEI32(PL_nexttype[toke]);
668 SAVEVPTR(PL_nextval[toke]);
670 SAVEI32(PL_nexttoke);
673 SAVECOPLINE(PL_curcop);
676 SAVEPPTR(PL_oldbufptr);
677 SAVEPPTR(PL_oldoldbufptr);
678 SAVEPPTR(PL_last_lop);
679 SAVEPPTR(PL_last_uni);
680 SAVEPPTR(PL_linestart);
681 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
685 PL_lex_state = LEX_NORMAL;
687 Newx(parser->lex_brackstack, 120, char);
688 Newx(parser->lex_casestack, 12, char);
689 *parser->lex_casestack = '\0';
695 s = SvPV_const(line, len);
701 parser->linestr = newSVpvs("\n;");
702 } else if (SvREADONLY(line) || s[len-1] != ';') {
703 parser->linestr = newSVsv(line);
705 sv_catpvs(parser->linestr, "\n;");
708 SvREFCNT_inc_simple_void_NN(line);
709 parser->linestr = line;
711 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(parser->linestr);
712 PL_bufend = PL_bufptr + SvCUR(parser->linestr);
713 PL_last_lop = PL_last_uni = NULL;
718 /* delete a parser object */
721 Perl_parser_free(pTHX_ const yy_parser *parser)
723 SvREFCNT_dec(parser->linestr);
725 Safefree(parser->stack);
726 Safefree(parser->lex_brackstack);
727 Safefree(parser->lex_casestack);
728 PL_parser = parser->old_parser;
735 * Finalizer for lexing operations. Must be called when the parser is
736 * done with the lexer.
743 PL_doextract = FALSE;
748 * This subroutine has nothing to do with tilting, whether at windmills
749 * or pinball tables. Its name is short for "increment line". It
750 * increments the current line number in CopLINE(PL_curcop) and checks
751 * to see whether the line starts with a comment of the form
752 * # line 500 "foo.pm"
753 * If so, it sets the current line number and file to the values in the comment.
757 S_incline(pTHX_ const char *s)
764 CopLINE_inc(PL_curcop);
767 while (SPACE_OR_TAB(*s))
769 if (strnEQ(s, "line", 4))
773 if (SPACE_OR_TAB(*s))
777 while (SPACE_OR_TAB(*s))
785 while (SPACE_OR_TAB(*s))
787 if (*s == '"' && (t = strchr(s+1, '"'))) {
797 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
799 if (*e != '\n' && *e != '\0')
800 return; /* false alarm */
803 const STRLEN len = t - s;
805 const char * const cf = CopFILE(PL_curcop);
806 STRLEN tmplen = cf ? strlen(cf) : 0;
807 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
808 /* must copy *{"::_<(eval N)[oldfilename:L]"}
809 * to *{"::_<newfilename"} */
810 /* However, the long form of evals is only turned on by the
811 debugger - usually they're "(eval %lu)" */
815 STRLEN tmplen2 = len;
816 if (tmplen + 2 <= sizeof smallbuf)
819 Newx(tmpbuf, tmplen + 2, char);
822 memcpy(tmpbuf + 2, cf, tmplen);
824 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
829 if (tmplen2 + 2 <= sizeof smallbuf)
832 Newx(tmpbuf2, tmplen2 + 2, char);
834 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
835 /* Either they malloc'd it, or we malloc'd it,
836 so no prefix is present in ours. */
841 memcpy(tmpbuf2 + 2, s, tmplen2);
844 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
846 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
847 /* adjust ${"::_<newfilename"} to store the new file name */
848 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
849 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
850 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
853 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
855 if (tmpbuf != smallbuf) Safefree(tmpbuf);
858 CopFILE_free(PL_curcop);
859 CopFILE_setn(PL_curcop, s, len);
861 CopLINE_set(PL_curcop, atoi(n)-1);
865 /* skip space before PL_thistoken */
868 S_skipspace0(pTHX_ register char *s)
875 PL_thiswhite = newSVpvs("");
876 sv_catsv(PL_thiswhite, PL_skipwhite);
877 sv_free(PL_skipwhite);
880 PL_realtokenstart = s - SvPVX(PL_linestr);
884 /* skip space after PL_thistoken */
887 S_skipspace1(pTHX_ register char *s)
889 const char *start = s;
890 I32 startoff = start - SvPVX(PL_linestr);
895 start = SvPVX(PL_linestr) + startoff;
896 if (!PL_thistoken && PL_realtokenstart >= 0) {
897 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
898 PL_thistoken = newSVpvn(tstart, start - tstart);
900 PL_realtokenstart = -1;
903 PL_nextwhite = newSVpvs("");
904 sv_catsv(PL_nextwhite, PL_skipwhite);
905 sv_free(PL_skipwhite);
912 S_skipspace2(pTHX_ register char *s, SV **svp)
915 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
916 const I32 startoff = s - SvPVX(PL_linestr);
919 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
920 if (!PL_madskills || !svp)
922 start = SvPVX(PL_linestr) + startoff;
923 if (!PL_thistoken && PL_realtokenstart >= 0) {
924 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
925 PL_thistoken = newSVpvn(tstart, start - tstart);
926 PL_realtokenstart = -1;
931 sv_setsv(*svp, PL_skipwhite);
932 sv_free(PL_skipwhite);
941 S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
943 AV *av = CopFILEAVx(PL_curcop);
945 SV * const sv = newSV_type(SVt_PVMG);
947 sv_setsv(sv, orig_sv);
949 sv_setpvn(sv, buf, len);
952 av_store(av, (I32)CopLINE(PL_curcop), sv);
958 * Called to gobble the appropriate amount and type of whitespace.
959 * Skips comments as well.
963 S_skipspace(pTHX_ register char *s)
968 int startoff = s - SvPVX(PL_linestr);
971 sv_free(PL_skipwhite);
976 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
977 while (s < PL_bufend && SPACE_OR_TAB(*s))
987 SSize_t oldprevlen, oldoldprevlen;
988 SSize_t oldloplen = 0, oldunilen = 0;
989 while (s < PL_bufend && isSPACE(*s)) {
990 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
995 if (s < PL_bufend && *s == '#') {
996 while (s < PL_bufend && *s != '\n')
1000 if (PL_in_eval && !PL_rsfp) {
1007 /* only continue to recharge the buffer if we're at the end
1008 * of the buffer, we're not reading from a source filter, and
1009 * we're in normal lexing mode
1011 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1012 PL_lex_state == LEX_FORMLINE)
1019 /* try to recharge the buffer */
1021 curoff = s - SvPVX(PL_linestr);
1024 if ((s = filter_gets(PL_linestr, PL_rsfp,
1025 (prevlen = SvCUR(PL_linestr)))) == NULL)
1028 if (PL_madskills && curoff != startoff) {
1030 PL_skipwhite = newSVpvs("");
1031 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1035 /* mustn't throw out old stuff yet if madpropping */
1036 SvCUR(PL_linestr) = curoff;
1037 s = SvPVX(PL_linestr) + curoff;
1039 if (curoff && s[-1] == '\n')
1043 /* end of file. Add on the -p or -n magic */
1044 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1047 sv_catpvs(PL_linestr,
1048 ";}continue{print or die qq(-p destination: $!\\n);}");
1050 sv_setpvs(PL_linestr,
1051 ";}continue{print or die qq(-p destination: $!\\n);}");
1053 PL_minus_n = PL_minus_p = 0;
1055 else if (PL_minus_n) {
1057 sv_catpvn(PL_linestr, ";}", 2);
1059 sv_setpvn(PL_linestr, ";}", 2);
1065 sv_catpvn(PL_linestr,";", 1);
1067 sv_setpvn(PL_linestr,";", 1);
1070 /* reset variables for next time we lex */
1071 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1077 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1078 PL_last_lop = PL_last_uni = NULL;
1080 /* Close the filehandle. Could be from -P preprocessor,
1081 * STDIN, or a regular file. If we were reading code from
1082 * STDIN (because the commandline held no -e or filename)
1083 * then we don't close it, we reset it so the code can
1084 * read from STDIN too.
1087 if (PL_preprocess && !PL_in_eval)
1088 (void)PerlProc_pclose(PL_rsfp);
1089 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1090 PerlIO_clearerr(PL_rsfp);
1092 (void)PerlIO_close(PL_rsfp);
1097 /* not at end of file, so we only read another line */
1098 /* make corresponding updates to old pointers, for yyerror() */
1099 oldprevlen = PL_oldbufptr - PL_bufend;
1100 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1102 oldunilen = PL_last_uni - PL_bufend;
1104 oldloplen = PL_last_lop - PL_bufend;
1105 PL_linestart = PL_bufptr = s + prevlen;
1106 PL_bufend = s + SvCUR(PL_linestr);
1108 PL_oldbufptr = s + oldprevlen;
1109 PL_oldoldbufptr = s + oldoldprevlen;
1111 PL_last_uni = s + oldunilen;
1113 PL_last_lop = s + oldloplen;
1116 /* debugger active and we're not compiling the debugger code,
1117 * so store the line into the debugger's array of lines
1119 if (PERLDB_LINE && PL_curstash != PL_debstash)
1120 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1127 PL_skipwhite = newSVpvs("");
1128 curoff = s - SvPVX(PL_linestr);
1129 if (curoff - startoff)
1130 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1139 * Check the unary operators to ensure there's no ambiguity in how they're
1140 * used. An ambiguous piece of code would be:
1142 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1143 * the +5 is its argument.
1153 if (PL_oldoldbufptr != PL_last_uni)
1155 while (isSPACE(*PL_last_uni))
1158 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1160 if ((t = strchr(s, '(')) && t < PL_bufptr)
1163 if (ckWARN_d(WARN_AMBIGUOUS)){
1164 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1165 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1166 (int)(s - PL_last_uni), PL_last_uni);
1171 * LOP : macro to build a list operator. Its behaviour has been replaced
1172 * with a subroutine, S_lop() for which LOP is just another name.
1175 #define LOP(f,x) return lop(f,x,s)
1179 * Build a list operator (or something that might be one). The rules:
1180 * - if we have a next token, then it's a list operator [why?]
1181 * - if the next thing is an opening paren, then it's a function
1182 * - else it's a list operator
1186 S_lop(pTHX_ I32 f, int x, char *s)
1193 PL_last_lop = PL_oldbufptr;
1194 PL_last_lop_op = (OPCODE)f;
1197 return REPORT(LSTOP);
1200 return REPORT(LSTOP);
1203 return REPORT(FUNC);
1206 return REPORT(FUNC);
1208 return REPORT(LSTOP);
1214 * Sets up for an eventual force_next(). start_force(0) basically does
1215 * an unshift, while start_force(-1) does a push. yylex removes items
1220 S_start_force(pTHX_ int where)
1224 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1225 where = PL_lasttoke;
1226 assert(PL_curforce < 0 || PL_curforce == where);
1227 if (PL_curforce != where) {
1228 for (i = PL_lasttoke; i > where; --i) {
1229 PL_nexttoke[i] = PL_nexttoke[i-1];
1233 if (PL_curforce < 0) /* in case of duplicate start_force() */
1234 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1235 PL_curforce = where;
1238 curmad('^', newSVpvs(""));
1239 CURMAD('_', PL_nextwhite);
1244 S_curmad(pTHX_ char slot, SV *sv)
1250 if (PL_curforce < 0)
1251 where = &PL_thismad;
1253 where = &PL_nexttoke[PL_curforce].next_mad;
1256 sv_setpvn(sv, "", 0);
1259 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1261 else if (PL_encoding) {
1262 sv_recode_to_utf8(sv, PL_encoding);
1267 /* keep a slot open for the head of the list? */
1268 if (slot != '_' && *where && (*where)->mad_key == '^') {
1269 (*where)->mad_key = slot;
1270 sv_free((*where)->mad_val);
1271 (*where)->mad_val = (void*)sv;
1274 addmad(newMADsv(slot, sv), where, 0);
1277 # define start_force(where) NOOP
1278 # define curmad(slot, sv) NOOP
1283 * When the lexer realizes it knows the next token (for instance,
1284 * it is reordering tokens for the parser) then it can call S_force_next
1285 * to know what token to return the next time the lexer is called. Caller
1286 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1287 * and possibly PL_expect to ensure the lexer handles the token correctly.
1291 S_force_next(pTHX_ I32 type)
1295 if (PL_curforce < 0)
1296 start_force(PL_lasttoke);
1297 PL_nexttoke[PL_curforce].next_type = type;
1298 if (PL_lex_state != LEX_KNOWNEXT)
1299 PL_lex_defer = PL_lex_state;
1300 PL_lex_state = LEX_KNOWNEXT;
1301 PL_lex_expect = PL_expect;
1304 PL_nexttype[PL_nexttoke] = type;
1306 if (PL_lex_state != LEX_KNOWNEXT) {
1307 PL_lex_defer = PL_lex_state;
1308 PL_lex_expect = PL_expect;
1309 PL_lex_state = LEX_KNOWNEXT;
1315 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1318 SV * const sv = newSVpvn(start,len);
1319 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1326 * When the lexer knows the next thing is a word (for instance, it has
1327 * just seen -> and it knows that the next char is a word char, then
1328 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1332 * char *start : buffer position (must be within PL_linestr)
1333 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1334 * int check_keyword : if true, Perl checks to make sure the word isn't
1335 * a keyword (do this if the word is a label, e.g. goto FOO)
1336 * int allow_pack : if true, : characters will also be allowed (require,
1337 * use, etc. do this)
1338 * int allow_initial_tick : used by the "sub" lexer only.
1342 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1348 start = SKIPSPACE1(start);
1350 if (isIDFIRST_lazy_if(s,UTF) ||
1351 (allow_pack && *s == ':') ||
1352 (allow_initial_tick && *s == '\'') )
1354 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1355 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1357 start_force(PL_curforce);
1359 curmad('X', newSVpvn(start,s-start));
1360 if (token == METHOD) {
1365 PL_expect = XOPERATOR;
1369 curmad('g', newSVpvs( "forced" ));
1370 NEXTVAL_NEXTTOKE.opval
1371 = (OP*)newSVOP(OP_CONST,0,
1372 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1373 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1381 * Called when the lexer wants $foo *foo &foo etc, but the program
1382 * text only contains the "foo" portion. The first argument is a pointer
1383 * to the "foo", and the second argument is the type symbol to prefix.
1384 * Forces the next token to be a "WORD".
1385 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1389 S_force_ident(pTHX_ register const char *s, int kind)
1393 const STRLEN len = strlen(s);
1394 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1395 start_force(PL_curforce);
1396 NEXTVAL_NEXTTOKE.opval = o;
1399 o->op_private = OPpCONST_ENTERED;
1400 /* XXX see note in pp_entereval() for why we forgo typo
1401 warnings if the symbol must be introduced in an eval.
1403 gv_fetchpvn_flags(s, len,
1404 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1406 kind == '$' ? SVt_PV :
1407 kind == '@' ? SVt_PVAV :
1408 kind == '%' ? SVt_PVHV :
1416 Perl_str_to_version(pTHX_ SV *sv)
1421 const char *start = SvPV_const(sv,len);
1422 const char * const end = start + len;
1423 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1424 while (start < end) {
1428 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1433 retval += ((NV)n)/nshift;
1442 * Forces the next token to be a version number.
1443 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1444 * and if "guessing" is TRUE, then no new token is created (and the caller
1445 * must use an alternative parsing method).
1449 S_force_version(pTHX_ char *s, int guessing)
1455 I32 startoff = s - SvPVX(PL_linestr);
1464 while (isDIGIT(*d) || *d == '_' || *d == '.')
1468 start_force(PL_curforce);
1469 curmad('X', newSVpvn(s,d-s));
1472 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1474 s = scan_num(s, &yylval);
1475 version = yylval.opval;
1476 ver = cSVOPx(version)->op_sv;
1477 if (SvPOK(ver) && !SvNIOK(ver)) {
1478 SvUPGRADE(ver, SVt_PVNV);
1479 SvNV_set(ver, str_to_version(ver));
1480 SvNOK_on(ver); /* hint that it is a version */
1483 else if (guessing) {
1486 sv_free(PL_nextwhite); /* let next token collect whitespace */
1488 s = SvPVX(PL_linestr) + startoff;
1496 if (PL_madskills && !version) {
1497 sv_free(PL_nextwhite); /* let next token collect whitespace */
1499 s = SvPVX(PL_linestr) + startoff;
1502 /* NOTE: The parser sees the package name and the VERSION swapped */
1503 start_force(PL_curforce);
1504 NEXTVAL_NEXTTOKE.opval = version;
1512 * Tokenize a quoted string passed in as an SV. It finds the next
1513 * chunk, up to end of string or a backslash. It may make a new
1514 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1519 S_tokeq(pTHX_ SV *sv)
1523 register char *send;
1531 s = SvPV_force(sv, len);
1532 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1535 while (s < send && *s != '\\')
1540 if ( PL_hints & HINT_NEW_STRING ) {
1541 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1547 if (s + 1 < send && (s[1] == '\\'))
1548 s++; /* all that, just for this */
1553 SvCUR_set(sv, d - SvPVX_const(sv));
1555 if ( PL_hints & HINT_NEW_STRING )
1556 return new_constant(NULL, 0, "q", sv, pv, "q");
1561 * Now come three functions related to double-quote context,
1562 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1563 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1564 * interact with PL_lex_state, and create fake ( ... ) argument lists
1565 * to handle functions and concatenation.
1566 * They assume that whoever calls them will be setting up a fake
1567 * join call, because each subthing puts a ',' after it. This lets
1570 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1572 * (I'm not sure whether the spurious commas at the end of lcfirst's
1573 * arguments and join's arguments are created or not).
1578 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1580 * Pattern matching will set PL_lex_op to the pattern-matching op to
1581 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1583 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1585 * Everything else becomes a FUNC.
1587 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1588 * had an OP_CONST or OP_READLINE). This just sets us up for a
1589 * call to S_sublex_push().
1593 S_sublex_start(pTHX)
1596 register const I32 op_type = yylval.ival;
1598 if (op_type == OP_NULL) {
1599 yylval.opval = PL_lex_op;
1603 if (op_type == OP_CONST || op_type == OP_READLINE) {
1604 SV *sv = tokeq(PL_lex_stuff);
1606 if (SvTYPE(sv) == SVt_PVIV) {
1607 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1609 const char * const p = SvPV_const(sv, len);
1610 SV * const nsv = newSVpvn(p, len);
1616 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1617 PL_lex_stuff = NULL;
1618 /* Allow <FH> // "foo" */
1619 if (op_type == OP_READLINE)
1620 PL_expect = XTERMORDORDOR;
1623 else if (op_type == OP_BACKTICK && PL_lex_op) {
1624 /* readpipe() vas overriden */
1625 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1626 yylval.opval = PL_lex_op;
1628 PL_lex_stuff = NULL;
1632 PL_sublex_info.super_state = PL_lex_state;
1633 PL_sublex_info.sub_inwhat = (U16)op_type;
1634 PL_sublex_info.sub_op = PL_lex_op;
1635 PL_lex_state = LEX_INTERPPUSH;
1639 yylval.opval = PL_lex_op;
1649 * Create a new scope to save the lexing state. The scope will be
1650 * ended in S_sublex_done. Returns a '(', starting the function arguments
1651 * to the uc, lc, etc. found before.
1652 * Sets PL_lex_state to LEX_INTERPCONCAT.
1661 PL_lex_state = PL_sublex_info.super_state;
1662 SAVEBOOL(PL_lex_dojoin);
1663 SAVEI32(PL_lex_brackets);
1664 SAVEI32(PL_lex_casemods);
1665 SAVEI32(PL_lex_starts);
1666 SAVEI8(PL_lex_state);
1667 SAVEVPTR(PL_lex_inpat);
1668 SAVEI16(PL_lex_inwhat);
1669 SAVECOPLINE(PL_curcop);
1670 SAVEPPTR(PL_bufptr);
1671 SAVEPPTR(PL_bufend);
1672 SAVEPPTR(PL_oldbufptr);
1673 SAVEPPTR(PL_oldoldbufptr);
1674 SAVEPPTR(PL_last_lop);
1675 SAVEPPTR(PL_last_uni);
1676 SAVEPPTR(PL_linestart);
1677 SAVESPTR(PL_linestr);
1678 SAVEGENERICPV(PL_lex_brackstack);
1679 SAVEGENERICPV(PL_lex_casestack);
1681 PL_linestr = PL_lex_stuff;
1682 PL_lex_stuff = NULL;
1684 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1685 = SvPVX(PL_linestr);
1686 PL_bufend += SvCUR(PL_linestr);
1687 PL_last_lop = PL_last_uni = NULL;
1688 SAVEFREESV(PL_linestr);
1690 PL_lex_dojoin = FALSE;
1691 PL_lex_brackets = 0;
1692 Newx(PL_lex_brackstack, 120, char);
1693 Newx(PL_lex_casestack, 12, char);
1694 PL_lex_casemods = 0;
1695 *PL_lex_casestack = '\0';
1697 PL_lex_state = LEX_INTERPCONCAT;
1698 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1700 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1701 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1702 PL_lex_inpat = PL_sublex_info.sub_op;
1704 PL_lex_inpat = NULL;
1711 * Restores lexer state after a S_sublex_push.
1718 if (!PL_lex_starts++) {
1719 SV * const sv = newSVpvs("");
1720 if (SvUTF8(PL_linestr))
1722 PL_expect = XOPERATOR;
1723 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1727 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1728 PL_lex_state = LEX_INTERPCASEMOD;
1732 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1733 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1734 PL_linestr = PL_lex_repl;
1736 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1737 PL_bufend += SvCUR(PL_linestr);
1738 PL_last_lop = PL_last_uni = NULL;
1739 SAVEFREESV(PL_linestr);
1740 PL_lex_dojoin = FALSE;
1741 PL_lex_brackets = 0;
1742 PL_lex_casemods = 0;
1743 *PL_lex_casestack = '\0';
1745 if (SvEVALED(PL_lex_repl)) {
1746 PL_lex_state = LEX_INTERPNORMAL;
1748 /* we don't clear PL_lex_repl here, so that we can check later
1749 whether this is an evalled subst; that means we rely on the
1750 logic to ensure sublex_done() is called again only via the
1751 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1754 PL_lex_state = LEX_INTERPCONCAT;
1764 PL_endwhite = newSVpvs("");
1765 sv_catsv(PL_endwhite, PL_thiswhite);
1769 sv_setpvn(PL_thistoken,"",0);
1771 PL_realtokenstart = -1;
1775 PL_bufend = SvPVX(PL_linestr);
1776 PL_bufend += SvCUR(PL_linestr);
1777 PL_expect = XOPERATOR;
1778 PL_sublex_info.sub_inwhat = 0;
1786 Extracts a pattern, double-quoted string, or transliteration. This
1789 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1790 processing a pattern (PL_lex_inpat is true), a transliteration
1791 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1793 Returns a pointer to the character scanned up to. If this is
1794 advanced from the start pointer supplied (i.e. if anything was
1795 successfully parsed), will leave an OP for the substring scanned
1796 in yylval. Caller must intuit reason for not parsing further
1797 by looking at the next characters herself.
1801 double-quoted style: \r and \n
1802 regexp special ones: \D \s
1805 case and quoting: \U \Q \E
1806 stops on @ and $, but not for $ as tail anchor
1808 In transliterations:
1809 characters are VERY literal, except for - not at the start or end
1810 of the string, which indicates a range. If the range is in bytes,
1811 scan_const expands the range to the full set of intermediate
1812 characters. If the range is in utf8, the hyphen is replaced with
1813 a certain range mark which will be handled by pmtrans() in op.c.
1815 In double-quoted strings:
1817 double-quoted style: \r and \n
1819 deprecated backrefs: \1 (in substitution replacements)
1820 case and quoting: \U \Q \E
1823 scan_const does *not* construct ops to handle interpolated strings.
1824 It stops processing as soon as it finds an embedded $ or @ variable
1825 and leaves it to the caller to work out what's going on.
1827 embedded arrays (whether in pattern or not) could be:
1828 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1830 $ in double-quoted strings must be the symbol of an embedded scalar.
1832 $ in pattern could be $foo or could be tail anchor. Assumption:
1833 it's a tail anchor if $ is the last thing in the string, or if it's
1834 followed by one of "()| \r\n\t"
1836 \1 (backreferences) are turned into $1
1838 The structure of the code is
1839 while (there's a character to process) {
1840 handle transliteration ranges
1841 skip regexp comments /(?#comment)/ and codes /(?{code})/
1842 skip #-initiated comments in //x patterns
1843 check for embedded arrays
1844 check for embedded scalars
1846 leave intact backslashes from leaveit (below)
1847 deprecate \1 in substitution replacements
1848 handle string-changing backslashes \l \U \Q \E, etc.
1849 switch (what was escaped) {
1850 handle \- in a transliteration (becomes a literal -)
1851 handle \132 (octal characters)
1852 handle \x15 and \x{1234} (hex characters)
1853 handle \N{name} (named characters)
1854 handle \cV (control characters)
1855 handle printf-style backslashes (\f, \r, \n, etc)
1857 } (end if backslash)
1858 } (end while character to read)
1863 S_scan_const(pTHX_ char *start)
1866 register char *send = PL_bufend; /* end of the constant */
1867 SV *sv = newSV(send - start); /* sv for the constant */
1868 register char *s = start; /* start of the constant */
1869 register char *d = SvPVX(sv); /* destination for copies */
1870 bool dorange = FALSE; /* are we in a translit range? */
1871 bool didrange = FALSE; /* did we just finish a range? */
1872 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1873 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1876 UV literal_endpoint = 0;
1877 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1880 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1881 /* If we are doing a trans and we know we want UTF8 set expectation */
1882 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1883 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1887 while (s < send || dorange) {
1888 /* get transliterations out of the way (they're most literal) */
1889 if (PL_lex_inwhat == OP_TRANS) {
1890 /* expand a range A-Z to the full set of characters. AIE! */
1892 I32 i; /* current expanded character */
1893 I32 min; /* first character in range */
1894 I32 max; /* last character in range */
1905 char * const c = (char*)utf8_hop((U8*)d, -1);
1909 *c = (char)UTF_TO_NATIVE(0xff);
1910 /* mark the range as done, and continue */
1916 i = d - SvPVX_const(sv); /* remember current offset */
1919 SvLEN(sv) + (has_utf8 ?
1920 (512 - UTF_CONTINUATION_MARK +
1923 /* How many two-byte within 0..255: 128 in UTF-8,
1924 * 96 in UTF-8-mod. */
1926 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1928 d = SvPVX(sv) + i; /* refresh d after realloc */
1932 for (j = 0; j <= 1; j++) {
1933 char * const c = (char*)utf8_hop((U8*)d, -1);
1934 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1940 max = (U8)0xff; /* only to \xff */
1941 uvmax = uv; /* \x{100} to uvmax */
1943 d = c; /* eat endpoint chars */
1948 d -= 2; /* eat the first char and the - */
1949 min = (U8)*d; /* first char in range */
1950 max = (U8)d[1]; /* last char in range */
1957 "Invalid range \"%c-%c\" in transliteration operator",
1958 (char)min, (char)max);
1962 if (literal_endpoint == 2 &&
1963 ((isLOWER(min) && isLOWER(max)) ||
1964 (isUPPER(min) && isUPPER(max)))) {
1966 for (i = min; i <= max; i++)
1968 *d++ = NATIVE_TO_NEED(has_utf8,i);
1970 for (i = min; i <= max; i++)
1972 *d++ = NATIVE_TO_NEED(has_utf8,i);
1977 for (i = min; i <= max; i++)
1980 const U8 ch = (U8)NATIVE_TO_UTF(i);
1981 if (UNI_IS_INVARIANT(ch))
1984 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1985 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1994 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1996 *d++ = (char)UTF_TO_NATIVE(0xff);
1998 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2002 /* mark the range as done, and continue */
2006 literal_endpoint = 0;
2011 /* range begins (ignore - as first or last char) */
2012 else if (*s == '-' && s+1 < send && s != start) {
2014 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2021 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2031 literal_endpoint = 0;
2032 native_range = TRUE;
2037 /* if we get here, we're not doing a transliteration */
2039 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2040 except for the last char, which will be done separately. */
2041 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2043 while (s+1 < send && *s != ')')
2044 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2046 else if (s[2] == '{' /* This should match regcomp.c */
2047 || (s[2] == '?' && s[3] == '{'))
2050 char *regparse = s + (s[2] == '{' ? 3 : 4);
2053 while (count && (c = *regparse)) {
2054 if (c == '\\' && regparse[1])
2062 if (*regparse != ')')
2063 regparse--; /* Leave one char for continuation. */
2064 while (s < regparse)
2065 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2069 /* likewise skip #-initiated comments in //x patterns */
2070 else if (*s == '#' && PL_lex_inpat &&
2071 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2072 while (s+1 < send && *s != '\n')
2073 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2076 /* check for embedded arrays
2077 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2079 else if (*s == '@' && s[1]) {
2080 if (isALNUM_lazy_if(s+1,UTF))
2082 if (strchr(":'{$", s[1]))
2084 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2085 break; /* in regexp, neither @+ nor @- are interpolated */
2088 /* check for embedded scalars. only stop if we're sure it's a
2091 else if (*s == '$') {
2092 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2094 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2095 break; /* in regexp, $ might be tail anchor */
2098 /* End of else if chain - OP_TRANS rejoin rest */
2101 if (*s == '\\' && s+1 < send) {
2104 /* deprecate \1 in strings and substitution replacements */
2105 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2106 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2108 if (ckWARN(WARN_SYNTAX))
2109 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2114 /* string-change backslash escapes */
2115 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2119 /* skip any other backslash escapes in a pattern */
2120 else if (PL_lex_inpat) {
2121 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2122 goto default_action;
2125 /* if we get here, it's either a quoted -, or a digit */
2128 /* quoted - in transliterations */
2130 if (PL_lex_inwhat == OP_TRANS) {
2137 if ((isALPHA(*s) || isDIGIT(*s)) &&
2139 Perl_warner(aTHX_ packWARN(WARN_MISC),
2140 "Unrecognized escape \\%c passed through",
2142 /* default action is to copy the quoted character */
2143 goto default_action;
2146 /* \132 indicates an octal constant */
2147 case '0': case '1': case '2': case '3':
2148 case '4': case '5': case '6': case '7':
2152 uv = grok_oct(s, &len, &flags, NULL);
2155 goto NUM_ESCAPE_INSERT;
2157 /* \x24 indicates a hex constant */
2161 char* const e = strchr(s, '}');
2162 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2163 PERL_SCAN_DISALLOW_PREFIX;
2168 yyerror("Missing right brace on \\x{}");
2172 uv = grok_hex(s, &len, &flags, NULL);
2178 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2179 uv = grok_hex(s, &len, &flags, NULL);
2185 /* Insert oct or hex escaped character.
2186 * There will always enough room in sv since such
2187 * escapes will be longer than any UTF-8 sequence
2188 * they can end up as. */
2190 /* We need to map to chars to ASCII before doing the tests
2193 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2194 if (!has_utf8 && uv > 255) {
2195 /* Might need to recode whatever we have
2196 * accumulated so far if it contains any
2199 * (Can't we keep track of that and avoid
2200 * this rescan? --jhi)
2204 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2205 if (!NATIVE_IS_INVARIANT(*c)) {
2210 const STRLEN offset = d - SvPVX_const(sv);
2212 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2216 while (src >= (const U8 *)SvPVX_const(sv)) {
2217 if (!NATIVE_IS_INVARIANT(*src)) {
2218 const U8 ch = NATIVE_TO_ASCII(*src);
2219 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2220 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2230 if (has_utf8 || uv > 255) {
2231 d = (char*)uvchr_to_utf8((U8*)d, uv);
2233 if (PL_lex_inwhat == OP_TRANS &&
2234 PL_sublex_info.sub_op) {
2235 PL_sublex_info.sub_op->op_private |=
2236 (PL_lex_repl ? OPpTRANS_FROM_UTF
2240 if (uv > 255 && !dorange)
2241 native_range = FALSE;
2253 /* \N{LATIN SMALL LETTER A} is a named character */
2257 char* e = strchr(s, '}');
2264 yyerror("Missing right brace on \\N{}");
2268 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2270 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2271 PERL_SCAN_DISALLOW_PREFIX;
2274 uv = grok_hex(s, &len, &flags, NULL);
2275 if ( e > s && len != (STRLEN)(e - s) ) {
2279 goto NUM_ESCAPE_INSERT;
2281 res = newSVpvn(s + 1, e - s - 1);
2282 type = newSVpvn(s - 2,e - s + 3);
2283 res = new_constant( NULL, 0, "charnames",
2284 res, NULL, SvPVX(type) );
2287 sv_utf8_upgrade(res);
2288 str = SvPV_const(res,len);
2289 #ifdef EBCDIC_NEVER_MIND
2290 /* charnames uses pack U and that has been
2291 * recently changed to do the below uni->native
2292 * mapping, so this would be redundant (and wrong,
2293 * the code point would be doubly converted).
2294 * But leave this in just in case the pack U change
2295 * gets revoked, but the semantics is still
2296 * desireable for charnames. --jhi */
2298 UV uv = utf8_to_uvchr((const U8*)str, 0);
2301 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2303 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2304 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2305 str = SvPV_const(res, len);
2309 if (!has_utf8 && SvUTF8(res)) {
2310 const char * const ostart = SvPVX_const(sv);
2311 SvCUR_set(sv, d - ostart);
2314 sv_utf8_upgrade(sv);
2315 /* this just broke our allocation above... */
2316 SvGROW(sv, (STRLEN)(send - start));
2317 d = SvPVX(sv) + SvCUR(sv);
2320 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2321 const char * const odest = SvPVX_const(sv);
2323 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2324 d = SvPVX(sv) + (d - odest);
2328 native_range = FALSE; /* \N{} is guessed to be Unicode */
2330 Copy(str, d, len, char);
2337 yyerror("Missing braces on \\N{}");
2340 /* \c is a control character */
2349 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2352 yyerror("Missing control char name in \\c");
2356 /* printf-style backslashes, formfeeds, newlines, etc */
2358 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2361 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2364 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2367 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2370 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2373 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2376 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2382 } /* end if (backslash) */
2389 /* If we started with encoded form, or already know we want it
2390 and then encode the next character */
2391 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2393 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2394 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2397 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2398 const STRLEN off = d - SvPVX_const(sv);
2399 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2401 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2404 if (uv > 255 && !dorange)
2405 native_range = FALSE;
2409 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2411 } /* while loop to process each character */
2413 /* terminate the string and set up the sv */
2415 SvCUR_set(sv, d - SvPVX_const(sv));
2416 if (SvCUR(sv) >= SvLEN(sv))
2417 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2420 if (PL_encoding && !has_utf8) {
2421 sv_recode_to_utf8(sv, PL_encoding);
2427 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2428 PL_sublex_info.sub_op->op_private |=
2429 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2433 /* shrink the sv if we allocated more than we used */
2434 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2435 SvPV_shrink_to_cur(sv);
2438 /* return the substring (via yylval) only if we parsed anything */
2439 if (s > PL_bufptr) {
2440 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2441 sv = new_constant(start, s - start,
2442 (const char *)(PL_lex_inpat ? "qr" : "q"),
2445 (( PL_lex_inwhat == OP_TRANS
2447 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2450 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2457 * Returns TRUE if there's more to the expression (e.g., a subscript),
2460 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2462 * ->[ and ->{ return TRUE
2463 * { and [ outside a pattern are always subscripts, so return TRUE
2464 * if we're outside a pattern and it's not { or [, then return FALSE
2465 * if we're in a pattern and the first char is a {
2466 * {4,5} (any digits around the comma) returns FALSE
2467 * if we're in a pattern and the first char is a [
2469 * [SOMETHING] has a funky algorithm to decide whether it's a
2470 * character class or not. It has to deal with things like
2471 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2472 * anything else returns TRUE
2475 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2478 S_intuit_more(pTHX_ register char *s)
2481 if (PL_lex_brackets)
2483 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2485 if (*s != '{' && *s != '[')
2490 /* In a pattern, so maybe we have {n,m}. */
2507 /* On the other hand, maybe we have a character class */
2510 if (*s == ']' || *s == '^')
2513 /* this is terrifying, and it works */
2514 int weight = 2; /* let's weigh the evidence */
2516 unsigned char un_char = 255, last_un_char;
2517 const char * const send = strchr(s,']');
2518 char tmpbuf[sizeof PL_tokenbuf * 4];
2520 if (!send) /* has to be an expression */
2523 Zero(seen,256,char);
2526 else if (isDIGIT(*s)) {
2528 if (isDIGIT(s[1]) && s[2] == ']')
2534 for (; s < send; s++) {
2535 last_un_char = un_char;
2536 un_char = (unsigned char)*s;
2541 weight -= seen[un_char] * 10;
2542 if (isALNUM_lazy_if(s+1,UTF)) {
2544 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2545 len = (int)strlen(tmpbuf);
2546 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2551 else if (*s == '$' && s[1] &&
2552 strchr("[#!%*<>()-=",s[1])) {
2553 if (/*{*/ strchr("])} =",s[2]))
2562 if (strchr("wds]",s[1]))
2564 else if (seen[(U8)'\''] || seen[(U8)'"'])
2566 else if (strchr("rnftbxcav",s[1]))
2568 else if (isDIGIT(s[1])) {
2570 while (s[1] && isDIGIT(s[1]))
2580 if (strchr("aA01! ",last_un_char))
2582 if (strchr("zZ79~",s[1]))
2584 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2585 weight -= 5; /* cope with negative subscript */
2588 if (!isALNUM(last_un_char)
2589 && !(last_un_char == '$' || last_un_char == '@'
2590 || last_un_char == '&')
2591 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2596 if (keyword(tmpbuf, d - tmpbuf, 0))
2599 if (un_char == last_un_char + 1)
2601 weight -= seen[un_char];
2606 if (weight >= 0) /* probably a character class */
2616 * Does all the checking to disambiguate
2618 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2619 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2621 * First argument is the stuff after the first token, e.g. "bar".
2623 * Not a method if bar is a filehandle.
2624 * Not a method if foo is a subroutine prototyped to take a filehandle.
2625 * Not a method if it's really "Foo $bar"
2626 * Method if it's "foo $bar"
2627 * Not a method if it's really "print foo $bar"
2628 * Method if it's really "foo package::" (interpreted as package->foo)
2629 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2630 * Not a method if bar is a filehandle or package, but is quoted with
2635 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2638 char *s = start + (*start == '$');
2639 char tmpbuf[sizeof PL_tokenbuf];
2647 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2651 const char *proto = SvPVX_const(cv);
2662 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2663 /* start is the beginning of the possible filehandle/object,
2664 * and s is the end of it
2665 * tmpbuf is a copy of it
2668 if (*start == '$') {
2669 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2670 isUPPER(*PL_tokenbuf))
2673 len = start - SvPVX(PL_linestr);
2677 start = SvPVX(PL_linestr) + len;
2681 return *s == '(' ? FUNCMETH : METHOD;
2683 if (!keyword(tmpbuf, len, 0)) {
2684 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2688 soff = s - SvPVX(PL_linestr);
2692 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2693 if (indirgv && GvCVu(indirgv))
2695 /* filehandle or package name makes it a method */
2696 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2698 soff = s - SvPVX(PL_linestr);
2701 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2702 return 0; /* no assumptions -- "=>" quotes bearword */
2704 start_force(PL_curforce);
2705 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2706 newSVpvn(tmpbuf,len));
2707 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2709 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2714 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2716 return *s == '(' ? FUNCMETH : METHOD;
2724 * Return a string of Perl code to load the debugger. If PERL5DB
2725 * is set, it will return the contents of that, otherwise a
2726 * compile-time require of perl5db.pl.
2734 const char * const pdb = PerlEnv_getenv("PERL5DB");
2738 SETERRNO(0,SS_NORMAL);
2739 return "BEGIN { require 'perl5db.pl' }";
2745 /* Encoded script support. filter_add() effectively inserts a
2746 * 'pre-processing' function into the current source input stream.
2747 * Note that the filter function only applies to the current source file
2748 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2750 * The datasv parameter (which may be NULL) can be used to pass
2751 * private data to this instance of the filter. The filter function
2752 * can recover the SV using the FILTER_DATA macro and use it to
2753 * store private buffers and state information.
2755 * The supplied datasv parameter is upgraded to a PVIO type
2756 * and the IoDIRP/IoANY field is used to store the function pointer,
2757 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2758 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2759 * private use must be set using malloc'd pointers.
2763 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2769 if (!PL_rsfp_filters)
2770 PL_rsfp_filters = newAV();
2773 SvUPGRADE(datasv, SVt_PVIO);
2774 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2775 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2776 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2777 FPTR2DPTR(void *, IoANY(datasv)),
2778 SvPV_nolen(datasv)));
2779 av_unshift(PL_rsfp_filters, 1);
2780 av_store(PL_rsfp_filters, 0, datasv) ;
2785 /* Delete most recently added instance of this filter function. */
2787 Perl_filter_del(pTHX_ filter_t funcp)
2793 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2794 FPTR2DPTR(void*, funcp)));
2796 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2798 /* if filter is on top of stack (usual case) just pop it off */
2799 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2800 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2801 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2802 IoANY(datasv) = (void *)NULL;
2803 sv_free(av_pop(PL_rsfp_filters));
2807 /* we need to search for the correct entry and clear it */
2808 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2812 /* Invoke the idxth filter function for the current rsfp. */
2813 /* maxlen 0 = read one text line */
2815 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2820 /* This API is bad. It should have been using unsigned int for maxlen.
2821 Not sure if we want to change the API, but if not we should sanity
2822 check the value here. */
2823 const unsigned int correct_length
2832 if (!PL_rsfp_filters)
2834 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2835 /* Provide a default input filter to make life easy. */
2836 /* Note that we append to the line. This is handy. */
2837 DEBUG_P(PerlIO_printf(Perl_debug_log,
2838 "filter_read %d: from rsfp\n", idx));
2839 if (correct_length) {
2842 const int old_len = SvCUR(buf_sv);
2844 /* ensure buf_sv is large enough */
2845 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2846 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2847 correct_length)) <= 0) {
2848 if (PerlIO_error(PL_rsfp))
2849 return -1; /* error */
2851 return 0 ; /* end of file */
2853 SvCUR_set(buf_sv, old_len + len) ;
2856 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2857 if (PerlIO_error(PL_rsfp))
2858 return -1; /* error */
2860 return 0 ; /* end of file */
2863 return SvCUR(buf_sv);
2865 /* Skip this filter slot if filter has been deleted */
2866 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2867 DEBUG_P(PerlIO_printf(Perl_debug_log,
2868 "filter_read %d: skipped (filter deleted)\n",
2870 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2872 /* Get function pointer hidden within datasv */
2873 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2874 DEBUG_P(PerlIO_printf(Perl_debug_log,
2875 "filter_read %d: via function %p (%s)\n",
2876 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2877 /* Call function. The function is expected to */
2878 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2879 /* Return: <0:error, =0:eof, >0:not eof */
2880 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2884 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2887 #ifdef PERL_CR_FILTER
2888 if (!PL_rsfp_filters) {
2889 filter_add(S_cr_textfilter,NULL);
2892 if (PL_rsfp_filters) {
2894 SvCUR_set(sv, 0); /* start with empty line */
2895 if (FILTER_READ(0, sv, 0) > 0)
2896 return ( SvPVX(sv) ) ;
2901 return (sv_gets(sv, fp, append));
2905 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2910 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2914 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2915 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2917 return GvHV(gv); /* Foo:: */
2920 /* use constant CLASS => 'MyClass' */
2921 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2922 if (gv && GvCV(gv)) {
2923 SV * const sv = cv_const_sv(GvCV(gv));
2925 pkgname = SvPV_nolen_const(sv);
2928 return gv_stashpv(pkgname, 0);
2932 * S_readpipe_override
2933 * Check whether readpipe() is overriden, and generates the appropriate
2934 * optree, provided sublex_start() is called afterwards.
2937 S_readpipe_override(pTHX)
2940 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2941 yylval.ival = OP_BACKTICK;
2943 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2945 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2946 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
2947 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2949 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2950 append_elem(OP_LIST,
2951 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2952 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2962 * The intent of this yylex wrapper is to minimize the changes to the
2963 * tokener when we aren't interested in collecting madprops. It remains
2964 * to be seen how successful this strategy will be...
2971 char *s = PL_bufptr;
2973 /* make sure PL_thiswhite is initialized */
2977 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2978 if (PL_pending_ident)
2979 return S_pending_ident(aTHX);
2981 /* previous token ate up our whitespace? */
2982 if (!PL_lasttoke && PL_nextwhite) {
2983 PL_thiswhite = PL_nextwhite;
2987 /* isolate the token, and figure out where it is without whitespace */
2988 PL_realtokenstart = -1;
2992 assert(PL_curforce < 0);
2994 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2995 if (!PL_thistoken) {
2996 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2997 PL_thistoken = newSVpvs("");
2999 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3000 PL_thistoken = newSVpvn(tstart, s - tstart);
3003 if (PL_thismad) /* install head */
3004 CURMAD('X', PL_thistoken);
3007 /* last whitespace of a sublex? */
3008 if (optype == ')' && PL_endwhite) {
3009 CURMAD('X', PL_endwhite);
3014 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3015 if (!PL_thiswhite && !PL_endwhite && !optype) {
3016 sv_free(PL_thistoken);
3021 /* put off final whitespace till peg */
3022 if (optype == ';' && !PL_rsfp) {
3023 PL_nextwhite = PL_thiswhite;
3026 else if (PL_thisopen) {
3027 CURMAD('q', PL_thisopen);
3029 sv_free(PL_thistoken);
3033 /* Store actual token text as madprop X */
3034 CURMAD('X', PL_thistoken);
3038 /* add preceding whitespace as madprop _ */
3039 CURMAD('_', PL_thiswhite);
3043 /* add quoted material as madprop = */
3044 CURMAD('=', PL_thisstuff);
3048 /* add terminating quote as madprop Q */
3049 CURMAD('Q', PL_thisclose);
3053 /* special processing based on optype */
3057 /* opval doesn't need a TOKEN since it can already store mp */
3068 append_madprops(PL_thismad, yylval.opval, 0);
3076 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3085 /* remember any fake bracket that lexer is about to discard */
3086 if (PL_lex_brackets == 1 &&
3087 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3090 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3093 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3094 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3097 break; /* don't bother looking for trailing comment */
3106 /* attach a trailing comment to its statement instead of next token */
3110 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3112 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3114 if (*s == '\n' || *s == '#') {
3115 while (s < PL_bufend && *s != '\n')
3119 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3120 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3137 /* Create new token struct. Note: opvals return early above. */
3138 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3145 S_tokenize_use(pTHX_ int is_use, char *s) {
3147 if (PL_expect != XSTATE)
3148 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3149 is_use ? "use" : "no"));
3151 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3152 s = force_version(s, TRUE);
3153 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3154 start_force(PL_curforce);
3155 NEXTVAL_NEXTTOKE.opval = NULL;
3158 else if (*s == 'v') {
3159 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3160 s = force_version(s, FALSE);
3164 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3165 s = force_version(s, FALSE);
3167 yylval.ival = is_use;
3171 static const char* const exp_name[] =
3172 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3173 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3180 Works out what to call the token just pulled out of the input
3181 stream. The yacc parser takes care of taking the ops we return and
3182 stitching them into a tree.
3188 if read an identifier
3189 if we're in a my declaration
3190 croak if they tried to say my($foo::bar)
3191 build the ops for a my() declaration
3192 if it's an access to a my() variable
3193 are we in a sort block?
3194 croak if my($a); $a <=> $b
3195 build ops for access to a my() variable
3196 if in a dq string, and they've said @foo and we can't find @foo
3198 build ops for a bareword
3199 if we already built the token before, use it.
3204 #pragma segment Perl_yylex
3210 register char *s = PL_bufptr;
3215 /* orig_keyword, gvp, and gv are initialized here because
3216 * jump to the label just_a_word_zero can bypass their
3217 * initialization later. */
3218 I32 orig_keyword = 0;
3223 SV* tmp = newSVpvs("");
3224 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3225 (IV)CopLINE(PL_curcop),
3226 lex_state_names[PL_lex_state],
3227 exp_name[PL_expect],
3228 pv_display(tmp, s, strlen(s), 0, 60));
3231 /* check if there's an identifier for us to look at */
3232 if (PL_pending_ident)
3233 return REPORT(S_pending_ident(aTHX));
3235 /* no identifier pending identification */
3237 switch (PL_lex_state) {
3239 case LEX_NORMAL: /* Some compilers will produce faster */
3240 case LEX_INTERPNORMAL: /* code if we comment these out. */
3244 /* when we've already built the next token, just pull it out of the queue */
3248 yylval = PL_nexttoke[PL_lasttoke].next_val;
3250 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3251 PL_nexttoke[PL_lasttoke].next_mad = 0;
3252 if (PL_thismad && PL_thismad->mad_key == '_') {
3253 PL_thiswhite = (SV*)PL_thismad->mad_val;
3254 PL_thismad->mad_val = 0;
3255 mad_free(PL_thismad);
3260 PL_lex_state = PL_lex_defer;
3261 PL_expect = PL_lex_expect;
3262 PL_lex_defer = LEX_NORMAL;
3263 if (!PL_nexttoke[PL_lasttoke].next_type)
3268 yylval = PL_nextval[PL_nexttoke];
3270 PL_lex_state = PL_lex_defer;
3271 PL_expect = PL_lex_expect;
3272 PL_lex_defer = LEX_NORMAL;
3276 /* FIXME - can these be merged? */
3277 return(PL_nexttoke[PL_lasttoke].next_type);
3279 return REPORT(PL_nexttype[PL_nexttoke]);
3282 /* interpolated case modifiers like \L \U, including \Q and \E.
3283 when we get here, PL_bufptr is at the \
3285 case LEX_INTERPCASEMOD:
3287 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3288 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3290 /* handle \E or end of string */
3291 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3293 if (PL_lex_casemods) {
3294 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3295 PL_lex_casestack[PL_lex_casemods] = '\0';
3297 if (PL_bufptr != PL_bufend
3298 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3300 PL_lex_state = LEX_INTERPCONCAT;
3303 PL_thistoken = newSVpvs("\\E");
3309 while (PL_bufptr != PL_bufend &&
3310 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3312 PL_thiswhite = newSVpvs("");
3313 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3317 if (PL_bufptr != PL_bufend)
3320 PL_lex_state = LEX_INTERPCONCAT;
3324 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3325 "### Saw case modifier\n"); });
3327 if (s[1] == '\\' && s[2] == 'E') {
3330 PL_thiswhite = newSVpvs("");
3331 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3334 PL_lex_state = LEX_INTERPCONCAT;
3339 if (!PL_madskills) /* when just compiling don't need correct */
3340 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3341 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3342 if ((*s == 'L' || *s == 'U') &&
3343 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3344 PL_lex_casestack[--PL_lex_casemods] = '\0';
3347 if (PL_lex_casemods > 10)
3348 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3349 PL_lex_casestack[PL_lex_casemods++] = *s;
3350 PL_lex_casestack[PL_lex_casemods] = '\0';
3351 PL_lex_state = LEX_INTERPCONCAT;
3352 start_force(PL_curforce);
3353 NEXTVAL_NEXTTOKE.ival = 0;
3355 start_force(PL_curforce);
3357 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3359 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3361 NEXTVAL_NEXTTOKE.ival = OP_LC;
3363 NEXTVAL_NEXTTOKE.ival = OP_UC;
3365 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3367 Perl_croak(aTHX_ "panic: yylex");
3369 SV* const tmpsv = newSVpvs("");
3370 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3376 if (PL_lex_starts) {
3382 sv_free(PL_thistoken);
3383 PL_thistoken = newSVpvs("");
3386 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3387 if (PL_lex_casemods == 1 && PL_lex_inpat)
3396 case LEX_INTERPPUSH:
3397 return REPORT(sublex_push());
3399 case LEX_INTERPSTART:
3400 if (PL_bufptr == PL_bufend)
3401 return REPORT(sublex_done());
3402 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3403 "### Interpolated variable\n"); });
3405 PL_lex_dojoin = (*PL_bufptr == '@');
3406 PL_lex_state = LEX_INTERPNORMAL;
3407 if (PL_lex_dojoin) {
3408 start_force(PL_curforce);
3409 NEXTVAL_NEXTTOKE.ival = 0;
3411 start_force(PL_curforce);
3412 force_ident("\"", '$');
3413 start_force(PL_curforce);
3414 NEXTVAL_NEXTTOKE.ival = 0;
3416 start_force(PL_curforce);
3417 NEXTVAL_NEXTTOKE.ival = 0;
3419 start_force(PL_curforce);
3420 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3423 if (PL_lex_starts++) {
3428 sv_free(PL_thistoken);
3429 PL_thistoken = newSVpvs("");
3432 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3433 if (!PL_lex_casemods && PL_lex_inpat)
3440 case LEX_INTERPENDMAYBE:
3441 if (intuit_more(PL_bufptr)) {
3442 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3448 if (PL_lex_dojoin) {
3449 PL_lex_dojoin = FALSE;
3450 PL_lex_state = LEX_INTERPCONCAT;
3454 sv_free(PL_thistoken);
3455 PL_thistoken = newSVpvs("");
3460 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3461 && SvEVALED(PL_lex_repl))
3463 if (PL_bufptr != PL_bufend)
3464 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3468 case LEX_INTERPCONCAT:
3470 if (PL_lex_brackets)
3471 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3473 if (PL_bufptr == PL_bufend)
3474 return REPORT(sublex_done());
3476 if (SvIVX(PL_linestr) == '\'') {
3477 SV *sv = newSVsv(PL_linestr);
3480 else if ( PL_hints & HINT_NEW_RE )
3481 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
3482 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3486 s = scan_const(PL_bufptr);
3488 PL_lex_state = LEX_INTERPCASEMOD;
3490 PL_lex_state = LEX_INTERPSTART;
3493 if (s != PL_bufptr) {
3494 start_force(PL_curforce);
3496 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3498 NEXTVAL_NEXTTOKE = yylval;
3501 if (PL_lex_starts++) {
3505 sv_free(PL_thistoken);
3506 PL_thistoken = newSVpvs("");
3509 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3510 if (!PL_lex_casemods && PL_lex_inpat)
3523 PL_lex_state = LEX_NORMAL;
3524 s = scan_formline(PL_bufptr);
3525 if (!PL_lex_formbrack)
3531 PL_oldoldbufptr = PL_oldbufptr;
3537 sv_free(PL_thistoken);
3540 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3544 if (isIDFIRST_lazy_if(s,UTF))
3546 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
3549 goto fake_eof; /* emulate EOF on ^D or ^Z */
3558 if (PL_lex_brackets) {
3559 yyerror((const char *)
3561 ? "Format not terminated"
3562 : "Missing right curly or square bracket"));
3564 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3565 "### Tokener got EOF\n");
3569 if (s++ < PL_bufend)
3570 goto retry; /* ignore stray nulls */
3573 if (!PL_in_eval && !PL_preambled) {
3574 PL_preambled = TRUE;
3579 sv_setpv(PL_linestr,incl_perldb());
3580 if (SvCUR(PL_linestr))
3581 sv_catpvs(PL_linestr,";");
3583 while(AvFILLp(PL_preambleav) >= 0) {
3584 SV *tmpsv = av_shift(PL_preambleav);
3585 sv_catsv(PL_linestr, tmpsv);
3586 sv_catpvs(PL_linestr, ";");
3589 sv_free((SV*)PL_preambleav);
3590 PL_preambleav = NULL;
3592 if (PL_minus_n || PL_minus_p) {
3593 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3595 sv_catpvs(PL_linestr,"chomp;");
3598 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3599 || *PL_splitstr == '"')
3600 && strchr(PL_splitstr + 1, *PL_splitstr))
3601 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3603 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3604 bytes can be used as quoting characters. :-) */
3605 const char *splits = PL_splitstr;
3606 sv_catpvs(PL_linestr, "our @F=split(q\0");
3609 if (*splits == '\\')
3610 sv_catpvn(PL_linestr, splits, 1);
3611 sv_catpvn(PL_linestr, splits, 1);
3612 } while (*splits++);
3613 /* This loop will embed the trailing NUL of
3614 PL_linestr as the last thing it does before
3616 sv_catpvs(PL_linestr, ");");
3620 sv_catpvs(PL_linestr,"our @F=split(' ');");
3624 sv_catpvs(PL_linestr,"use feature ':5.10';");
3625 sv_catpvs(PL_linestr, "\n");
3626 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3627 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3628 PL_last_lop = PL_last_uni = NULL;
3629 if (PERLDB_LINE && PL_curstash != PL_debstash)
3630 update_debugger_info(PL_linestr, NULL, 0);
3634 bof = PL_rsfp ? TRUE : FALSE;
3635 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3638 PL_realtokenstart = -1;
3641 if (PL_preprocess && !PL_in_eval)
3642 (void)PerlProc_pclose(PL_rsfp);
3643 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3644 PerlIO_clearerr(PL_rsfp);
3646 (void)PerlIO_close(PL_rsfp);
3648 PL_doextract = FALSE;
3650 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3655 sv_setpv(PL_linestr,
3658 ? ";}continue{print;}" : ";}"));
3659 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3660 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3661 PL_last_lop = PL_last_uni = NULL;
3662 PL_minus_n = PL_minus_p = 0;
3665 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3666 PL_last_lop = PL_last_uni = NULL;
3667 sv_setpvn(PL_linestr,"",0);
3668 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3670 /* If it looks like the start of a BOM or raw UTF-16,
3671 * check if it in fact is. */
3677 #ifdef PERLIO_IS_STDIO
3678 # ifdef __GNU_LIBRARY__
3679 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3680 # define FTELL_FOR_PIPE_IS_BROKEN
3684 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3685 # define FTELL_FOR_PIPE_IS_BROKEN
3690 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3691 /* This loses the possibility to detect the bof
3692 * situation on perl -P when the libc5 is being used.
3693 * Workaround? Maybe attach some extra state to PL_rsfp?
3696 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3698 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3701 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3702 s = swallow_bom((U8*)s);
3706 /* Incest with pod. */
3709 sv_catsv(PL_thiswhite, PL_linestr);
3711 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3712 sv_setpvn(PL_linestr, "", 0);
3713 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3714 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3715 PL_last_lop = PL_last_uni = NULL;
3716 PL_doextract = FALSE;
3720 } while (PL_doextract);
3721 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3722 if (PERLDB_LINE && PL_curstash != PL_debstash)
3723 update_debugger_info(PL_linestr, NULL, 0);
3724 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3725 PL_last_lop = PL_last_uni = NULL;
3726 if (CopLINE(PL_curcop) == 1) {
3727 while (s < PL_bufend && isSPACE(*s))
3729 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3733 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3737 if (*s == '#' && *(s+1) == '!')
3739 #ifdef ALTERNATE_SHEBANG
3741 static char const as[] = ALTERNATE_SHEBANG;
3742 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3743 d = s + (sizeof(as) - 1);
3745 #endif /* ALTERNATE_SHEBANG */
3754 while (*d && !isSPACE(*d))
3758 #ifdef ARG_ZERO_IS_SCRIPT
3759 if (ipathend > ipath) {
3761 * HP-UX (at least) sets argv[0] to the script name,
3762 * which makes $^X incorrect. And Digital UNIX and Linux,
3763 * at least, set argv[0] to the basename of the Perl
3764 * interpreter. So, having found "#!", we'll set it right.
3766 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3768 assert(SvPOK(x) || SvGMAGICAL(x));
3769 if (sv_eq(x, CopFILESV(PL_curcop))) {
3770 sv_setpvn(x, ipath, ipathend - ipath);
3776 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3777 const char * const lstart = SvPV_const(x,llen);
3779 bstart += blen - llen;
3780 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3781 sv_setpvn(x, ipath, ipathend - ipath);
3786 TAINT_NOT; /* $^X is always tainted, but that's OK */
3788 #endif /* ARG_ZERO_IS_SCRIPT */
3793 d = instr(s,"perl -");
3795 d = instr(s,"perl");
3797 /* avoid getting into infinite loops when shebang
3798 * line contains "Perl" rather than "perl" */
3800 for (d = ipathend-4; d >= ipath; --d) {
3801 if ((*d == 'p' || *d == 'P')
3802 && !ibcmp(d, "perl", 4))
3812 #ifdef ALTERNATE_SHEBANG
3814 * If the ALTERNATE_SHEBANG on this system starts with a
3815 * character that can be part of a Perl expression, then if
3816 * we see it but not "perl", we're probably looking at the
3817 * start of Perl code, not a request to hand off to some
3818 * other interpreter. Similarly, if "perl" is there, but
3819 * not in the first 'word' of the line, we assume the line
3820 * contains the start of the Perl program.
3822 if (d && *s != '#') {
3823 const char *c = ipath;
3824 while (*c && !strchr("; \t\r\n\f\v#", *c))
3827 d = NULL; /* "perl" not in first word; ignore */
3829 *s = '#'; /* Don't try to parse shebang line */
3831 #endif /* ALTERNATE_SHEBANG */
3832 #ifndef MACOS_TRADITIONAL
3837 !instr(s,"indir") &&
3838 instr(PL_origargv[0],"perl"))
3845 while (s < PL_bufend && isSPACE(*s))
3847 if (s < PL_bufend) {
3848 Newxz(newargv,PL_origargc+3,char*);
3850 while (s < PL_bufend && !isSPACE(*s))
3853 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3856 newargv = PL_origargv;
3859 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3861 Perl_croak(aTHX_ "Can't exec %s", ipath);
3865 while (*d && !isSPACE(*d))
3867 while (SPACE_OR_TAB(*d))
3871 const bool switches_done = PL_doswitches;
3872 const U32 oldpdb = PL_perldb;
3873 const bool oldn = PL_minus_n;
3874 const bool oldp = PL_minus_p;
3877 if (*d == 'M' || *d == 'm' || *d == 'C') {
3878 const char * const m = d;
3879 while (*d && !isSPACE(*d))
3881 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3884 d = moreswitches(d);
3886 if (PL_doswitches && !switches_done) {
3887 int argc = PL_origargc;
3888 char **argv = PL_origargv;
3891 } while (argc && argv[0][0] == '-' && argv[0][1]);
3892 init_argv_symbols(argc,argv);
3894 if ((PERLDB_LINE && !oldpdb) ||
3895 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3896 /* if we have already added "LINE: while (<>) {",
3897 we must not do it again */
3899 sv_setpvn(PL_linestr, "", 0);
3900 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3901 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3902 PL_last_lop = PL_last_uni = NULL;
3903 PL_preambled = FALSE;
3905 (void)gv_fetchfile(PL_origfilename);
3912 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3914 PL_lex_state = LEX_FORMLINE;
3919 #ifdef PERL_STRICT_CR
3920 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3922 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3924 case ' ': case '\t': case '\f': case 013:
3925 #ifdef MACOS_TRADITIONAL
3929 PL_realtokenstart = -1;
3931 PL_thiswhite = newSVpvs("");
3932 sv_catpvn(PL_thiswhite, s, 1);
3939 PL_realtokenstart = -1;
3943 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3944 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3945 /* handle eval qq[#line 1 "foo"\n ...] */
3946 CopLINE_dec(PL_curcop);
3949 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3951 if (!PL_in_eval || PL_rsfp)
3956 while (d < PL_bufend && *d != '\n')
3960 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3961 Perl_croak(aTHX_ "panic: input overflow");
3964 PL_thiswhite = newSVpvn(s, d - s);
3969 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3971 PL_lex_state = LEX_FORMLINE;
3977 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
3978 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
3981 TOKEN(PEG); /* make sure any #! line is accessible */
3986 /* if (PL_madskills && PL_lex_formbrack) { */
3988 while (d < PL_bufend && *d != '\n')
3992 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3993 Perl_croak(aTHX_ "panic: input overflow");
3994 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
3996 PL_thiswhite = newSVpvs("");
3997 if (CopLINE(PL_curcop) == 1) {
3998 sv_setpvn(PL_thiswhite, "", 0);
4001 sv_catpvn(PL_thiswhite, s, d - s);
4015 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4023 while (s < PL_bufend && SPACE_OR_TAB(*s))
4026 if (strnEQ(s,"=>",2)) {
4027 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4028 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4029 OPERATOR('-'); /* unary minus */
4031 PL_last_uni = PL_oldbufptr;
4033 case 'r': ftst = OP_FTEREAD; break;
4034 case 'w': ftst = OP_FTEWRITE; break;
4035 case 'x': ftst = OP_FTEEXEC; break;
4036 case 'o': ftst = OP_FTEOWNED; break;
4037 case 'R': ftst = OP_FTRREAD; break;
4038 case 'W': ftst = OP_FTRWRITE; break;
4039 case 'X': ftst = OP_FTREXEC; break;
4040 case 'O': ftst = OP_FTROWNED; break;
4041 case 'e': ftst = OP_FTIS; break;
4042 case 'z': ftst = OP_FTZERO; break;
4043 case 's': ftst = OP_FTSIZE; break;
4044 case 'f': ftst = OP_FTFILE; break;
4045 case 'd': ftst = OP_FTDIR; break;
4046 case 'l': ftst = OP_FTLINK; break;
4047 case 'p': ftst = OP_FTPIPE; break;
4048 case 'S': ftst = OP_FTSOCK; break;
4049 case 'u': ftst = OP_FTSUID; break;
4050 case 'g': ftst = OP_FTSGID; break;
4051 case 'k': ftst = OP_FTSVTX; break;
4052 case 'b': ftst = OP_FTBLK; break;
4053 case 'c': ftst = OP_FTCHR; break;
4054 case 't': ftst = OP_FTTTY; break;
4055 case 'T': ftst = OP_FTTEXT; break;
4056 case 'B': ftst = OP_FTBINARY; break;
4057 case 'M': case 'A': case 'C':
4058 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4060 case 'M': ftst = OP_FTMTIME; break;
4061 case 'A': ftst = OP_FTATIME; break;
4062 case 'C': ftst = OP_FTCTIME; break;
4070 PL_last_lop_op = (OPCODE)ftst;
4071 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4072 "### Saw file test %c\n", (int)tmp);
4077 /* Assume it was a minus followed by a one-letter named
4078 * subroutine call (or a -bareword), then. */
4079 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4080 "### '-%c' looked like a file test but was not\n",
4087 const char tmp = *s++;
4090 if (PL_expect == XOPERATOR)
4095 else if (*s == '>') {
4098 if (isIDFIRST_lazy_if(s,UTF)) {
4099 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4107 if (PL_expect == XOPERATOR)
4110 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4112 OPERATOR('-'); /* unary minus */
4118 const char tmp = *s++;
4121 if (PL_expect == XOPERATOR)
4126 if (PL_expect == XOPERATOR)
4129 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4136 if (PL_expect != XOPERATOR) {
4137 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4138 PL_expect = XOPERATOR;
4139 force_ident(PL_tokenbuf, '*');
4152 if (PL_expect == XOPERATOR) {
4156 PL_tokenbuf[0] = '%';
4157 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4158 sizeof PL_tokenbuf - 1, FALSE);
4159 if (!PL_tokenbuf[1]) {
4162 PL_pending_ident = '%';
4173 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4180 const char tmp = *s++;
4186 goto just_a_word_zero_gv;
4189 switch (PL_expect) {
4195 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4197 PL_bufptr = s; /* update in case we back off */
4203 PL_expect = XTERMBLOCK;
4206 stuffstart = s - SvPVX(PL_linestr) - 1;
4210 while (isIDFIRST_lazy_if(s,UTF)) {
4213 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4214 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4215 if (tmp < 0) tmp = -tmp;
4230 sv = newSVpvn(s, len);
4232 d = scan_str(d,TRUE,TRUE);
4234 /* MUST advance bufptr here to avoid bogus
4235 "at end of line" context messages from yyerror().
4237 PL_bufptr = s + len;
4238 yyerror("Unterminated attribute parameter in attribute list");
4242 return REPORT(0); /* EOF indicator */
4246 sv_catsv(sv, PL_lex_stuff);
4247 attrs = append_elem(OP_LIST, attrs,
4248 newSVOP(OP_CONST, 0, sv));
4249 SvREFCNT_dec(PL_lex_stuff);
4250 PL_lex_stuff = NULL;
4253 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4255 if (PL_in_my == KEY_our) {
4257 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
4259 /* skip to avoid loading attributes.pm */
4261 deprecate(":unique");
4264 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4267 /* NOTE: any CV attrs applied here need to be part of
4268 the CVf_BUILTIN_ATTRS define in cv.h! */
4269 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4271 CvLVALUE_on(PL_compcv);
4273 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4275 CvLOCKED_on(PL_compcv);
4277 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4279 CvMETHOD_on(PL_compcv);
4281 else if (!PL_in_my && len == 9 && strnEQ(SvPVX(sv), "assertion", len)) {
4283 CvASSERTION_on(PL_compcv);
4285 /* After we've set the flags, it could be argued that
4286 we don't need to do the attributes.pm-based setting
4287 process, and shouldn't bother appending recognized
4288 flags. To experiment with that, uncomment the
4289 following "else". (Note that's already been
4290 uncommented. That keeps the above-applied built-in
4291 attributes from being intercepted (and possibly
4292 rejected) by a package's attribute routines, but is
4293 justified by the performance win for the common case
4294 of applying only built-in attributes.) */
4296 attrs = append_elem(OP_LIST, attrs,
4297 newSVOP(OP_CONST, 0,
4301 if (*s == ':' && s[1] != ':')
4304 break; /* require real whitespace or :'s */
4305 /* XXX losing whitespace on sequential attributes here */
4309 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4310 if (*s != ';' && *s != '}' && *s != tmp
4311 && (tmp != '=' || *s != ')')) {
4312 const char q = ((*s == '\'') ? '"' : '\'');
4313 /* If here for an expression, and parsed no attrs, back
4315 if (tmp == '=' && !attrs) {
4319 /* MUST advance bufptr here to avoid bogus "at end of line"
4320 context messages from yyerror().
4323 yyerror( (const char *)
4325 ? Perl_form(aTHX_ "Invalid separator character "
4326 "%c%c%c in attribute list", q, *s, q)
4327 : "Unterminated attribute list" ) );
4335 start_force(PL_curforce);
4336 NEXTVAL_NEXTTOKE.opval = attrs;
4337 CURMAD('_', PL_nextwhite);
4342 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4343 (s - SvPVX(PL_linestr)) - stuffstart);
4351 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4352 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4360 const char tmp = *s++;
4365 const char tmp = *s++;
4373 if (PL_lex_brackets <= 0)
4374 yyerror("Unmatched right square bracket");
4377 if (PL_lex_state == LEX_INTERPNORMAL) {
4378 if (PL_lex_brackets == 0) {
4379 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
4380 PL_lex_state = LEX_INTERPEND;
4387 if (PL_lex_brackets > 100) {
4388 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4390 switch (PL_expect) {
4392 if (PL_lex_formbrack) {
4396 if (PL_oldoldbufptr == PL_last_lop)
4397 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4399 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4400 OPERATOR(HASHBRACK);
4402 while (s < PL_bufend && SPACE_OR_TAB(*s))
4405 PL_tokenbuf[0] = '\0';
4406 if (d < PL_bufend && *d == '-') {
4407 PL_tokenbuf[0] = '-';
4409 while (d < PL_bufend && SPACE_OR_TAB(*d))
4412 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4413 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4415 while (d < PL_bufend && SPACE_OR_TAB(*d))
4418 const char minus = (PL_tokenbuf[0] == '-');
4419 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4427 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4432 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4437 if (PL_oldoldbufptr == PL_last_lop)
4438 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4440 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4443 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4445 /* This hack is to get the ${} in the message. */
4447 yyerror("syntax error");
4450 OPERATOR(HASHBRACK);
4452 /* This hack serves to disambiguate a pair of curlies
4453 * as being a block or an anon hash. Normally, expectation
4454 * determines that, but in cases where we're not in a
4455 * position to expect anything in particular (like inside
4456 * eval"") we have to resolve the ambiguity. This code
4457 * covers the case where the first term in the curlies is a
4458 * quoted string. Most other cases need to be explicitly
4459 * disambiguated by prepending a "+" before the opening
4460 * curly in order to force resolution as an anon hash.
4462 * XXX should probably propagate the outer expectation
4463 * into eval"" to rely less on this hack, but that could
4464 * potentially break current behavior of eval"".
4468 if (*s == '\'' || *s == '"' || *s == '`') {
4469 /* common case: get past first string, handling escapes */
4470 for (t++; t < PL_bufend && *t != *s;)
4471 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4475 else if (*s == 'q') {
4478 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4481 /* skip q//-like construct */
4483 char open, close, term;
4486 while (t < PL_bufend && isSPACE(*t))
4488 /* check for q => */
4489 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4490 OPERATOR(HASHBRACK);
4494 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4498 for (t++; t < PL_bufend; t++) {
4499 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4501 else if (*t == open)
4505 for (t++; t < PL_bufend; t++) {
4506 if (*t == '\\' && t+1 < PL_bufend)
4508 else if (*t == close && --brackets <= 0)
4510 else if (*t == open)
4517 /* skip plain q word */
4518 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4521 else if (isALNUM_lazy_if(t,UTF)) {
4523 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4526 while (t < PL_bufend && isSPACE(*t))
4528 /* if comma follows first term, call it an anon hash */
4529 /* XXX it could be a comma expression with loop modifiers */
4530 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4531 || (*t == '=' && t[1] == '>')))
4532 OPERATOR(HASHBRACK);
4533 if (PL_expect == XREF)
4536 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4542 yylval.ival = CopLINE(PL_curcop);
4543 if (isSPACE(*s) || *s == '#')
4544 PL_copline = NOLINE; /* invalidate current command line number */
4549 if (PL_lex_brackets <= 0)
4550 yyerror("Unmatched right curly bracket");
4552 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4553 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4554 PL_lex_formbrack = 0;
4555 if (PL_lex_state == LEX_INTERPNORMAL) {
4556 if (PL_lex_brackets == 0) {
4557 if (PL_expect & XFAKEBRACK) {
4558 PL_expect &= XENUMMASK;
4559 PL_lex_state = LEX_INTERPEND;
4564 PL_thiswhite = newSVpvs("");
4565 sv_catpvn(PL_thiswhite,"}",1);
4568 return yylex(); /* ignore fake brackets */
4570 if (*s == '-' && s[1] == '>')
4571 PL_lex_state = LEX_INTERPENDMAYBE;
4572 else if (*s != '[' && *s != '{')
4573 PL_lex_state = LEX_INTERPEND;
4576 if (PL_expect & XFAKEBRACK) {
4577 PL_expect &= XENUMMASK;
4579 return yylex(); /* ignore fake brackets */
4581 start_force(PL_curforce);
4583 curmad('X', newSVpvn(s-1,1));
4584 CURMAD('_', PL_thiswhite);
4589 PL_thistoken = newSVpvs("");
4597 if (PL_expect == XOPERATOR) {
4598 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4599 && isIDFIRST_lazy_if(s,UTF))
4601 CopLINE_dec(PL_curcop);
4602 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4603 CopLINE_inc(PL_curcop);
4608 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4610 PL_expect = XOPERATOR;
4611 force_ident(PL_tokenbuf, '&');
4615 yylval.ival = (OPpENTERSUB_AMPER<<8);
4627 const char tmp = *s++;
4634 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4635 && strchr("+-*/%.^&|<",tmp))
4636 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4637 "Reversed %c= operator",(int)tmp);
4639 if (PL_expect == XSTATE && isALPHA(tmp) &&
4640 (s == PL_linestart+1 || s[-2] == '\n') )
4642 if (PL_in_eval && !PL_rsfp) {
4647 if (strnEQ(s,"=cut",4)) {
4663 PL_thiswhite = newSVpvs("");
4664 sv_catpvn(PL_thiswhite, PL_linestart,
4665 PL_bufend - PL_linestart);
4669 PL_doextract = TRUE;
4673 if (PL_lex_brackets < PL_lex_formbrack) {
4675 #ifdef PERL_STRICT_CR
4676 while (SPACE_OR_TAB(*t))
4678 while (SPACE_OR_TAB(*t) || *t == '\r')
4681 if (*t == '\n' || *t == '#') {
4692 const char tmp = *s++;
4694 /* was this !=~ where !~ was meant?
4695 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4697 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4698 const char *t = s+1;
4700 while (t < PL_bufend && isSPACE(*t))
4703 if (*t == '/' || *t == '?' ||
4704 ((*t == 'm' || *t == 's' || *t == 'y')
4705 && !isALNUM(t[1])) ||
4706 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4707 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4708 "!=~ should be !~");
4718 if (PL_expect != XOPERATOR) {
4719 if (s[1] != '<' && !strchr(s,'>'))
4722 s = scan_heredoc(s);
4724 s = scan_inputsymbol(s);
4725 TERM(sublex_start());
4731 SHop(OP_LEFT_SHIFT);
4745 const char tmp = *s++;
4747 SHop(OP_RIGHT_SHIFT);
4748 else if (tmp == '=')
4757 if (PL_expect == XOPERATOR) {
4758 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4760 deprecate_old(commaless_variable_list);
4761 return REPORT(','); /* grandfather non-comma-format format */
4765 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4766 PL_tokenbuf[0] = '@';
4767 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4768 sizeof PL_tokenbuf - 1, FALSE);
4769 if (PL_expect == XOPERATOR)
4770 no_op("Array length", s);
4771 if (!PL_tokenbuf[1])
4773 PL_expect = XOPERATOR;
4774 PL_pending_ident = '#';
4778 PL_tokenbuf[0] = '$';
4779 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4780 sizeof PL_tokenbuf - 1, FALSE);
4781 if (PL_expect == XOPERATOR)
4783 if (!PL_tokenbuf[1]) {
4785 yyerror("Final $ should be \\$ or $name");
4789 /* This kludge not intended to be bulletproof. */
4790 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4791 yylval.opval = newSVOP(OP_CONST, 0,
4792 newSViv(CopARYBASE_get(&PL_compiling)));
4793 yylval.opval->op_private = OPpCONST_ARYBASE;
4799 const char tmp = *s;
4800 if (PL_lex_state == LEX_NORMAL)
4803 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4804 && intuit_more(s)) {
4806 PL_tokenbuf[0] = '@';
4807 if (ckWARN(WARN_SYNTAX)) {
4810 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4813 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4814 while (t < PL_bufend && *t != ']')
4816 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4817 "Multidimensional syntax %.*s not supported",
4818 (int)((t - PL_bufptr) + 1), PL_bufptr);
4822 else if (*s == '{') {
4824 PL_tokenbuf[0] = '%';
4825 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4826 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4828 char tmpbuf[sizeof PL_tokenbuf];
4831 } while (isSPACE(*t));
4832 if (isIDFIRST_lazy_if(t,UTF)) {
4834 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4838 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4839 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4840 "You need to quote \"%s\"",
4847 PL_expect = XOPERATOR;
4848 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4849 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4850 if (!islop || PL_last_lop_op == OP_GREPSTART)
4851 PL_expect = XOPERATOR;
4852 else if (strchr("$@\"'`q", *s))
4853 PL_expect = XTERM; /* e.g. print $fh "foo" */
4854 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4855 PL_expect = XTERM; /* e.g. print $fh &sub */
4856 else if (isIDFIRST_lazy_if(s,UTF)) {
4857 char tmpbuf[sizeof PL_tokenbuf];
4859 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4860 if ((t2 = keyword(tmpbuf, len, 0))) {
4861 /* binary operators exclude handle interpretations */
4873 PL_expect = XTERM; /* e.g. print $fh length() */
4878 PL_expect = XTERM; /* e.g. print $fh subr() */
4881 else if (isDIGIT(*s))
4882 PL_expect = XTERM; /* e.g. print $fh 3 */
4883 else if (*s == '.' && isDIGIT(s[1]))
4884 PL_expect = XTERM; /* e.g. print $fh .3 */
4885 else if ((*s == '?' || *s == '-' || *s == '+')
4886 && !isSPACE(s[1]) && s[1] != '=')
4887 PL_expect = XTERM; /* e.g. print $fh -1 */
4888 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4890 PL_expect = XTERM; /* e.g. print $fh /.../
4891 XXX except DORDOR operator
4893 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4895 PL_expect = XTERM; /* print $fh <<"EOF" */
4898 PL_pending_ident = '$';
4902 if (PL_expect == XOPERATOR)
4904 PL_tokenbuf[0] = '@';
4905 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4906 if (!PL_tokenbuf[1]) {
4909 if (PL_lex_state == LEX_NORMAL)
4911 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4913 PL_tokenbuf[0] = '%';
4915 /* Warn about @ where they meant $. */
4916 if (*s == '[' || *s == '{') {
4917 if (ckWARN(WARN_SYNTAX)) {
4918 const char *t = s + 1;
4919 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4921 if (*t == '}' || *t == ']') {
4923 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4924 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4925 "Scalar value %.*s better written as $%.*s",
4926 (int)(t-PL_bufptr), PL_bufptr,
4927 (int)(t-PL_bufptr-1), PL_bufptr+1);
4932 PL_pending_ident = '@';
4935 case '/': /* may be division, defined-or, or pattern */
4936 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4940 case '?': /* may either be conditional or pattern */
4941 if(PL_expect == XOPERATOR) {
4949 /* A // operator. */
4959 /* Disable warning on "study /blah/" */
4960 if (PL_oldoldbufptr == PL_last_uni
4961 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4962 || memNE(PL_last_uni, "study", 5)
4963 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4966 s = scan_pat(s,OP_MATCH);
4967 TERM(sublex_start());
4971 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4972 #ifdef PERL_STRICT_CR
4975 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
4977 && (s == PL_linestart || s[-1] == '\n') )
4979 PL_lex_formbrack = 0;
4983 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
4989 yylval.ival = OPf_SPECIAL;
4995 if (PL_expect != XOPERATOR)
5000 case '0': case '1': case '2': case '3': case '4':
5001 case '5': case '6': case '7': case '8': case '9':
5002 s = scan_num(s, &yylval);
5003 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5004 if (PL_expect == XOPERATOR)
5009 s = scan_str(s,!!PL_madskills,FALSE);
5010 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5011 if (PL_expect == XOPERATOR) {
5012 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5014 deprecate_old(commaless_variable_list);
5015 return REPORT(','); /* grandfather non-comma-format format */
5022 yylval.ival = OP_CONST;
5023 TERM(sublex_start());
5026 s = scan_str(s,!!PL_madskills,FALSE);
5027 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5028 if (PL_expect == XOPERATOR) {
5029 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5031 deprecate_old(commaless_variable_list);
5032 return REPORT(','); /* grandfather non-comma-format format */
5039 yylval.ival = OP_CONST;
5040 /* FIXME. I think that this can be const if char *d is replaced by
5041 more localised variables. */
5042 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5043 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5044 yylval.ival = OP_STRINGIFY;
5048 TERM(sublex_start());
5051 s = scan_str(s,!!PL_madskills,FALSE);
5052 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5053 if (PL_expect == XOPERATOR)
5054 no_op("Backticks",s);
5057 readpipe_override();
5058 TERM(sublex_start());
5062 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
5063 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5065 if (PL_expect == XOPERATOR)
5066 no_op("Backslash",s);
5070 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5071 char *start = s + 2;
5072 while (isDIGIT(*start) || *start == '_')
5074 if (*start == '.' && isDIGIT(start[1])) {
5075 s = scan_num(s, &yylval);
5078 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5079 else if (!isALPHA(*start) && (PL_expect == XTERM
5080 || PL_expect == XREF || PL_expect == XSTATE
5081 || PL_expect == XTERMORDORDOR)) {
5082 /* XXX Use gv_fetchpvn rather than stomping on a const string */
5083 const char c = *start;
5086 gv = gv_fetchpv(s, 0, SVt_PVCV);
5089 s = scan_num(s, &yylval);
5096 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5138 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5140 /* Some keywords can be followed by any delimiter, including ':' */
5141 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5142 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5143 (PL_tokenbuf[0] == 'q' &&
5144 strchr("qwxr", PL_tokenbuf[1])))));
5146 /* x::* is just a word, unless x is "CORE" */
5147 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5151 while (d < PL_bufend && isSPACE(*d))
5152 d++; /* no comments skipped here, or s### is misparsed */
5154 /* Is this a label? */
5155 if (!tmp && PL_expect == XSTATE
5156 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5158 yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5163 /* Check for keywords */
5164 tmp = keyword(PL_tokenbuf, len, 0);
5166 /* Is this a word before a => operator? */
5167 if (*d == '=' && d[1] == '>') {
5170 = (OP*)newSVOP(OP_CONST, 0,
5171 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5172 yylval.opval->op_private = OPpCONST_BARE;
5176 if (tmp < 0) { /* second-class keyword? */
5177 GV *ogv = NULL; /* override (winner) */
5178 GV *hgv = NULL; /* hidden (loser) */
5179 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5181 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5184 if (GvIMPORTED_CV(gv))
5186 else if (! CvMETHOD(cv))
5190 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5191 (gv = *gvp) && isGV_with_GP(gv) &&
5192 GvCVu(gv) && GvIMPORTED_CV(gv))
5199 tmp = 0; /* overridden by import or by GLOBAL */
5202 && -tmp==KEY_lock /* XXX generalizable kludge */
5205 tmp = 0; /* any sub overrides "weak" keyword */
5207 else { /* no override */
5209 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
5210 Perl_warner(aTHX_ packWARN(WARN_MISC),
5211 "dump() better written as CORE::dump()");
5215 if (hgv && tmp != KEY_x && tmp != KEY_CORE
5216 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
5217 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5218 "Ambiguous call resolved as CORE::%s(), %s",
5219 GvENAME(hgv), "qualify as such or use &");
5226 default: /* not a keyword */
5227 /* Trade off - by using this evil construction we can pull the
5228 variable gv into the block labelled keylookup. If not, then
5229 we have to give it function scope so that the goto from the
5230 earlier ':' case doesn't bypass the initialisation. */
5232 just_a_word_zero_gv:
5240 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5243 SV *nextPL_nextwhite = 0;
5247 /* Get the rest if it looks like a package qualifier */
5249 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5251 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5254 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5255 *s == '\'' ? "'" : "::");
5260 if (PL_expect == XOPERATOR) {
5261 if (PL_bufptr == PL_linestart) {
5262 CopLINE_dec(PL_curcop);
5263 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
5264 CopLINE_inc(PL_curcop);
5267 no_op("Bareword",s);
5270 /* Look for a subroutine with this name in current package,
5271 unless name is "Foo::", in which case Foo is a bearword
5272 (and a package name). */
5274 if (len > 2 && !PL_madskills &&
5275 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5277 if (ckWARN(WARN_BAREWORD)
5278 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5279 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5280 "Bareword \"%s\" refers to nonexistent package",
5283 PL_tokenbuf[len] = '\0';
5289 /* Mustn't actually add anything to a symbol table.
5290 But also don't want to "initialise" any placeholder
5291 constants that might already be there into full
5292 blown PVGVs with attached PVCV. */
5293 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5294 GV_NOADD_NOINIT, SVt_PVCV);
5299 /* if we saw a global override before, get the right name */
5302 sv = newSVpvs("CORE::GLOBAL::");
5303 sv_catpv(sv,PL_tokenbuf);
5306 /* If len is 0, newSVpv does strlen(), which is correct.
5307 If len is non-zero, then it will be the true length,
5308 and so the scalar will be created correctly. */
5309 sv = newSVpv(PL_tokenbuf,len);
5312 if (PL_madskills && !PL_thistoken) {
5313 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5314 PL_thistoken = newSVpv(start,s - start);
5315 PL_realtokenstart = s - SvPVX(PL_linestr);
5319 /* Presume this is going to be a bareword of some sort. */
5322 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5323 yylval.opval->op_private = OPpCONST_BARE;
5324 /* UTF-8 package name? */
5325 if (UTF && !IN_BYTES &&
5326 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5329 /* And if "Foo::", then that's what it certainly is. */
5334 /* Do the explicit type check so that we don't need to force
5335 the initialisation of the symbol table to have a real GV.
5336 Beware - gv may not really be a PVGV, cv may not really be
5337 a PVCV, (because of the space optimisations that gv_init
5338 understands) But they're true if for this symbol there is
5339 respectively a typeglob and a subroutine.
5341 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5342 /* Real typeglob, so get the real subroutine: */
5344 /* A proxy for a subroutine in this package? */
5345 : SvOK(gv) ? (CV *) gv : NULL)
5348 /* See if it's the indirect object for a list operator. */
5350 if (PL_oldoldbufptr &&
5351 PL_oldoldbufptr < PL_bufptr &&
5352 (PL_oldoldbufptr == PL_last_lop
5353 || PL_oldoldbufptr == PL_last_uni) &&
5354 /* NO SKIPSPACE BEFORE HERE! */
5355 (PL_expect == XREF ||
5356 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5358 bool immediate_paren = *s == '(';
5360 /* (Now we can afford to cross potential line boundary.) */
5361 s = SKIPSPACE2(s,nextPL_nextwhite);
5363 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5366 /* Two barewords in a row may indicate method call. */
5368 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5369 (tmp = intuit_method(s, gv, cv)))
5372 /* If not a declared subroutine, it's an indirect object. */
5373 /* (But it's an indir obj regardless for sort.) */
5374 /* Also, if "_" follows a filetest operator, it's a bareword */
5377 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5379 (PL_last_lop_op != OP_MAPSTART &&
5380 PL_last_lop_op != OP_GREPSTART))))
5381 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5382 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5385 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5390 PL_expect = XOPERATOR;
5393 s = SKIPSPACE2(s,nextPL_nextwhite);
5394 PL_nextwhite = nextPL_nextwhite;
5399 /* Is this a word before a => operator? */
5400 if (*s == '=' && s[1] == '>' && !pkgname) {
5402 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
5403 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5404 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
5408 /* If followed by a paren, it's certainly a subroutine. */
5413 while (SPACE_OR_TAB(*d))
5415 if (*d == ')' && (sv = gv_const_sv(gv))) {
5419 char *par = SvPVX(PL_linestr) + PL_realtokenstart;
5420 sv_catpvn(PL_thistoken, par, s - par);
5422 sv_free(PL_nextwhite);
5433 PL_nextwhite = PL_thiswhite;
5436 start_force(PL_curforce);
5438 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5439 PL_expect = XOPERATOR;
5442 PL_nextwhite = nextPL_nextwhite;
5443 curmad('X', PL_thistoken);
5444 PL_thistoken = newSVpvs("");
5452 /* If followed by var or block, call it a method (unless sub) */
5454 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5455 PL_last_lop = PL_oldbufptr;
5456 PL_last_lop_op = OP_METHOD;
5460 /* If followed by a bareword, see if it looks like indir obj. */
5463 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5464 && (tmp = intuit_method(s, gv, cv)))
5467 /* Not a method, so call it a subroutine (if defined) */
5470 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5471 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5472 "Ambiguous use of -%s resolved as -&%s()",
5473 PL_tokenbuf, PL_tokenbuf);
5474 /* Check for a constant sub */
5475 if ((sv = gv_const_sv(gv)) && !PL_madskills) {
5477 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
5478 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5479 yylval.opval->op_private = 0;
5483 /* Resolve to GV now. */
5484 if (SvTYPE(gv) != SVt_PVGV) {
5485 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5486 assert (SvTYPE(gv) == SVt_PVGV);
5487 /* cv must have been some sort of placeholder, so
5488 now needs replacing with a real code reference. */
5492 op_free(yylval.opval);
5493 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5494 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5495 PL_last_lop = PL_oldbufptr;
5496 PL_last_lop_op = OP_ENTERSUB;
5497 /* Is there a prototype? */
5505 const char *proto = SvPV_const((SV*)cv, protolen);
5508 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5510 while (*proto == ';')
5512 if (*proto == '&' && *s == '{') {
5513 sv_setpv(PL_subname,
5516 "__ANON__" : "__ANON__::__ANON__"));
5523 PL_nextwhite = PL_thiswhite;
5526 start_force(PL_curforce);
5527 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5530 PL_nextwhite = nextPL_nextwhite;
5531 curmad('X', PL_thistoken);
5532 PL_thistoken = newSVpvs("");
5539 /* Guess harder when madskills require "best effort". */
5540 if (PL_madskills && (!gv || !GvCVu(gv))) {
5541 int probable_sub = 0;
5542 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5544 else if (isALPHA(*s)) {
5548 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5549 if (!keyword(tmpbuf, tmplen, 0))
5552 while (d < PL_bufend && isSPACE(*d))
5554 if (*d == '=' && d[1] == '>')
5559 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5560 op_free(yylval.opval);
5561 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5562 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5563 PL_last_lop = PL_oldbufptr;
5564 PL_last_lop_op = OP_ENTERSUB;
5565 PL_nextwhite = PL_thiswhite;
5567 start_force(PL_curforce);
5568 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5570 PL_nextwhite = nextPL_nextwhite;
5571 curmad('X', PL_thistoken);
5572 PL_thistoken = newSVpvs("");
5577 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5584 /* Call it a bare word */
5586 if (PL_hints & HINT_STRICT_SUBS)
5587 yylval.opval->op_private |= OPpCONST_STRICT;
5590 if (lastchar != '-') {
5591 if (ckWARN(WARN_RESERVED)) {
5595 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5596 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5604 && ckWARN_d(WARN_AMBIGUOUS)) {
5605 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5606 "Operator or semicolon missing before %c%s",
5607 lastchar, PL_tokenbuf);
5608 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5609 "Ambiguous use of %c resolved as operator %c",
5610 lastchar, lastchar);
5616 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5617 newSVpv(CopFILE(PL_curcop),0));
5621 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5622 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5625 case KEY___PACKAGE__:
5626 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5628 ? newSVhek(HvNAME_HEK(PL_curstash))
5635 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5636 const char *pname = "main";
5637 if (PL_tokenbuf[2] == 'D')
5638 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5639 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5643 GvIOp(gv) = newIO();
5644 IoIFP(GvIOp(gv)) = PL_rsfp;
5645 #if defined(HAS_FCNTL) && defined(F_SETFD)
5647 const int fd = PerlIO_fileno(PL_rsfp);
5648 fcntl(fd,F_SETFD,fd >= 3);
5651 /* Mark this internal pseudo-handle as clean */
5652 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5654 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
5655 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5656 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5658 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5659 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5660 /* if the script was opened in binmode, we need to revert
5661 * it to text mode for compatibility; but only iff it has CRs
5662 * XXX this is a questionable hack at best. */
5663 if (PL_bufend-PL_bufptr > 2
5664 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5667 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5668 loc = PerlIO_tell(PL_rsfp);
5669 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5672 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5674 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5675 #endif /* NETWARE */
5676 #ifdef PERLIO_IS_STDIO /* really? */
5677 # if defined(__BORLANDC__)
5678 /* XXX see note in do_binmode() */
5679 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5683 PerlIO_seek(PL_rsfp, loc, 0);
5687 #ifdef PERLIO_LAYERS
5690 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5691 else if (PL_encoding) {
5698 XPUSHs(PL_encoding);
5700 call_method("name", G_SCALAR);
5704 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5705 Perl_form(aTHX_ ":encoding(%"SVf")",
5714 if (PL_realtokenstart >= 0) {
5715 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5717 PL_endwhite = newSVpvs("");
5718 sv_catsv(PL_endwhite, PL_thiswhite);
5720 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5721 PL_realtokenstart = -1;
5723 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5724 SvCUR(PL_endwhite))) != Nullch) ;
5739 if (PL_expect == XSTATE) {
5746 if (*s == ':' && s[1] == ':') {
5749 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5750 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5751 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5754 else if (tmp == KEY_require || tmp == KEY_do)
5755 /* that's a way to remember we saw "CORE::" */
5768 LOP(OP_ACCEPT,XTERM);
5774 LOP(OP_ATAN2,XTERM);
5780 LOP(OP_BINMODE,XTERM);
5783 LOP(OP_BLESS,XTERM);
5792 /* When 'use switch' is in effect, continue has a dual
5793 life as a control operator. */
5795 if (!FEATURE_IS_ENABLED("switch"))
5798 /* We have to disambiguate the two senses of
5799 "continue". If the next token is a '{' then
5800 treat it as the start of a continue block;
5801 otherwise treat it as a control operator.
5813 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5830 if (!PL_cryptseen) {
5831 PL_cryptseen = TRUE;
5835 LOP(OP_CRYPT,XTERM);
5838 LOP(OP_CHMOD,XTERM);
5841 LOP(OP_CHOWN,XTERM);
5844 LOP(OP_CONNECT,XTERM);
5863 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5864 if (orig_keyword == KEY_do) {
5873 PL_hints |= HINT_BLOCK_SCOPE;
5883 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5884 LOP(OP_DBMOPEN,XTERM);
5890 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5897 yylval.ival = CopLINE(PL_curcop);
5913 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
5914 UNIBRACK(OP_ENTEREVAL);
5932 case KEY_endhostent:
5938 case KEY_endservent:
5941 case KEY_endprotoent:
5952 yylval.ival = CopLINE(PL_curcop);
5954 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
5957 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
5960 if ((PL_bufend - p) >= 3 &&
5961 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
5963 else if ((PL_bufend - p) >= 4 &&
5964 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
5967 if (isIDFIRST_lazy_if(p,UTF)) {
5968 p = scan_ident(p, PL_bufend,
5969 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5973 Perl_croak(aTHX_ "Missing $ on loop variable");
5975 s = SvPVX(PL_linestr) + soff;
5981 LOP(OP_FORMLINE,XTERM);
5987 LOP(OP_FCNTL,XTERM);
5993 LOP(OP_FLOCK,XTERM);
6002 LOP(OP_GREPSTART, XREF);
6005 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6020 case KEY_getpriority:
6021 LOP(OP_GETPRIORITY,XTERM);
6023 case KEY_getprotobyname:
6026 case KEY_getprotobynumber:
6027 LOP(OP_GPBYNUMBER,XTERM);
6029 case KEY_getprotoent:
6041 case KEY_getpeername:
6042 UNI(OP_GETPEERNAME);
6044 case KEY_gethostbyname:
6047 case KEY_gethostbyaddr:
6048 LOP(OP_GHBYADDR,XTERM);
6050 case KEY_gethostent:
6053 case KEY_getnetbyname:
6056 case KEY_getnetbyaddr:
6057 LOP(OP_GNBYADDR,XTERM);
6062 case KEY_getservbyname:
6063 LOP(OP_GSBYNAME,XTERM);
6065 case KEY_getservbyport:
6066 LOP(OP_GSBYPORT,XTERM);
6068 case KEY_getservent:
6071 case KEY_getsockname:
6072 UNI(OP_GETSOCKNAME);
6074 case KEY_getsockopt:
6075 LOP(OP_GSOCKOPT,XTERM);
6090 yylval.ival = CopLINE(PL_curcop);
6101 yylval.ival = CopLINE(PL_curcop);
6105 LOP(OP_INDEX,XTERM);
6111 LOP(OP_IOCTL,XTERM);
6123 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6155 LOP(OP_LISTEN,XTERM);
6164 s = scan_pat(s,OP_MATCH);
6165 TERM(sublex_start());
6168 LOP(OP_MAPSTART, XREF);
6171 LOP(OP_MKDIR,XTERM);
6174 LOP(OP_MSGCTL,XTERM);
6177 LOP(OP_MSGGET,XTERM);
6180 LOP(OP_MSGRCV,XTERM);
6183 LOP(OP_MSGSND,XTERM);
6188 PL_in_my = (U16)tmp;
6190 if (isIDFIRST_lazy_if(s,UTF)) {
6194 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6195 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6197 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6198 if (!PL_in_my_stash) {
6201 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6205 if (PL_madskills) { /* just add type to declarator token */
6206 sv_catsv(PL_thistoken, PL_nextwhite);
6208 sv_catpvn(PL_thistoken, start, s - start);
6216 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6223 s = tokenize_use(0, s);
6227 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6234 if (isIDFIRST_lazy_if(s,UTF)) {
6236 for (d = s; isALNUM_lazy_if(d,UTF);)
6238 for (t=d; isSPACE(*t);)
6240 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6242 && !(t[0] == '=' && t[1] == '>')
6244 int parms_len = (int)(d-s);
6245 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6246 "Precedence problem: open %.*s should be open(%.*s)",
6247 parms_len, s, parms_len, s);
6253 yylval.ival = OP_OR;
6263 LOP(OP_OPEN_DIR,XTERM);
6266 checkcomma(s,PL_tokenbuf,"filehandle");
6270 checkcomma(s,PL_tokenbuf,"filehandle");
6289 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6293 LOP(OP_PIPE_OP,XTERM);
6296 s = scan_str(s,!!PL_madskills,FALSE);
6299 yylval.ival = OP_CONST;
6300 TERM(sublex_start());
6306 s = scan_str(s,!!PL_madskills,FALSE);
6309 PL_expect = XOPERATOR;
6311 if (SvCUR(PL_lex_stuff)) {
6314 d = SvPV_force(PL_lex_stuff, len);
6316 for (; isSPACE(*d) && len; --len, ++d)
6321 if (!warned && ckWARN(WARN_QW)) {
6322 for (; !isSPACE(*d) && len; --len, ++d) {
6324 Perl_warner(aTHX_ packWARN(WARN_QW),
6325 "Possible attempt to separate words with commas");
6328 else if (*d == '#') {
6329 Perl_warner(aTHX_ packWARN(WARN_QW),
6330 "Possible attempt to put comments in qw() list");
6336 for (; !isSPACE(*d) && len; --len, ++d)
6339 sv = newSVpvn(b, d-b);
6340 if (DO_UTF8(PL_lex_stuff))
6342 words = append_elem(OP_LIST, words,
6343 newSVOP(OP_CONST, 0, tokeq(sv)));
6347 start_force(PL_curforce);
6348 NEXTVAL_NEXTTOKE.opval = words;
6353 SvREFCNT_dec(PL_lex_stuff);
6354 PL_lex_stuff = NULL;
6360 s = scan_str(s,!!PL_madskills,FALSE);
6363 yylval.ival = OP_STRINGIFY;
6364 if (SvIVX(PL_lex_stuff) == '\'')
6365 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6366 TERM(sublex_start());
6369 s = scan_pat(s,OP_QR);
6370 TERM(sublex_start());
6373 s = scan_str(s,!!PL_madskills,FALSE);
6376 readpipe_override();
6377 TERM(sublex_start());
6385 s = force_version(s, FALSE);
6387 else if (*s != 'v' || !isDIGIT(s[1])
6388 || (s = force_version(s, TRUE), *s == 'v'))
6390 *PL_tokenbuf = '\0';
6391 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6392 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6393 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6395 yyerror("<> should be quotes");
6397 if (orig_keyword == KEY_require) {
6405 PL_last_uni = PL_oldbufptr;
6406 PL_last_lop_op = OP_REQUIRE;
6408 return REPORT( (int)REQUIRE );
6414 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6418 LOP(OP_RENAME,XTERM);
6427 LOP(OP_RINDEX,XTERM);
6437 UNIDOR(OP_READLINE);
6441 UNIDOR(OP_BACKTICK);
6450 LOP(OP_REVERSE,XTERM);
6453 UNIDOR(OP_READLINK);
6461 TERM(sublex_start());
6463 TOKEN(1); /* force error */
6466 checkcomma(s,PL_tokenbuf,"filehandle");
6476 LOP(OP_SELECT,XTERM);
6482 LOP(OP_SEMCTL,XTERM);
6485 LOP(OP_SEMGET,XTERM);
6488 LOP(OP_SEMOP,XTERM);
6494 LOP(OP_SETPGRP,XTERM);
6496 case KEY_setpriority:
6497 LOP(OP_SETPRIORITY,XTERM);
6499 case KEY_sethostent:
6505 case KEY_setservent:
6508 case KEY_setprotoent:
6518 LOP(OP_SEEKDIR,XTERM);
6520 case KEY_setsockopt:
6521 LOP(OP_SSOCKOPT,XTERM);
6527 LOP(OP_SHMCTL,XTERM);
6530 LOP(OP_SHMGET,XTERM);
6533 LOP(OP_SHMREAD,XTERM);
6536 LOP(OP_SHMWRITE,XTERM);
6539 LOP(OP_SHUTDOWN,XTERM);
6548 LOP(OP_SOCKET,XTERM);
6550 case KEY_socketpair:
6551 LOP(OP_SOCKPAIR,XTERM);
6554 checkcomma(s,PL_tokenbuf,"subroutine name");
6556 if (*s == ';' || *s == ')') /* probably a close */
6557 Perl_croak(aTHX_ "sort is now a reserved word");
6559 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6563 LOP(OP_SPLIT,XTERM);
6566 LOP(OP_SPRINTF,XTERM);
6569 LOP(OP_SPLICE,XTERM);
6584 LOP(OP_SUBSTR,XTERM);
6590 char tmpbuf[sizeof PL_tokenbuf];
6591 SSize_t tboffset = 0;
6592 expectation attrful;
6593 bool have_name, have_proto;
6594 const int key = tmp;
6599 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6600 SV *subtoken = newSVpvn(tstart, s - tstart);
6604 s = SKIPSPACE2(s,tmpwhite);
6609 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6610 (*s == ':' && s[1] == ':'))
6617 attrful = XATTRBLOCK;
6618 /* remember buffer pos'n for later force_word */
6619 tboffset = s - PL_oldbufptr;
6620 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6623 nametoke = newSVpvn(s, d - s);
6625 if (memchr(tmpbuf, ':', len))
6626 sv_setpvn(PL_subname, tmpbuf, len);
6628 sv_setsv(PL_subname,PL_curstname);
6629 sv_catpvs(PL_subname,"::");
6630 sv_catpvn(PL_subname,tmpbuf,len);
6637 CURMAD('X', nametoke);
6638 CURMAD('_', tmpwhite);
6639 (void) force_word(PL_oldbufptr + tboffset, WORD,
6642 s = SKIPSPACE2(d,tmpwhite);
6649 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6650 PL_expect = XTERMBLOCK;
6651 attrful = XATTRTERM;
6652 sv_setpvn(PL_subname,"?",1);
6656 if (key == KEY_format) {
6658 PL_lex_formbrack = PL_lex_brackets + 1;
6660 PL_thistoken = subtoken;
6664 (void) force_word(PL_oldbufptr + tboffset, WORD,
6670 /* Look for a prototype */
6673 bool bad_proto = FALSE;
6674 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6676 s = scan_str(s,!!PL_madskills,FALSE);
6678 Perl_croak(aTHX_ "Prototype not terminated");
6679 /* strip spaces and check for bad characters */
6680 d = SvPVX(PL_lex_stuff);
6682 for (p = d; *p; ++p) {
6685 if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
6691 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6692 "Illegal character in prototype for %"SVf" : %s",
6693 SVfARG(PL_subname), d);
6694 SvCUR_set(PL_lex_stuff, tmp);
6699 CURMAD('q', PL_thisopen);
6700 CURMAD('_', tmpwhite);
6701 CURMAD('=', PL_thisstuff);
6702 CURMAD('Q', PL_thisclose);
6703 NEXTVAL_NEXTTOKE.opval =
6704 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6705 PL_lex_stuff = Nullsv;
6708 s = SKIPSPACE2(s,tmpwhite);
6716 if (*s == ':' && s[1] != ':')
6717 PL_expect = attrful;
6718 else if (*s != '{' && key == KEY_sub) {
6720 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6722 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6729 curmad('^', newSVpvs(""));
6730 CURMAD('_', tmpwhite);
6734 PL_thistoken = subtoken;
6737 NEXTVAL_NEXTTOKE.opval =
6738 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6739 PL_lex_stuff = NULL;
6744 sv_setpv(PL_subname,
6746 (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"));
6750 (void) force_word(PL_oldbufptr + tboffset, WORD,
6760 LOP(OP_SYSTEM,XREF);
6763 LOP(OP_SYMLINK,XTERM);
6766 LOP(OP_SYSCALL,XTERM);
6769 LOP(OP_SYSOPEN,XTERM);
6772 LOP(OP_SYSSEEK,XTERM);
6775 LOP(OP_SYSREAD,XTERM);
6778 LOP(OP_SYSWRITE,XTERM);
6782 TERM(sublex_start());
6803 LOP(OP_TRUNCATE,XTERM);
6815 yylval.ival = CopLINE(PL_curcop);
6819 yylval.ival = CopLINE(PL_curcop);
6823 LOP(OP_UNLINK,XTERM);
6829 LOP(OP_UNPACK,XTERM);
6832 LOP(OP_UTIME,XTERM);
6838 LOP(OP_UNSHIFT,XTERM);
6841 s = tokenize_use(1, s);
6851 yylval.ival = CopLINE(PL_curcop);
6855 yylval.ival = CopLINE(PL_curcop);
6859 PL_hints |= HINT_BLOCK_SCOPE;
6866 LOP(OP_WAITPID,XTERM);
6875 ctl_l[0] = toCTRL('L');
6877 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6880 /* Make sure $^L is defined */
6881 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
6886 if (PL_expect == XOPERATOR)
6892 yylval.ival = OP_XOR;
6897 TERM(sublex_start());
6902 #pragma segment Main
6906 S_pending_ident(pTHX)
6911 /* pit holds the identifier we read and pending_ident is reset */
6912 char pit = PL_pending_ident;
6913 PL_pending_ident = 0;
6915 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
6916 DEBUG_T({ PerlIO_printf(Perl_debug_log,
6917 "### Pending identifier '%s'\n", PL_tokenbuf); });
6919 /* if we're in a my(), we can't allow dynamics here.
6920 $foo'bar has already been turned into $foo::bar, so
6921 just check for colons.
6923 if it's a legal name, the OP is a PADANY.
6926 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
6927 if (strchr(PL_tokenbuf,':'))
6928 yyerror(Perl_form(aTHX_ "No package name allowed for "
6929 "variable %s in \"our\"",
6931 tmp = allocmy(PL_tokenbuf);
6934 if (strchr(PL_tokenbuf,':'))
6935 yyerror(Perl_form(aTHX_ PL_no_myglob,
6936 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
6938 yylval.opval = newOP(OP_PADANY, 0);
6939 yylval.opval->op_targ = allocmy(PL_tokenbuf);
6945 build the ops for accesses to a my() variable.
6947 Deny my($a) or my($b) in a sort block, *if* $a or $b is
6948 then used in a comparison. This catches most, but not
6949 all cases. For instance, it catches
6950 sort { my($a); $a <=> $b }
6952 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
6953 (although why you'd do that is anyone's guess).
6956 if (!strchr(PL_tokenbuf,':')) {
6958 tmp = pad_findmy(PL_tokenbuf);
6959 if (tmp != NOT_IN_PAD) {
6960 /* might be an "our" variable" */
6961 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6962 /* build ops for a bareword */
6963 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
6964 HEK * const stashname = HvNAME_HEK(stash);
6965 SV * const sym = newSVhek(stashname);
6966 sv_catpvs(sym, "::");
6967 sv_catpv(sym, PL_tokenbuf+1);
6968 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
6969 yylval.opval->op_private = OPpCONST_ENTERED;
6972 ? (GV_ADDMULTI | GV_ADDINEVAL)
6975 ((PL_tokenbuf[0] == '$') ? SVt_PV
6976 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6981 /* if it's a sort block and they're naming $a or $b */
6982 if (PL_last_lop_op == OP_SORT &&
6983 PL_tokenbuf[0] == '$' &&
6984 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
6987 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
6988 d < PL_bufend && *d != '\n';
6991 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
6992 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
6998 yylval.opval = newOP(OP_PADANY, 0);
6999 yylval.opval->op_targ = tmp;
7005 Whine if they've said @foo in a doublequoted string,
7006 and @foo isn't a variable we can find in the symbol
7009 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7010 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
7011 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7012 && ckWARN(WARN_AMBIGUOUS)
7013 /* DO NOT warn for @- and @+ */
7014 && !( PL_tokenbuf[2] == '\0' &&
7015 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7018 /* Downgraded from fatal to warning 20000522 mjd */
7019 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7020 "Possible unintended interpolation of %s in string",
7025 /* build ops for a bareword */
7026 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
7027 yylval.opval->op_private = OPpCONST_ENTERED;
7030 /* If the identifier refers to a stash, don't autovivify it.
7031 * Change 24660 had the side effect of causing symbol table
7032 * hashes to always be defined, even if they were freshly
7033 * created and the only reference in the entire program was
7034 * the single statement with the defined %foo::bar:: test.
7035 * It appears that all code in the wild doing this actually
7036 * wants to know whether sub-packages have been loaded, so
7037 * by avoiding auto-vivifying symbol tables, we ensure that
7038 * defined %foo::bar:: continues to be false, and the existing
7039 * tests still give the expected answers, even though what
7040 * they're actually testing has now changed subtly.
7042 (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
7044 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7045 ((PL_tokenbuf[0] == '$') ? SVt_PV
7046 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7052 * The following code was generated by perl_keyword.pl.
7056 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7061 case 1: /* 5 tokens of length 1 */
7093 case 2: /* 18 tokens of length 2 */
7239 case 3: /* 29 tokens of length 3 */
7243 if (name[1] == 'N' &&
7306 if (name[1] == 'i' &&
7328 return (all_keywords || FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
7346 if (name[1] == 'o' &&
7355 if (name[1] == 'e' &&
7364 if (name[1] == 'n' &&
7373 if (name[1] == 'o' &&
7382 if (name[1] == 'a' &&
7391 if (name[1] == 'o' &&
7453 if (name[1] == 'e' &&
7467 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7493 if (name[1] == 'i' &&
7502 if (name[1] == 's' &&
7511 if (name[1] == 'e' &&
7520 if (name[1] == 'o' &&
7532 case 4: /* 41 tokens of length 4 */
7536 if (name[1] == 'O' &&
7546 if (name[1] == 'N' &&
7556 if (name[1] == 'i' &&
7566 if (name[1] == 'h' &&
7576 if (name[1] == 'u' &&
7589 if (name[2] == 'c' &&
7598 if (name[2] == 's' &&
7607 if (name[2] == 'a' &&
7643 if (name[1] == 'o' &&
7656 if (name[2] == 't' &&
7665 if (name[2] == 'o' &&
7674 if (name[2] == 't' &&
7683 if (name[2] == 'e' &&
7696 if (name[1] == 'o' &&
7709 if (name[2] == 'y' &&
7718 if (name[2] == 'l' &&
7734 if (name[2] == 's' &&
7743 if (name[2] == 'n' &&
7752 if (name[2] == 'c' &&
7765 if (name[1] == 'e' &&
7775 if (name[1] == 'p' &&
7788 if (name[2] == 'c' &&
7797 if (name[2] == 'p' &&
7806 if (name[2] == 's' &&
7822 if (name[2] == 'n' &&
7892 if (name[2] == 'r' &&
7901 if (name[2] == 'r' &&
7910 if (name[2] == 'a' &&
7926 if (name[2] == 'l' &&
7988 if (name[2] == 'e' &&
7991 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8004 case 5: /* 39 tokens of length 5 */
8008 if (name[1] == 'E' &&
8019 if (name[1] == 'H' &&
8033 if (name[2] == 'a' &&
8043 if (name[2] == 'a' &&
8060 if (name[2] == 'e' &&
8070 if (name[2] == 'e' &&
8074 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8090 if (name[3] == 'i' &&
8099 if (name[3] == 'o' &&
8135 if (name[2] == 'o' &&
8145 if (name[2] == 'y' &&
8159 if (name[1] == 'l' &&
8173 if (name[2] == 'n' &&
8183 if (name[2] == 'o' &&
8197 if (name[1] == 'i' &&
8202 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8211 if (name[2] == 'd' &&
8221 if (name[2] == 'c' &&
8238 if (name[2] == 'c' &&
8248 if (name[2] == 't' &&
8262 if (name[1] == 'k' &&
8273 if (name[1] == 'r' &&
8287 if (name[2] == 's' &&
8297 if (name[2] == 'd' &&
8314 if (name[2] == 'm' &&
8324 if (name[2] == 'i' &&
8334 if (name[2] == 'e' &&
8344 if (name[2] == 'l' &&
8354 if (name[2] == 'a' &&
8367 if (name[3] == 't' &&
8370 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8376 if (name[3] == 'd' &&
8393 if (name[1] == 'i' &&
8407 if (name[2] == 'a' &&
8420 if (name[3] == 'e' &&
8455 if (name[2] == 'i' &&
8472 if (name[2] == 'i' &&
8482 if (name[2] == 'i' &&
8499 case 6: /* 33 tokens of length 6 */
8503 if (name[1] == 'c' &&
8518 if (name[2] == 'l' &&
8529 if (name[2] == 'r' &&
8544 if (name[1] == 'e' &&
8559 if (name[2] == 's' &&
8564 if(ckWARN_d(WARN_SYNTAX))
8565 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8571 if (name[2] == 'i' &&
8589 if (name[2] == 'l' &&
8600 if (name[2] == 'r' &&
8615 if (name[1] == 'm' &&
8630 if (name[2] == 'n' &&
8641 if (name[2] == 's' &&
8656 if (name[1] == 's' &&
8662 if (name[4] == 't' &&
8671 if (name[4] == 'e' &&
8680 if (name[4] == 'c' &&
8689 if (name[4] == 'n' &&
8705 if (name[1] == 'r' &&
8723 if (name[3] == 'a' &&
8733 if (name[3] == 'u' &&
8747 if (name[2] == 'n' &&
8765 if (name[2] == 'a' &&
8779 if (name[3] == 'e' &&
8792 if (name[4] == 't' &&
8801 if (name[4] == 'e' &&
8823 if (name[4] == 't' &&
8832 if (name[4] == 'e' &&
8848 if (name[2] == 'c' &&
8859 if (name[2] == 'l' &&
8870 if (name[2] == 'b' &&
8881 if (name[2] == 's' &&
8904 if (name[4] == 's' &&
8913 if (name[4] == 'n' &&
8926 if (name[3] == 'a' &&
8943 if (name[1] == 'a' &&
8958 case 7: /* 29 tokens of length 7 */
8962 if (name[1] == 'E' &&
8975 if (name[1] == '_' &&
8988 if (name[1] == 'i' &&
8995 return -KEY_binmode;
9001 if (name[1] == 'o' &&
9008 return -KEY_connect;
9017 if (name[2] == 'm' &&
9023 return -KEY_dbmopen;
9034 if (name[4] == 'u' &&
9038 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9044 if (name[4] == 'n' &&
9065 if (name[1] == 'o' &&
9078 if (name[1] == 'e' &&
9085 if (name[5] == 'r' &&
9088 return -KEY_getpgrp;
9094 if (name[5] == 'i' &&
9097 return -KEY_getppid;
9110 if (name[1] == 'c' &&
9117 return -KEY_lcfirst;
9123 if (name[1] == 'p' &&
9130 return -KEY_opendir;
9136 if (name[1] == 'a' &&
9154 if (name[3] == 'd' &&
9159 return -KEY_readdir;
9165 if (name[3] == 'u' &&
9176 if (name[3] == 'e' &&
9181 return -KEY_reverse;
9200 if (name[3] == 'k' &&
9205 return -KEY_seekdir;
9211 if (name[3] == 'p' &&
9216 return -KEY_setpgrp;
9226 if (name[2] == 'm' &&
9232 return -KEY_shmread;
9238 if (name[2] == 'r' &&
9244 return -KEY_sprintf;
9253 if (name[3] == 'l' &&
9258 return -KEY_symlink;
9267 if (name[4] == 'a' &&
9271 return -KEY_syscall;
9277 if (name[4] == 'p' &&
9281 return -KEY_sysopen;
9287 if (name[4] == 'e' &&
9291 return -KEY_sysread;
9297 if (name[4] == 'e' &&
9301 return -KEY_sysseek;
9319 if (name[1] == 'e' &&
9326 return -KEY_telldir;
9335 if (name[2] == 'f' &&
9341 return -KEY_ucfirst;
9347 if (name[2] == 's' &&
9353 return -KEY_unshift;
9363 if (name[1] == 'a' &&
9370 return -KEY_waitpid;
9379 case 8: /* 26 tokens of length 8 */
9383 if (name[1] == 'U' &&
9391 return KEY_AUTOLOAD;
9402 if (name[3] == 'A' &&
9408 return KEY___DATA__;
9414 if (name[3] == 'I' &&
9420 return -KEY___FILE__;
9426 if (name[3] == 'I' &&
9432 return -KEY___LINE__;
9448 if (name[2] == 'o' &&
9455 return -KEY_closedir;
9461 if (name[2] == 'n' &&
9468 return -KEY_continue;
9478 if (name[1] == 'b' &&
9486 return -KEY_dbmclose;
9492 if (name[1] == 'n' &&
9498 if (name[4] == 'r' &&
9503 return -KEY_endgrent;
9509 if (name[4] == 'w' &&
9514 return -KEY_endpwent;
9527 if (name[1] == 'o' &&
9535 return -KEY_formline;
9541 if (name[1] == 'e' &&
9552 if (name[6] == 'n' &&
9555 return -KEY_getgrent;
9561 if (name[6] == 'i' &&
9564 return -KEY_getgrgid;
9570 if (name[6] == 'a' &&
9573 return -KEY_getgrnam;
9586 if (name[4] == 'o' &&
9591 return -KEY_getlogin;
9602 if (name[6] == 'n' &&
9605 return -KEY_getpwent;
9611 if (name[6] == 'a' &&
9614 return -KEY_getpwnam;
9620 if (name[6] == 'i' &&
9623 return -KEY_getpwuid;
9643 if (name[1] == 'e' &&
9650 if (name[5] == 'i' &&
9657 return -KEY_readline;
9662 return -KEY_readlink;
9673 if (name[5] == 'i' &&
9677 return -KEY_readpipe;
9698 if (name[4] == 'r' &&
9703 return -KEY_setgrent;
9709 if (name[4] == 'w' &&
9714 return -KEY_setpwent;
9730 if (name[3] == 'w' &&
9736 return -KEY_shmwrite;
9742 if (name[3] == 't' &&
9748 return -KEY_shutdown;
9758 if (name[2] == 's' &&
9765 return -KEY_syswrite;
9775 if (name[1] == 'r' &&
9783 return -KEY_truncate;
9792 case 9: /* 9 tokens of length 9 */
9796 if (name[1] == 'N' &&
9805 return KEY_UNITCHECK;
9811 if (name[1] == 'n' &&
9820 return -KEY_endnetent;
9826 if (name[1] == 'e' &&
9835 return -KEY_getnetent;
9841 if (name[1] == 'o' &&
9850 return -KEY_localtime;
9856 if (name[1] == 'r' &&
9865 return KEY_prototype;
9871 if (name[1] == 'u' &&
9880 return -KEY_quotemeta;
9886 if (name[1] == 'e' &&
9895 return -KEY_rewinddir;
9901 if (name[1] == 'e' &&
9910 return -KEY_setnetent;
9916 if (name[1] == 'a' &&
9925 return -KEY_wantarray;
9934 case 10: /* 9 tokens of length 10 */
9938 if (name[1] == 'n' &&
9944 if (name[4] == 'o' &&
9951 return -KEY_endhostent;
9957 if (name[4] == 'e' &&
9964 return -KEY_endservent;
9977 if (name[1] == 'e' &&
9983 if (name[4] == 'o' &&
9990 return -KEY_gethostent;
9999 if (name[5] == 'r' &&
10005 return -KEY_getservent;
10011 if (name[5] == 'c' &&
10017 return -KEY_getsockopt;
10037 if (name[2] == 't')
10042 if (name[4] == 'o' &&
10049 return -KEY_sethostent;
10058 if (name[5] == 'r' &&
10064 return -KEY_setservent;
10070 if (name[5] == 'c' &&
10076 return -KEY_setsockopt;
10093 if (name[2] == 'c' &&
10102 return -KEY_socketpair;
10115 case 11: /* 8 tokens of length 11 */
10119 if (name[1] == '_' &&
10129 { /* __PACKAGE__ */
10130 return -KEY___PACKAGE__;
10136 if (name[1] == 'n' &&
10146 { /* endprotoent */
10147 return -KEY_endprotoent;
10153 if (name[1] == 'e' &&
10162 if (name[5] == 'e' &&
10168 { /* getpeername */
10169 return -KEY_getpeername;
10178 if (name[6] == 'o' &&
10183 { /* getpriority */
10184 return -KEY_getpriority;
10190 if (name[6] == 't' &&
10195 { /* getprotoent */
10196 return -KEY_getprotoent;
10210 if (name[4] == 'o' &&
10217 { /* getsockname */
10218 return -KEY_getsockname;
10231 if (name[1] == 'e' &&
10239 if (name[6] == 'o' &&
10244 { /* setpriority */
10245 return -KEY_setpriority;
10251 if (name[6] == 't' &&
10256 { /* setprotoent */
10257 return -KEY_setprotoent;
10273 case 12: /* 2 tokens of length 12 */
10274 if (name[0] == 'g' &&
10286 if (name[9] == 'd' &&
10289 { /* getnetbyaddr */
10290 return -KEY_getnetbyaddr;
10296 if (name[9] == 'a' &&
10299 { /* getnetbyname */
10300 return -KEY_getnetbyname;
10312 case 13: /* 4 tokens of length 13 */
10313 if (name[0] == 'g' &&
10320 if (name[4] == 'o' &&
10329 if (name[10] == 'd' &&
10332 { /* gethostbyaddr */
10333 return -KEY_gethostbyaddr;
10339 if (name[10] == 'a' &&
10342 { /* gethostbyname */
10343 return -KEY_gethostbyname;
10356 if (name[4] == 'e' &&
10365 if (name[10] == 'a' &&
10368 { /* getservbyname */
10369 return -KEY_getservbyname;
10375 if (name[10] == 'o' &&
10378 { /* getservbyport */
10379 return -KEY_getservbyport;
10398 case 14: /* 1 tokens of length 14 */
10399 if (name[0] == 'g' &&
10413 { /* getprotobyname */
10414 return -KEY_getprotobyname;
10419 case 16: /* 1 tokens of length 16 */
10420 if (name[0] == 'g' &&
10436 { /* getprotobynumber */
10437 return -KEY_getprotobynumber;
10451 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10455 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10456 if (ckWARN(WARN_SYNTAX)) {
10459 for (w = s+2; *w && level; w++) {
10462 else if (*w == ')')
10465 while (isSPACE(*w))
10467 /* the list of chars below is for end of statements or
10468 * block / parens, boolean operators (&&, ||, //) and branch
10469 * constructs (or, and, if, until, unless, while, err, for).
10470 * Not a very solid hack... */
10471 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10472 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10473 "%s (...) interpreted as function",name);
10476 while (s < PL_bufend && isSPACE(*s))
10480 while (s < PL_bufend && isSPACE(*s))
10482 if (isIDFIRST_lazy_if(s,UTF)) {
10483 const char * const w = s++;
10484 while (isALNUM_lazy_if(s,UTF))
10486 while (s < PL_bufend && isSPACE(*s))
10490 if (keyword(w, s - w, 0))
10493 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10494 if (gv && GvCVu(gv))
10496 Perl_croak(aTHX_ "No comma allowed after %s", what);
10501 /* Either returns sv, or mortalizes sv and returns a new SV*.
10502 Best used as sv=new_constant(..., sv, ...).
10503 If s, pv are NULL, calls subroutine with one argument,
10504 and type is used with error messages only. */
10507 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
10511 HV * const table = GvHV(PL_hintgv); /* ^H */
10515 const char *why1 = "", *why2 = "", *why3 = "";
10517 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10520 why2 = (const char *)
10521 (strEQ(key,"charnames")
10522 ? "(possibly a missing \"use charnames ...\")"
10524 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10525 (type ? type: "undef"), why2);
10527 /* This is convoluted and evil ("goto considered harmful")
10528 * but I do not understand the intricacies of all the different
10529 * failure modes of %^H in here. The goal here is to make
10530 * the most probable error message user-friendly. --jhi */
10535 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10536 (type ? type: "undef"), why1, why2, why3);
10538 yyerror(SvPVX_const(msg));
10542 cvp = hv_fetch(table, key, strlen(key), FALSE);
10543 if (!cvp || !SvOK(*cvp)) {
10546 why3 = "} is not defined";
10549 sv_2mortal(sv); /* Parent created it permanently */
10552 pv = sv_2mortal(newSVpvn(s, len));
10554 typesv = sv_2mortal(newSVpv(type, 0));
10556 typesv = &PL_sv_undef;
10558 PUSHSTACKi(PERLSI_OVERLOAD);
10570 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10574 /* Check the eval first */
10575 if (!PL_in_eval && SvTRUE(ERRSV)) {
10576 sv_catpvs(ERRSV, "Propagated");
10577 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10579 res = SvREFCNT_inc_simple(sv);
10583 SvREFCNT_inc_simple_void(res);
10592 why1 = "Call to &{$^H{";
10594 why3 = "}} did not return a defined value";
10602 /* Returns a NUL terminated string, with the length of the string written to
10606 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10609 register char *d = dest;
10610 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10613 Perl_croak(aTHX_ ident_too_long);
10614 if (isALNUM(*s)) /* UTF handled below */
10616 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10621 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10625 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10626 char *t = s + UTF8SKIP(s);
10628 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10632 Perl_croak(aTHX_ ident_too_long);
10633 Copy(s, d, len, char);
10646 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10649 char *bracket = NULL;
10651 register char *d = dest;
10652 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10657 while (isDIGIT(*s)) {
10659 Perl_croak(aTHX_ ident_too_long);
10666 Perl_croak(aTHX_ ident_too_long);
10667 if (isALNUM(*s)) /* UTF handled below */
10669 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10674 else if (*s == ':' && s[1] == ':') {
10678 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10679 char *t = s + UTF8SKIP(s);
10680 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10682 if (d + (t - s) > e)
10683 Perl_croak(aTHX_ ident_too_long);
10684 Copy(s, d, t - s, char);
10695 if (PL_lex_state != LEX_NORMAL)
10696 PL_lex_state = LEX_INTERPENDMAYBE;
10699 if (*s == '$' && s[1] &&
10700 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10713 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10718 if (isSPACE(s[-1])) {
10720 const char ch = *s++;
10721 if (!SPACE_OR_TAB(ch)) {
10727 if (isIDFIRST_lazy_if(d,UTF)) {
10731 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10732 end += UTF8SKIP(end);
10733 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10734 end += UTF8SKIP(end);
10736 Copy(s, d, end - s, char);
10741 while ((isALNUM(*s) || *s == ':') && d < e)
10744 Perl_croak(aTHX_ ident_too_long);
10747 while (s < send && SPACE_OR_TAB(*s))
10749 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10750 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10751 const char * const brack =
10753 ((*s == '[') ? "[...]" : "{...}");
10754 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10755 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10756 funny, dest, brack, funny, dest, brack);
10759 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10763 /* Handle extended ${^Foo} variables
10764 * 1999-02-27 mjd-perl-patch@plover.com */
10765 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10769 while (isALNUM(*s) && d < e) {
10773 Perl_croak(aTHX_ ident_too_long);
10778 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10779 PL_lex_state = LEX_INTERPEND;
10782 if (PL_lex_state == LEX_NORMAL) {
10783 if (ckWARN(WARN_AMBIGUOUS) &&
10784 (keyword(dest, d - dest, 0)
10785 || get_cvn_flags(dest, d - dest, 0)))
10789 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10790 "Ambiguous use of %c{%s} resolved to %c%s",
10791 funny, dest, funny, dest);
10796 s = bracket; /* let the parser handle it */
10800 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10801 PL_lex_state = LEX_INTERPEND;
10806 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10808 PERL_UNUSED_CONTEXT;
10812 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
10813 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10814 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10815 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10816 case KEEPCOPY_PAT_MOD: *pmfl |= PMf_KEEPCOPY; break;
10822 S_scan_pat(pTHX_ char *start, I32 type)
10826 char *s = scan_str(start,!!PL_madskills,FALSE);
10827 const char * const valid_flags =
10828 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10835 const char * const delimiter = skipspace(start);
10839 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10840 : "Search pattern not terminated" ));
10843 pm = (PMOP*)newPMOP(type, 0);
10844 if (PL_multi_open == '?') {
10845 /* This is the only point in the code that sets PMf_ONCE: */
10846 pm->op_pmflags |= PMf_ONCE;
10848 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10849 allows us to restrict the list needed by reset to just the ??
10851 assert(type != OP_TRANS);
10853 MAGIC *mg = mg_find((SV*)PL_curstash, PERL_MAGIC_symtab);
10856 mg = sv_magicext((SV*)PL_curstash, 0, PERL_MAGIC_symtab, 0, 0,
10859 elements = mg->mg_len / sizeof(PMOP**);
10860 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10861 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10862 mg->mg_len = elements * sizeof(PMOP**);
10863 PmopSTASH_set(pm,PL_curstash);
10869 while (*s && strchr(valid_flags, *s))
10870 pmflag(&pm->op_pmflags,*s++);
10872 if (PL_madskills && modstart != s) {
10873 SV* tmptoken = newSVpvn(modstart, s - modstart);
10874 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
10877 /* issue a warning if /c is specified,but /g is not */
10878 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
10879 && ckWARN(WARN_REGEXP))
10881 Perl_warner(aTHX_ packWARN(WARN_REGEXP),
10882 "Use of /c modifier is meaningless without /g" );
10885 PL_lex_op = (OP*)pm;
10886 yylval.ival = OP_MATCH;
10891 S_scan_subst(pTHX_ char *start)
10902 yylval.ival = OP_NULL;
10904 s = scan_str(start,!!PL_madskills,FALSE);
10907 Perl_croak(aTHX_ "Substitution pattern not terminated");
10909 if (s[-1] == PL_multi_open)
10912 if (PL_madskills) {
10913 CURMAD('q', PL_thisopen);
10914 CURMAD('_', PL_thiswhite);
10915 CURMAD('E', PL_thisstuff);
10916 CURMAD('Q', PL_thisclose);
10917 PL_realtokenstart = s - SvPVX(PL_linestr);
10921 first_start = PL_multi_start;
10922 s = scan_str(s,!!PL_madskills,FALSE);
10924 if (PL_lex_stuff) {
10925 SvREFCNT_dec(PL_lex_stuff);
10926 PL_lex_stuff = NULL;
10928 Perl_croak(aTHX_ "Substitution replacement not terminated");
10930 PL_multi_start = first_start; /* so whole substitution is taken together */
10932 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10935 if (PL_madskills) {
10936 CURMAD('z', PL_thisopen);
10937 CURMAD('R', PL_thisstuff);
10938 CURMAD('Z', PL_thisclose);
10944 if (*s == EXEC_PAT_MOD) {
10948 else if (strchr(S_PAT_MODS, *s))
10949 pmflag(&pm->op_pmflags,*s++);
10955 if (PL_madskills) {
10957 curmad('m', newSVpvn(modstart, s - modstart));
10958 append_madprops(PL_thismad, (OP*)pm, 0);
10962 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
10963 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10967 SV * const repl = newSVpvs("");
10969 PL_sublex_info.super_bufptr = s;
10970 PL_sublex_info.super_bufend = PL_bufend;
10972 pm->op_pmflags |= PMf_EVAL;
10974 sv_catpv(repl, (const char *)(es ? "eval " : "do "));
10975 sv_catpvs(repl, "{");
10976 sv_catsv(repl, PL_lex_repl);
10977 if (strchr(SvPVX(PL_lex_repl), '#'))
10978 sv_catpvs(repl, "\n");
10979 sv_catpvs(repl, "}");
10981 SvREFCNT_dec(PL_lex_repl);
10982 PL_lex_repl = repl;
10985 PL_lex_op = (OP*)pm;
10986 yylval.ival = OP_SUBST;
10991 S_scan_trans(pTHX_ char *start)
11004 yylval.ival = OP_NULL;
11006 s = scan_str(start,!!PL_madskills,FALSE);
11008 Perl_croak(aTHX_ "Transliteration pattern not terminated");
11010 if (s[-1] == PL_multi_open)
11013 if (PL_madskills) {
11014 CURMAD('q', PL_thisopen);
11015 CURMAD('_', PL_thiswhite);
11016 CURMAD('E', PL_thisstuff);
11017 CURMAD('Q', PL_thisclose);
11018 PL_realtokenstart = s - SvPVX(PL_linestr);
11022 s = scan_str(s,!!PL_madskills,FALSE);
11024 if (PL_lex_stuff) {
11025 SvREFCNT_dec(PL_lex_stuff);
11026 PL_lex_stuff = NULL;
11028 Perl_croak(aTHX_ "Transliteration replacement not terminated");
11030 if (PL_madskills) {
11031 CURMAD('z', PL_thisopen);
11032 CURMAD('R', PL_thisstuff);
11033 CURMAD('Z', PL_thisclose);
11036 complement = del = squash = 0;
11043 complement = OPpTRANS_COMPLEMENT;
11046 del = OPpTRANS_DELETE;
11049 squash = OPpTRANS_SQUASH;
11058 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11059 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11060 o->op_private &= ~OPpTRANS_ALL;
11061 o->op_private |= del|squash|complement|
11062 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11063 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11066 yylval.ival = OP_TRANS;
11069 if (PL_madskills) {
11071 curmad('m', newSVpvn(modstart, s - modstart));
11072 append_madprops(PL_thismad, o, 0);
11081 S_scan_heredoc(pTHX_ register char *s)
11085 I32 op_type = OP_SCALAR;
11089 const char *found_newline;
11093 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11095 I32 stuffstart = s - SvPVX(PL_linestr);
11098 PL_realtokenstart = -1;
11103 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11107 while (SPACE_OR_TAB(*peek))
11109 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11112 s = delimcpy(d, e, s, PL_bufend, term, &len);
11122 if (!isALNUM_lazy_if(s,UTF))
11123 deprecate_old("bare << to mean <<\"\"");
11124 for (; isALNUM_lazy_if(s,UTF); s++) {
11129 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11130 Perl_croak(aTHX_ "Delimiter for here document is too long");
11133 len = d - PL_tokenbuf;
11136 if (PL_madskills) {
11137 tstart = PL_tokenbuf + !outer;
11138 PL_thisclose = newSVpvn(tstart, len - !outer);
11139 tstart = SvPVX(PL_linestr) + stuffstart;
11140 PL_thisopen = newSVpvn(tstart, s - tstart);
11141 stuffstart = s - SvPVX(PL_linestr);
11144 #ifndef PERL_STRICT_CR
11145 d = strchr(s, '\r');
11147 char * const olds = s;
11149 while (s < PL_bufend) {
11155 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11164 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11171 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11172 herewas = newSVpvn(s,PL_bufend-s);
11176 herewas = newSVpvn(s-1,found_newline-s+1);
11179 herewas = newSVpvn(s,found_newline-s);
11183 if (PL_madskills) {
11184 tstart = SvPVX(PL_linestr) + stuffstart;
11186 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11188 PL_thisstuff = newSVpvn(tstart, s - tstart);
11191 s += SvCUR(herewas);
11194 stuffstart = s - SvPVX(PL_linestr);
11200 tmpstr = newSV_type(SVt_PVIV);
11201 SvGROW(tmpstr, 80);
11202 if (term == '\'') {
11203 op_type = OP_CONST;
11204 SvIV_set(tmpstr, -1);
11206 else if (term == '`') {
11207 op_type = OP_BACKTICK;
11208 SvIV_set(tmpstr, '\\');
11212 PL_multi_start = CopLINE(PL_curcop);
11213 PL_multi_open = PL_multi_close = '<';
11214 term = *PL_tokenbuf;
11215 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11216 char * const bufptr = PL_sublex_info.super_bufptr;
11217 char * const bufend = PL_sublex_info.super_bufend;
11218 char * const olds = s - SvCUR(herewas);
11219 s = strchr(bufptr, '\n');
11223 while (s < bufend &&
11224 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11226 CopLINE_inc(PL_curcop);
11229 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11230 missingterm(PL_tokenbuf);
11232 sv_setpvn(herewas,bufptr,d-bufptr+1);
11233 sv_setpvn(tmpstr,d+1,s-d);
11235 sv_catpvn(herewas,s,bufend-s);
11236 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11243 while (s < PL_bufend &&
11244 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11246 CopLINE_inc(PL_curcop);
11248 if (s >= PL_bufend) {
11249 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11250 missingterm(PL_tokenbuf);
11252 sv_setpvn(tmpstr,d+1,s-d);
11254 if (PL_madskills) {
11256 sv_catpvn(PL_thisstuff, d + 1, s - d);
11258 PL_thisstuff = newSVpvn(d + 1, s - d);
11259 stuffstart = s - SvPVX(PL_linestr);
11263 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11265 sv_catpvn(herewas,s,PL_bufend-s);
11266 sv_setsv(PL_linestr,herewas);
11267 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11268 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11269 PL_last_lop = PL_last_uni = NULL;
11272 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
11273 while (s >= PL_bufend) { /* multiple line string? */
11275 if (PL_madskills) {
11276 tstart = SvPVX(PL_linestr) + stuffstart;
11278 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11280 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11284 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11285 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11286 missingterm(PL_tokenbuf);
11289 stuffstart = s - SvPVX(PL_linestr);
11291 CopLINE_inc(PL_curcop);
11292 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11293 PL_last_lop = PL_last_uni = NULL;
11294 #ifndef PERL_STRICT_CR
11295 if (PL_bufend - PL_linestart >= 2) {
11296 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11297 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11299 PL_bufend[-2] = '\n';
11301 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11303 else if (PL_bufend[-1] == '\r')
11304 PL_bufend[-1] = '\n';
11306 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11307 PL_bufend[-1] = '\n';
11309 if (PERLDB_LINE && PL_curstash != PL_debstash)
11310 update_debugger_info(PL_linestr, NULL, 0);
11311 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11312 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11313 *(SvPVX(PL_linestr) + off ) = ' ';
11314 sv_catsv(PL_linestr,herewas);
11315 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11316 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11320 sv_catsv(tmpstr,PL_linestr);
11325 PL_multi_end = CopLINE(PL_curcop);
11326 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11327 SvPV_shrink_to_cur(tmpstr);
11329 SvREFCNT_dec(herewas);
11331 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11333 else if (PL_encoding)
11334 sv_recode_to_utf8(tmpstr, PL_encoding);
11336 PL_lex_stuff = tmpstr;
11337 yylval.ival = op_type;
11341 /* scan_inputsymbol
11342 takes: current position in input buffer
11343 returns: new position in input buffer
11344 side-effects: yylval and lex_op are set.
11349 <FH> read from filehandle
11350 <pkg::FH> read from package qualified filehandle
11351 <pkg'FH> read from package qualified filehandle
11352 <$fh> read from filehandle in $fh
11353 <*.h> filename glob
11358 S_scan_inputsymbol(pTHX_ char *start)
11361 register char *s = start; /* current position in buffer */
11365 char *d = PL_tokenbuf; /* start of temp holding space */
11366 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11368 end = strchr(s, '\n');
11371 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11373 /* die if we didn't have space for the contents of the <>,
11374 or if it didn't end, or if we see a newline
11377 if (len >= (I32)sizeof PL_tokenbuf)
11378 Perl_croak(aTHX_ "Excessively long <> operator");
11380 Perl_croak(aTHX_ "Unterminated <> operator");
11385 Remember, only scalar variables are interpreted as filehandles by
11386 this code. Anything more complex (e.g., <$fh{$num}>) will be
11387 treated as a glob() call.
11388 This code makes use of the fact that except for the $ at the front,
11389 a scalar variable and a filehandle look the same.
11391 if (*d == '$' && d[1]) d++;
11393 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11394 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11397 /* If we've tried to read what we allow filehandles to look like, and
11398 there's still text left, then it must be a glob() and not a getline.
11399 Use scan_str to pull out the stuff between the <> and treat it
11400 as nothing more than a string.
11403 if (d - PL_tokenbuf != len) {
11404 yylval.ival = OP_GLOB;
11406 s = scan_str(start,!!PL_madskills,FALSE);
11408 Perl_croak(aTHX_ "Glob not terminated");
11412 bool readline_overriden = FALSE;
11415 /* we're in a filehandle read situation */
11418 /* turn <> into <ARGV> */
11420 Copy("ARGV",d,5,char);
11422 /* Check whether readline() is overriden */
11423 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11425 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11427 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11428 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11429 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11430 readline_overriden = TRUE;
11432 /* if <$fh>, create the ops to turn the variable into a
11436 /* try to find it in the pad for this block, otherwise find
11437 add symbol table ops
11439 const PADOFFSET tmp = pad_findmy(d);
11440 if (tmp != NOT_IN_PAD) {
11441 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11442 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11443 HEK * const stashname = HvNAME_HEK(stash);
11444 SV * const sym = sv_2mortal(newSVhek(stashname));
11445 sv_catpvs(sym, "::");
11446 sv_catpv(sym, d+1);
11451 OP * const o = newOP(OP_PADSV, 0);
11453 PL_lex_op = readline_overriden
11454 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11455 append_elem(OP_LIST, o,
11456 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11457 : (OP*)newUNOP(OP_READLINE, 0, o);
11466 ? (GV_ADDMULTI | GV_ADDINEVAL)
11469 PL_lex_op = readline_overriden
11470 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11471 append_elem(OP_LIST,
11472 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11473 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11474 : (OP*)newUNOP(OP_READLINE, 0,
11475 newUNOP(OP_RV2SV, 0,
11476 newGVOP(OP_GV, 0, gv)));
11478 if (!readline_overriden)
11479 PL_lex_op->op_flags |= OPf_SPECIAL;
11480 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
11481 yylval.ival = OP_NULL;
11484 /* If it's none of the above, it must be a literal filehandle
11485 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11487 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11488 PL_lex_op = readline_overriden
11489 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11490 append_elem(OP_LIST,
11491 newGVOP(OP_GV, 0, gv),
11492 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11493 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11494 yylval.ival = OP_NULL;
11503 takes: start position in buffer
11504 keep_quoted preserve \ on the embedded delimiter(s)
11505 keep_delims preserve the delimiters around the string
11506 returns: position to continue reading from buffer
11507 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11508 updates the read buffer.
11510 This subroutine pulls a string out of the input. It is called for:
11511 q single quotes q(literal text)
11512 ' single quotes 'literal text'
11513 qq double quotes qq(interpolate $here please)
11514 " double quotes "interpolate $here please"
11515 qx backticks qx(/bin/ls -l)
11516 ` backticks `/bin/ls -l`
11517 qw quote words @EXPORT_OK = qw( func() $spam )
11518 m// regexp match m/this/
11519 s/// regexp substitute s/this/that/
11520 tr/// string transliterate tr/this/that/
11521 y/// string transliterate y/this/that/
11522 ($*@) sub prototypes sub foo ($)
11523 (stuff) sub attr parameters sub foo : attr(stuff)
11524 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11526 In most of these cases (all but <>, patterns and transliterate)
11527 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11528 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11529 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11532 It skips whitespace before the string starts, and treats the first
11533 character as the delimiter. If the delimiter is one of ([{< then
11534 the corresponding "close" character )]}> is used as the closing
11535 delimiter. It allows quoting of delimiters, and if the string has
11536 balanced delimiters ([{<>}]) it allows nesting.
11538 On success, the SV with the resulting string is put into lex_stuff or,
11539 if that is already non-NULL, into lex_repl. The second case occurs only
11540 when parsing the RHS of the special constructs s/// and tr/// (y///).
11541 For convenience, the terminating delimiter character is stuffed into
11546 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11549 SV *sv; /* scalar value: string */
11550 const char *tmps; /* temp string, used for delimiter matching */
11551 register char *s = start; /* current position in the buffer */
11552 register char term; /* terminating character */
11553 register char *to; /* current position in the sv's data */
11554 I32 brackets = 1; /* bracket nesting level */
11555 bool has_utf8 = FALSE; /* is there any utf8 content? */
11556 I32 termcode; /* terminating char. code */
11557 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11558 STRLEN termlen; /* length of terminating string */
11559 int last_off = 0; /* last position for nesting bracket */
11565 /* skip space before the delimiter */
11571 if (PL_realtokenstart >= 0) {
11572 stuffstart = PL_realtokenstart;
11573 PL_realtokenstart = -1;
11576 stuffstart = start - SvPVX(PL_linestr);
11578 /* mark where we are, in case we need to report errors */
11581 /* after skipping whitespace, the next character is the terminator */
11584 termcode = termstr[0] = term;
11588 termcode = utf8_to_uvchr((U8*)s, &termlen);
11589 Copy(s, termstr, termlen, U8);
11590 if (!UTF8_IS_INVARIANT(term))
11594 /* mark where we are */
11595 PL_multi_start = CopLINE(PL_curcop);
11596 PL_multi_open = term;
11598 /* find corresponding closing delimiter */
11599 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11600 termcode = termstr[0] = term = tmps[5];
11602 PL_multi_close = term;
11604 /* create a new SV to hold the contents. 79 is the SV's initial length.
11605 What a random number. */
11606 sv = newSV_type(SVt_PVIV);
11608 SvIV_set(sv, termcode);
11609 (void)SvPOK_only(sv); /* validate pointer */
11611 /* move past delimiter and try to read a complete string */
11613 sv_catpvn(sv, s, termlen);
11616 tstart = SvPVX(PL_linestr) + stuffstart;
11617 if (!PL_thisopen && !keep_delims) {
11618 PL_thisopen = newSVpvn(tstart, s - tstart);
11619 stuffstart = s - SvPVX(PL_linestr);
11623 if (PL_encoding && !UTF) {
11627 int offset = s - SvPVX_const(PL_linestr);
11628 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11629 &offset, (char*)termstr, termlen);
11630 const char * const ns = SvPVX_const(PL_linestr) + offset;
11631 char * const svlast = SvEND(sv) - 1;
11633 for (; s < ns; s++) {
11634 if (*s == '\n' && !PL_rsfp)
11635 CopLINE_inc(PL_curcop);
11638 goto read_more_line;
11640 /* handle quoted delimiters */
11641 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11643 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11645 if ((svlast-1 - t) % 2) {
11646 if (!keep_quoted) {
11647 *(svlast-1) = term;
11649 SvCUR_set(sv, SvCUR(sv) - 1);
11654 if (PL_multi_open == PL_multi_close) {
11660 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11661 /* At here, all closes are "was quoted" one,
11662 so we don't check PL_multi_close. */
11664 if (!keep_quoted && *(t+1) == PL_multi_open)
11669 else if (*t == PL_multi_open)
11677 SvCUR_set(sv, w - SvPVX_const(sv));
11679 last_off = w - SvPVX(sv);
11680 if (--brackets <= 0)
11685 if (!keep_delims) {
11686 SvCUR_set(sv, SvCUR(sv) - 1);
11692 /* extend sv if need be */
11693 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11694 /* set 'to' to the next character in the sv's string */
11695 to = SvPVX(sv)+SvCUR(sv);
11697 /* if open delimiter is the close delimiter read unbridle */
11698 if (PL_multi_open == PL_multi_close) {
11699 for (; s < PL_bufend; s++,to++) {
11700 /* embedded newlines increment the current line number */
11701 if (*s == '\n' && !PL_rsfp)
11702 CopLINE_inc(PL_curcop);
11703 /* handle quoted delimiters */
11704 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11705 if (!keep_quoted && s[1] == term)
11707 /* any other quotes are simply copied straight through */
11711 /* terminate when run out of buffer (the for() condition), or
11712 have found the terminator */
11713 else if (*s == term) {
11716 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11719 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11725 /* if the terminator isn't the same as the start character (e.g.,
11726 matched brackets), we have to allow more in the quoting, and
11727 be prepared for nested brackets.
11730 /* read until we run out of string, or we find the terminator */
11731 for (; s < PL_bufend; s++,to++) {
11732 /* embedded newlines increment the line count */
11733 if (*s == '\n' && !PL_rsfp)
11734 CopLINE_inc(PL_curcop);
11735 /* backslashes can escape the open or closing characters */
11736 if (*s == '\\' && s+1 < PL_bufend) {
11737 if (!keep_quoted &&
11738 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11743 /* allow nested opens and closes */
11744 else if (*s == PL_multi_close && --brackets <= 0)
11746 else if (*s == PL_multi_open)
11748 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11753 /* terminate the copied string and update the sv's end-of-string */
11755 SvCUR_set(sv, to - SvPVX_const(sv));
11758 * this next chunk reads more into the buffer if we're not done yet
11762 break; /* handle case where we are done yet :-) */
11764 #ifndef PERL_STRICT_CR
11765 if (to - SvPVX_const(sv) >= 2) {
11766 if ((to[-2] == '\r' && to[-1] == '\n') ||
11767 (to[-2] == '\n' && to[-1] == '\r'))
11771 SvCUR_set(sv, to - SvPVX_const(sv));
11773 else if (to[-1] == '\r')
11776 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11781 /* if we're out of file, or a read fails, bail and reset the current
11782 line marker so we can report where the unterminated string began
11785 if (PL_madskills) {
11786 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11788 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11790 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11794 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11796 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11802 /* we read a line, so increment our line counter */
11803 CopLINE_inc(PL_curcop);
11805 /* update debugger info */
11806 if (PERLDB_LINE && PL_curstash != PL_debstash)
11807 update_debugger_info(PL_linestr, NULL, 0);
11809 /* having changed the buffer, we must update PL_bufend */
11810 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11811 PL_last_lop = PL_last_uni = NULL;
11814 /* at this point, we have successfully read the delimited string */
11816 if (!PL_encoding || UTF) {
11818 if (PL_madskills) {
11819 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11820 const int len = s - tstart;
11822 sv_catpvn(PL_thisstuff, tstart, len);
11824 PL_thisstuff = newSVpvn(tstart, len);
11825 if (!PL_thisclose && !keep_delims)
11826 PL_thisclose = newSVpvn(s,termlen);
11831 sv_catpvn(sv, s, termlen);
11836 if (PL_madskills) {
11837 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11838 const int len = s - tstart - termlen;
11840 sv_catpvn(PL_thisstuff, tstart, len);
11842 PL_thisstuff = newSVpvn(tstart, len);
11843 if (!PL_thisclose && !keep_delims)
11844 PL_thisclose = newSVpvn(s - termlen,termlen);
11848 if (has_utf8 || PL_encoding)
11851 PL_multi_end = CopLINE(PL_curcop);
11853 /* if we allocated too much space, give some back */
11854 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11855 SvLEN_set(sv, SvCUR(sv) + 1);
11856 SvPV_renew(sv, SvLEN(sv));
11859 /* decide whether this is the first or second quoted string we've read
11872 takes: pointer to position in buffer
11873 returns: pointer to new position in buffer
11874 side-effects: builds ops for the constant in yylval.op
11876 Read a number in any of the formats that Perl accepts:
11878 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11879 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11882 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11884 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11887 If it reads a number without a decimal point or an exponent, it will
11888 try converting the number to an integer and see if it can do so
11889 without loss of precision.
11893 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11896 register const char *s = start; /* current position in buffer */
11897 register char *d; /* destination in temp buffer */
11898 register char *e; /* end of temp buffer */
11899 NV nv; /* number read, as a double */
11900 SV *sv = NULL; /* place to put the converted number */
11901 bool floatit; /* boolean: int or float? */
11902 const char *lastub = NULL; /* position of last underbar */
11903 static char const number_too_long[] = "Number too long";
11905 /* We use the first character to decide what type of number this is */
11909 Perl_croak(aTHX_ "panic: scan_num");
11911 /* if it starts with a 0, it could be an octal number, a decimal in
11912 0.13 disguise, or a hexadecimal number, or a binary number. */
11916 u holds the "number so far"
11917 shift the power of 2 of the base
11918 (hex == 4, octal == 3, binary == 1)
11919 overflowed was the number more than we can hold?
11921 Shift is used when we add a digit. It also serves as an "are
11922 we in octal/hex/binary?" indicator to disallow hex characters
11923 when in octal mode.
11928 bool overflowed = FALSE;
11929 bool just_zero = TRUE; /* just plain 0 or binary number? */
11930 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11931 static const char* const bases[5] =
11932 { "", "binary", "", "octal", "hexadecimal" };
11933 static const char* const Bases[5] =
11934 { "", "Binary", "", "Octal", "Hexadecimal" };
11935 static const char* const maxima[5] =
11937 "0b11111111111111111111111111111111",
11941 const char *base, *Base, *max;
11943 /* check for hex */
11948 } else if (s[1] == 'b') {
11953 /* check for a decimal in disguise */
11954 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11956 /* so it must be octal */
11963 if (ckWARN(WARN_SYNTAX))
11964 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11965 "Misplaced _ in number");
11969 base = bases[shift];
11970 Base = Bases[shift];
11971 max = maxima[shift];
11973 /* read the rest of the number */
11975 /* x is used in the overflow test,
11976 b is the digit we're adding on. */
11981 /* if we don't mention it, we're done */
11985 /* _ are ignored -- but warned about if consecutive */
11987 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11988 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11989 "Misplaced _ in number");
11993 /* 8 and 9 are not octal */
11994 case '8': case '9':
11996 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12000 case '2': case '3': case '4':
12001 case '5': case '6': case '7':
12003 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12006 case '0': case '1':
12007 b = *s++ & 15; /* ASCII digit -> value of digit */
12011 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12012 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12013 /* make sure they said 0x */
12016 b = (*s++ & 7) + 9;
12018 /* Prepare to put the digit we have onto the end
12019 of the number so far. We check for overflows.
12025 x = u << shift; /* make room for the digit */
12027 if ((x >> shift) != u
12028 && !(PL_hints & HINT_NEW_BINARY)) {
12031 if (ckWARN_d(WARN_OVERFLOW))
12032 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12033 "Integer overflow in %s number",
12036 u = x | b; /* add the digit to the end */
12039 n *= nvshift[shift];
12040 /* If an NV has not enough bits in its
12041 * mantissa to represent an UV this summing of
12042 * small low-order numbers is a waste of time
12043 * (because the NV cannot preserve the
12044 * low-order bits anyway): we could just
12045 * remember when did we overflow and in the
12046 * end just multiply n by the right
12054 /* if we get here, we had success: make a scalar value from
12059 /* final misplaced underbar check */
12060 if (s[-1] == '_') {
12061 if (ckWARN(WARN_SYNTAX))
12062 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12067 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
12068 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12069 "%s number > %s non-portable",
12075 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
12076 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12077 "%s number > %s non-portable",
12082 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12083 sv = new_constant(start, s - start, "integer",
12085 else if (PL_hints & HINT_NEW_BINARY)
12086 sv = new_constant(start, s - start, "binary", sv, NULL, NULL);
12091 handle decimal numbers.
12092 we're also sent here when we read a 0 as the first digit
12094 case '1': case '2': case '3': case '4': case '5':
12095 case '6': case '7': case '8': case '9': case '.':
12098 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12101 /* read next group of digits and _ and copy into d */
12102 while (isDIGIT(*s) || *s == '_') {
12103 /* skip underscores, checking for misplaced ones
12107 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12108 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12109 "Misplaced _ in number");
12113 /* check for end of fixed-length buffer */
12115 Perl_croak(aTHX_ number_too_long);
12116 /* if we're ok, copy the character */
12121 /* final misplaced underbar check */
12122 if (lastub && s == lastub + 1) {
12123 if (ckWARN(WARN_SYNTAX))
12124 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12127 /* read a decimal portion if there is one. avoid
12128 3..5 being interpreted as the number 3. followed
12131 if (*s == '.' && s[1] != '.') {
12136 if (ckWARN(WARN_SYNTAX))
12137 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12138 "Misplaced _ in number");
12142 /* copy, ignoring underbars, until we run out of digits.
12144 for (; isDIGIT(*s) || *s == '_'; s++) {
12145 /* fixed length buffer check */
12147 Perl_croak(aTHX_ number_too_long);
12149 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12150 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12151 "Misplaced _ in number");
12157 /* fractional part ending in underbar? */
12158 if (s[-1] == '_') {
12159 if (ckWARN(WARN_SYNTAX))
12160 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12161 "Misplaced _ in number");
12163 if (*s == '.' && isDIGIT(s[1])) {
12164 /* oops, it's really a v-string, but without the "v" */
12170 /* read exponent part, if present */
12171 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12175 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12176 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12178 /* stray preinitial _ */
12180 if (ckWARN(WARN_SYNTAX))
12181 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12182 "Misplaced _ in number");
12186 /* allow positive or negative exponent */
12187 if (*s == '+' || *s == '-')
12190 /* stray initial _ */
12192 if (ckWARN(WARN_SYNTAX))
12193 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12194 "Misplaced _ in number");
12198 /* read digits of exponent */
12199 while (isDIGIT(*s) || *s == '_') {
12202 Perl_croak(aTHX_ number_too_long);
12206 if (((lastub && s == lastub + 1) ||
12207 (!isDIGIT(s[1]) && s[1] != '_'))
12208 && ckWARN(WARN_SYNTAX))
12209 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12210 "Misplaced _ in number");
12217 /* make an sv from the string */
12221 We try to do an integer conversion first if no characters
12222 indicating "float" have been found.
12227 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12229 if (flags == IS_NUMBER_IN_UV) {
12231 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12234 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12235 if (uv <= (UV) IV_MIN)
12236 sv_setiv(sv, -(IV)uv);
12243 /* terminate the string */
12245 nv = Atof(PL_tokenbuf);
12249 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
12250 (PL_hints & HINT_NEW_INTEGER) )
12251 sv = new_constant(PL_tokenbuf,
12254 (floatit ? "float" : "integer"),
12258 /* if it starts with a v, it could be a v-string */
12261 sv = newSV(5); /* preallocate storage space */
12262 s = scan_vstring(s, PL_bufend, sv);
12266 /* make the op for the constant and return */
12269 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12271 lvalp->opval = NULL;
12277 S_scan_formline(pTHX_ register char *s)
12280 register char *eol;
12282 SV * const stuff = newSVpvs("");
12283 bool needargs = FALSE;
12284 bool eofmt = FALSE;
12286 char *tokenstart = s;
12289 if (PL_madskills) {
12290 savewhite = PL_thiswhite;
12295 while (!needargs) {
12298 #ifdef PERL_STRICT_CR
12299 while (SPACE_OR_TAB(*t))
12302 while (SPACE_OR_TAB(*t) || *t == '\r')
12305 if (*t == '\n' || t == PL_bufend) {
12310 if (PL_in_eval && !PL_rsfp) {
12311 eol = (char *) memchr(s,'\n',PL_bufend-s);
12316 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12318 for (t = s; t < eol; t++) {
12319 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12321 goto enough; /* ~~ must be first line in formline */
12323 if (*t == '@' || *t == '^')
12327 sv_catpvn(stuff, s, eol-s);
12328 #ifndef PERL_STRICT_CR
12329 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12330 char *end = SvPVX(stuff) + SvCUR(stuff);
12333 SvCUR_set(stuff, SvCUR(stuff) - 1);
12343 if (PL_madskills) {
12345 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12347 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12350 s = filter_gets(PL_linestr, PL_rsfp, 0);
12352 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12354 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12356 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12357 PL_last_lop = PL_last_uni = NULL;
12366 if (SvCUR(stuff)) {
12369 PL_lex_state = LEX_NORMAL;
12370 start_force(PL_curforce);
12371 NEXTVAL_NEXTTOKE.ival = 0;
12375 PL_lex_state = LEX_FORMLINE;
12377 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12379 else if (PL_encoding)
12380 sv_recode_to_utf8(stuff, PL_encoding);
12382 start_force(PL_curforce);
12383 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12385 start_force(PL_curforce);
12386 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12390 SvREFCNT_dec(stuff);
12392 PL_lex_formbrack = 0;
12396 if (PL_madskills) {
12398 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12400 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12401 PL_thiswhite = savewhite;
12413 PL_cshlen = strlen(PL_cshname);
12415 #if defined(USE_ITHREADS)
12416 PERL_UNUSED_CONTEXT;
12422 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12425 const I32 oldsavestack_ix = PL_savestack_ix;
12426 CV* const outsidecv = PL_compcv;
12429 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12431 SAVEI32(PL_subline);
12432 save_item(PL_subname);
12433 SAVESPTR(PL_compcv);
12435 PL_compcv = (CV*)newSV_type(is_format ? SVt_PVFM : SVt_PVCV);
12436 CvFLAGS(PL_compcv) |= flags;
12438 PL_subline = CopLINE(PL_curcop);
12439 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12440 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
12441 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12443 return oldsavestack_ix;
12447 #pragma segment Perl_yylex
12450 Perl_yywarn(pTHX_ const char *s)
12453 PL_in_eval |= EVAL_WARNONLY;
12455 PL_in_eval &= ~EVAL_WARNONLY;
12460 Perl_yyerror(pTHX_ const char *s)
12463 const char *where = NULL;
12464 const char *context = NULL;
12467 int yychar = PL_parser->yychar;
12469 if (!yychar || (yychar == ';' && !PL_rsfp))
12471 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12472 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12473 PL_oldbufptr != PL_bufptr) {
12476 The code below is removed for NetWare because it abends/crashes on NetWare
12477 when the script has error such as not having the closing quotes like:
12478 if ($var eq "value)
12479 Checking of white spaces is anyway done in NetWare code.
12482 while (isSPACE(*PL_oldoldbufptr))
12485 context = PL_oldoldbufptr;
12486 contlen = PL_bufptr - PL_oldoldbufptr;
12488 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12489 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12492 The code below is removed for NetWare because it abends/crashes on NetWare
12493 when the script has error such as not having the closing quotes like:
12494 if ($var eq "value)
12495 Checking of white spaces is anyway done in NetWare code.
12498 while (isSPACE(*PL_oldbufptr))
12501 context = PL_oldbufptr;
12502 contlen = PL_bufptr - PL_oldbufptr;
12504 else if (yychar > 255)
12505 where = "next token ???";
12506 else if (yychar == -2) { /* YYEMPTY */
12507 if (PL_lex_state == LEX_NORMAL ||
12508 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12509 where = "at end of line";
12510 else if (PL_lex_inpat)
12511 where = "within pattern";
12513 where = "within string";
12516 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
12518 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12519 else if (isPRINT_LC(yychar))
12520 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
12522 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12523 where = SvPVX_const(where_sv);
12525 msg = sv_2mortal(newSVpv(s, 0));
12526 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12527 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12529 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12531 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12532 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12533 Perl_sv_catpvf(aTHX_ msg,
12534 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12535 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12538 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
12539 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12542 if (PL_error_count >= 10) {
12543 if (PL_in_eval && SvCUR(ERRSV))
12544 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12545 SVfARG(ERRSV), OutCopFILE(PL_curcop));
12547 Perl_croak(aTHX_ "%s has too many errors.\n",
12548 OutCopFILE(PL_curcop));
12551 PL_in_my_stash = NULL;
12555 #pragma segment Main
12559 S_swallow_bom(pTHX_ U8 *s)
12562 const STRLEN slen = SvCUR(PL_linestr);
12565 if (s[1] == 0xFE) {
12566 /* UTF-16 little-endian? (or UTF32-LE?) */
12567 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12568 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12569 #ifndef PERL_NO_UTF16_FILTER
12570 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12573 if (PL_bufend > (char*)s) {
12577 filter_add(utf16rev_textfilter, NULL);
12578 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12579 utf16_to_utf8_reversed(s, news,
12580 PL_bufend - (char*)s - 1,
12582 sv_setpvn(PL_linestr, (const char*)news, newlen);
12584 s = (U8*)SvPVX(PL_linestr);
12585 Copy(news, s, newlen, U8);
12589 SvUTF8_on(PL_linestr);
12590 s = (U8*)SvPVX(PL_linestr);
12592 /* FIXME - is this a general bug fix? */
12595 PL_bufend = SvPVX(PL_linestr) + newlen;
12598 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12603 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12604 #ifndef PERL_NO_UTF16_FILTER
12605 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12608 if (PL_bufend > (char *)s) {
12612 filter_add(utf16_textfilter, NULL);
12613 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12614 utf16_to_utf8(s, news,
12615 PL_bufend - (char*)s,
12617 sv_setpvn(PL_linestr, (const char*)news, newlen);
12619 SvUTF8_on(PL_linestr);
12620 s = (U8*)SvPVX(PL_linestr);
12621 PL_bufend = SvPVX(PL_linestr) + newlen;
12624 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12629 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12630 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12631 s += 3; /* UTF-8 */
12637 if (s[2] == 0xFE && s[3] == 0xFF) {
12638 /* UTF-32 big-endian */
12639 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12642 else if (s[2] == 0 && s[3] != 0) {
12645 * are a good indicator of UTF-16BE. */
12646 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12652 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12653 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12654 s += 4; /* UTF-8 */
12660 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12663 * are a good indicator of UTF-16LE. */
12664 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12673 * Restore a source filter.
12677 restore_rsfp(pTHX_ void *f)
12680 PerlIO * const fp = (PerlIO*)f;
12682 if (PL_rsfp == PerlIO_stdin())
12683 PerlIO_clearerr(PL_rsfp);
12684 else if (PL_rsfp && (PL_rsfp != fp))
12685 PerlIO_close(PL_rsfp);
12689 #ifndef PERL_NO_UTF16_FILTER
12691 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12694 const STRLEN old = SvCUR(sv);
12695 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12696 DEBUG_P(PerlIO_printf(Perl_debug_log,
12697 "utf16_textfilter(%p): %d %d (%d)\n",
12698 FPTR2DPTR(void *, utf16_textfilter),
12699 idx, maxlen, (int) count));
12703 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12704 Copy(SvPVX_const(sv), tmps, old, char);
12705 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12706 SvCUR(sv) - old, &newlen);
12707 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12709 DEBUG_P({sv_dump(sv);});
12714 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12717 const STRLEN old = SvCUR(sv);
12718 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12719 DEBUG_P(PerlIO_printf(Perl_debug_log,
12720 "utf16rev_textfilter(%p): %d %d (%d)\n",
12721 FPTR2DPTR(void *, utf16rev_textfilter),
12722 idx, maxlen, (int) count));
12726 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12727 Copy(SvPVX_const(sv), tmps, old, char);
12728 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12729 SvCUR(sv) - old, &newlen);
12730 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12732 DEBUG_P({ sv_dump(sv); });
12738 Returns a pointer to the next character after the parsed
12739 vstring, as well as updating the passed in sv.
12741 Function must be called like
12744 s = scan_vstring(s,e,sv);
12746 where s and e are the start and end of the string.
12747 The sv should already be large enough to store the vstring
12748 passed in, for performance reasons.
12753 Perl_scan_vstring(pTHX_ const char *s, const char *e, SV *sv)
12756 const char *pos = s;
12757 const char *start = s;
12758 if (*pos == 'v') pos++; /* get past 'v' */
12759 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12761 if ( *pos != '.') {
12762 /* this may not be a v-string if followed by => */
12763 const char *next = pos;
12764 while (next < e && isSPACE(*next))
12766 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12767 /* return string not v-string */
12768 sv_setpvn(sv,(char *)s,pos-s);
12769 return (char *)pos;
12773 if (!isALPHA(*pos)) {
12774 U8 tmpbuf[UTF8_MAXBYTES+1];
12777 s++; /* get past 'v' */
12779 sv_setpvn(sv, "", 0);
12782 /* this is atoi() that tolerates underscores */
12785 const char *end = pos;
12787 while (--end >= s) {
12789 const UV orev = rev;
12790 rev += (*end - '0') * mult;
12792 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12793 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12794 "Integer overflow in decimal number");
12798 if (rev > 0x7FFFFFFF)
12799 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12801 /* Append native character for the rev point */
12802 tmpend = uvchr_to_utf8(tmpbuf, rev);
12803 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12804 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12806 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12812 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12816 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12824 * c-indentation-style: bsd
12825 * c-basic-offset: 4
12826 * indent-tabs-mode: t
12829 * ex: set ts=8 sts=4 sw=4 noet: