3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yylval (PL_parser->yylval)
28 static const char ident_too_long[] = "Identifier too long";
29 static const char commaless_variable_list[] = "comma-less variable list";
31 static void restore_rsfp(pTHX_ void *f);
32 #ifndef PERL_NO_UTF16_FILTER
33 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
34 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
38 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
39 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
41 # define CURMAD(slot,sv)
42 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
45 #define XFAKEBRACK 128
48 #ifdef USE_UTF8_SCRIPTS
49 # define UTF (!IN_BYTES)
51 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
54 /* In variables named $^X, these are the legal values for X.
55 * 1999-02-27 mjd-perl-patch@plover.com */
56 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
58 /* On MacOS, respect nonbreaking spaces */
59 #ifdef MACOS_TRADITIONAL
60 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
62 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
65 /* LEX_* are values for PL_lex_state, the state of the lexer.
66 * They are arranged oddly so that the guard on the switch statement
67 * can get by with a single comparison (if the compiler is smart enough).
70 /* #define LEX_NOTPARSING 11 is done in perl.h. */
72 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
73 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
74 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
75 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
76 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
78 /* at end of code, eg "$x" followed by: */
79 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
80 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
82 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
83 string or after \E, $foo, etc */
84 #define LEX_INTERPCONST 2 /* NOT USED */
85 #define LEX_FORMLINE 1 /* expecting a format line */
86 #define LEX_KNOWNEXT 0 /* next token known; just return it */
90 static const char* const lex_state_names[] = {
109 #include "keywords.h"
111 /* CLINE is a macro that ensures PL_copline has a sane value */
116 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
119 # define SKIPSPACE0(s) skipspace0(s)
120 # define SKIPSPACE1(s) skipspace1(s)
121 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
122 # define PEEKSPACE(s) skipspace2(s,0)
124 # define SKIPSPACE0(s) skipspace(s)
125 # define SKIPSPACE1(s) skipspace(s)
126 # define SKIPSPACE2(s,tsv) skipspace(s)
127 # define PEEKSPACE(s) skipspace(s)
131 * Convenience functions to return different tokens and prime the
132 * lexer for the next token. They all take an argument.
134 * TOKEN : generic token (used for '(', DOLSHARP, etc)
135 * OPERATOR : generic operator
136 * AOPERATOR : assignment operator
137 * PREBLOCK : beginning the block after an if, while, foreach, ...
138 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
139 * PREREF : *EXPR where EXPR is not a simple identifier
140 * TERM : expression term
141 * LOOPX : loop exiting command (goto, last, dump, etc)
142 * FTST : file test operator
143 * FUN0 : zero-argument function
144 * FUN1 : not used, except for not, which isn't a UNIOP
145 * BOop : bitwise or or xor
147 * SHop : shift operator
148 * PWop : power operator
149 * PMop : pattern-matching operator
150 * Aop : addition-level operator
151 * Mop : multiplication-level operator
152 * Eop : equality-testing operator
153 * Rop : relational operator <= != gt
155 * Also see LOP and lop() below.
158 #ifdef DEBUGGING /* Serve -DT. */
159 # define REPORT(retval) tokereport((I32)retval)
161 # define REPORT(retval) (retval)
164 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
165 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
166 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
167 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
168 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
169 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
170 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
171 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
172 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
173 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
174 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
175 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
176 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
177 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
178 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
179 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
180 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
181 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
182 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
183 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
185 /* This bit of chicanery makes a unary function followed by
186 * a parenthesis into a function with one argument, highest precedence.
187 * The UNIDOR macro is for unary functions that can be followed by the //
188 * operator (such as C<shift // 0>).
190 #define UNI2(f,x) { \
194 PL_last_uni = PL_oldbufptr; \
195 PL_last_lop_op = f; \
197 return REPORT( (int)FUNC1 ); \
199 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
201 #define UNI(f) UNI2(f,XTERM)
202 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
204 #define UNIBRACK(f) { \
207 PL_last_uni = PL_oldbufptr; \
209 return REPORT( (int)FUNC1 ); \
211 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
214 /* grandfather return to old style */
215 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
219 /* how to interpret the yylval associated with the token */
223 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
229 static struct debug_tokens {
231 enum token_type type;
233 } const debug_tokens[] =
235 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
236 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
237 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
238 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
239 { ARROW, TOKENTYPE_NONE, "ARROW" },
240 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
241 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
242 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
243 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
244 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
245 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
246 { DO, TOKENTYPE_NONE, "DO" },
247 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
248 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
249 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
250 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
251 { ELSE, TOKENTYPE_NONE, "ELSE" },
252 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
253 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
254 { FOR, TOKENTYPE_IVAL, "FOR" },
255 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
256 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
257 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
258 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
259 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
260 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
261 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
262 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
263 { IF, TOKENTYPE_IVAL, "IF" },
264 { LABEL, TOKENTYPE_PVAL, "LABEL" },
265 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
266 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
267 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
268 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
269 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
270 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
271 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
272 { MY, TOKENTYPE_IVAL, "MY" },
273 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
274 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
275 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
276 { OROP, TOKENTYPE_IVAL, "OROP" },
277 { OROR, TOKENTYPE_NONE, "OROR" },
278 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
279 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
280 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
281 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
282 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
283 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
284 { PREINC, TOKENTYPE_NONE, "PREINC" },
285 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
286 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
287 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
288 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
289 { SUB, TOKENTYPE_NONE, "SUB" },
290 { THING, TOKENTYPE_OPVAL, "THING" },
291 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
292 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
293 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
294 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
295 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
296 { USE, TOKENTYPE_IVAL, "USE" },
297 { WHEN, TOKENTYPE_IVAL, "WHEN" },
298 { WHILE, TOKENTYPE_IVAL, "WHILE" },
299 { WORD, TOKENTYPE_OPVAL, "WORD" },
300 { 0, TOKENTYPE_NONE, NULL }
303 /* dump the returned token in rv, plus any optional arg in yylval */
306 S_tokereport(pTHX_ I32 rv)
310 const char *name = NULL;
311 enum token_type type = TOKENTYPE_NONE;
312 const struct debug_tokens *p;
313 SV* const report = newSVpvs("<== ");
315 for (p = debug_tokens; p->token; p++) {
316 if (p->token == (int)rv) {
323 Perl_sv_catpv(aTHX_ report, name);
324 else if ((char)rv > ' ' && (char)rv < '~')
325 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
327 sv_catpvs(report, "EOF");
329 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
332 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
335 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
337 case TOKENTYPE_OPNUM:
338 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
339 PL_op_name[yylval.ival]);
342 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
344 case TOKENTYPE_OPVAL:
346 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
347 PL_op_name[yylval.opval->op_type]);
348 if (yylval.opval->op_type == OP_CONST) {
349 Perl_sv_catpvf(aTHX_ report, " %s",
350 SvPEEK(cSVOPx_sv(yylval.opval)));
355 sv_catpvs(report, "(opval=null)");
358 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
364 /* print the buffer with suitable escapes */
367 S_printbuf(pTHX_ const char* fmt, const char* s)
369 SV* const tmp = newSVpvs("");
370 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
379 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
380 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
384 S_ao(pTHX_ int toketype)
387 if (*PL_bufptr == '=') {
389 if (toketype == ANDAND)
390 yylval.ival = OP_ANDASSIGN;
391 else if (toketype == OROR)
392 yylval.ival = OP_ORASSIGN;
393 else if (toketype == DORDOR)
394 yylval.ival = OP_DORASSIGN;
402 * When Perl expects an operator and finds something else, no_op
403 * prints the warning. It always prints "<something> found where
404 * operator expected. It prints "Missing semicolon on previous line?"
405 * if the surprise occurs at the start of the line. "do you need to
406 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
407 * where the compiler doesn't know if foo is a method call or a function.
408 * It prints "Missing operator before end of line" if there's nothing
409 * after the missing operator, or "... before <...>" if there is something
410 * after the missing operator.
414 S_no_op(pTHX_ const char *what, char *s)
417 char * const oldbp = PL_bufptr;
418 const bool is_first = (PL_oldbufptr == PL_linestart);
424 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
425 if (ckWARN_d(WARN_SYNTAX)) {
427 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
428 "\t(Missing semicolon on previous line?)\n");
429 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
431 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
433 if (t < PL_bufptr && isSPACE(*t))
434 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
435 "\t(Do you need to predeclare %.*s?)\n",
436 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
440 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
441 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
449 * Complain about missing quote/regexp/heredoc terminator.
450 * If it's called with NULL then it cauterizes the line buffer.
451 * If we're in a delimited string and the delimiter is a control
452 * character, it's reformatted into a two-char sequence like ^C.
457 S_missingterm(pTHX_ char *s)
463 char * const nl = strrchr(s,'\n');
469 iscntrl(PL_multi_close)
471 PL_multi_close < 32 || PL_multi_close == 127
475 tmpbuf[1] = (char)toCTRL(PL_multi_close);
480 *tmpbuf = (char)PL_multi_close;
484 q = strchr(s,'"') ? '\'' : '"';
485 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
488 #define FEATURE_IS_ENABLED(name) \
489 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
490 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
492 * S_feature_is_enabled
493 * Check whether the named feature is enabled.
496 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
499 HV * const hinthv = GvHV(PL_hintgv);
500 char he_name[32] = "feature_";
501 (void) my_strlcpy(&he_name[8], name, 24);
503 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
511 Perl_deprecate(pTHX_ const char *s)
513 if (ckWARN(WARN_DEPRECATED))
514 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
518 Perl_deprecate_old(pTHX_ const char *s)
520 /* This function should NOT be called for any new deprecated warnings */
521 /* Use Perl_deprecate instead */
523 /* It is here to maintain backward compatibility with the pre-5.8 */
524 /* warnings category hierarchy. The "deprecated" category used to */
525 /* live under the "syntax" category. It is now a top-level category */
526 /* in its own right. */
528 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
529 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
530 "Use of %s is deprecated", s);
534 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
535 * utf16-to-utf8-reversed.
538 #ifdef PERL_CR_FILTER
542 register const char *s = SvPVX_const(sv);
543 register const char * const e = s + SvCUR(sv);
544 /* outer loop optimized to do nothing if there are no CR-LFs */
546 if (*s++ == '\r' && *s == '\n') {
547 /* hit a CR-LF, need to copy the rest */
548 register char *d = s - 1;
551 if (*s == '\r' && s[1] == '\n')
562 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
564 const I32 count = FILTER_READ(idx+1, sv, maxlen);
565 if (count > 0 && !maxlen)
573 * Initialize variables. Uses the Perl save_stack to save its state (for
574 * recursive calls to the parser).
578 Perl_lex_start(pTHX_ SV *line)
584 SAVEI32(PL_lex_dojoin);
585 SAVEI32(PL_lex_brackets);
586 SAVEI32(PL_lex_casemods);
587 SAVEI32(PL_lex_starts);
588 SAVEI32(PL_lex_state);
589 SAVEVPTR(PL_lex_inpat);
590 SAVEI32(PL_lex_inwhat);
592 if (PL_lex_state == LEX_KNOWNEXT) {
593 I32 toke = PL_lasttoke;
594 while (--toke >= 0) {
595 SAVEI32(PL_nexttoke[toke].next_type);
596 SAVEVPTR(PL_nexttoke[toke].next_val);
598 SAVEVPTR(PL_nexttoke[toke].next_mad);
600 SAVEI32(PL_lasttoke);
603 SAVESPTR(PL_thistoken);
604 SAVESPTR(PL_thiswhite);
605 SAVESPTR(PL_nextwhite);
606 SAVESPTR(PL_thisopen);
607 SAVESPTR(PL_thisclose);
608 SAVESPTR(PL_thisstuff);
609 SAVEVPTR(PL_thismad);
610 SAVEI32(PL_realtokenstart);
611 SAVEI32(PL_faketokens);
613 SAVEI32(PL_curforce);
615 if (PL_lex_state == LEX_KNOWNEXT) {
616 I32 toke = PL_nexttoke;
617 while (--toke >= 0) {
618 SAVEI32(PL_nexttype[toke]);
619 SAVEVPTR(PL_nextval[toke]);
621 SAVEI32(PL_nexttoke);
624 SAVECOPLINE(PL_curcop);
627 SAVEPPTR(PL_oldbufptr);
628 SAVEPPTR(PL_oldoldbufptr);
629 SAVEPPTR(PL_last_lop);
630 SAVEPPTR(PL_last_uni);
631 SAVEPPTR(PL_linestart);
632 SAVESPTR(PL_linestr);
633 SAVEGENERICPV(PL_lex_brackstack);
634 SAVEGENERICPV(PL_lex_casestack);
635 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
636 SAVESPTR(PL_lex_stuff);
637 SAVEI32(PL_lex_defer);
638 SAVEI32(PL_sublex_info.sub_inwhat);
639 SAVESPTR(PL_lex_repl);
641 SAVEINT(PL_lex_expect);
643 PL_lex_state = LEX_NORMAL;
647 Newx(PL_lex_brackstack, 120, char);
648 Newx(PL_lex_casestack, 12, char);
650 *PL_lex_casestack = '\0';
662 PL_sublex_info.sub_inwhat = 0;
664 s = SvPV_const(PL_linestr, len);
665 if (SvREADONLY(PL_linestr) || !len || s[len-1] != ';') {
666 PL_linestr = sv_2mortal(len ? newSVsv(PL_linestr) : newSVpvn(s, 0));
667 if (!len || s[len-1] != ';')
668 sv_catpvs(PL_linestr, "\n;");
670 SvTEMP_off(PL_linestr);
671 /* PL_linestr needs to survive until end of scope, not just the next
672 FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
673 SvREFCNT_inc_simple_void_NN(PL_linestr);
674 SAVEFREESV(PL_linestr);
675 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
676 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
677 PL_last_lop = PL_last_uni = NULL;
683 * Finalizer for lexing operations. Must be called when the parser is
684 * done with the lexer.
691 PL_doextract = FALSE;
696 * This subroutine has nothing to do with tilting, whether at windmills
697 * or pinball tables. Its name is short for "increment line". It
698 * increments the current line number in CopLINE(PL_curcop) and checks
699 * to see whether the line starts with a comment of the form
700 * # line 500 "foo.pm"
701 * If so, it sets the current line number and file to the values in the comment.
705 S_incline(pTHX_ char *s)
713 CopLINE_inc(PL_curcop);
716 while (SPACE_OR_TAB(*s))
718 if (strnEQ(s, "line", 4))
722 if (SPACE_OR_TAB(*s))
726 while (SPACE_OR_TAB(*s))
734 while (SPACE_OR_TAB(*s))
736 if (*s == '"' && (t = strchr(s+1, '"'))) {
746 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
748 if (*e != '\n' && *e != '\0')
749 return; /* false alarm */
755 const char * const cf = CopFILE(PL_curcop);
756 STRLEN tmplen = cf ? strlen(cf) : 0;
757 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
758 /* must copy *{"::_<(eval N)[oldfilename:L]"}
759 * to *{"::_<newfilename"} */
760 char smallbuf[256], smallbuf2[256];
761 char *tmpbuf, *tmpbuf2;
763 STRLEN tmplen2 = strlen(s);
764 if (tmplen + 3 < sizeof smallbuf)
767 Newx(tmpbuf, tmplen + 3, char);
768 if (tmplen2 + 3 < sizeof smallbuf2)
771 Newx(tmpbuf2, tmplen2 + 3, char);
772 tmpbuf[0] = tmpbuf2[0] = '_';
773 tmpbuf[1] = tmpbuf2[1] = '<';
774 memcpy(tmpbuf + 2, cf, ++tmplen);
775 memcpy(tmpbuf2 + 2, s, ++tmplen2);
777 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
779 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
781 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
782 /* adjust ${"::_<newfilename"} to store the new file name */
783 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
784 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
785 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
788 if (tmpbuf != smallbuf) Safefree(tmpbuf);
789 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
792 CopFILE_free(PL_curcop);
793 CopFILE_set(PL_curcop, s);
796 CopLINE_set(PL_curcop, atoi(n)-1);
800 /* skip space before PL_thistoken */
803 S_skipspace0(pTHX_ register char *s)
810 PL_thiswhite = newSVpvs("");
811 sv_catsv(PL_thiswhite, PL_skipwhite);
812 sv_free(PL_skipwhite);
815 PL_realtokenstart = s - SvPVX(PL_linestr);
819 /* skip space after PL_thistoken */
822 S_skipspace1(pTHX_ register char *s)
824 const char *start = s;
825 I32 startoff = start - SvPVX(PL_linestr);
830 start = SvPVX(PL_linestr) + startoff;
831 if (!PL_thistoken && PL_realtokenstart >= 0) {
832 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
833 PL_thistoken = newSVpvn(tstart, start - tstart);
835 PL_realtokenstart = -1;
838 PL_nextwhite = newSVpvs("");
839 sv_catsv(PL_nextwhite, PL_skipwhite);
840 sv_free(PL_skipwhite);
847 S_skipspace2(pTHX_ register char *s, SV **svp)
850 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
851 const I32 startoff = s - SvPVX(PL_linestr);
854 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
855 if (!PL_madskills || !svp)
857 start = SvPVX(PL_linestr) + startoff;
858 if (!PL_thistoken && PL_realtokenstart >= 0) {
859 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
860 PL_thistoken = newSVpvn(tstart, start - tstart);
861 PL_realtokenstart = -1;
866 sv_setsv(*svp, PL_skipwhite);
867 sv_free(PL_skipwhite);
876 S_update_debugger_info_pv(pTHX_ const char *buf, STRLEN len)
878 AV *av = CopFILEAVx(PL_curcop);
880 SV * const sv = newSV(0);
881 sv_upgrade(sv, SVt_PVMG);
882 sv_setpvn(sv, buf, len);
885 av_store(av, (I32)CopLINE(PL_curcop), sv);
890 S_update_debugger_info_sv(pTHX_ SV *orig_sv)
892 AV *av = CopFILEAVx(PL_curcop);
894 SV * const sv = newSV(0);
895 sv_upgrade(sv, SVt_PVMG);
896 sv_setsv(sv, orig_sv);
899 av_store(av, (I32)CopLINE(PL_curcop), sv);
905 * Called to gobble the appropriate amount and type of whitespace.
906 * Skips comments as well.
910 S_skipspace(pTHX_ register char *s)
915 int startoff = s - SvPVX(PL_linestr);
918 sv_free(PL_skipwhite);
923 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
924 while (s < PL_bufend && SPACE_OR_TAB(*s))
934 SSize_t oldprevlen, oldoldprevlen;
935 SSize_t oldloplen = 0, oldunilen = 0;
936 while (s < PL_bufend && isSPACE(*s)) {
937 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
942 if (s < PL_bufend && *s == '#') {
943 while (s < PL_bufend && *s != '\n')
947 if (PL_in_eval && !PL_rsfp) {
954 /* only continue to recharge the buffer if we're at the end
955 * of the buffer, we're not reading from a source filter, and
956 * we're in normal lexing mode
958 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
959 PL_lex_state == LEX_FORMLINE)
966 /* try to recharge the buffer */
968 curoff = s - SvPVX(PL_linestr);
971 if ((s = filter_gets(PL_linestr, PL_rsfp,
972 (prevlen = SvCUR(PL_linestr)))) == NULL)
975 if (PL_madskills && curoff != startoff) {
977 PL_skipwhite = newSVpvs("");
978 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
982 /* mustn't throw out old stuff yet if madpropping */
983 SvCUR(PL_linestr) = curoff;
984 s = SvPVX(PL_linestr) + curoff;
986 if (curoff && s[-1] == '\n')
990 /* end of file. Add on the -p or -n magic */
991 /* XXX these shouldn't really be added here, can't set PL_faketokens */
995 ";}continue{print or die qq(-p destination: $!\\n);}");
998 ";}continue{print or die qq(-p destination: $!\\n);}");
1000 PL_minus_n = PL_minus_p = 0;
1002 else if (PL_minus_n) {
1004 sv_catpvn(PL_linestr, ";}", 2);
1006 sv_setpvn(PL_linestr, ";}", 2);
1012 sv_catpvn(PL_linestr,";", 1);
1014 sv_setpvn(PL_linestr,";", 1);
1017 /* reset variables for next time we lex */
1018 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1024 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1025 PL_last_lop = PL_last_uni = NULL;
1027 /* Close the filehandle. Could be from -P preprocessor,
1028 * STDIN, or a regular file. If we were reading code from
1029 * STDIN (because the commandline held no -e or filename)
1030 * then we don't close it, we reset it so the code can
1031 * read from STDIN too.
1034 if (PL_preprocess && !PL_in_eval)
1035 (void)PerlProc_pclose(PL_rsfp);
1036 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1037 PerlIO_clearerr(PL_rsfp);
1039 (void)PerlIO_close(PL_rsfp);
1044 /* not at end of file, so we only read another line */
1045 /* make corresponding updates to old pointers, for yyerror() */
1046 oldprevlen = PL_oldbufptr - PL_bufend;
1047 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1049 oldunilen = PL_last_uni - PL_bufend;
1051 oldloplen = PL_last_lop - PL_bufend;
1052 PL_linestart = PL_bufptr = s + prevlen;
1053 PL_bufend = s + SvCUR(PL_linestr);
1055 PL_oldbufptr = s + oldprevlen;
1056 PL_oldoldbufptr = s + oldoldprevlen;
1058 PL_last_uni = s + oldunilen;
1060 PL_last_lop = s + oldloplen;
1063 /* debugger active and we're not compiling the debugger code,
1064 * so store the line into the debugger's array of lines
1066 if (PERLDB_LINE && PL_curstash != PL_debstash)
1067 update_debugger_info_pv(PL_bufptr, PL_bufend - PL_bufptr);
1074 PL_skipwhite = newSVpvs("");
1075 curoff = s - SvPVX(PL_linestr);
1076 if (curoff - startoff)
1077 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1086 * Check the unary operators to ensure there's no ambiguity in how they're
1087 * used. An ambiguous piece of code would be:
1089 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1090 * the +5 is its argument.
1100 if (PL_oldoldbufptr != PL_last_uni)
1102 while (isSPACE(*PL_last_uni))
1105 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1107 if ((t = strchr(s, '(')) && t < PL_bufptr)
1110 if (ckWARN_d(WARN_AMBIGUOUS)){
1111 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1112 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1113 (int)(s - PL_last_uni), PL_last_uni);
1118 * LOP : macro to build a list operator. Its behaviour has been replaced
1119 * with a subroutine, S_lop() for which LOP is just another name.
1122 #define LOP(f,x) return lop(f,x,s)
1126 * Build a list operator (or something that might be one). The rules:
1127 * - if we have a next token, then it's a list operator [why?]
1128 * - if the next thing is an opening paren, then it's a function
1129 * - else it's a list operator
1133 S_lop(pTHX_ I32 f, int x, char *s)
1140 PL_last_lop = PL_oldbufptr;
1141 PL_last_lop_op = (OPCODE)f;
1144 return REPORT(LSTOP);
1147 return REPORT(LSTOP);
1150 return REPORT(FUNC);
1153 return REPORT(FUNC);
1155 return REPORT(LSTOP);
1161 * Sets up for an eventual force_next(). start_force(0) basically does
1162 * an unshift, while start_force(-1) does a push. yylex removes items
1167 S_start_force(pTHX_ int where)
1171 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1172 where = PL_lasttoke;
1173 assert(PL_curforce < 0 || PL_curforce == where);
1174 if (PL_curforce != where) {
1175 for (i = PL_lasttoke; i > where; --i) {
1176 PL_nexttoke[i] = PL_nexttoke[i-1];
1180 if (PL_curforce < 0) /* in case of duplicate start_force() */
1181 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1182 PL_curforce = where;
1185 curmad('^', newSVpvs(""));
1186 CURMAD('_', PL_nextwhite);
1191 S_curmad(pTHX_ char slot, SV *sv)
1197 if (PL_curforce < 0)
1198 where = &PL_thismad;
1200 where = &PL_nexttoke[PL_curforce].next_mad;
1203 sv_setpvn(sv, "", 0);
1206 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1208 else if (PL_encoding) {
1209 sv_recode_to_utf8(sv, PL_encoding);
1214 /* keep a slot open for the head of the list? */
1215 if (slot != '_' && *where && (*where)->mad_key == '^') {
1216 (*where)->mad_key = slot;
1217 sv_free((*where)->mad_val);
1218 (*where)->mad_val = (void*)sv;
1221 addmad(newMADsv(slot, sv), where, 0);
1224 # define start_force(where) NOOP
1225 # define curmad(slot, sv) NOOP
1230 * When the lexer realizes it knows the next token (for instance,
1231 * it is reordering tokens for the parser) then it can call S_force_next
1232 * to know what token to return the next time the lexer is called. Caller
1233 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1234 * and possibly PL_expect to ensure the lexer handles the token correctly.
1238 S_force_next(pTHX_ I32 type)
1242 if (PL_curforce < 0)
1243 start_force(PL_lasttoke);
1244 PL_nexttoke[PL_curforce].next_type = type;
1245 if (PL_lex_state != LEX_KNOWNEXT)
1246 PL_lex_defer = PL_lex_state;
1247 PL_lex_state = LEX_KNOWNEXT;
1248 PL_lex_expect = PL_expect;
1251 PL_nexttype[PL_nexttoke] = type;
1253 if (PL_lex_state != LEX_KNOWNEXT) {
1254 PL_lex_defer = PL_lex_state;
1255 PL_lex_expect = PL_expect;
1256 PL_lex_state = LEX_KNOWNEXT;
1262 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1265 SV * const sv = newSVpvn(start,len);
1266 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1273 * When the lexer knows the next thing is a word (for instance, it has
1274 * just seen -> and it knows that the next char is a word char, then
1275 * it calls S_force_word to stick the next word into the PL_next lookahead.
1278 * char *start : buffer position (must be within PL_linestr)
1279 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1280 * int check_keyword : if true, Perl checks to make sure the word isn't
1281 * a keyword (do this if the word is a label, e.g. goto FOO)
1282 * int allow_pack : if true, : characters will also be allowed (require,
1283 * use, etc. do this)
1284 * int allow_initial_tick : used by the "sub" lexer only.
1288 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1294 start = SKIPSPACE1(start);
1296 if (isIDFIRST_lazy_if(s,UTF) ||
1297 (allow_pack && *s == ':') ||
1298 (allow_initial_tick && *s == '\'') )
1300 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1301 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1303 start_force(PL_curforce);
1305 curmad('X', newSVpvn(start,s-start));
1306 if (token == METHOD) {
1311 PL_expect = XOPERATOR;
1314 NEXTVAL_NEXTTOKE.opval
1315 = (OP*)newSVOP(OP_CONST,0,
1316 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1317 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1325 * Called when the lexer wants $foo *foo &foo etc, but the program
1326 * text only contains the "foo" portion. The first argument is a pointer
1327 * to the "foo", and the second argument is the type symbol to prefix.
1328 * Forces the next token to be a "WORD".
1329 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1333 S_force_ident(pTHX_ register const char *s, int kind)
1337 const STRLEN len = strlen(s);
1338 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1339 start_force(PL_curforce);
1340 NEXTVAL_NEXTTOKE.opval = o;
1343 o->op_private = OPpCONST_ENTERED;
1344 /* XXX see note in pp_entereval() for why we forgo typo
1345 warnings if the symbol must be introduced in an eval.
1347 gv_fetchpvn_flags(s, len,
1348 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1350 kind == '$' ? SVt_PV :
1351 kind == '@' ? SVt_PVAV :
1352 kind == '%' ? SVt_PVHV :
1360 Perl_str_to_version(pTHX_ SV *sv)
1365 const char *start = SvPV_const(sv,len);
1366 const char * const end = start + len;
1367 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1368 while (start < end) {
1372 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1377 retval += ((NV)n)/nshift;
1386 * Forces the next token to be a version number.
1387 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1388 * and if "guessing" is TRUE, then no new token is created (and the caller
1389 * must use an alternative parsing method).
1393 S_force_version(pTHX_ char *s, int guessing)
1399 I32 startoff = s - SvPVX(PL_linestr);
1408 while (isDIGIT(*d) || *d == '_' || *d == '.')
1412 start_force(PL_curforce);
1413 curmad('X', newSVpvn(s,d-s));
1416 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1418 s = scan_num(s, &yylval);
1419 version = yylval.opval;
1420 ver = cSVOPx(version)->op_sv;
1421 if (SvPOK(ver) && !SvNIOK(ver)) {
1422 SvUPGRADE(ver, SVt_PVNV);
1423 SvNV_set(ver, str_to_version(ver));
1424 SvNOK_on(ver); /* hint that it is a version */
1427 else if (guessing) {
1430 sv_free(PL_nextwhite); /* let next token collect whitespace */
1432 s = SvPVX(PL_linestr) + startoff;
1440 if (PL_madskills && !version) {
1441 sv_free(PL_nextwhite); /* let next token collect whitespace */
1443 s = SvPVX(PL_linestr) + startoff;
1446 /* NOTE: The parser sees the package name and the VERSION swapped */
1447 start_force(PL_curforce);
1448 NEXTVAL_NEXTTOKE.opval = version;
1456 * Tokenize a quoted string passed in as an SV. It finds the next
1457 * chunk, up to end of string or a backslash. It may make a new
1458 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1463 S_tokeq(pTHX_ SV *sv)
1467 register char *send;
1475 s = SvPV_force(sv, len);
1476 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1479 while (s < send && *s != '\\')
1484 if ( PL_hints & HINT_NEW_STRING ) {
1485 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1491 if (s + 1 < send && (s[1] == '\\'))
1492 s++; /* all that, just for this */
1497 SvCUR_set(sv, d - SvPVX_const(sv));
1499 if ( PL_hints & HINT_NEW_STRING )
1500 return new_constant(NULL, 0, "q", sv, pv, "q");
1505 * Now come three functions related to double-quote context,
1506 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1507 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1508 * interact with PL_lex_state, and create fake ( ... ) argument lists
1509 * to handle functions and concatenation.
1510 * They assume that whoever calls them will be setting up a fake
1511 * join call, because each subthing puts a ',' after it. This lets
1514 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1516 * (I'm not sure whether the spurious commas at the end of lcfirst's
1517 * arguments and join's arguments are created or not).
1522 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1524 * Pattern matching will set PL_lex_op to the pattern-matching op to
1525 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1527 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1529 * Everything else becomes a FUNC.
1531 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1532 * had an OP_CONST or OP_READLINE). This just sets us up for a
1533 * call to S_sublex_push().
1537 S_sublex_start(pTHX)
1540 register const I32 op_type = yylval.ival;
1542 if (op_type == OP_NULL) {
1543 yylval.opval = PL_lex_op;
1547 if (op_type == OP_CONST || op_type == OP_READLINE) {
1548 SV *sv = tokeq(PL_lex_stuff);
1550 if (SvTYPE(sv) == SVt_PVIV) {
1551 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1553 const char * const p = SvPV_const(sv, len);
1554 SV * const nsv = newSVpvn(p, len);
1560 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1561 PL_lex_stuff = NULL;
1562 /* Allow <FH> // "foo" */
1563 if (op_type == OP_READLINE)
1564 PL_expect = XTERMORDORDOR;
1567 else if (op_type == OP_BACKTICK && PL_lex_op) {
1568 /* readpipe() vas overriden */
1569 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1570 yylval.opval = PL_lex_op;
1572 PL_lex_stuff = NULL;
1576 PL_sublex_info.super_state = PL_lex_state;
1577 PL_sublex_info.sub_inwhat = op_type;
1578 PL_sublex_info.sub_op = PL_lex_op;
1579 PL_lex_state = LEX_INTERPPUSH;
1583 yylval.opval = PL_lex_op;
1593 * Create a new scope to save the lexing state. The scope will be
1594 * ended in S_sublex_done. Returns a '(', starting the function arguments
1595 * to the uc, lc, etc. found before.
1596 * Sets PL_lex_state to LEX_INTERPCONCAT.
1605 PL_lex_state = PL_sublex_info.super_state;
1606 SAVEI32(PL_lex_dojoin);
1607 SAVEI32(PL_lex_brackets);
1608 SAVEI32(PL_lex_casemods);
1609 SAVEI32(PL_lex_starts);
1610 SAVEI32(PL_lex_state);
1611 SAVEVPTR(PL_lex_inpat);
1612 SAVEI32(PL_lex_inwhat);
1613 SAVECOPLINE(PL_curcop);
1614 SAVEPPTR(PL_bufptr);
1615 SAVEPPTR(PL_bufend);
1616 SAVEPPTR(PL_oldbufptr);
1617 SAVEPPTR(PL_oldoldbufptr);
1618 SAVEPPTR(PL_last_lop);
1619 SAVEPPTR(PL_last_uni);
1620 SAVEPPTR(PL_linestart);
1621 SAVESPTR(PL_linestr);
1622 SAVEGENERICPV(PL_lex_brackstack);
1623 SAVEGENERICPV(PL_lex_casestack);
1625 PL_linestr = PL_lex_stuff;
1626 PL_lex_stuff = NULL;
1628 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1629 = SvPVX(PL_linestr);
1630 PL_bufend += SvCUR(PL_linestr);
1631 PL_last_lop = PL_last_uni = NULL;
1632 SAVEFREESV(PL_linestr);
1634 PL_lex_dojoin = FALSE;
1635 PL_lex_brackets = 0;
1636 Newx(PL_lex_brackstack, 120, char);
1637 Newx(PL_lex_casestack, 12, char);
1638 PL_lex_casemods = 0;
1639 *PL_lex_casestack = '\0';
1641 PL_lex_state = LEX_INTERPCONCAT;
1642 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1644 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1645 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1646 PL_lex_inpat = PL_sublex_info.sub_op;
1648 PL_lex_inpat = NULL;
1655 * Restores lexer state after a S_sublex_push.
1662 if (!PL_lex_starts++) {
1663 SV * const sv = newSVpvs("");
1664 if (SvUTF8(PL_linestr))
1666 PL_expect = XOPERATOR;
1667 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1671 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1672 PL_lex_state = LEX_INTERPCASEMOD;
1676 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1677 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1678 PL_linestr = PL_lex_repl;
1680 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1681 PL_bufend += SvCUR(PL_linestr);
1682 PL_last_lop = PL_last_uni = NULL;
1683 SAVEFREESV(PL_linestr);
1684 PL_lex_dojoin = FALSE;
1685 PL_lex_brackets = 0;
1686 PL_lex_casemods = 0;
1687 *PL_lex_casestack = '\0';
1689 if (SvEVALED(PL_lex_repl)) {
1690 PL_lex_state = LEX_INTERPNORMAL;
1692 /* we don't clear PL_lex_repl here, so that we can check later
1693 whether this is an evalled subst; that means we rely on the
1694 logic to ensure sublex_done() is called again only via the
1695 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1698 PL_lex_state = LEX_INTERPCONCAT;
1708 PL_endwhite = newSVpvs("");
1709 sv_catsv(PL_endwhite, PL_thiswhite);
1713 sv_setpvn(PL_thistoken,"",0);
1715 PL_realtokenstart = -1;
1719 PL_bufend = SvPVX(PL_linestr);
1720 PL_bufend += SvCUR(PL_linestr);
1721 PL_expect = XOPERATOR;
1722 PL_sublex_info.sub_inwhat = 0;
1730 Extracts a pattern, double-quoted string, or transliteration. This
1733 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1734 processing a pattern (PL_lex_inpat is true), a transliteration
1735 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1737 Returns a pointer to the character scanned up to. If this is
1738 advanced from the start pointer supplied (i.e. if anything was
1739 successfully parsed), will leave an OP for the substring scanned
1740 in yylval. Caller must intuit reason for not parsing further
1741 by looking at the next characters herself.
1745 double-quoted style: \r and \n
1746 regexp special ones: \D \s
1749 case and quoting: \U \Q \E
1750 stops on @ and $, but not for $ as tail anchor
1752 In transliterations:
1753 characters are VERY literal, except for - not at the start or end
1754 of the string, which indicates a range. If the range is in bytes,
1755 scan_const expands the range to the full set of intermediate
1756 characters. If the range is in utf8, the hyphen is replaced with
1757 a certain range mark which will be handled by pmtrans() in op.c.
1759 In double-quoted strings:
1761 double-quoted style: \r and \n
1763 deprecated backrefs: \1 (in substitution replacements)
1764 case and quoting: \U \Q \E
1767 scan_const does *not* construct ops to handle interpolated strings.
1768 It stops processing as soon as it finds an embedded $ or @ variable
1769 and leaves it to the caller to work out what's going on.
1771 embedded arrays (whether in pattern or not) could be:
1772 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1774 $ in double-quoted strings must be the symbol of an embedded scalar.
1776 $ in pattern could be $foo or could be tail anchor. Assumption:
1777 it's a tail anchor if $ is the last thing in the string, or if it's
1778 followed by one of "()| \r\n\t"
1780 \1 (backreferences) are turned into $1
1782 The structure of the code is
1783 while (there's a character to process) {
1784 handle transliteration ranges
1785 skip regexp comments /(?#comment)/ and codes /(?{code})/
1786 skip #-initiated comments in //x patterns
1787 check for embedded arrays
1788 check for embedded scalars
1790 leave intact backslashes from leaveit (below)
1791 deprecate \1 in substitution replacements
1792 handle string-changing backslashes \l \U \Q \E, etc.
1793 switch (what was escaped) {
1794 handle \- in a transliteration (becomes a literal -)
1795 handle \132 (octal characters)
1796 handle \x15 and \x{1234} (hex characters)
1797 handle \N{name} (named characters)
1798 handle \cV (control characters)
1799 handle printf-style backslashes (\f, \r, \n, etc)
1801 } (end if backslash)
1802 } (end while character to read)
1807 S_scan_const(pTHX_ char *start)
1810 register char *send = PL_bufend; /* end of the constant */
1811 SV *sv = newSV(send - start); /* sv for the constant */
1812 register char *s = start; /* start of the constant */
1813 register char *d = SvPVX(sv); /* destination for copies */
1814 bool dorange = FALSE; /* are we in a translit range? */
1815 bool didrange = FALSE; /* did we just finish a range? */
1816 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1817 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1820 UV literal_endpoint = 0;
1821 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1824 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1825 /* If we are doing a trans and we know we want UTF8 set expectation */
1826 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1827 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1831 while (s < send || dorange) {
1832 /* get transliterations out of the way (they're most literal) */
1833 if (PL_lex_inwhat == OP_TRANS) {
1834 /* expand a range A-Z to the full set of characters. AIE! */
1836 I32 i; /* current expanded character */
1837 I32 min; /* first character in range */
1838 I32 max; /* last character in range */
1849 char * const c = (char*)utf8_hop((U8*)d, -1);
1853 *c = (char)UTF_TO_NATIVE(0xff);
1854 /* mark the range as done, and continue */
1860 i = d - SvPVX_const(sv); /* remember current offset */
1863 SvLEN(sv) + (has_utf8 ?
1864 (512 - UTF_CONTINUATION_MARK +
1867 /* How many two-byte within 0..255: 128 in UTF-8,
1868 * 96 in UTF-8-mod. */
1870 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1872 d = SvPVX(sv) + i; /* refresh d after realloc */
1876 for (j = 0; j <= 1; j++) {
1877 char * const c = (char*)utf8_hop((U8*)d, -1);
1878 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1884 max = (U8)0xff; /* only to \xff */
1885 uvmax = uv; /* \x{100} to uvmax */
1887 d = c; /* eat endpoint chars */
1892 d -= 2; /* eat the first char and the - */
1893 min = (U8)*d; /* first char in range */
1894 max = (U8)d[1]; /* last char in range */
1901 "Invalid range \"%c-%c\" in transliteration operator",
1902 (char)min, (char)max);
1906 if (literal_endpoint == 2 &&
1907 ((isLOWER(min) && isLOWER(max)) ||
1908 (isUPPER(min) && isUPPER(max)))) {
1910 for (i = min; i <= max; i++)
1912 *d++ = NATIVE_TO_NEED(has_utf8,i);
1914 for (i = min; i <= max; i++)
1916 *d++ = NATIVE_TO_NEED(has_utf8,i);
1921 for (i = min; i <= max; i++)
1924 const U8 ch = (U8)NATIVE_TO_UTF(i);
1925 if (UNI_IS_INVARIANT(ch))
1928 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1929 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1938 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1940 *d++ = (char)UTF_TO_NATIVE(0xff);
1942 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1946 /* mark the range as done, and continue */
1950 literal_endpoint = 0;
1955 /* range begins (ignore - as first or last char) */
1956 else if (*s == '-' && s+1 < send && s != start) {
1958 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1965 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1975 literal_endpoint = 0;
1976 native_range = TRUE;
1981 /* if we get here, we're not doing a transliteration */
1983 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1984 except for the last char, which will be done separately. */
1985 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1987 while (s+1 < send && *s != ')')
1988 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1990 else if (s[2] == '{' /* This should match regcomp.c */
1991 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1994 char *regparse = s + (s[2] == '{' ? 3 : 4);
1997 while (count && (c = *regparse)) {
1998 if (c == '\\' && regparse[1])
2006 if (*regparse != ')')
2007 regparse--; /* Leave one char for continuation. */
2008 while (s < regparse)
2009 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2013 /* likewise skip #-initiated comments in //x patterns */
2014 else if (*s == '#' && PL_lex_inpat &&
2015 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2016 while (s+1 < send && *s != '\n')
2017 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2020 /* check for embedded arrays
2021 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2023 else if (*s == '@' && s[1]) {
2024 if (isALNUM_lazy_if(s+1,UTF))
2026 if (strchr(":'{$", s[1]))
2028 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2029 break; /* in regexp, neither @+ nor @- are interpolated */
2032 /* check for embedded scalars. only stop if we're sure it's a
2035 else if (*s == '$') {
2036 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2038 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2039 break; /* in regexp, $ might be tail anchor */
2042 /* End of else if chain - OP_TRANS rejoin rest */
2045 if (*s == '\\' && s+1 < send) {
2048 /* deprecate \1 in strings and substitution replacements */
2049 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2050 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2052 if (ckWARN(WARN_SYNTAX))
2053 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2058 /* string-change backslash escapes */
2059 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2063 /* skip any other backslash escapes in a pattern */
2064 else if (PL_lex_inpat) {
2065 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2066 goto default_action;
2069 /* if we get here, it's either a quoted -, or a digit */
2072 /* quoted - in transliterations */
2074 if (PL_lex_inwhat == OP_TRANS) {
2081 if ((isALPHA(*s) || isDIGIT(*s)) &&
2083 Perl_warner(aTHX_ packWARN(WARN_MISC),
2084 "Unrecognized escape \\%c passed through",
2086 /* default action is to copy the quoted character */
2087 goto default_action;
2090 /* \132 indicates an octal constant */
2091 case '0': case '1': case '2': case '3':
2092 case '4': case '5': case '6': case '7':
2096 uv = grok_oct(s, &len, &flags, NULL);
2099 goto NUM_ESCAPE_INSERT;
2101 /* \x24 indicates a hex constant */
2105 char* const e = strchr(s, '}');
2106 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2107 PERL_SCAN_DISALLOW_PREFIX;
2112 yyerror("Missing right brace on \\x{}");
2116 uv = grok_hex(s, &len, &flags, NULL);
2122 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2123 uv = grok_hex(s, &len, &flags, NULL);
2129 /* Insert oct or hex escaped character.
2130 * There will always enough room in sv since such
2131 * escapes will be longer than any UTF-8 sequence
2132 * they can end up as. */
2134 /* We need to map to chars to ASCII before doing the tests
2137 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2138 if (!has_utf8 && uv > 255) {
2139 /* Might need to recode whatever we have
2140 * accumulated so far if it contains any
2143 * (Can't we keep track of that and avoid
2144 * this rescan? --jhi)
2148 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2149 if (!NATIVE_IS_INVARIANT(*c)) {
2154 const STRLEN offset = d - SvPVX_const(sv);
2156 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2160 while (src >= (const U8 *)SvPVX_const(sv)) {
2161 if (!NATIVE_IS_INVARIANT(*src)) {
2162 const U8 ch = NATIVE_TO_ASCII(*src);
2163 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2164 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2174 if (has_utf8 || uv > 255) {
2175 d = (char*)uvchr_to_utf8((U8*)d, uv);
2177 if (PL_lex_inwhat == OP_TRANS &&
2178 PL_sublex_info.sub_op) {
2179 PL_sublex_info.sub_op->op_private |=
2180 (PL_lex_repl ? OPpTRANS_FROM_UTF
2184 if (uv > 255 && !dorange)
2185 native_range = FALSE;
2197 /* \N{LATIN SMALL LETTER A} is a named character */
2201 char* e = strchr(s, '}');
2208 yyerror("Missing right brace on \\N{}");
2212 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2214 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2215 PERL_SCAN_DISALLOW_PREFIX;
2218 uv = grok_hex(s, &len, &flags, NULL);
2219 if ( e > s && len != (STRLEN)(e - s) ) {
2223 goto NUM_ESCAPE_INSERT;
2225 res = newSVpvn(s + 1, e - s - 1);
2226 type = newSVpvn(s - 2,e - s + 3);
2227 res = new_constant( NULL, 0, "charnames",
2228 res, NULL, SvPVX(type) );
2231 sv_utf8_upgrade(res);
2232 str = SvPV_const(res,len);
2233 #ifdef EBCDIC_NEVER_MIND
2234 /* charnames uses pack U and that has been
2235 * recently changed to do the below uni->native
2236 * mapping, so this would be redundant (and wrong,
2237 * the code point would be doubly converted).
2238 * But leave this in just in case the pack U change
2239 * gets revoked, but the semantics is still
2240 * desireable for charnames. --jhi */
2242 UV uv = utf8_to_uvchr((const U8*)str, 0);
2245 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2247 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2248 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2249 str = SvPV_const(res, len);
2253 if (!has_utf8 && SvUTF8(res)) {
2254 const char * const ostart = SvPVX_const(sv);
2255 SvCUR_set(sv, d - ostart);
2258 sv_utf8_upgrade(sv);
2259 /* this just broke our allocation above... */
2260 SvGROW(sv, (STRLEN)(send - start));
2261 d = SvPVX(sv) + SvCUR(sv);
2264 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2265 const char * const odest = SvPVX_const(sv);
2267 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2268 d = SvPVX(sv) + (d - odest);
2272 native_range = FALSE; /* \N{} is guessed to be Unicode */
2274 Copy(str, d, len, char);
2281 yyerror("Missing braces on \\N{}");
2284 /* \c is a control character */
2293 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2296 yyerror("Missing control char name in \\c");
2300 /* printf-style backslashes, formfeeds, newlines, etc */
2302 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2305 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2308 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2311 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2314 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2317 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2320 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2326 } /* end if (backslash) */
2333 /* If we started with encoded form, or already know we want it
2334 and then encode the next character */
2335 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2337 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2338 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2341 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2342 const STRLEN off = d - SvPVX_const(sv);
2343 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2345 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2348 if (uv > 255 && !dorange)
2349 native_range = FALSE;
2353 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2355 } /* while loop to process each character */
2357 /* terminate the string and set up the sv */
2359 SvCUR_set(sv, d - SvPVX_const(sv));
2360 if (SvCUR(sv) >= SvLEN(sv))
2361 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2364 if (PL_encoding && !has_utf8) {
2365 sv_recode_to_utf8(sv, PL_encoding);
2371 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2372 PL_sublex_info.sub_op->op_private |=
2373 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2377 /* shrink the sv if we allocated more than we used */
2378 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2379 SvPV_shrink_to_cur(sv);
2382 /* return the substring (via yylval) only if we parsed anything */
2383 if (s > PL_bufptr) {
2384 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2385 sv = new_constant(start, s - start,
2386 (const char *)(PL_lex_inpat ? "qr" : "q"),
2389 (( PL_lex_inwhat == OP_TRANS
2391 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2394 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2401 * Returns TRUE if there's more to the expression (e.g., a subscript),
2404 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2406 * ->[ and ->{ return TRUE
2407 * { and [ outside a pattern are always subscripts, so return TRUE
2408 * if we're outside a pattern and it's not { or [, then return FALSE
2409 * if we're in a pattern and the first char is a {
2410 * {4,5} (any digits around the comma) returns FALSE
2411 * if we're in a pattern and the first char is a [
2413 * [SOMETHING] has a funky algorithm to decide whether it's a
2414 * character class or not. It has to deal with things like
2415 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2416 * anything else returns TRUE
2419 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2422 S_intuit_more(pTHX_ register char *s)
2425 if (PL_lex_brackets)
2427 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2429 if (*s != '{' && *s != '[')
2434 /* In a pattern, so maybe we have {n,m}. */
2451 /* On the other hand, maybe we have a character class */
2454 if (*s == ']' || *s == '^')
2457 /* this is terrifying, and it works */
2458 int weight = 2; /* let's weigh the evidence */
2460 unsigned char un_char = 255, last_un_char;
2461 const char * const send = strchr(s,']');
2462 char tmpbuf[sizeof PL_tokenbuf * 4];
2464 if (!send) /* has to be an expression */
2467 Zero(seen,256,char);
2470 else if (isDIGIT(*s)) {
2472 if (isDIGIT(s[1]) && s[2] == ']')
2478 for (; s < send; s++) {
2479 last_un_char = un_char;
2480 un_char = (unsigned char)*s;
2485 weight -= seen[un_char] * 10;
2486 if (isALNUM_lazy_if(s+1,UTF)) {
2488 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2489 len = (int)strlen(tmpbuf);
2490 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2495 else if (*s == '$' && s[1] &&
2496 strchr("[#!%*<>()-=",s[1])) {
2497 if (/*{*/ strchr("])} =",s[2]))
2506 if (strchr("wds]",s[1]))
2508 else if (seen[(U8)'\''] || seen[(U8)'"'])
2510 else if (strchr("rnftbxcav",s[1]))
2512 else if (isDIGIT(s[1])) {
2514 while (s[1] && isDIGIT(s[1]))
2524 if (strchr("aA01! ",last_un_char))
2526 if (strchr("zZ79~",s[1]))
2528 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2529 weight -= 5; /* cope with negative subscript */
2532 if (!isALNUM(last_un_char)
2533 && !(last_un_char == '$' || last_un_char == '@'
2534 || last_un_char == '&')
2535 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2540 if (keyword(tmpbuf, d - tmpbuf, 0))
2543 if (un_char == last_un_char + 1)
2545 weight -= seen[un_char];
2550 if (weight >= 0) /* probably a character class */
2560 * Does all the checking to disambiguate
2562 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2563 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2565 * First argument is the stuff after the first token, e.g. "bar".
2567 * Not a method if bar is a filehandle.
2568 * Not a method if foo is a subroutine prototyped to take a filehandle.
2569 * Not a method if it's really "Foo $bar"
2570 * Method if it's "foo $bar"
2571 * Not a method if it's really "print foo $bar"
2572 * Method if it's really "foo package::" (interpreted as package->foo)
2573 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2574 * Not a method if bar is a filehandle or package, but is quoted with
2579 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2582 char *s = start + (*start == '$');
2583 char tmpbuf[sizeof PL_tokenbuf];
2591 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2595 const char *proto = SvPVX_const(cv);
2606 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2607 /* start is the beginning of the possible filehandle/object,
2608 * and s is the end of it
2609 * tmpbuf is a copy of it
2612 if (*start == '$') {
2613 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2616 len = start - SvPVX(PL_linestr);
2620 start = SvPVX(PL_linestr) + len;
2624 return *s == '(' ? FUNCMETH : METHOD;
2626 if (!keyword(tmpbuf, len, 0)) {
2627 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2631 soff = s - SvPVX(PL_linestr);
2635 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2636 if (indirgv && GvCVu(indirgv))
2638 /* filehandle or package name makes it a method */
2639 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2641 soff = s - SvPVX(PL_linestr);
2644 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2645 return 0; /* no assumptions -- "=>" quotes bearword */
2647 start_force(PL_curforce);
2648 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2649 newSVpvn(tmpbuf,len));
2650 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2652 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2657 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2659 return *s == '(' ? FUNCMETH : METHOD;
2667 * Return a string of Perl code to load the debugger. If PERL5DB
2668 * is set, it will return the contents of that, otherwise a
2669 * compile-time require of perl5db.pl.
2677 const char * const pdb = PerlEnv_getenv("PERL5DB");
2681 SETERRNO(0,SS_NORMAL);
2682 return "BEGIN { require 'perl5db.pl' }";
2688 /* Encoded script support. filter_add() effectively inserts a
2689 * 'pre-processing' function into the current source input stream.
2690 * Note that the filter function only applies to the current source file
2691 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2693 * The datasv parameter (which may be NULL) can be used to pass
2694 * private data to this instance of the filter. The filter function
2695 * can recover the SV using the FILTER_DATA macro and use it to
2696 * store private buffers and state information.
2698 * The supplied datasv parameter is upgraded to a PVIO type
2699 * and the IoDIRP/IoANY field is used to store the function pointer,
2700 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2701 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2702 * private use must be set using malloc'd pointers.
2706 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2712 if (!PL_rsfp_filters)
2713 PL_rsfp_filters = newAV();
2716 SvUPGRADE(datasv, SVt_PVIO);
2717 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2718 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2719 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2720 FPTR2DPTR(void *, IoANY(datasv)),
2721 SvPV_nolen(datasv)));
2722 av_unshift(PL_rsfp_filters, 1);
2723 av_store(PL_rsfp_filters, 0, datasv) ;
2728 /* Delete most recently added instance of this filter function. */
2730 Perl_filter_del(pTHX_ filter_t funcp)
2736 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2737 FPTR2DPTR(void*, funcp)));
2739 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2741 /* if filter is on top of stack (usual case) just pop it off */
2742 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2743 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2744 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2745 IoANY(datasv) = (void *)NULL;
2746 sv_free(av_pop(PL_rsfp_filters));
2750 /* we need to search for the correct entry and clear it */
2751 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2755 /* Invoke the idxth filter function for the current rsfp. */
2756 /* maxlen 0 = read one text line */
2758 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2763 /* This API is bad. It should have been using unsigned int for maxlen.
2764 Not sure if we want to change the API, but if not we should sanity
2765 check the value here. */
2766 const unsigned int correct_length
2775 if (!PL_rsfp_filters)
2777 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2778 /* Provide a default input filter to make life easy. */
2779 /* Note that we append to the line. This is handy. */
2780 DEBUG_P(PerlIO_printf(Perl_debug_log,
2781 "filter_read %d: from rsfp\n", idx));
2782 if (correct_length) {
2785 const int old_len = SvCUR(buf_sv);
2787 /* ensure buf_sv is large enough */
2788 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2789 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2790 correct_length)) <= 0) {
2791 if (PerlIO_error(PL_rsfp))
2792 return -1; /* error */
2794 return 0 ; /* end of file */
2796 SvCUR_set(buf_sv, old_len + len) ;
2799 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2800 if (PerlIO_error(PL_rsfp))
2801 return -1; /* error */
2803 return 0 ; /* end of file */
2806 return SvCUR(buf_sv);
2808 /* Skip this filter slot if filter has been deleted */
2809 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2810 DEBUG_P(PerlIO_printf(Perl_debug_log,
2811 "filter_read %d: skipped (filter deleted)\n",
2813 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2815 /* Get function pointer hidden within datasv */
2816 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2817 DEBUG_P(PerlIO_printf(Perl_debug_log,
2818 "filter_read %d: via function %p (%s)\n",
2819 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2820 /* Call function. The function is expected to */
2821 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2822 /* Return: <0:error, =0:eof, >0:not eof */
2823 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2827 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2830 #ifdef PERL_CR_FILTER
2831 if (!PL_rsfp_filters) {
2832 filter_add(S_cr_textfilter,NULL);
2835 if (PL_rsfp_filters) {
2837 SvCUR_set(sv, 0); /* start with empty line */
2838 if (FILTER_READ(0, sv, 0) > 0)
2839 return ( SvPVX(sv) ) ;
2844 return (sv_gets(sv, fp, append));
2848 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2853 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2857 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2858 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2860 return GvHV(gv); /* Foo:: */
2863 /* use constant CLASS => 'MyClass' */
2864 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2865 if (gv && GvCV(gv)) {
2866 SV * const sv = cv_const_sv(GvCV(gv));
2868 pkgname = SvPV_nolen_const(sv);
2871 return gv_stashpv(pkgname, FALSE);
2875 * S_readpipe_override
2876 * Check whether readpipe() is overriden, and generates the appropriate
2877 * optree, provided sublex_start() is called afterwards.
2880 S_readpipe_override(pTHX)
2883 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2884 yylval.ival = OP_BACKTICK;
2886 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2888 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2889 && (gv_readpipe = *gvp) != (GV*)&PL_sv_undef
2890 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2892 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2893 append_elem(OP_LIST,
2894 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2895 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2905 * The intent of this yylex wrapper is to minimize the changes to the
2906 * tokener when we aren't interested in collecting madprops. It remains
2907 * to be seen how successful this strategy will be...
2914 char *s = PL_bufptr;
2916 /* make sure PL_thiswhite is initialized */
2920 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2921 if (PL_pending_ident)
2922 return S_pending_ident(aTHX);
2924 /* previous token ate up our whitespace? */
2925 if (!PL_lasttoke && PL_nextwhite) {
2926 PL_thiswhite = PL_nextwhite;
2930 /* isolate the token, and figure out where it is without whitespace */
2931 PL_realtokenstart = -1;
2935 assert(PL_curforce < 0);
2937 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2938 if (!PL_thistoken) {
2939 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2940 PL_thistoken = newSVpvs("");
2942 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
2943 PL_thistoken = newSVpvn(tstart, s - tstart);
2946 if (PL_thismad) /* install head */
2947 CURMAD('X', PL_thistoken);
2950 /* last whitespace of a sublex? */
2951 if (optype == ')' && PL_endwhite) {
2952 CURMAD('X', PL_endwhite);
2957 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
2958 if (!PL_thiswhite && !PL_endwhite && !optype) {
2959 sv_free(PL_thistoken);
2964 /* put off final whitespace till peg */
2965 if (optype == ';' && !PL_rsfp) {
2966 PL_nextwhite = PL_thiswhite;
2969 else if (PL_thisopen) {
2970 CURMAD('q', PL_thisopen);
2972 sv_free(PL_thistoken);
2976 /* Store actual token text as madprop X */
2977 CURMAD('X', PL_thistoken);
2981 /* add preceding whitespace as madprop _ */
2982 CURMAD('_', PL_thiswhite);
2986 /* add quoted material as madprop = */
2987 CURMAD('=', PL_thisstuff);
2991 /* add terminating quote as madprop Q */
2992 CURMAD('Q', PL_thisclose);
2996 /* special processing based on optype */
3000 /* opval doesn't need a TOKEN since it can already store mp */
3011 append_madprops(PL_thismad, yylval.opval, 0);
3019 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3028 /* remember any fake bracket that lexer is about to discard */
3029 if (PL_lex_brackets == 1 &&
3030 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3033 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3036 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3037 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3040 break; /* don't bother looking for trailing comment */
3049 /* attach a trailing comment to its statement instead of next token */
3053 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3055 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3057 if (*s == '\n' || *s == '#') {
3058 while (s < PL_bufend && *s != '\n')
3062 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3063 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3080 /* Create new token struct. Note: opvals return early above. */
3081 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3088 S_tokenize_use(pTHX_ int is_use, char *s) {
3090 if (PL_expect != XSTATE)
3091 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3092 is_use ? "use" : "no"));
3094 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3095 s = force_version(s, TRUE);
3096 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3097 start_force(PL_curforce);
3098 NEXTVAL_NEXTTOKE.opval = NULL;
3101 else if (*s == 'v') {
3102 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3103 s = force_version(s, FALSE);
3107 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3108 s = force_version(s, FALSE);
3110 yylval.ival = is_use;
3114 static const char* const exp_name[] =
3115 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3116 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3123 Works out what to call the token just pulled out of the input
3124 stream. The yacc parser takes care of taking the ops we return and
3125 stitching them into a tree.
3131 if read an identifier
3132 if we're in a my declaration
3133 croak if they tried to say my($foo::bar)
3134 build the ops for a my() declaration
3135 if it's an access to a my() variable
3136 are we in a sort block?
3137 croak if my($a); $a <=> $b
3138 build ops for access to a my() variable
3139 if in a dq string, and they've said @foo and we can't find @foo
3141 build ops for a bareword
3142 if we already built the token before, use it.
3147 #pragma segment Perl_yylex
3153 register char *s = PL_bufptr;
3158 /* orig_keyword, gvp, and gv are initialized here because
3159 * jump to the label just_a_word_zero can bypass their
3160 * initialization later. */
3161 I32 orig_keyword = 0;
3166 SV* tmp = newSVpvs("");
3167 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3168 (IV)CopLINE(PL_curcop),
3169 lex_state_names[PL_lex_state],
3170 exp_name[PL_expect],
3171 pv_display(tmp, s, strlen(s), 0, 60));
3174 /* check if there's an identifier for us to look at */
3175 if (PL_pending_ident)
3176 return REPORT(S_pending_ident(aTHX));
3178 /* no identifier pending identification */
3180 switch (PL_lex_state) {
3182 case LEX_NORMAL: /* Some compilers will produce faster */
3183 case LEX_INTERPNORMAL: /* code if we comment these out. */
3187 /* when we've already built the next token, just pull it out of the queue */
3191 yylval = PL_nexttoke[PL_lasttoke].next_val;
3193 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3194 PL_nexttoke[PL_lasttoke].next_mad = 0;
3195 if (PL_thismad && PL_thismad->mad_key == '_') {
3196 PL_thiswhite = (SV*)PL_thismad->mad_val;
3197 PL_thismad->mad_val = 0;
3198 mad_free(PL_thismad);
3203 PL_lex_state = PL_lex_defer;
3204 PL_expect = PL_lex_expect;
3205 PL_lex_defer = LEX_NORMAL;
3206 if (!PL_nexttoke[PL_lasttoke].next_type)
3211 yylval = PL_nextval[PL_nexttoke];
3213 PL_lex_state = PL_lex_defer;
3214 PL_expect = PL_lex_expect;
3215 PL_lex_defer = LEX_NORMAL;
3219 /* FIXME - can these be merged? */
3220 return(PL_nexttoke[PL_lasttoke].next_type);
3222 return REPORT(PL_nexttype[PL_nexttoke]);
3225 /* interpolated case modifiers like \L \U, including \Q and \E.
3226 when we get here, PL_bufptr is at the \
3228 case LEX_INTERPCASEMOD:
3230 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3231 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3233 /* handle \E or end of string */
3234 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3236 if (PL_lex_casemods) {
3237 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3238 PL_lex_casestack[PL_lex_casemods] = '\0';
3240 if (PL_bufptr != PL_bufend
3241 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3243 PL_lex_state = LEX_INTERPCONCAT;
3246 PL_thistoken = newSVpvs("\\E");
3252 while (PL_bufptr != PL_bufend &&
3253 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3255 PL_thiswhite = newSVpvs("");
3256 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3260 if (PL_bufptr != PL_bufend)
3263 PL_lex_state = LEX_INTERPCONCAT;
3267 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3268 "### Saw case modifier\n"); });
3270 if (s[1] == '\\' && s[2] == 'E') {
3273 PL_thiswhite = newSVpvs("");
3274 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3277 PL_lex_state = LEX_INTERPCONCAT;
3282 if (!PL_madskills) /* when just compiling don't need correct */
3283 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3284 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3285 if ((*s == 'L' || *s == 'U') &&
3286 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3287 PL_lex_casestack[--PL_lex_casemods] = '\0';
3290 if (PL_lex_casemods > 10)
3291 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3292 PL_lex_casestack[PL_lex_casemods++] = *s;
3293 PL_lex_casestack[PL_lex_casemods] = '\0';
3294 PL_lex_state = LEX_INTERPCONCAT;
3295 start_force(PL_curforce);
3296 NEXTVAL_NEXTTOKE.ival = 0;
3298 start_force(PL_curforce);
3300 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3302 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3304 NEXTVAL_NEXTTOKE.ival = OP_LC;
3306 NEXTVAL_NEXTTOKE.ival = OP_UC;
3308 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3310 Perl_croak(aTHX_ "panic: yylex");
3312 SV* const tmpsv = newSVpvs("");
3313 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3319 if (PL_lex_starts) {
3325 sv_free(PL_thistoken);
3326 PL_thistoken = newSVpvs("");
3329 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3330 if (PL_lex_casemods == 1 && PL_lex_inpat)
3339 case LEX_INTERPPUSH:
3340 return REPORT(sublex_push());
3342 case LEX_INTERPSTART:
3343 if (PL_bufptr == PL_bufend)
3344 return REPORT(sublex_done());
3345 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3346 "### Interpolated variable\n"); });
3348 PL_lex_dojoin = (*PL_bufptr == '@');
3349 PL_lex_state = LEX_INTERPNORMAL;
3350 if (PL_lex_dojoin) {
3351 start_force(PL_curforce);
3352 NEXTVAL_NEXTTOKE.ival = 0;
3354 start_force(PL_curforce);
3355 force_ident("\"", '$');
3356 start_force(PL_curforce);
3357 NEXTVAL_NEXTTOKE.ival = 0;
3359 start_force(PL_curforce);
3360 NEXTVAL_NEXTTOKE.ival = 0;
3362 start_force(PL_curforce);
3363 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3366 if (PL_lex_starts++) {
3371 sv_free(PL_thistoken);
3372 PL_thistoken = newSVpvs("");
3375 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3376 if (!PL_lex_casemods && PL_lex_inpat)
3383 case LEX_INTERPENDMAYBE:
3384 if (intuit_more(PL_bufptr)) {
3385 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3391 if (PL_lex_dojoin) {
3392 PL_lex_dojoin = FALSE;
3393 PL_lex_state = LEX_INTERPCONCAT;
3397 sv_free(PL_thistoken);
3398 PL_thistoken = newSVpvs("");
3403 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3404 && SvEVALED(PL_lex_repl))
3406 if (PL_bufptr != PL_bufend)
3407 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3411 case LEX_INTERPCONCAT:
3413 if (PL_lex_brackets)
3414 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3416 if (PL_bufptr == PL_bufend)
3417 return REPORT(sublex_done());
3419 if (SvIVX(PL_linestr) == '\'') {
3420 SV *sv = newSVsv(PL_linestr);
3423 else if ( PL_hints & HINT_NEW_RE )
3424 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
3425 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3429 s = scan_const(PL_bufptr);
3431 PL_lex_state = LEX_INTERPCASEMOD;
3433 PL_lex_state = LEX_INTERPSTART;
3436 if (s != PL_bufptr) {
3437 start_force(PL_curforce);
3439 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3441 NEXTVAL_NEXTTOKE = yylval;
3444 if (PL_lex_starts++) {
3448 sv_free(PL_thistoken);
3449 PL_thistoken = newSVpvs("");
3452 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3453 if (!PL_lex_casemods && PL_lex_inpat)
3466 PL_lex_state = LEX_NORMAL;
3467 s = scan_formline(PL_bufptr);
3468 if (!PL_lex_formbrack)
3474 PL_oldoldbufptr = PL_oldbufptr;
3480 sv_free(PL_thistoken);
3483 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3487 if (isIDFIRST_lazy_if(s,UTF))
3489 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
3492 goto fake_eof; /* emulate EOF on ^D or ^Z */
3501 if (PL_lex_brackets) {
3502 yyerror((const char *)
3504 ? "Format not terminated"
3505 : "Missing right curly or square bracket"));
3507 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3508 "### Tokener got EOF\n");
3512 if (s++ < PL_bufend)
3513 goto retry; /* ignore stray nulls */
3516 if (!PL_in_eval && !PL_preambled) {
3517 PL_preambled = TRUE;
3522 sv_setpv(PL_linestr,incl_perldb());
3523 if (SvCUR(PL_linestr))
3524 sv_catpvs(PL_linestr,";");
3526 while(AvFILLp(PL_preambleav) >= 0) {
3527 SV *tmpsv = av_shift(PL_preambleav);
3528 sv_catsv(PL_linestr, tmpsv);
3529 sv_catpvs(PL_linestr, ";");
3532 sv_free((SV*)PL_preambleav);
3533 PL_preambleav = NULL;
3535 if (PL_minus_n || PL_minus_p) {
3536 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3538 sv_catpvs(PL_linestr,"chomp;");
3541 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3542 || *PL_splitstr == '"')
3543 && strchr(PL_splitstr + 1, *PL_splitstr))
3544 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3546 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3547 bytes can be used as quoting characters. :-) */
3548 const char *splits = PL_splitstr;
3549 sv_catpvs(PL_linestr, "our @F=split(q\0");
3552 if (*splits == '\\')
3553 sv_catpvn(PL_linestr, splits, 1);
3554 sv_catpvn(PL_linestr, splits, 1);
3555 } while (*splits++);
3556 /* This loop will embed the trailing NUL of
3557 PL_linestr as the last thing it does before
3559 sv_catpvs(PL_linestr, ");");
3563 sv_catpvs(PL_linestr,"our @F=split(' ');");
3567 sv_catpvs(PL_linestr,"use feature ':5.10';");
3568 sv_catpvs(PL_linestr, "\n");
3569 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3570 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3571 PL_last_lop = PL_last_uni = NULL;
3572 if (PERLDB_LINE && PL_curstash != PL_debstash)
3573 update_debugger_info_sv(PL_linestr);
3577 bof = PL_rsfp ? TRUE : FALSE;
3578 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3581 PL_realtokenstart = -1;
3584 if (PL_preprocess && !PL_in_eval)
3585 (void)PerlProc_pclose(PL_rsfp);
3586 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3587 PerlIO_clearerr(PL_rsfp);
3589 (void)PerlIO_close(PL_rsfp);
3591 PL_doextract = FALSE;
3593 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3598 sv_setpv(PL_linestr,
3601 ? ";}continue{print;}" : ";}"));
3602 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3603 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3604 PL_last_lop = PL_last_uni = NULL;
3605 PL_minus_n = PL_minus_p = 0;
3608 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3609 PL_last_lop = PL_last_uni = NULL;
3610 sv_setpvn(PL_linestr,"",0);
3611 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3613 /* If it looks like the start of a BOM or raw UTF-16,
3614 * check if it in fact is. */
3620 #ifdef PERLIO_IS_STDIO
3621 # ifdef __GNU_LIBRARY__
3622 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3623 # define FTELL_FOR_PIPE_IS_BROKEN
3627 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3628 # define FTELL_FOR_PIPE_IS_BROKEN
3633 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3634 /* This loses the possibility to detect the bof
3635 * situation on perl -P when the libc5 is being used.
3636 * Workaround? Maybe attach some extra state to PL_rsfp?
3639 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3641 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3644 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3645 s = swallow_bom((U8*)s);
3649 /* Incest with pod. */
3652 sv_catsv(PL_thiswhite, PL_linestr);
3654 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3655 sv_setpvn(PL_linestr, "", 0);
3656 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3657 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3658 PL_last_lop = PL_last_uni = NULL;
3659 PL_doextract = FALSE;
3663 } while (PL_doextract);
3664 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3665 if (PERLDB_LINE && PL_curstash != PL_debstash)
3666 update_debugger_info_sv(PL_linestr);
3667 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3668 PL_last_lop = PL_last_uni = NULL;
3669 if (CopLINE(PL_curcop) == 1) {
3670 while (s < PL_bufend && isSPACE(*s))
3672 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3676 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3680 if (*s == '#' && *(s+1) == '!')
3682 #ifdef ALTERNATE_SHEBANG
3684 static char const as[] = ALTERNATE_SHEBANG;
3685 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3686 d = s + (sizeof(as) - 1);
3688 #endif /* ALTERNATE_SHEBANG */
3697 while (*d && !isSPACE(*d))
3701 #ifdef ARG_ZERO_IS_SCRIPT
3702 if (ipathend > ipath) {
3704 * HP-UX (at least) sets argv[0] to the script name,
3705 * which makes $^X incorrect. And Digital UNIX and Linux,
3706 * at least, set argv[0] to the basename of the Perl
3707 * interpreter. So, having found "#!", we'll set it right.
3709 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3711 assert(SvPOK(x) || SvGMAGICAL(x));
3712 if (sv_eq(x, CopFILESV(PL_curcop))) {
3713 sv_setpvn(x, ipath, ipathend - ipath);
3719 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3720 const char * const lstart = SvPV_const(x,llen);
3722 bstart += blen - llen;
3723 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3724 sv_setpvn(x, ipath, ipathend - ipath);
3729 TAINT_NOT; /* $^X is always tainted, but that's OK */
3731 #endif /* ARG_ZERO_IS_SCRIPT */
3736 d = instr(s,"perl -");
3738 d = instr(s,"perl");
3740 /* avoid getting into infinite loops when shebang
3741 * line contains "Perl" rather than "perl" */
3743 for (d = ipathend-4; d >= ipath; --d) {
3744 if ((*d == 'p' || *d == 'P')
3745 && !ibcmp(d, "perl", 4))
3755 #ifdef ALTERNATE_SHEBANG
3757 * If the ALTERNATE_SHEBANG on this system starts with a
3758 * character that can be part of a Perl expression, then if
3759 * we see it but not "perl", we're probably looking at the
3760 * start of Perl code, not a request to hand off to some
3761 * other interpreter. Similarly, if "perl" is there, but
3762 * not in the first 'word' of the line, we assume the line
3763 * contains the start of the Perl program.
3765 if (d && *s != '#') {
3766 const char *c = ipath;
3767 while (*c && !strchr("; \t\r\n\f\v#", *c))
3770 d = NULL; /* "perl" not in first word; ignore */
3772 *s = '#'; /* Don't try to parse shebang line */
3774 #endif /* ALTERNATE_SHEBANG */
3775 #ifndef MACOS_TRADITIONAL
3780 !instr(s,"indir") &&
3781 instr(PL_origargv[0],"perl"))
3788 while (s < PL_bufend && isSPACE(*s))
3790 if (s < PL_bufend) {
3791 Newxz(newargv,PL_origargc+3,char*);
3793 while (s < PL_bufend && !isSPACE(*s))
3796 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3799 newargv = PL_origargv;
3802 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3804 Perl_croak(aTHX_ "Can't exec %s", ipath);
3808 while (*d && !isSPACE(*d))
3810 while (SPACE_OR_TAB(*d))
3814 const bool switches_done = PL_doswitches;
3815 const U32 oldpdb = PL_perldb;
3816 const bool oldn = PL_minus_n;
3817 const bool oldp = PL_minus_p;
3820 if (*d == 'M' || *d == 'm' || *d == 'C') {
3821 const char * const m = d;
3822 while (*d && !isSPACE(*d))
3824 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3827 d = moreswitches(d);
3829 if (PL_doswitches && !switches_done) {
3830 int argc = PL_origargc;
3831 char **argv = PL_origargv;
3834 } while (argc && argv[0][0] == '-' && argv[0][1]);
3835 init_argv_symbols(argc,argv);
3837 if ((PERLDB_LINE && !oldpdb) ||
3838 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3839 /* if we have already added "LINE: while (<>) {",
3840 we must not do it again */
3842 sv_setpvn(PL_linestr, "", 0);
3843 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3844 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3845 PL_last_lop = PL_last_uni = NULL;
3846 PL_preambled = FALSE;
3848 (void)gv_fetchfile(PL_origfilename);
3855 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3857 PL_lex_state = LEX_FORMLINE;
3862 #ifdef PERL_STRICT_CR
3863 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3865 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3867 case ' ': case '\t': case '\f': case 013:
3868 #ifdef MACOS_TRADITIONAL
3872 PL_realtokenstart = -1;
3881 PL_realtokenstart = -1;
3885 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3886 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3887 /* handle eval qq[#line 1 "foo"\n ...] */
3888 CopLINE_dec(PL_curcop);
3891 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3893 if (!PL_in_eval || PL_rsfp)
3898 while (d < PL_bufend && *d != '\n')
3902 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3903 Perl_croak(aTHX_ "panic: input overflow");
3906 PL_thiswhite = newSVpvn(s, d - s);
3911 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3913 PL_lex_state = LEX_FORMLINE;
3919 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
3920 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
3923 TOKEN(PEG); /* make sure any #! line is accessible */
3928 /* if (PL_madskills && PL_lex_formbrack) { */
3930 while (d < PL_bufend && *d != '\n')
3934 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3935 Perl_croak(aTHX_ "panic: input overflow");
3936 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
3938 PL_thiswhite = newSVpvs("");
3939 if (CopLINE(PL_curcop) == 1) {
3940 sv_setpvn(PL_thiswhite, "", 0);
3943 sv_catpvn(PL_thiswhite, s, d - s);
3957 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3965 while (s < PL_bufend && SPACE_OR_TAB(*s))
3968 if (strnEQ(s,"=>",2)) {
3969 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3970 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
3971 OPERATOR('-'); /* unary minus */
3973 PL_last_uni = PL_oldbufptr;
3975 case 'r': ftst = OP_FTEREAD; break;
3976 case 'w': ftst = OP_FTEWRITE; break;
3977 case 'x': ftst = OP_FTEEXEC; break;
3978 case 'o': ftst = OP_FTEOWNED; break;
3979 case 'R': ftst = OP_FTRREAD; break;
3980 case 'W': ftst = OP_FTRWRITE; break;
3981 case 'X': ftst = OP_FTREXEC; break;
3982 case 'O': ftst = OP_FTROWNED; break;
3983 case 'e': ftst = OP_FTIS; break;
3984 case 'z': ftst = OP_FTZERO; break;
3985 case 's': ftst = OP_FTSIZE; break;
3986 case 'f': ftst = OP_FTFILE; break;
3987 case 'd': ftst = OP_FTDIR; break;
3988 case 'l': ftst = OP_FTLINK; break;
3989 case 'p': ftst = OP_FTPIPE; break;
3990 case 'S': ftst = OP_FTSOCK; break;
3991 case 'u': ftst = OP_FTSUID; break;
3992 case 'g': ftst = OP_FTSGID; break;
3993 case 'k': ftst = OP_FTSVTX; break;
3994 case 'b': ftst = OP_FTBLK; break;
3995 case 'c': ftst = OP_FTCHR; break;
3996 case 't': ftst = OP_FTTTY; break;
3997 case 'T': ftst = OP_FTTEXT; break;
3998 case 'B': ftst = OP_FTBINARY; break;
3999 case 'M': case 'A': case 'C':
4000 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4002 case 'M': ftst = OP_FTMTIME; break;
4003 case 'A': ftst = OP_FTATIME; break;
4004 case 'C': ftst = OP_FTCTIME; break;
4012 PL_last_lop_op = (OPCODE)ftst;
4013 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4014 "### Saw file test %c\n", (int)tmp);
4019 /* Assume it was a minus followed by a one-letter named
4020 * subroutine call (or a -bareword), then. */
4021 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4022 "### '-%c' looked like a file test but was not\n",
4029 const char tmp = *s++;
4032 if (PL_expect == XOPERATOR)
4037 else if (*s == '>') {
4040 if (isIDFIRST_lazy_if(s,UTF)) {
4041 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4049 if (PL_expect == XOPERATOR)
4052 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4054 OPERATOR('-'); /* unary minus */
4060 const char tmp = *s++;
4063 if (PL_expect == XOPERATOR)
4068 if (PL_expect == XOPERATOR)
4071 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4078 if (PL_expect != XOPERATOR) {
4079 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4080 PL_expect = XOPERATOR;
4081 force_ident(PL_tokenbuf, '*');
4094 if (PL_expect == XOPERATOR) {
4098 PL_tokenbuf[0] = '%';
4099 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
4100 if (!PL_tokenbuf[1]) {
4103 PL_pending_ident = '%';
4114 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
4115 && FEATURE_IS_ENABLED("~~"))
4122 const char tmp = *s++;
4128 goto just_a_word_zero_gv;
4131 switch (PL_expect) {
4137 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4139 PL_bufptr = s; /* update in case we back off */
4145 PL_expect = XTERMBLOCK;
4148 stuffstart = s - SvPVX(PL_linestr) - 1;
4152 while (isIDFIRST_lazy_if(s,UTF)) {
4155 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4156 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4157 if (tmp < 0) tmp = -tmp;
4172 sv = newSVpvn(s, len);
4174 d = scan_str(d,TRUE,TRUE);
4176 /* MUST advance bufptr here to avoid bogus
4177 "at end of line" context messages from yyerror().
4179 PL_bufptr = s + len;
4180 yyerror("Unterminated attribute parameter in attribute list");
4184 return REPORT(0); /* EOF indicator */
4188 sv_catsv(sv, PL_lex_stuff);
4189 attrs = append_elem(OP_LIST, attrs,
4190 newSVOP(OP_CONST, 0, sv));
4191 SvREFCNT_dec(PL_lex_stuff);
4192 PL_lex_stuff = NULL;
4195 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4197 if (PL_in_my == KEY_our) {
4199 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
4201 /* skip to avoid loading attributes.pm */
4203 deprecate(":unique");
4206 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4209 /* NOTE: any CV attrs applied here need to be part of
4210 the CVf_BUILTIN_ATTRS define in cv.h! */
4211 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4213 CvLVALUE_on(PL_compcv);
4215 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4217 CvLOCKED_on(PL_compcv);
4219 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4221 CvMETHOD_on(PL_compcv);
4223 else if (!PL_in_my && len == 9 && strnEQ(SvPVX(sv), "assertion", len)) {
4225 CvASSERTION_on(PL_compcv);
4227 /* After we've set the flags, it could be argued that
4228 we don't need to do the attributes.pm-based setting
4229 process, and shouldn't bother appending recognized
4230 flags. To experiment with that, uncomment the
4231 following "else". (Note that's already been
4232 uncommented. That keeps the above-applied built-in
4233 attributes from being intercepted (and possibly
4234 rejected) by a package's attribute routines, but is
4235 justified by the performance win for the common case
4236 of applying only built-in attributes.) */
4238 attrs = append_elem(OP_LIST, attrs,
4239 newSVOP(OP_CONST, 0,
4243 if (*s == ':' && s[1] != ':')
4246 break; /* require real whitespace or :'s */
4247 /* XXX losing whitespace on sequential attributes here */
4251 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4252 if (*s != ';' && *s != '}' && *s != tmp
4253 && (tmp != '=' || *s != ')')) {
4254 const char q = ((*s == '\'') ? '"' : '\'');
4255 /* If here for an expression, and parsed no attrs, back
4257 if (tmp == '=' && !attrs) {
4261 /* MUST advance bufptr here to avoid bogus "at end of line"
4262 context messages from yyerror().
4265 yyerror( (const char *)
4267 ? Perl_form(aTHX_ "Invalid separator character "
4268 "%c%c%c in attribute list", q, *s, q)
4269 : "Unterminated attribute list" ) );
4277 start_force(PL_curforce);
4278 NEXTVAL_NEXTTOKE.opval = attrs;
4279 CURMAD('_', PL_nextwhite);
4284 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4285 (s - SvPVX(PL_linestr)) - stuffstart);
4293 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4294 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4302 const char tmp = *s++;
4307 const char tmp = *s++;
4315 if (PL_lex_brackets <= 0)
4316 yyerror("Unmatched right square bracket");
4319 if (PL_lex_state == LEX_INTERPNORMAL) {
4320 if (PL_lex_brackets == 0) {
4321 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
4322 PL_lex_state = LEX_INTERPEND;
4329 if (PL_lex_brackets > 100) {
4330 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4332 switch (PL_expect) {
4334 if (PL_lex_formbrack) {
4338 if (PL_oldoldbufptr == PL_last_lop)
4339 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4341 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4342 OPERATOR(HASHBRACK);
4344 while (s < PL_bufend && SPACE_OR_TAB(*s))
4347 PL_tokenbuf[0] = '\0';
4348 if (d < PL_bufend && *d == '-') {
4349 PL_tokenbuf[0] = '-';
4351 while (d < PL_bufend && SPACE_OR_TAB(*d))
4354 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4355 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4357 while (d < PL_bufend && SPACE_OR_TAB(*d))
4360 const char minus = (PL_tokenbuf[0] == '-');
4361 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4369 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4374 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4379 if (PL_oldoldbufptr == PL_last_lop)
4380 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4382 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4385 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4387 /* This hack is to get the ${} in the message. */
4389 yyerror("syntax error");
4392 OPERATOR(HASHBRACK);
4394 /* This hack serves to disambiguate a pair of curlies
4395 * as being a block or an anon hash. Normally, expectation
4396 * determines that, but in cases where we're not in a
4397 * position to expect anything in particular (like inside
4398 * eval"") we have to resolve the ambiguity. This code
4399 * covers the case where the first term in the curlies is a
4400 * quoted string. Most other cases need to be explicitly
4401 * disambiguated by prepending a "+" before the opening
4402 * curly in order to force resolution as an anon hash.
4404 * XXX should probably propagate the outer expectation
4405 * into eval"" to rely less on this hack, but that could
4406 * potentially break current behavior of eval"".
4410 if (*s == '\'' || *s == '"' || *s == '`') {
4411 /* common case: get past first string, handling escapes */
4412 for (t++; t < PL_bufend && *t != *s;)
4413 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4417 else if (*s == 'q') {
4420 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4423 /* skip q//-like construct */
4425 char open, close, term;
4428 while (t < PL_bufend && isSPACE(*t))
4430 /* check for q => */
4431 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4432 OPERATOR(HASHBRACK);
4436 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4440 for (t++; t < PL_bufend; t++) {
4441 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4443 else if (*t == open)
4447 for (t++; t < PL_bufend; t++) {
4448 if (*t == '\\' && t+1 < PL_bufend)
4450 else if (*t == close && --brackets <= 0)
4452 else if (*t == open)
4459 /* skip plain q word */
4460 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4463 else if (isALNUM_lazy_if(t,UTF)) {
4465 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4468 while (t < PL_bufend && isSPACE(*t))
4470 /* if comma follows first term, call it an anon hash */
4471 /* XXX it could be a comma expression with loop modifiers */
4472 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4473 || (*t == '=' && t[1] == '>')))
4474 OPERATOR(HASHBRACK);
4475 if (PL_expect == XREF)
4478 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4484 yylval.ival = CopLINE(PL_curcop);
4485 if (isSPACE(*s) || *s == '#')
4486 PL_copline = NOLINE; /* invalidate current command line number */
4491 if (PL_lex_brackets <= 0)
4492 yyerror("Unmatched right curly bracket");
4494 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4495 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4496 PL_lex_formbrack = 0;
4497 if (PL_lex_state == LEX_INTERPNORMAL) {
4498 if (PL_lex_brackets == 0) {
4499 if (PL_expect & XFAKEBRACK) {
4500 PL_expect &= XENUMMASK;
4501 PL_lex_state = LEX_INTERPEND;
4506 PL_thiswhite = newSVpvs("");
4507 sv_catpvn(PL_thiswhite,"}",1);
4510 return yylex(); /* ignore fake brackets */
4512 if (*s == '-' && s[1] == '>')
4513 PL_lex_state = LEX_INTERPENDMAYBE;
4514 else if (*s != '[' && *s != '{')
4515 PL_lex_state = LEX_INTERPEND;
4518 if (PL_expect & XFAKEBRACK) {
4519 PL_expect &= XENUMMASK;
4521 return yylex(); /* ignore fake brackets */
4523 start_force(PL_curforce);
4525 curmad('X', newSVpvn(s-1,1));
4526 CURMAD('_', PL_thiswhite);
4531 PL_thistoken = newSVpvs("");
4539 if (PL_expect == XOPERATOR) {
4540 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4541 && isIDFIRST_lazy_if(s,UTF))
4543 CopLINE_dec(PL_curcop);
4544 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4545 CopLINE_inc(PL_curcop);
4550 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4552 PL_expect = XOPERATOR;
4553 force_ident(PL_tokenbuf, '&');
4557 yylval.ival = (OPpENTERSUB_AMPER<<8);
4569 const char tmp = *s++;
4576 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4577 && strchr("+-*/%.^&|<",tmp))
4578 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4579 "Reversed %c= operator",(int)tmp);
4581 if (PL_expect == XSTATE && isALPHA(tmp) &&
4582 (s == PL_linestart+1 || s[-2] == '\n') )
4584 if (PL_in_eval && !PL_rsfp) {
4589 if (strnEQ(s,"=cut",4)) {
4605 PL_thiswhite = newSVpvs("");
4606 sv_catpvn(PL_thiswhite, PL_linestart,
4607 PL_bufend - PL_linestart);
4611 PL_doextract = TRUE;
4615 if (PL_lex_brackets < PL_lex_formbrack) {
4617 #ifdef PERL_STRICT_CR
4618 while (SPACE_OR_TAB(*t))
4620 while (SPACE_OR_TAB(*t) || *t == '\r')
4623 if (*t == '\n' || *t == '#') {
4634 const char tmp = *s++;
4636 /* was this !=~ where !~ was meant?
4637 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4639 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4640 const char *t = s+1;
4642 while (t < PL_bufend && isSPACE(*t))
4645 if (*t == '/' || *t == '?' ||
4646 ((*t == 'm' || *t == 's' || *t == 'y')
4647 && !isALNUM(t[1])) ||
4648 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4649 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4650 "!=~ should be !~");
4660 if (PL_expect != XOPERATOR) {
4661 if (s[1] != '<' && !strchr(s,'>'))
4664 s = scan_heredoc(s);
4666 s = scan_inputsymbol(s);
4667 TERM(sublex_start());
4673 SHop(OP_LEFT_SHIFT);
4687 const char tmp = *s++;
4689 SHop(OP_RIGHT_SHIFT);
4690 else if (tmp == '=')
4699 if (PL_expect == XOPERATOR) {
4700 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4702 deprecate_old(commaless_variable_list);
4703 return REPORT(','); /* grandfather non-comma-format format */
4707 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4708 PL_tokenbuf[0] = '@';
4709 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4710 sizeof PL_tokenbuf - 1, FALSE);
4711 if (PL_expect == XOPERATOR)
4712 no_op("Array length", s);
4713 if (!PL_tokenbuf[1])
4715 PL_expect = XOPERATOR;
4716 PL_pending_ident = '#';
4720 PL_tokenbuf[0] = '$';
4721 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4722 sizeof PL_tokenbuf - 1, FALSE);
4723 if (PL_expect == XOPERATOR)
4725 if (!PL_tokenbuf[1]) {
4727 yyerror("Final $ should be \\$ or $name");
4731 /* This kludge not intended to be bulletproof. */
4732 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4733 yylval.opval = newSVOP(OP_CONST, 0,
4734 newSViv(CopARYBASE_get(&PL_compiling)));
4735 yylval.opval->op_private = OPpCONST_ARYBASE;
4741 const char tmp = *s;
4742 if (PL_lex_state == LEX_NORMAL)
4745 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4746 && intuit_more(s)) {
4748 PL_tokenbuf[0] = '@';
4749 if (ckWARN(WARN_SYNTAX)) {
4752 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4755 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4756 while (t < PL_bufend && *t != ']')
4758 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4759 "Multidimensional syntax %.*s not supported",
4760 (int)((t - PL_bufptr) + 1), PL_bufptr);
4764 else if (*s == '{') {
4766 PL_tokenbuf[0] = '%';
4767 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4768 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4770 char tmpbuf[sizeof PL_tokenbuf];
4773 } while (isSPACE(*t));
4774 if (isIDFIRST_lazy_if(t,UTF)) {
4776 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4780 if (*t == ';' && get_cv(tmpbuf, FALSE))
4781 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4782 "You need to quote \"%s\"",
4789 PL_expect = XOPERATOR;
4790 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4791 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4792 if (!islop || PL_last_lop_op == OP_GREPSTART)
4793 PL_expect = XOPERATOR;
4794 else if (strchr("$@\"'`q", *s))
4795 PL_expect = XTERM; /* e.g. print $fh "foo" */
4796 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4797 PL_expect = XTERM; /* e.g. print $fh &sub */
4798 else if (isIDFIRST_lazy_if(s,UTF)) {
4799 char tmpbuf[sizeof PL_tokenbuf];
4801 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4802 if ((t2 = keyword(tmpbuf, len, 0))) {
4803 /* binary operators exclude handle interpretations */
4815 PL_expect = XTERM; /* e.g. print $fh length() */
4820 PL_expect = XTERM; /* e.g. print $fh subr() */
4823 else if (isDIGIT(*s))
4824 PL_expect = XTERM; /* e.g. print $fh 3 */
4825 else if (*s == '.' && isDIGIT(s[1]))
4826 PL_expect = XTERM; /* e.g. print $fh .3 */
4827 else if ((*s == '?' || *s == '-' || *s == '+')
4828 && !isSPACE(s[1]) && s[1] != '=')
4829 PL_expect = XTERM; /* e.g. print $fh -1 */
4830 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4832 PL_expect = XTERM; /* e.g. print $fh /.../
4833 XXX except DORDOR operator
4835 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4837 PL_expect = XTERM; /* print $fh <<"EOF" */
4840 PL_pending_ident = '$';
4844 if (PL_expect == XOPERATOR)
4846 PL_tokenbuf[0] = '@';
4847 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4848 if (!PL_tokenbuf[1]) {
4851 if (PL_lex_state == LEX_NORMAL)
4853 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4855 PL_tokenbuf[0] = '%';
4857 /* Warn about @ where they meant $. */
4858 if (*s == '[' || *s == '{') {
4859 if (ckWARN(WARN_SYNTAX)) {
4860 const char *t = s + 1;
4861 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4863 if (*t == '}' || *t == ']') {
4865 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4866 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4867 "Scalar value %.*s better written as $%.*s",
4868 (int)(t-PL_bufptr), PL_bufptr,
4869 (int)(t-PL_bufptr-1), PL_bufptr+1);
4874 PL_pending_ident = '@';
4877 case '/': /* may be division, defined-or, or pattern */
4878 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4882 case '?': /* may either be conditional or pattern */
4883 if(PL_expect == XOPERATOR) {
4891 /* A // operator. */
4901 /* Disable warning on "study /blah/" */
4902 if (PL_oldoldbufptr == PL_last_uni
4903 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4904 || memNE(PL_last_uni, "study", 5)
4905 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4908 s = scan_pat(s,OP_MATCH);
4909 TERM(sublex_start());
4913 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4914 #ifdef PERL_STRICT_CR
4917 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
4919 && (s == PL_linestart || s[-1] == '\n') )
4921 PL_lex_formbrack = 0;
4925 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
4931 yylval.ival = OPf_SPECIAL;
4937 if (PL_expect != XOPERATOR)
4942 case '0': case '1': case '2': case '3': case '4':
4943 case '5': case '6': case '7': case '8': case '9':
4944 s = scan_num(s, &yylval);
4945 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
4946 if (PL_expect == XOPERATOR)
4951 s = scan_str(s,!!PL_madskills,FALSE);
4952 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
4953 if (PL_expect == XOPERATOR) {
4954 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4956 deprecate_old(commaless_variable_list);
4957 return REPORT(','); /* grandfather non-comma-format format */
4964 yylval.ival = OP_CONST;
4965 TERM(sublex_start());
4968 s = scan_str(s,!!PL_madskills,FALSE);
4969 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
4970 if (PL_expect == XOPERATOR) {
4971 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4973 deprecate_old(commaless_variable_list);
4974 return REPORT(','); /* grandfather non-comma-format format */
4981 yylval.ival = OP_CONST;
4982 /* FIXME. I think that this can be const if char *d is replaced by
4983 more localised variables. */
4984 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4985 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4986 yylval.ival = OP_STRINGIFY;
4990 TERM(sublex_start());
4993 s = scan_str(s,!!PL_madskills,FALSE);
4994 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
4995 if (PL_expect == XOPERATOR)
4996 no_op("Backticks",s);
4999 readpipe_override();
5000 TERM(sublex_start());
5004 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
5005 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5007 if (PL_expect == XOPERATOR)
5008 no_op("Backslash",s);
5012 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5013 char *start = s + 2;
5014 while (isDIGIT(*start) || *start == '_')
5016 if (*start == '.' && isDIGIT(start[1])) {
5017 s = scan_num(s, &yylval);
5020 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5021 else if (!isALPHA(*start) && (PL_expect == XTERM
5022 || PL_expect == XREF || PL_expect == XSTATE
5023 || PL_expect == XTERMORDORDOR)) {
5024 /* XXX Use gv_fetchpvn rather than stomping on a const string */
5025 const char c = *start;
5028 gv = gv_fetchpv(s, 0, SVt_PVCV);
5031 s = scan_num(s, &yylval);
5038 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5080 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5082 /* Some keywords can be followed by any delimiter, including ':' */
5083 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5084 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5085 (PL_tokenbuf[0] == 'q' &&
5086 strchr("qwxr", PL_tokenbuf[1])))));
5088 /* x::* is just a word, unless x is "CORE" */
5089 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5093 while (d < PL_bufend && isSPACE(*d))
5094 d++; /* no comments skipped here, or s### is misparsed */
5096 /* Is this a label? */
5097 if (!tmp && PL_expect == XSTATE
5098 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5100 yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5105 /* Check for keywords */
5106 tmp = keyword(PL_tokenbuf, len, 0);
5108 /* Is this a word before a => operator? */
5109 if (*d == '=' && d[1] == '>') {
5112 = (OP*)newSVOP(OP_CONST, 0,
5113 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5114 yylval.opval->op_private = OPpCONST_BARE;
5118 if (tmp < 0) { /* second-class keyword? */
5119 GV *ogv = NULL; /* override (winner) */
5120 GV *hgv = NULL; /* hidden (loser) */
5121 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5123 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5126 if (GvIMPORTED_CV(gv))
5128 else if (! CvMETHOD(cv))
5132 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5133 (gv = *gvp) != (GV*)&PL_sv_undef &&
5134 GvCVu(gv) && GvIMPORTED_CV(gv))
5141 tmp = 0; /* overridden by import or by GLOBAL */
5144 && -tmp==KEY_lock /* XXX generalizable kludge */
5146 && !hv_fetchs(GvHVn(PL_incgv), "Thread.pm", FALSE))
5148 tmp = 0; /* any sub overrides "weak" keyword */
5150 else { /* no override */
5152 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
5153 Perl_warner(aTHX_ packWARN(WARN_MISC),
5154 "dump() better written as CORE::dump()");
5158 if (hgv && tmp != KEY_x && tmp != KEY_CORE
5159 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
5160 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5161 "Ambiguous call resolved as CORE::%s(), %s",
5162 GvENAME(hgv), "qualify as such or use &");
5169 default: /* not a keyword */
5170 /* Trade off - by using this evil construction we can pull the
5171 variable gv into the block labelled keylookup. If not, then
5172 we have to give it function scope so that the goto from the
5173 earlier ':' case doesn't bypass the initialisation. */
5175 just_a_word_zero_gv:
5183 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5186 SV *nextPL_nextwhite = 0;
5190 /* Get the rest if it looks like a package qualifier */
5192 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5194 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5197 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5198 *s == '\'' ? "'" : "::");
5203 if (PL_expect == XOPERATOR) {
5204 if (PL_bufptr == PL_linestart) {
5205 CopLINE_dec(PL_curcop);
5206 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
5207 CopLINE_inc(PL_curcop);
5210 no_op("Bareword",s);
5213 /* Look for a subroutine with this name in current package,
5214 unless name is "Foo::", in which case Foo is a bearword
5215 (and a package name). */
5217 if (len > 2 && !PL_madskills &&
5218 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5220 if (ckWARN(WARN_BAREWORD)
5221 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5222 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5223 "Bareword \"%s\" refers to nonexistent package",
5226 PL_tokenbuf[len] = '\0';
5232 /* Mustn't actually add anything to a symbol table.
5233 But also don't want to "initialise" any placeholder
5234 constants that might already be there into full
5235 blown PVGVs with attached PVCV. */
5236 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5237 GV_NOADD_NOINIT, SVt_PVCV);
5242 /* if we saw a global override before, get the right name */
5245 sv = newSVpvs("CORE::GLOBAL::");
5246 sv_catpv(sv,PL_tokenbuf);
5249 /* If len is 0, newSVpv does strlen(), which is correct.
5250 If len is non-zero, then it will be the true length,
5251 and so the scalar will be created correctly. */
5252 sv = newSVpv(PL_tokenbuf,len);
5255 if (PL_madskills && !PL_thistoken) {
5256 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5257 PL_thistoken = newSVpv(start,s - start);
5258 PL_realtokenstart = s - SvPVX(PL_linestr);
5262 /* Presume this is going to be a bareword of some sort. */
5265 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5266 yylval.opval->op_private = OPpCONST_BARE;
5267 /* UTF-8 package name? */
5268 if (UTF && !IN_BYTES &&
5269 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5272 /* And if "Foo::", then that's what it certainly is. */
5277 /* Do the explicit type check so that we don't need to force
5278 the initialisation of the symbol table to have a real GV.
5279 Beware - gv may not really be a PVGV, cv may not really be
5280 a PVCV, (because of the space optimisations that gv_init
5281 understands) But they're true if for this symbol there is
5282 respectively a typeglob and a subroutine.
5284 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5285 /* Real typeglob, so get the real subroutine: */
5287 /* A proxy for a subroutine in this package? */
5288 : SvOK(gv) ? (CV *) gv : NULL)
5291 /* See if it's the indirect object for a list operator. */
5293 if (PL_oldoldbufptr &&
5294 PL_oldoldbufptr < PL_bufptr &&
5295 (PL_oldoldbufptr == PL_last_lop
5296 || PL_oldoldbufptr == PL_last_uni) &&
5297 /* NO SKIPSPACE BEFORE HERE! */
5298 (PL_expect == XREF ||
5299 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5301 bool immediate_paren = *s == '(';
5303 /* (Now we can afford to cross potential line boundary.) */
5304 s = SKIPSPACE2(s,nextPL_nextwhite);
5306 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5309 /* Two barewords in a row may indicate method call. */
5311 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5312 (tmp = intuit_method(s, gv, cv)))
5315 /* If not a declared subroutine, it's an indirect object. */
5316 /* (But it's an indir obj regardless for sort.) */
5317 /* Also, if "_" follows a filetest operator, it's a bareword */
5320 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5322 (PL_last_lop_op != OP_MAPSTART &&
5323 PL_last_lop_op != OP_GREPSTART))))
5324 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5325 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5328 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5333 PL_expect = XOPERATOR;
5336 s = SKIPSPACE2(s,nextPL_nextwhite);
5337 PL_nextwhite = nextPL_nextwhite;
5342 /* Is this a word before a => operator? */
5343 if (*s == '=' && s[1] == '>' && !pkgname) {
5345 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
5346 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5347 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
5351 /* If followed by a paren, it's certainly a subroutine. */
5356 while (SPACE_OR_TAB(*d))
5358 if (*d == ')' && (sv = gv_const_sv(gv))) {
5362 char *par = SvPVX(PL_linestr) + PL_realtokenstart;
5363 sv_catpvn(PL_thistoken, par, s - par);
5365 sv_free(PL_nextwhite);
5375 PL_nextwhite = PL_thiswhite;
5378 start_force(PL_curforce);
5380 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5381 PL_expect = XOPERATOR;
5384 PL_nextwhite = nextPL_nextwhite;
5385 curmad('X', PL_thistoken);
5386 PL_thistoken = newSVpvs("");
5394 /* If followed by var or block, call it a method (unless sub) */
5396 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5397 PL_last_lop = PL_oldbufptr;
5398 PL_last_lop_op = OP_METHOD;
5402 /* If followed by a bareword, see if it looks like indir obj. */
5405 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5406 && (tmp = intuit_method(s, gv, cv)))
5409 /* Not a method, so call it a subroutine (if defined) */
5412 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5413 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5414 "Ambiguous use of -%s resolved as -&%s()",
5415 PL_tokenbuf, PL_tokenbuf);
5416 /* Check for a constant sub */
5417 if ((sv = gv_const_sv(gv))) {
5419 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
5420 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5421 yylval.opval->op_private = 0;
5425 /* Resolve to GV now. */
5426 if (SvTYPE(gv) != SVt_PVGV) {
5427 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5428 assert (SvTYPE(gv) == SVt_PVGV);
5429 /* cv must have been some sort of placeholder, so
5430 now needs replacing with a real code reference. */
5434 op_free(yylval.opval);
5435 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5436 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5437 PL_last_lop = PL_oldbufptr;
5438 PL_last_lop_op = OP_ENTERSUB;
5439 /* Is there a prototype? */
5447 const char *proto = SvPV_const((SV*)cv, protolen);
5450 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5452 while (*proto == ';')
5454 if (*proto == '&' && *s == '{') {
5455 sv_setpv(PL_subname,
5458 "__ANON__" : "__ANON__::__ANON__"));
5465 PL_nextwhite = PL_thiswhite;
5468 start_force(PL_curforce);
5469 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5472 PL_nextwhite = nextPL_nextwhite;
5473 curmad('X', PL_thistoken);
5474 PL_thistoken = newSVpvs("");
5481 /* Guess harder when madskills require "best effort". */
5482 if (PL_madskills && (!gv || !GvCVu(gv))) {
5483 int probable_sub = 0;
5484 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5486 else if (isALPHA(*s)) {
5490 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5491 if (!keyword(tmpbuf, tmplen, 0))
5494 while (d < PL_bufend && isSPACE(*d))
5496 if (*d == '=' && d[1] == '>')
5501 gv = gv_fetchpv(PL_tokenbuf, TRUE, SVt_PVCV);
5502 op_free(yylval.opval);
5503 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5504 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5505 PL_last_lop = PL_oldbufptr;
5506 PL_last_lop_op = OP_ENTERSUB;
5507 PL_nextwhite = PL_thiswhite;
5509 start_force(PL_curforce);
5510 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5512 PL_nextwhite = nextPL_nextwhite;
5513 curmad('X', PL_thistoken);
5514 PL_thistoken = newSVpvs("");
5519 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5526 /* Call it a bare word */
5528 if (PL_hints & HINT_STRICT_SUBS)
5529 yylval.opval->op_private |= OPpCONST_STRICT;
5532 if (lastchar != '-') {
5533 if (ckWARN(WARN_RESERVED)) {
5537 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
5538 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5545 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5546 && ckWARN_d(WARN_AMBIGUOUS)) {
5547 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5548 "Operator or semicolon missing before %c%s",
5549 lastchar, PL_tokenbuf);
5550 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5551 "Ambiguous use of %c resolved as operator %c",
5552 lastchar, lastchar);
5558 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5559 newSVpv(CopFILE(PL_curcop),0));
5563 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5564 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5567 case KEY___PACKAGE__:
5568 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5570 ? newSVhek(HvNAME_HEK(PL_curstash))
5577 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5578 const char *pname = "main";
5579 if (PL_tokenbuf[2] == 'D')
5580 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5581 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5585 GvIOp(gv) = newIO();
5586 IoIFP(GvIOp(gv)) = PL_rsfp;
5587 #if defined(HAS_FCNTL) && defined(F_SETFD)
5589 const int fd = PerlIO_fileno(PL_rsfp);
5590 fcntl(fd,F_SETFD,fd >= 3);
5593 /* Mark this internal pseudo-handle as clean */
5594 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5596 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
5597 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5598 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5600 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5601 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5602 /* if the script was opened in binmode, we need to revert
5603 * it to text mode for compatibility; but only iff it has CRs
5604 * XXX this is a questionable hack at best. */
5605 if (PL_bufend-PL_bufptr > 2
5606 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5609 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5610 loc = PerlIO_tell(PL_rsfp);
5611 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5614 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5616 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5617 #endif /* NETWARE */
5618 #ifdef PERLIO_IS_STDIO /* really? */
5619 # if defined(__BORLANDC__)
5620 /* XXX see note in do_binmode() */
5621 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5625 PerlIO_seek(PL_rsfp, loc, 0);
5629 #ifdef PERLIO_LAYERS
5632 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5633 else if (PL_encoding) {
5640 XPUSHs(PL_encoding);
5642 call_method("name", G_SCALAR);
5646 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5647 Perl_form(aTHX_ ":encoding(%"SVf")",
5656 if (PL_realtokenstart >= 0) {
5657 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5659 PL_endwhite = newSVpvs("");
5660 sv_catsv(PL_endwhite, PL_thiswhite);
5662 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5663 PL_realtokenstart = -1;
5665 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5666 SvCUR(PL_endwhite))) != Nullch) ;
5681 if (PL_expect == XSTATE) {
5688 if (*s == ':' && s[1] == ':') {
5691 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5692 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5693 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5696 else if (tmp == KEY_require || tmp == KEY_do)
5697 /* that's a way to remember we saw "CORE::" */
5710 LOP(OP_ACCEPT,XTERM);
5716 LOP(OP_ATAN2,XTERM);
5722 LOP(OP_BINMODE,XTERM);
5725 LOP(OP_BLESS,XTERM);
5734 /* When 'use switch' is in effect, continue has a dual
5735 life as a control operator. */
5737 if (!FEATURE_IS_ENABLED("switch"))
5740 /* We have to disambiguate the two senses of
5741 "continue". If the next token is a '{' then
5742 treat it as the start of a continue block;
5743 otherwise treat it as a control operator.
5755 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5772 if (!PL_cryptseen) {
5773 PL_cryptseen = TRUE;
5777 LOP(OP_CRYPT,XTERM);
5780 LOP(OP_CHMOD,XTERM);
5783 LOP(OP_CHOWN,XTERM);
5786 LOP(OP_CONNECT,XTERM);
5805 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5806 if (orig_keyword == KEY_do) {
5815 PL_hints |= HINT_BLOCK_SCOPE;
5825 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5826 LOP(OP_DBMOPEN,XTERM);
5832 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5839 yylval.ival = CopLINE(PL_curcop);
5855 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
5856 UNIBRACK(OP_ENTEREVAL);
5874 case KEY_endhostent:
5880 case KEY_endservent:
5883 case KEY_endprotoent:
5894 yylval.ival = CopLINE(PL_curcop);
5896 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
5899 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
5902 if ((PL_bufend - p) >= 3 &&
5903 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
5905 else if ((PL_bufend - p) >= 4 &&
5906 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
5909 if (isIDFIRST_lazy_if(p,UTF)) {
5910 p = scan_ident(p, PL_bufend,
5911 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5915 Perl_croak(aTHX_ "Missing $ on loop variable");
5917 s = SvPVX(PL_linestr) + soff;
5923 LOP(OP_FORMLINE,XTERM);
5929 LOP(OP_FCNTL,XTERM);
5935 LOP(OP_FLOCK,XTERM);
5944 LOP(OP_GREPSTART, XREF);
5947 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5962 case KEY_getpriority:
5963 LOP(OP_GETPRIORITY,XTERM);
5965 case KEY_getprotobyname:
5968 case KEY_getprotobynumber:
5969 LOP(OP_GPBYNUMBER,XTERM);
5971 case KEY_getprotoent:
5983 case KEY_getpeername:
5984 UNI(OP_GETPEERNAME);
5986 case KEY_gethostbyname:
5989 case KEY_gethostbyaddr:
5990 LOP(OP_GHBYADDR,XTERM);
5992 case KEY_gethostent:
5995 case KEY_getnetbyname:
5998 case KEY_getnetbyaddr:
5999 LOP(OP_GNBYADDR,XTERM);
6004 case KEY_getservbyname:
6005 LOP(OP_GSBYNAME,XTERM);
6007 case KEY_getservbyport:
6008 LOP(OP_GSBYPORT,XTERM);
6010 case KEY_getservent:
6013 case KEY_getsockname:
6014 UNI(OP_GETSOCKNAME);
6016 case KEY_getsockopt:
6017 LOP(OP_GSOCKOPT,XTERM);
6032 yylval.ival = CopLINE(PL_curcop);
6043 yylval.ival = CopLINE(PL_curcop);
6047 LOP(OP_INDEX,XTERM);
6053 LOP(OP_IOCTL,XTERM);
6065 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6097 LOP(OP_LISTEN,XTERM);
6106 s = scan_pat(s,OP_MATCH);
6107 TERM(sublex_start());
6110 LOP(OP_MAPSTART, XREF);
6113 LOP(OP_MKDIR,XTERM);
6116 LOP(OP_MSGCTL,XTERM);
6119 LOP(OP_MSGGET,XTERM);
6122 LOP(OP_MSGRCV,XTERM);
6125 LOP(OP_MSGSND,XTERM);
6132 if (isIDFIRST_lazy_if(s,UTF)) {
6136 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6137 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6139 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6140 if (!PL_in_my_stash) {
6143 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6147 if (PL_madskills) { /* just add type to declarator token */
6148 sv_catsv(PL_thistoken, PL_nextwhite);
6150 sv_catpvn(PL_thistoken, start, s - start);
6158 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6165 s = tokenize_use(0, s);
6169 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6176 if (isIDFIRST_lazy_if(s,UTF)) {
6178 for (d = s; isALNUM_lazy_if(d,UTF);)
6180 for (t=d; isSPACE(*t);)
6182 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6184 && !(t[0] == '=' && t[1] == '>')
6186 int parms_len = (int)(d-s);
6187 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6188 "Precedence problem: open %.*s should be open(%.*s)",
6189 parms_len, s, parms_len, s);
6195 yylval.ival = OP_OR;
6205 LOP(OP_OPEN_DIR,XTERM);
6208 checkcomma(s,PL_tokenbuf,"filehandle");
6212 checkcomma(s,PL_tokenbuf,"filehandle");
6231 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6235 LOP(OP_PIPE_OP,XTERM);
6238 s = scan_str(s,!!PL_madskills,FALSE);
6241 yylval.ival = OP_CONST;
6242 TERM(sublex_start());
6248 s = scan_str(s,!!PL_madskills,FALSE);
6251 PL_expect = XOPERATOR;
6253 if (SvCUR(PL_lex_stuff)) {
6256 d = SvPV_force(PL_lex_stuff, len);
6258 for (; isSPACE(*d) && len; --len, ++d)
6263 if (!warned && ckWARN(WARN_QW)) {
6264 for (; !isSPACE(*d) && len; --len, ++d) {
6266 Perl_warner(aTHX_ packWARN(WARN_QW),
6267 "Possible attempt to separate words with commas");
6270 else if (*d == '#') {
6271 Perl_warner(aTHX_ packWARN(WARN_QW),
6272 "Possible attempt to put comments in qw() list");
6278 for (; !isSPACE(*d) && len; --len, ++d)
6281 sv = newSVpvn(b, d-b);
6282 if (DO_UTF8(PL_lex_stuff))
6284 words = append_elem(OP_LIST, words,
6285 newSVOP(OP_CONST, 0, tokeq(sv)));
6289 start_force(PL_curforce);
6290 NEXTVAL_NEXTTOKE.opval = words;
6295 SvREFCNT_dec(PL_lex_stuff);
6296 PL_lex_stuff = NULL;
6302 s = scan_str(s,!!PL_madskills,FALSE);
6305 yylval.ival = OP_STRINGIFY;
6306 if (SvIVX(PL_lex_stuff) == '\'')
6307 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6308 TERM(sublex_start());
6311 s = scan_pat(s,OP_QR);
6312 TERM(sublex_start());
6315 s = scan_str(s,!!PL_madskills,FALSE);
6318 readpipe_override();
6319 TERM(sublex_start());
6327 s = force_version(s, FALSE);
6329 else if (*s != 'v' || !isDIGIT(s[1])
6330 || (s = force_version(s, TRUE), *s == 'v'))
6332 *PL_tokenbuf = '\0';
6333 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6334 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6335 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
6337 yyerror("<> should be quotes");
6339 if (orig_keyword == KEY_require) {
6347 PL_last_uni = PL_oldbufptr;
6348 PL_last_lop_op = OP_REQUIRE;
6350 return REPORT( (int)REQUIRE );
6356 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6360 LOP(OP_RENAME,XTERM);
6369 LOP(OP_RINDEX,XTERM);
6379 UNIDOR(OP_READLINE);
6392 LOP(OP_REVERSE,XTERM);
6395 UNIDOR(OP_READLINK);
6403 TERM(sublex_start());
6405 TOKEN(1); /* force error */
6408 checkcomma(s,PL_tokenbuf,"filehandle");
6418 LOP(OP_SELECT,XTERM);
6424 LOP(OP_SEMCTL,XTERM);
6427 LOP(OP_SEMGET,XTERM);
6430 LOP(OP_SEMOP,XTERM);
6436 LOP(OP_SETPGRP,XTERM);
6438 case KEY_setpriority:
6439 LOP(OP_SETPRIORITY,XTERM);
6441 case KEY_sethostent:
6447 case KEY_setservent:
6450 case KEY_setprotoent:
6460 LOP(OP_SEEKDIR,XTERM);
6462 case KEY_setsockopt:
6463 LOP(OP_SSOCKOPT,XTERM);
6469 LOP(OP_SHMCTL,XTERM);
6472 LOP(OP_SHMGET,XTERM);
6475 LOP(OP_SHMREAD,XTERM);
6478 LOP(OP_SHMWRITE,XTERM);
6481 LOP(OP_SHUTDOWN,XTERM);
6490 LOP(OP_SOCKET,XTERM);
6492 case KEY_socketpair:
6493 LOP(OP_SOCKPAIR,XTERM);
6496 checkcomma(s,PL_tokenbuf,"subroutine name");
6498 if (*s == ';' || *s == ')') /* probably a close */
6499 Perl_croak(aTHX_ "sort is now a reserved word");
6501 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6505 LOP(OP_SPLIT,XTERM);
6508 LOP(OP_SPRINTF,XTERM);
6511 LOP(OP_SPLICE,XTERM);
6526 LOP(OP_SUBSTR,XTERM);
6532 char tmpbuf[sizeof PL_tokenbuf];
6533 SSize_t tboffset = 0;
6534 expectation attrful;
6535 bool have_name, have_proto;
6536 const int key = tmp;
6541 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6542 SV *subtoken = newSVpvn(tstart, s - tstart);
6546 s = SKIPSPACE2(s,tmpwhite);
6551 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6552 (*s == ':' && s[1] == ':'))
6559 attrful = XATTRBLOCK;
6560 /* remember buffer pos'n for later force_word */
6561 tboffset = s - PL_oldbufptr;
6562 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6565 nametoke = newSVpvn(s, d - s);
6567 if (strchr(tmpbuf, ':'))
6568 sv_setpv(PL_subname, tmpbuf);
6570 sv_setsv(PL_subname,PL_curstname);
6571 sv_catpvs(PL_subname,"::");
6572 sv_catpvn(PL_subname,tmpbuf,len);
6579 CURMAD('X', nametoke);
6580 CURMAD('_', tmpwhite);
6581 (void) force_word(PL_oldbufptr + tboffset, WORD,
6584 s = SKIPSPACE2(d,tmpwhite);
6591 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6592 PL_expect = XTERMBLOCK;
6593 attrful = XATTRTERM;
6594 sv_setpvn(PL_subname,"?",1);
6598 if (key == KEY_format) {
6600 PL_lex_formbrack = PL_lex_brackets + 1;
6602 PL_thistoken = subtoken;
6606 (void) force_word(PL_oldbufptr + tboffset, WORD,
6612 /* Look for a prototype */
6615 bool bad_proto = FALSE;
6616 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6618 s = scan_str(s,!!PL_madskills,FALSE);
6620 Perl_croak(aTHX_ "Prototype not terminated");
6621 /* strip spaces and check for bad characters */
6622 d = SvPVX(PL_lex_stuff);
6624 for (p = d; *p; ++p) {
6627 if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
6633 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6634 "Illegal character in prototype for %"SVf" : %s",
6635 (void*)PL_subname, d);
6636 SvCUR_set(PL_lex_stuff, tmp);
6641 CURMAD('q', PL_thisopen);
6642 CURMAD('_', tmpwhite);
6643 CURMAD('=', PL_thisstuff);
6644 CURMAD('Q', PL_thisclose);
6645 NEXTVAL_NEXTTOKE.opval =
6646 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6647 PL_lex_stuff = Nullsv;
6650 s = SKIPSPACE2(s,tmpwhite);
6658 if (*s == ':' && s[1] != ':')
6659 PL_expect = attrful;
6660 else if (*s != '{' && key == KEY_sub) {
6662 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6664 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, (void*)PL_subname);
6671 curmad('^', newSVpvs(""));
6672 CURMAD('_', tmpwhite);
6676 PL_thistoken = subtoken;
6679 NEXTVAL_NEXTTOKE.opval =
6680 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6681 PL_lex_stuff = NULL;
6686 sv_setpv(PL_subname,
6688 (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"));
6692 (void) force_word(PL_oldbufptr + tboffset, WORD,
6702 LOP(OP_SYSTEM,XREF);
6705 LOP(OP_SYMLINK,XTERM);
6708 LOP(OP_SYSCALL,XTERM);
6711 LOP(OP_SYSOPEN,XTERM);
6714 LOP(OP_SYSSEEK,XTERM);
6717 LOP(OP_SYSREAD,XTERM);
6720 LOP(OP_SYSWRITE,XTERM);
6724 TERM(sublex_start());
6745 LOP(OP_TRUNCATE,XTERM);
6757 yylval.ival = CopLINE(PL_curcop);
6761 yylval.ival = CopLINE(PL_curcop);
6765 LOP(OP_UNLINK,XTERM);
6771 LOP(OP_UNPACK,XTERM);
6774 LOP(OP_UTIME,XTERM);
6780 LOP(OP_UNSHIFT,XTERM);
6783 s = tokenize_use(1, s);
6793 yylval.ival = CopLINE(PL_curcop);
6797 yylval.ival = CopLINE(PL_curcop);
6801 PL_hints |= HINT_BLOCK_SCOPE;
6808 LOP(OP_WAITPID,XTERM);
6817 ctl_l[0] = toCTRL('L');
6819 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6822 /* Make sure $^L is defined */
6823 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
6828 if (PL_expect == XOPERATOR)
6834 yylval.ival = OP_XOR;
6839 TERM(sublex_start());
6844 #pragma segment Main
6848 S_pending_ident(pTHX)
6853 /* pit holds the identifier we read and pending_ident is reset */
6854 char pit = PL_pending_ident;
6855 PL_pending_ident = 0;
6857 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
6858 DEBUG_T({ PerlIO_printf(Perl_debug_log,
6859 "### Pending identifier '%s'\n", PL_tokenbuf); });
6861 /* if we're in a my(), we can't allow dynamics here.
6862 $foo'bar has already been turned into $foo::bar, so
6863 just check for colons.
6865 if it's a legal name, the OP is a PADANY.
6868 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
6869 if (strchr(PL_tokenbuf,':'))
6870 yyerror(Perl_form(aTHX_ "No package name allowed for "
6871 "variable %s in \"our\"",
6873 tmp = allocmy(PL_tokenbuf);
6876 if (strchr(PL_tokenbuf,':'))
6877 yyerror(Perl_form(aTHX_ PL_no_myglob,
6878 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
6880 yylval.opval = newOP(OP_PADANY, 0);
6881 yylval.opval->op_targ = allocmy(PL_tokenbuf);
6887 build the ops for accesses to a my() variable.
6889 Deny my($a) or my($b) in a sort block, *if* $a or $b is
6890 then used in a comparison. This catches most, but not
6891 all cases. For instance, it catches
6892 sort { my($a); $a <=> $b }
6894 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
6895 (although why you'd do that is anyone's guess).
6898 if (!strchr(PL_tokenbuf,':')) {
6900 tmp = pad_findmy(PL_tokenbuf);
6901 if (tmp != NOT_IN_PAD) {
6902 /* might be an "our" variable" */
6903 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6904 /* build ops for a bareword */
6905 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
6906 HEK * const stashname = HvNAME_HEK(stash);
6907 SV * const sym = newSVhek(stashname);
6908 sv_catpvs(sym, "::");
6909 sv_catpv(sym, PL_tokenbuf+1);
6910 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
6911 yylval.opval->op_private = OPpCONST_ENTERED;
6914 ? (GV_ADDMULTI | GV_ADDINEVAL)
6917 ((PL_tokenbuf[0] == '$') ? SVt_PV
6918 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6923 /* if it's a sort block and they're naming $a or $b */
6924 if (PL_last_lop_op == OP_SORT &&
6925 PL_tokenbuf[0] == '$' &&
6926 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
6929 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
6930 d < PL_bufend && *d != '\n';
6933 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
6934 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
6940 yylval.opval = newOP(OP_PADANY, 0);
6941 yylval.opval->op_targ = tmp;
6947 Whine if they've said @foo in a doublequoted string,
6948 and @foo isn't a variable we can find in the symbol
6951 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
6952 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
6953 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
6954 && ckWARN(WARN_AMBIGUOUS))
6956 /* Downgraded from fatal to warning 20000522 mjd */
6957 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6958 "Possible unintended interpolation of %s in string",
6963 /* build ops for a bareword */
6964 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
6965 yylval.opval->op_private = OPpCONST_ENTERED;
6968 /* If the identifier refers to a stash, don't autovivify it.
6969 * Change 24660 had the side effect of causing symbol table
6970 * hashes to always be defined, even if they were freshly
6971 * created and the only reference in the entire program was
6972 * the single statement with the defined %foo::bar:: test.
6973 * It appears that all code in the wild doing this actually
6974 * wants to know whether sub-packages have been loaded, so
6975 * by avoiding auto-vivifying symbol tables, we ensure that
6976 * defined %foo::bar:: continues to be false, and the existing
6977 * tests still give the expected answers, even though what
6978 * they're actually testing has now changed subtly.
6980 (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
6982 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
6983 ((PL_tokenbuf[0] == '$') ? SVt_PV
6984 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6990 * The following code was generated by perl_keyword.pl.
6994 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
6999 case 1: /* 5 tokens of length 1 */
7031 case 2: /* 18 tokens of length 2 */
7177 case 3: /* 29 tokens of length 3 */
7181 if (name[1] == 'N' &&
7244 if (name[1] == 'i' &&
7266 return (all_keywords || FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
7284 if (name[1] == 'o' &&
7293 if (name[1] == 'e' &&
7302 if (name[1] == 'n' &&
7311 if (name[1] == 'o' &&
7320 if (name[1] == 'a' &&
7329 if (name[1] == 'o' &&
7391 if (name[1] == 'e' &&
7405 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7431 if (name[1] == 'i' &&
7440 if (name[1] == 's' &&
7449 if (name[1] == 'e' &&
7458 if (name[1] == 'o' &&
7470 case 4: /* 41 tokens of length 4 */
7474 if (name[1] == 'O' &&
7484 if (name[1] == 'N' &&
7494 if (name[1] == 'i' &&
7504 if (name[1] == 'h' &&
7514 if (name[1] == 'u' &&
7527 if (name[2] == 'c' &&
7536 if (name[2] == 's' &&
7545 if (name[2] == 'a' &&
7581 if (name[1] == 'o' &&
7594 if (name[2] == 't' &&
7603 if (name[2] == 'o' &&
7612 if (name[2] == 't' &&
7621 if (name[2] == 'e' &&
7634 if (name[1] == 'o' &&
7647 if (name[2] == 'y' &&
7656 if (name[2] == 'l' &&
7672 if (name[2] == 's' &&
7681 if (name[2] == 'n' &&
7690 if (name[2] == 'c' &&
7703 if (name[1] == 'e' &&
7713 if (name[1] == 'p' &&
7726 if (name[2] == 'c' &&
7735 if (name[2] == 'p' &&
7744 if (name[2] == 's' &&
7760 if (name[2] == 'n' &&
7830 if (name[2] == 'r' &&
7839 if (name[2] == 'r' &&
7848 if (name[2] == 'a' &&
7864 if (name[2] == 'l' &&
7926 if (name[2] == 'e' &&
7929 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
7942 case 5: /* 39 tokens of length 5 */
7946 if (name[1] == 'E' &&
7957 if (name[1] == 'H' &&
7971 if (name[2] == 'a' &&
7981 if (name[2] == 'a' &&
7998 if (name[2] == 'e' &&
8008 if (name[2] == 'e' &&
8012 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8028 if (name[3] == 'i' &&
8037 if (name[3] == 'o' &&
8073 if (name[2] == 'o' &&
8083 if (name[2] == 'y' &&
8097 if (name[1] == 'l' &&
8111 if (name[2] == 'n' &&
8121 if (name[2] == 'o' &&
8135 if (name[1] == 'i' &&
8140 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8149 if (name[2] == 'd' &&
8159 if (name[2] == 'c' &&
8176 if (name[2] == 'c' &&
8186 if (name[2] == 't' &&
8200 if (name[1] == 'k' &&
8211 if (name[1] == 'r' &&
8225 if (name[2] == 's' &&
8235 if (name[2] == 'd' &&
8252 if (name[2] == 'm' &&
8262 if (name[2] == 'i' &&
8272 if (name[2] == 'e' &&
8282 if (name[2] == 'l' &&
8292 if (name[2] == 'a' &&
8305 if (name[3] == 't' &&
8308 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8314 if (name[3] == 'd' &&
8331 if (name[1] == 'i' &&
8345 if (name[2] == 'a' &&
8358 if (name[3] == 'e' &&
8393 if (name[2] == 'i' &&
8410 if (name[2] == 'i' &&
8420 if (name[2] == 'i' &&
8437 case 6: /* 33 tokens of length 6 */
8441 if (name[1] == 'c' &&
8456 if (name[2] == 'l' &&
8467 if (name[2] == 'r' &&
8482 if (name[1] == 'e' &&
8497 if (name[2] == 's' &&
8502 if(ckWARN_d(WARN_SYNTAX))
8503 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8509 if (name[2] == 'i' &&
8527 if (name[2] == 'l' &&
8538 if (name[2] == 'r' &&
8553 if (name[1] == 'm' &&
8568 if (name[2] == 'n' &&
8579 if (name[2] == 's' &&
8594 if (name[1] == 's' &&
8600 if (name[4] == 't' &&
8609 if (name[4] == 'e' &&
8618 if (name[4] == 'c' &&
8627 if (name[4] == 'n' &&
8643 if (name[1] == 'r' &&
8661 if (name[3] == 'a' &&
8671 if (name[3] == 'u' &&
8685 if (name[2] == 'n' &&
8703 if (name[2] == 'a' &&
8717 if (name[3] == 'e' &&
8730 if (name[4] == 't' &&
8739 if (name[4] == 'e' &&
8761 if (name[4] == 't' &&
8770 if (name[4] == 'e' &&
8786 if (name[2] == 'c' &&
8797 if (name[2] == 'l' &&
8808 if (name[2] == 'b' &&
8819 if (name[2] == 's' &&
8842 if (name[4] == 's' &&
8851 if (name[4] == 'n' &&
8864 if (name[3] == 'a' &&
8881 if (name[1] == 'a' &&
8896 case 7: /* 29 tokens of length 7 */
8900 if (name[1] == 'E' &&
8913 if (name[1] == '_' &&
8926 if (name[1] == 'i' &&
8933 return -KEY_binmode;
8939 if (name[1] == 'o' &&
8946 return -KEY_connect;
8955 if (name[2] == 'm' &&
8961 return -KEY_dbmopen;
8972 if (name[4] == 'u' &&
8976 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
8982 if (name[4] == 'n' &&
9003 if (name[1] == 'o' &&
9016 if (name[1] == 'e' &&
9023 if (name[5] == 'r' &&
9026 return -KEY_getpgrp;
9032 if (name[5] == 'i' &&
9035 return -KEY_getppid;
9048 if (name[1] == 'c' &&
9055 return -KEY_lcfirst;
9061 if (name[1] == 'p' &&
9068 return -KEY_opendir;
9074 if (name[1] == 'a' &&
9092 if (name[3] == 'd' &&
9097 return -KEY_readdir;
9103 if (name[3] == 'u' &&
9114 if (name[3] == 'e' &&
9119 return -KEY_reverse;
9138 if (name[3] == 'k' &&
9143 return -KEY_seekdir;
9149 if (name[3] == 'p' &&
9154 return -KEY_setpgrp;
9164 if (name[2] == 'm' &&
9170 return -KEY_shmread;
9176 if (name[2] == 'r' &&
9182 return -KEY_sprintf;
9191 if (name[3] == 'l' &&
9196 return -KEY_symlink;
9205 if (name[4] == 'a' &&
9209 return -KEY_syscall;
9215 if (name[4] == 'p' &&
9219 return -KEY_sysopen;
9225 if (name[4] == 'e' &&
9229 return -KEY_sysread;
9235 if (name[4] == 'e' &&
9239 return -KEY_sysseek;
9257 if (name[1] == 'e' &&
9264 return -KEY_telldir;
9273 if (name[2] == 'f' &&
9279 return -KEY_ucfirst;
9285 if (name[2] == 's' &&
9291 return -KEY_unshift;
9301 if (name[1] == 'a' &&
9308 return -KEY_waitpid;
9317 case 8: /* 26 tokens of length 8 */
9321 if (name[1] == 'U' &&
9329 return KEY_AUTOLOAD;
9340 if (name[3] == 'A' &&
9346 return KEY___DATA__;
9352 if (name[3] == 'I' &&
9358 return -KEY___FILE__;
9364 if (name[3] == 'I' &&
9370 return -KEY___LINE__;
9386 if (name[2] == 'o' &&
9393 return -KEY_closedir;
9399 if (name[2] == 'n' &&
9406 return -KEY_continue;
9416 if (name[1] == 'b' &&
9424 return -KEY_dbmclose;
9430 if (name[1] == 'n' &&
9436 if (name[4] == 'r' &&
9441 return -KEY_endgrent;
9447 if (name[4] == 'w' &&
9452 return -KEY_endpwent;
9465 if (name[1] == 'o' &&
9473 return -KEY_formline;
9479 if (name[1] == 'e' &&
9490 if (name[6] == 'n' &&
9493 return -KEY_getgrent;
9499 if (name[6] == 'i' &&
9502 return -KEY_getgrgid;
9508 if (name[6] == 'a' &&
9511 return -KEY_getgrnam;
9524 if (name[4] == 'o' &&
9529 return -KEY_getlogin;
9540 if (name[6] == 'n' &&
9543 return -KEY_getpwent;
9549 if (name[6] == 'a' &&
9552 return -KEY_getpwnam;
9558 if (name[6] == 'i' &&
9561 return -KEY_getpwuid;
9581 if (name[1] == 'e' &&
9588 if (name[5] == 'i' &&
9595 return -KEY_readline;
9600 return -KEY_readlink;
9611 if (name[5] == 'i' &&
9615 return -KEY_readpipe;
9636 if (name[4] == 'r' &&
9641 return -KEY_setgrent;
9647 if (name[4] == 'w' &&
9652 return -KEY_setpwent;
9668 if (name[3] == 'w' &&
9674 return -KEY_shmwrite;
9680 if (name[3] == 't' &&
9686 return -KEY_shutdown;
9696 if (name[2] == 's' &&
9703 return -KEY_syswrite;
9713 if (name[1] == 'r' &&
9721 return -KEY_truncate;
9730 case 9: /* 9 tokens of length 9 */
9734 if (name[1] == 'N' &&
9743 return KEY_UNITCHECK;
9749 if (name[1] == 'n' &&
9758 return -KEY_endnetent;
9764 if (name[1] == 'e' &&
9773 return -KEY_getnetent;
9779 if (name[1] == 'o' &&
9788 return -KEY_localtime;
9794 if (name[1] == 'r' &&
9803 return KEY_prototype;
9809 if (name[1] == 'u' &&
9818 return -KEY_quotemeta;
9824 if (name[1] == 'e' &&
9833 return -KEY_rewinddir;
9839 if (name[1] == 'e' &&
9848 return -KEY_setnetent;
9854 if (name[1] == 'a' &&
9863 return -KEY_wantarray;
9872 case 10: /* 9 tokens of length 10 */
9876 if (name[1] == 'n' &&
9882 if (name[4] == 'o' &&
9889 return -KEY_endhostent;
9895 if (name[4] == 'e' &&
9902 return -KEY_endservent;
9915 if (name[1] == 'e' &&
9921 if (name[4] == 'o' &&
9928 return -KEY_gethostent;
9937 if (name[5] == 'r' &&
9943 return -KEY_getservent;
9949 if (name[5] == 'c' &&
9955 return -KEY_getsockopt;
9980 if (name[4] == 'o' &&
9987 return -KEY_sethostent;
9996 if (name[5] == 'r' &&
10002 return -KEY_setservent;
10008 if (name[5] == 'c' &&
10014 return -KEY_setsockopt;
10031 if (name[2] == 'c' &&
10040 return -KEY_socketpair;
10053 case 11: /* 8 tokens of length 11 */
10057 if (name[1] == '_' &&
10067 { /* __PACKAGE__ */
10068 return -KEY___PACKAGE__;
10074 if (name[1] == 'n' &&
10084 { /* endprotoent */
10085 return -KEY_endprotoent;
10091 if (name[1] == 'e' &&
10100 if (name[5] == 'e' &&
10106 { /* getpeername */
10107 return -KEY_getpeername;
10116 if (name[6] == 'o' &&
10121 { /* getpriority */
10122 return -KEY_getpriority;
10128 if (name[6] == 't' &&
10133 { /* getprotoent */
10134 return -KEY_getprotoent;
10148 if (name[4] == 'o' &&
10155 { /* getsockname */
10156 return -KEY_getsockname;
10169 if (name[1] == 'e' &&
10177 if (name[6] == 'o' &&
10182 { /* setpriority */
10183 return -KEY_setpriority;
10189 if (name[6] == 't' &&
10194 { /* setprotoent */
10195 return -KEY_setprotoent;
10211 case 12: /* 2 tokens of length 12 */
10212 if (name[0] == 'g' &&
10224 if (name[9] == 'd' &&
10227 { /* getnetbyaddr */
10228 return -KEY_getnetbyaddr;
10234 if (name[9] == 'a' &&
10237 { /* getnetbyname */
10238 return -KEY_getnetbyname;
10250 case 13: /* 4 tokens of length 13 */
10251 if (name[0] == 'g' &&
10258 if (name[4] == 'o' &&
10267 if (name[10] == 'd' &&
10270 { /* gethostbyaddr */
10271 return -KEY_gethostbyaddr;
10277 if (name[10] == 'a' &&
10280 { /* gethostbyname */
10281 return -KEY_gethostbyname;
10294 if (name[4] == 'e' &&
10303 if (name[10] == 'a' &&
10306 { /* getservbyname */
10307 return -KEY_getservbyname;
10313 if (name[10] == 'o' &&
10316 { /* getservbyport */
10317 return -KEY_getservbyport;
10336 case 14: /* 1 tokens of length 14 */
10337 if (name[0] == 'g' &&
10351 { /* getprotobyname */
10352 return -KEY_getprotobyname;
10357 case 16: /* 1 tokens of length 16 */
10358 if (name[0] == 'g' &&
10374 { /* getprotobynumber */
10375 return -KEY_getprotobynumber;
10389 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10393 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10394 if (ckWARN(WARN_SYNTAX)) {
10397 for (w = s+2; *w && level; w++) {
10400 else if (*w == ')')
10403 while (isSPACE(*w))
10405 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
10406 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10407 "%s (...) interpreted as function",name);
10410 while (s < PL_bufend && isSPACE(*s))
10414 while (s < PL_bufend && isSPACE(*s))
10416 if (isIDFIRST_lazy_if(s,UTF)) {
10417 const char * const w = s++;
10418 while (isALNUM_lazy_if(s,UTF))
10420 while (s < PL_bufend && isSPACE(*s))
10424 if (keyword(w, s - w, 0))
10427 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10428 if (gv && GvCVu(gv))
10430 Perl_croak(aTHX_ "No comma allowed after %s", what);
10435 /* Either returns sv, or mortalizes sv and returns a new SV*.
10436 Best used as sv=new_constant(..., sv, ...).
10437 If s, pv are NULL, calls subroutine with one argument,
10438 and type is used with error messages only. */
10441 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
10445 HV * const table = GvHV(PL_hintgv); /* ^H */
10449 const char *why1 = "", *why2 = "", *why3 = "";
10451 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10454 why2 = (const char *)
10455 (strEQ(key,"charnames")
10456 ? "(possibly a missing \"use charnames ...\")"
10458 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10459 (type ? type: "undef"), why2);
10461 /* This is convoluted and evil ("goto considered harmful")
10462 * but I do not understand the intricacies of all the different
10463 * failure modes of %^H in here. The goal here is to make
10464 * the most probable error message user-friendly. --jhi */
10469 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10470 (type ? type: "undef"), why1, why2, why3);
10472 yyerror(SvPVX_const(msg));
10476 cvp = hv_fetch(table, key, strlen(key), FALSE);
10477 if (!cvp || !SvOK(*cvp)) {
10480 why3 = "} is not defined";
10483 sv_2mortal(sv); /* Parent created it permanently */
10486 pv = sv_2mortal(newSVpvn(s, len));
10488 typesv = sv_2mortal(newSVpv(type, 0));
10490 typesv = &PL_sv_undef;
10492 PUSHSTACKi(PERLSI_OVERLOAD);
10504 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10508 /* Check the eval first */
10509 if (!PL_in_eval && SvTRUE(ERRSV)) {
10510 sv_catpvs(ERRSV, "Propagated");
10511 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10513 res = SvREFCNT_inc_simple(sv);
10517 SvREFCNT_inc_simple_void(res);
10526 why1 = "Call to &{$^H{";
10528 why3 = "}} did not return a defined value";
10536 /* Returns a NUL terminated string, with the length of the string written to
10540 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10543 register char *d = dest;
10544 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10547 Perl_croak(aTHX_ ident_too_long);
10548 if (isALNUM(*s)) /* UTF handled below */
10550 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10555 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10559 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10560 char *t = s + UTF8SKIP(s);
10562 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10566 Perl_croak(aTHX_ ident_too_long);
10567 Copy(s, d, len, char);
10580 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10583 char *bracket = NULL;
10585 register char *d = dest;
10586 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10591 while (isDIGIT(*s)) {
10593 Perl_croak(aTHX_ ident_too_long);
10600 Perl_croak(aTHX_ ident_too_long);
10601 if (isALNUM(*s)) /* UTF handled below */
10603 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10608 else if (*s == ':' && s[1] == ':') {
10612 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10613 char *t = s + UTF8SKIP(s);
10614 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10616 if (d + (t - s) > e)
10617 Perl_croak(aTHX_ ident_too_long);
10618 Copy(s, d, t - s, char);
10629 if (PL_lex_state != LEX_NORMAL)
10630 PL_lex_state = LEX_INTERPENDMAYBE;
10633 if (*s == '$' && s[1] &&
10634 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10647 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10652 if (isSPACE(s[-1])) {
10654 const char ch = *s++;
10655 if (!SPACE_OR_TAB(ch)) {
10661 if (isIDFIRST_lazy_if(d,UTF)) {
10665 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10666 end += UTF8SKIP(end);
10667 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10668 end += UTF8SKIP(end);
10670 Copy(s, d, end - s, char);
10675 while ((isALNUM(*s) || *s == ':') && d < e)
10678 Perl_croak(aTHX_ ident_too_long);
10681 while (s < send && SPACE_OR_TAB(*s))
10683 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10684 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10685 const char * const brack =
10687 ((*s == '[') ? "[...]" : "{...}");
10688 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10689 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10690 funny, dest, brack, funny, dest, brack);
10693 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10697 /* Handle extended ${^Foo} variables
10698 * 1999-02-27 mjd-perl-patch@plover.com */
10699 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10703 while (isALNUM(*s) && d < e) {
10707 Perl_croak(aTHX_ ident_too_long);
10712 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10713 PL_lex_state = LEX_INTERPEND;
10716 if (PL_lex_state == LEX_NORMAL) {
10717 if (ckWARN(WARN_AMBIGUOUS) &&
10718 (keyword(dest, d - dest, 0) || get_cv(dest, FALSE)))
10722 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10723 "Ambiguous use of %c{%s} resolved to %c%s",
10724 funny, dest, funny, dest);
10729 s = bracket; /* let the parser handle it */
10733 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10734 PL_lex_state = LEX_INTERPEND;
10739 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10741 PERL_UNUSED_CONTEXT;
10744 else if (ch == 'g')
10745 *pmfl |= PMf_GLOBAL;
10746 else if (ch == 'c')
10747 *pmfl |= PMf_CONTINUE;
10748 else if (ch == 'o')
10750 else if (ch == 'm')
10751 *pmfl |= PMf_MULTILINE;
10752 else if (ch == 's')
10753 *pmfl |= PMf_SINGLELINE;
10754 else if (ch == 'x')
10755 *pmfl |= PMf_EXTENDED;
10759 S_scan_pat(pTHX_ char *start, I32 type)
10763 char *s = scan_str(start,!!PL_madskills,FALSE);
10764 const char * const valid_flags =
10765 (const char *)((type == OP_QR) ? "iomsx" : "iogcmsx");
10772 const char * const delimiter = skipspace(start);
10776 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10777 : "Search pattern not terminated" ));
10780 pm = (PMOP*)newPMOP(type, 0);
10781 if (PL_multi_open == '?')
10782 pm->op_pmflags |= PMf_ONCE;
10786 while (*s && strchr(valid_flags, *s))
10787 pmflag(&pm->op_pmflags,*s++);
10789 if (PL_madskills && modstart != s) {
10790 SV* tmptoken = newSVpvn(modstart, s - modstart);
10791 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
10794 /* issue a warning if /c is specified,but /g is not */
10795 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
10796 && ckWARN(WARN_REGEXP))
10798 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
10801 pm->op_pmpermflags = pm->op_pmflags;
10803 PL_lex_op = (OP*)pm;
10804 yylval.ival = OP_MATCH;
10809 S_scan_subst(pTHX_ char *start)
10820 yylval.ival = OP_NULL;
10822 s = scan_str(start,!!PL_madskills,FALSE);
10825 Perl_croak(aTHX_ "Substitution pattern not terminated");
10827 if (s[-1] == PL_multi_open)
10830 if (PL_madskills) {
10831 CURMAD('q', PL_thisopen);
10832 CURMAD('_', PL_thiswhite);
10833 CURMAD('E', PL_thisstuff);
10834 CURMAD('Q', PL_thisclose);
10835 PL_realtokenstart = s - SvPVX(PL_linestr);
10839 first_start = PL_multi_start;
10840 s = scan_str(s,!!PL_madskills,FALSE);
10842 if (PL_lex_stuff) {
10843 SvREFCNT_dec(PL_lex_stuff);
10844 PL_lex_stuff = NULL;
10846 Perl_croak(aTHX_ "Substitution replacement not terminated");
10848 PL_multi_start = first_start; /* so whole substitution is taken together */
10850 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10853 if (PL_madskills) {
10854 CURMAD('z', PL_thisopen);
10855 CURMAD('R', PL_thisstuff);
10856 CURMAD('Z', PL_thisclose);
10866 else if (strchr("iogcmsx", *s))
10867 pmflag(&pm->op_pmflags,*s++);
10873 if (PL_madskills) {
10875 curmad('m', newSVpvn(modstart, s - modstart));
10876 append_madprops(PL_thismad, (OP*)pm, 0);
10880 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
10881 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10885 SV * const repl = newSVpvs("");
10887 PL_sublex_info.super_bufptr = s;
10888 PL_sublex_info.super_bufend = PL_bufend;
10890 pm->op_pmflags |= PMf_EVAL;
10892 sv_catpv(repl, (const char *)(es ? "eval " : "do "));
10893 sv_catpvs(repl, "{");
10894 sv_catsv(repl, PL_lex_repl);
10895 if (strchr(SvPVX(PL_lex_repl), '#'))
10896 sv_catpvs(repl, "\n");
10897 sv_catpvs(repl, "}");
10899 SvREFCNT_dec(PL_lex_repl);
10900 PL_lex_repl = repl;
10903 pm->op_pmpermflags = pm->op_pmflags;
10904 PL_lex_op = (OP*)pm;
10905 yylval.ival = OP_SUBST;
10910 S_scan_trans(pTHX_ char *start)
10923 yylval.ival = OP_NULL;
10925 s = scan_str(start,!!PL_madskills,FALSE);
10927 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10929 if (s[-1] == PL_multi_open)
10932 if (PL_madskills) {
10933 CURMAD('q', PL_thisopen);
10934 CURMAD('_', PL_thiswhite);
10935 CURMAD('E', PL_thisstuff);
10936 CURMAD('Q', PL_thisclose);
10937 PL_realtokenstart = s - SvPVX(PL_linestr);
10941 s = scan_str(s,!!PL_madskills,FALSE);
10943 if (PL_lex_stuff) {
10944 SvREFCNT_dec(PL_lex_stuff);
10945 PL_lex_stuff = NULL;
10947 Perl_croak(aTHX_ "Transliteration replacement not terminated");
10949 if (PL_madskills) {
10950 CURMAD('z', PL_thisopen);
10951 CURMAD('R', PL_thisstuff);
10952 CURMAD('Z', PL_thisclose);
10955 complement = del = squash = 0;
10962 complement = OPpTRANS_COMPLEMENT;
10965 del = OPpTRANS_DELETE;
10968 squash = OPpTRANS_SQUASH;
10977 Newx(tbl, complement&&!del?258:256, short);
10978 o = newPVOP(OP_TRANS, 0, (char*)tbl);
10979 o->op_private &= ~OPpTRANS_ALL;
10980 o->op_private |= del|squash|complement|
10981 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
10982 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
10985 yylval.ival = OP_TRANS;
10988 if (PL_madskills) {
10990 curmad('m', newSVpvn(modstart, s - modstart));
10991 append_madprops(PL_thismad, o, 0);
11000 S_scan_heredoc(pTHX_ register char *s)
11004 I32 op_type = OP_SCALAR;
11008 const char *found_newline;
11012 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11014 I32 stuffstart = s - SvPVX(PL_linestr);
11017 PL_realtokenstart = -1;
11022 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11026 while (SPACE_OR_TAB(*peek))
11028 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11031 s = delimcpy(d, e, s, PL_bufend, term, &len);
11041 if (!isALNUM_lazy_if(s,UTF))
11042 deprecate_old("bare << to mean <<\"\"");
11043 for (; isALNUM_lazy_if(s,UTF); s++) {
11048 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11049 Perl_croak(aTHX_ "Delimiter for here document is too long");
11052 len = d - PL_tokenbuf;
11055 if (PL_madskills) {
11056 tstart = PL_tokenbuf + !outer;
11057 PL_thisclose = newSVpvn(tstart, len - !outer);
11058 tstart = SvPVX(PL_linestr) + stuffstart;
11059 PL_thisopen = newSVpvn(tstart, s - tstart);
11060 stuffstart = s - SvPVX(PL_linestr);
11063 #ifndef PERL_STRICT_CR
11064 d = strchr(s, '\r');
11066 char * const olds = s;
11068 while (s < PL_bufend) {
11074 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11083 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11090 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11091 herewas = newSVpvn(s,PL_bufend-s);
11095 herewas = newSVpvn(s-1,found_newline-s+1);
11098 herewas = newSVpvn(s,found_newline-s);
11102 if (PL_madskills) {
11103 tstart = SvPVX(PL_linestr) + stuffstart;
11105 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11107 PL_thisstuff = newSVpvn(tstart, s - tstart);
11110 s += SvCUR(herewas);
11113 stuffstart = s - SvPVX(PL_linestr);
11119 tmpstr = newSV(79);
11120 sv_upgrade(tmpstr, SVt_PVIV);
11121 if (term == '\'') {
11122 op_type = OP_CONST;
11123 SvIV_set(tmpstr, -1);
11125 else if (term == '`') {
11126 op_type = OP_BACKTICK;
11127 SvIV_set(tmpstr, '\\');
11131 PL_multi_start = CopLINE(PL_curcop);
11132 PL_multi_open = PL_multi_close = '<';
11133 term = *PL_tokenbuf;
11134 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11135 char * const bufptr = PL_sublex_info.super_bufptr;
11136 char * const bufend = PL_sublex_info.super_bufend;
11137 char * const olds = s - SvCUR(herewas);
11138 s = strchr(bufptr, '\n');
11142 while (s < bufend &&
11143 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11145 CopLINE_inc(PL_curcop);
11148 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11149 missingterm(PL_tokenbuf);
11151 sv_setpvn(herewas,bufptr,d-bufptr+1);
11152 sv_setpvn(tmpstr,d+1,s-d);
11154 sv_catpvn(herewas,s,bufend-s);
11155 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11162 while (s < PL_bufend &&
11163 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11165 CopLINE_inc(PL_curcop);
11167 if (s >= PL_bufend) {
11168 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11169 missingterm(PL_tokenbuf);
11171 sv_setpvn(tmpstr,d+1,s-d);
11173 if (PL_madskills) {
11175 sv_catpvn(PL_thisstuff, d + 1, s - d);
11177 PL_thisstuff = newSVpvn(d + 1, s - d);
11178 stuffstart = s - SvPVX(PL_linestr);
11182 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11184 sv_catpvn(herewas,s,PL_bufend-s);
11185 sv_setsv(PL_linestr,herewas);
11186 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11187 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11188 PL_last_lop = PL_last_uni = NULL;
11191 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
11192 while (s >= PL_bufend) { /* multiple line string? */
11194 if (PL_madskills) {
11195 tstart = SvPVX(PL_linestr) + stuffstart;
11197 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11199 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11203 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11204 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11205 missingterm(PL_tokenbuf);
11208 stuffstart = s - SvPVX(PL_linestr);
11210 CopLINE_inc(PL_curcop);
11211 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11212 PL_last_lop = PL_last_uni = NULL;
11213 #ifndef PERL_STRICT_CR
11214 if (PL_bufend - PL_linestart >= 2) {
11215 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11216 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11218 PL_bufend[-2] = '\n';
11220 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11222 else if (PL_bufend[-1] == '\r')
11223 PL_bufend[-1] = '\n';
11225 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11226 PL_bufend[-1] = '\n';
11228 if (PERLDB_LINE && PL_curstash != PL_debstash)
11229 update_debugger_info_sv(PL_linestr);
11230 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11231 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11232 *(SvPVX(PL_linestr) + off ) = ' ';
11233 sv_catsv(PL_linestr,herewas);
11234 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11235 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11239 sv_catsv(tmpstr,PL_linestr);
11244 PL_multi_end = CopLINE(PL_curcop);
11245 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11246 SvPV_shrink_to_cur(tmpstr);
11248 SvREFCNT_dec(herewas);
11250 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11252 else if (PL_encoding)
11253 sv_recode_to_utf8(tmpstr, PL_encoding);
11255 PL_lex_stuff = tmpstr;
11256 yylval.ival = op_type;
11260 /* scan_inputsymbol
11261 takes: current position in input buffer
11262 returns: new position in input buffer
11263 side-effects: yylval and lex_op are set.
11268 <FH> read from filehandle
11269 <pkg::FH> read from package qualified filehandle
11270 <pkg'FH> read from package qualified filehandle
11271 <$fh> read from filehandle in $fh
11272 <*.h> filename glob
11277 S_scan_inputsymbol(pTHX_ char *start)
11280 register char *s = start; /* current position in buffer */
11284 char *d = PL_tokenbuf; /* start of temp holding space */
11285 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11287 end = strchr(s, '\n');
11290 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11292 /* die if we didn't have space for the contents of the <>,
11293 or if it didn't end, or if we see a newline
11296 if (len >= (I32)sizeof PL_tokenbuf)
11297 Perl_croak(aTHX_ "Excessively long <> operator");
11299 Perl_croak(aTHX_ "Unterminated <> operator");
11304 Remember, only scalar variables are interpreted as filehandles by
11305 this code. Anything more complex (e.g., <$fh{$num}>) will be
11306 treated as a glob() call.
11307 This code makes use of the fact that except for the $ at the front,
11308 a scalar variable and a filehandle look the same.
11310 if (*d == '$' && d[1]) d++;
11312 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11313 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11316 /* If we've tried to read what we allow filehandles to look like, and
11317 there's still text left, then it must be a glob() and not a getline.
11318 Use scan_str to pull out the stuff between the <> and treat it
11319 as nothing more than a string.
11322 if (d - PL_tokenbuf != len) {
11323 yylval.ival = OP_GLOB;
11325 s = scan_str(start,!!PL_madskills,FALSE);
11327 Perl_croak(aTHX_ "Glob not terminated");
11331 bool readline_overriden = FALSE;
11334 /* we're in a filehandle read situation */
11337 /* turn <> into <ARGV> */
11339 Copy("ARGV",d,5,char);
11341 /* Check whether readline() is overriden */
11342 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11344 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11346 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11347 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
11348 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11349 readline_overriden = TRUE;
11351 /* if <$fh>, create the ops to turn the variable into a
11355 /* try to find it in the pad for this block, otherwise find
11356 add symbol table ops
11358 const PADOFFSET tmp = pad_findmy(d);
11359 if (tmp != NOT_IN_PAD) {
11360 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11361 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11362 HEK * const stashname = HvNAME_HEK(stash);
11363 SV * const sym = sv_2mortal(newSVhek(stashname));
11364 sv_catpvs(sym, "::");
11365 sv_catpv(sym, d+1);
11370 OP * const o = newOP(OP_PADSV, 0);
11372 PL_lex_op = readline_overriden
11373 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11374 append_elem(OP_LIST, o,
11375 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11376 : (OP*)newUNOP(OP_READLINE, 0, o);
11385 ? (GV_ADDMULTI | GV_ADDINEVAL)
11388 PL_lex_op = readline_overriden
11389 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11390 append_elem(OP_LIST,
11391 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11392 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11393 : (OP*)newUNOP(OP_READLINE, 0,
11394 newUNOP(OP_RV2SV, 0,
11395 newGVOP(OP_GV, 0, gv)));
11397 if (!readline_overriden)
11398 PL_lex_op->op_flags |= OPf_SPECIAL;
11399 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
11400 yylval.ival = OP_NULL;
11403 /* If it's none of the above, it must be a literal filehandle
11404 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11406 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11407 PL_lex_op = readline_overriden
11408 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11409 append_elem(OP_LIST,
11410 newGVOP(OP_GV, 0, gv),
11411 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11412 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11413 yylval.ival = OP_NULL;
11422 takes: start position in buffer
11423 keep_quoted preserve \ on the embedded delimiter(s)
11424 keep_delims preserve the delimiters around the string
11425 returns: position to continue reading from buffer
11426 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11427 updates the read buffer.
11429 This subroutine pulls a string out of the input. It is called for:
11430 q single quotes q(literal text)
11431 ' single quotes 'literal text'
11432 qq double quotes qq(interpolate $here please)
11433 " double quotes "interpolate $here please"
11434 qx backticks qx(/bin/ls -l)
11435 ` backticks `/bin/ls -l`
11436 qw quote words @EXPORT_OK = qw( func() $spam )
11437 m// regexp match m/this/
11438 s/// regexp substitute s/this/that/
11439 tr/// string transliterate tr/this/that/
11440 y/// string transliterate y/this/that/
11441 ($*@) sub prototypes sub foo ($)
11442 (stuff) sub attr parameters sub foo : attr(stuff)
11443 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11445 In most of these cases (all but <>, patterns and transliterate)
11446 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11447 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11448 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11451 It skips whitespace before the string starts, and treats the first
11452 character as the delimiter. If the delimiter is one of ([{< then
11453 the corresponding "close" character )]}> is used as the closing
11454 delimiter. It allows quoting of delimiters, and if the string has
11455 balanced delimiters ([{<>}]) it allows nesting.
11457 On success, the SV with the resulting string is put into lex_stuff or,
11458 if that is already non-NULL, into lex_repl. The second case occurs only
11459 when parsing the RHS of the special constructs s/// and tr/// (y///).
11460 For convenience, the terminating delimiter character is stuffed into
11465 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11468 SV *sv; /* scalar value: string */
11469 const char *tmps; /* temp string, used for delimiter matching */
11470 register char *s = start; /* current position in the buffer */
11471 register char term; /* terminating character */
11472 register char *to; /* current position in the sv's data */
11473 I32 brackets = 1; /* bracket nesting level */
11474 bool has_utf8 = FALSE; /* is there any utf8 content? */
11475 I32 termcode; /* terminating char. code */
11476 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11477 STRLEN termlen; /* length of terminating string */
11478 int last_off = 0; /* last position for nesting bracket */
11484 /* skip space before the delimiter */
11490 if (PL_realtokenstart >= 0) {
11491 stuffstart = PL_realtokenstart;
11492 PL_realtokenstart = -1;
11495 stuffstart = start - SvPVX(PL_linestr);
11497 /* mark where we are, in case we need to report errors */
11500 /* after skipping whitespace, the next character is the terminator */
11503 termcode = termstr[0] = term;
11507 termcode = utf8_to_uvchr((U8*)s, &termlen);
11508 Copy(s, termstr, termlen, U8);
11509 if (!UTF8_IS_INVARIANT(term))
11513 /* mark where we are */
11514 PL_multi_start = CopLINE(PL_curcop);
11515 PL_multi_open = term;
11517 /* find corresponding closing delimiter */
11518 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11519 termcode = termstr[0] = term = tmps[5];
11521 PL_multi_close = term;
11523 /* create a new SV to hold the contents. 79 is the SV's initial length.
11524 What a random number. */
11526 sv_upgrade(sv, SVt_PVIV);
11527 SvIV_set(sv, termcode);
11528 (void)SvPOK_only(sv); /* validate pointer */
11530 /* move past delimiter and try to read a complete string */
11532 sv_catpvn(sv, s, termlen);
11535 tstart = SvPVX(PL_linestr) + stuffstart;
11536 if (!PL_thisopen && !keep_delims) {
11537 PL_thisopen = newSVpvn(tstart, s - tstart);
11538 stuffstart = s - SvPVX(PL_linestr);
11542 if (PL_encoding && !UTF) {
11546 int offset = s - SvPVX_const(PL_linestr);
11547 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11548 &offset, (char*)termstr, termlen);
11549 const char * const ns = SvPVX_const(PL_linestr) + offset;
11550 char * const svlast = SvEND(sv) - 1;
11552 for (; s < ns; s++) {
11553 if (*s == '\n' && !PL_rsfp)
11554 CopLINE_inc(PL_curcop);
11557 goto read_more_line;
11559 /* handle quoted delimiters */
11560 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11562 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11564 if ((svlast-1 - t) % 2) {
11565 if (!keep_quoted) {
11566 *(svlast-1) = term;
11568 SvCUR_set(sv, SvCUR(sv) - 1);
11573 if (PL_multi_open == PL_multi_close) {
11579 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11580 /* At here, all closes are "was quoted" one,
11581 so we don't check PL_multi_close. */
11583 if (!keep_quoted && *(t+1) == PL_multi_open)
11588 else if (*t == PL_multi_open)
11596 SvCUR_set(sv, w - SvPVX_const(sv));
11598 last_off = w - SvPVX(sv);
11599 if (--brackets <= 0)
11604 if (!keep_delims) {
11605 SvCUR_set(sv, SvCUR(sv) - 1);
11611 /* extend sv if need be */
11612 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11613 /* set 'to' to the next character in the sv's string */
11614 to = SvPVX(sv)+SvCUR(sv);
11616 /* if open delimiter is the close delimiter read unbridle */
11617 if (PL_multi_open == PL_multi_close) {
11618 for (; s < PL_bufend; s++,to++) {
11619 /* embedded newlines increment the current line number */
11620 if (*s == '\n' && !PL_rsfp)
11621 CopLINE_inc(PL_curcop);
11622 /* handle quoted delimiters */
11623 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11624 if (!keep_quoted && s[1] == term)
11626 /* any other quotes are simply copied straight through */
11630 /* terminate when run out of buffer (the for() condition), or
11631 have found the terminator */
11632 else if (*s == term) {
11635 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11638 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11644 /* if the terminator isn't the same as the start character (e.g.,
11645 matched brackets), we have to allow more in the quoting, and
11646 be prepared for nested brackets.
11649 /* read until we run out of string, or we find the terminator */
11650 for (; s < PL_bufend; s++,to++) {
11651 /* embedded newlines increment the line count */
11652 if (*s == '\n' && !PL_rsfp)
11653 CopLINE_inc(PL_curcop);
11654 /* backslashes can escape the open or closing characters */
11655 if (*s == '\\' && s+1 < PL_bufend) {
11656 if (!keep_quoted &&
11657 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11662 /* allow nested opens and closes */
11663 else if (*s == PL_multi_close && --brackets <= 0)
11665 else if (*s == PL_multi_open)
11667 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11672 /* terminate the copied string and update the sv's end-of-string */
11674 SvCUR_set(sv, to - SvPVX_const(sv));
11677 * this next chunk reads more into the buffer if we're not done yet
11681 break; /* handle case where we are done yet :-) */
11683 #ifndef PERL_STRICT_CR
11684 if (to - SvPVX_const(sv) >= 2) {
11685 if ((to[-2] == '\r' && to[-1] == '\n') ||
11686 (to[-2] == '\n' && to[-1] == '\r'))
11690 SvCUR_set(sv, to - SvPVX_const(sv));
11692 else if (to[-1] == '\r')
11695 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11700 /* if we're out of file, or a read fails, bail and reset the current
11701 line marker so we can report where the unterminated string began
11704 if (PL_madskills) {
11705 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11707 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11709 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11713 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11715 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11721 /* we read a line, so increment our line counter */
11722 CopLINE_inc(PL_curcop);
11724 /* update debugger info */
11725 if (PERLDB_LINE && PL_curstash != PL_debstash)
11726 update_debugger_info_sv(PL_linestr);
11728 /* having changed the buffer, we must update PL_bufend */
11729 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11730 PL_last_lop = PL_last_uni = NULL;
11733 /* at this point, we have successfully read the delimited string */
11735 if (!PL_encoding || UTF) {
11737 if (PL_madskills) {
11738 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11739 const int len = s - tstart;
11741 sv_catpvn(PL_thisstuff, tstart, len);
11743 PL_thisstuff = newSVpvn(tstart, len);
11744 if (!PL_thisclose && !keep_delims)
11745 PL_thisclose = newSVpvn(s,termlen);
11750 sv_catpvn(sv, s, termlen);
11755 if (PL_madskills) {
11756 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11757 const int len = s - tstart - termlen;
11759 sv_catpvn(PL_thisstuff, tstart, len);
11761 PL_thisstuff = newSVpvn(tstart, len);
11762 if (!PL_thisclose && !keep_delims)
11763 PL_thisclose = newSVpvn(s - termlen,termlen);
11767 if (has_utf8 || PL_encoding)
11770 PL_multi_end = CopLINE(PL_curcop);
11772 /* if we allocated too much space, give some back */
11773 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11774 SvLEN_set(sv, SvCUR(sv) + 1);
11775 SvPV_renew(sv, SvLEN(sv));
11778 /* decide whether this is the first or second quoted string we've read
11791 takes: pointer to position in buffer
11792 returns: pointer to new position in buffer
11793 side-effects: builds ops for the constant in yylval.op
11795 Read a number in any of the formats that Perl accepts:
11797 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11798 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11801 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11803 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11806 If it reads a number without a decimal point or an exponent, it will
11807 try converting the number to an integer and see if it can do so
11808 without loss of precision.
11812 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11815 register const char *s = start; /* current position in buffer */
11816 register char *d; /* destination in temp buffer */
11817 register char *e; /* end of temp buffer */
11818 NV nv; /* number read, as a double */
11819 SV *sv = NULL; /* place to put the converted number */
11820 bool floatit; /* boolean: int or float? */
11821 const char *lastub = NULL; /* position of last underbar */
11822 static char const number_too_long[] = "Number too long";
11824 /* We use the first character to decide what type of number this is */
11828 Perl_croak(aTHX_ "panic: scan_num");
11830 /* if it starts with a 0, it could be an octal number, a decimal in
11831 0.13 disguise, or a hexadecimal number, or a binary number. */
11835 u holds the "number so far"
11836 shift the power of 2 of the base
11837 (hex == 4, octal == 3, binary == 1)
11838 overflowed was the number more than we can hold?
11840 Shift is used when we add a digit. It also serves as an "are
11841 we in octal/hex/binary?" indicator to disallow hex characters
11842 when in octal mode.
11847 bool overflowed = FALSE;
11848 bool just_zero = TRUE; /* just plain 0 or binary number? */
11849 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11850 static const char* const bases[5] =
11851 { "", "binary", "", "octal", "hexadecimal" };
11852 static const char* const Bases[5] =
11853 { "", "Binary", "", "Octal", "Hexadecimal" };
11854 static const char* const maxima[5] =
11856 "0b11111111111111111111111111111111",
11860 const char *base, *Base, *max;
11862 /* check for hex */
11867 } else if (s[1] == 'b') {
11872 /* check for a decimal in disguise */
11873 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11875 /* so it must be octal */
11882 if (ckWARN(WARN_SYNTAX))
11883 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11884 "Misplaced _ in number");
11888 base = bases[shift];
11889 Base = Bases[shift];
11890 max = maxima[shift];
11892 /* read the rest of the number */
11894 /* x is used in the overflow test,
11895 b is the digit we're adding on. */
11900 /* if we don't mention it, we're done */
11904 /* _ are ignored -- but warned about if consecutive */
11906 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11907 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11908 "Misplaced _ in number");
11912 /* 8 and 9 are not octal */
11913 case '8': case '9':
11915 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11919 case '2': case '3': case '4':
11920 case '5': case '6': case '7':
11922 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11925 case '0': case '1':
11926 b = *s++ & 15; /* ASCII digit -> value of digit */
11930 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11931 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11932 /* make sure they said 0x */
11935 b = (*s++ & 7) + 9;
11937 /* Prepare to put the digit we have onto the end
11938 of the number so far. We check for overflows.
11944 x = u << shift; /* make room for the digit */
11946 if ((x >> shift) != u
11947 && !(PL_hints & HINT_NEW_BINARY)) {
11950 if (ckWARN_d(WARN_OVERFLOW))
11951 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11952 "Integer overflow in %s number",
11955 u = x | b; /* add the digit to the end */
11958 n *= nvshift[shift];
11959 /* If an NV has not enough bits in its
11960 * mantissa to represent an UV this summing of
11961 * small low-order numbers is a waste of time
11962 * (because the NV cannot preserve the
11963 * low-order bits anyway): we could just
11964 * remember when did we overflow and in the
11965 * end just multiply n by the right
11973 /* if we get here, we had success: make a scalar value from
11978 /* final misplaced underbar check */
11979 if (s[-1] == '_') {
11980 if (ckWARN(WARN_SYNTAX))
11981 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11986 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
11987 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
11988 "%s number > %s non-portable",
11994 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
11995 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
11996 "%s number > %s non-portable",
12001 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12002 sv = new_constant(start, s - start, "integer",
12004 else if (PL_hints & HINT_NEW_BINARY)
12005 sv = new_constant(start, s - start, "binary", sv, NULL, NULL);
12010 handle decimal numbers.
12011 we're also sent here when we read a 0 as the first digit
12013 case '1': case '2': case '3': case '4': case '5':
12014 case '6': case '7': case '8': case '9': case '.':
12017 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12020 /* read next group of digits and _ and copy into d */
12021 while (isDIGIT(*s) || *s == '_') {
12022 /* skip underscores, checking for misplaced ones
12026 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12027 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12028 "Misplaced _ in number");
12032 /* check for end of fixed-length buffer */
12034 Perl_croak(aTHX_ number_too_long);
12035 /* if we're ok, copy the character */
12040 /* final misplaced underbar check */
12041 if (lastub && s == lastub + 1) {
12042 if (ckWARN(WARN_SYNTAX))
12043 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12046 /* read a decimal portion if there is one. avoid
12047 3..5 being interpreted as the number 3. followed
12050 if (*s == '.' && s[1] != '.') {
12055 if (ckWARN(WARN_SYNTAX))
12056 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12057 "Misplaced _ in number");
12061 /* copy, ignoring underbars, until we run out of digits.
12063 for (; isDIGIT(*s) || *s == '_'; s++) {
12064 /* fixed length buffer check */
12066 Perl_croak(aTHX_ number_too_long);
12068 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12069 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12070 "Misplaced _ in number");
12076 /* fractional part ending in underbar? */
12077 if (s[-1] == '_') {
12078 if (ckWARN(WARN_SYNTAX))
12079 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12080 "Misplaced _ in number");
12082 if (*s == '.' && isDIGIT(s[1])) {
12083 /* oops, it's really a v-string, but without the "v" */
12089 /* read exponent part, if present */
12090 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12094 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12095 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12097 /* stray preinitial _ */
12099 if (ckWARN(WARN_SYNTAX))
12100 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12101 "Misplaced _ in number");
12105 /* allow positive or negative exponent */
12106 if (*s == '+' || *s == '-')
12109 /* stray initial _ */
12111 if (ckWARN(WARN_SYNTAX))
12112 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12113 "Misplaced _ in number");
12117 /* read digits of exponent */
12118 while (isDIGIT(*s) || *s == '_') {
12121 Perl_croak(aTHX_ number_too_long);
12125 if (((lastub && s == lastub + 1) ||
12126 (!isDIGIT(s[1]) && s[1] != '_'))
12127 && ckWARN(WARN_SYNTAX))
12128 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12129 "Misplaced _ in number");
12136 /* make an sv from the string */
12140 We try to do an integer conversion first if no characters
12141 indicating "float" have been found.
12146 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12148 if (flags == IS_NUMBER_IN_UV) {
12150 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12153 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12154 if (uv <= (UV) IV_MIN)
12155 sv_setiv(sv, -(IV)uv);
12162 /* terminate the string */
12164 nv = Atof(PL_tokenbuf);
12168 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
12169 (PL_hints & HINT_NEW_INTEGER) )
12170 sv = new_constant(PL_tokenbuf,
12173 (floatit ? "float" : "integer"),
12177 /* if it starts with a v, it could be a v-string */
12180 sv = newSV(5); /* preallocate storage space */
12181 s = scan_vstring(s,sv);
12185 /* make the op for the constant and return */
12188 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12190 lvalp->opval = NULL;
12196 S_scan_formline(pTHX_ register char *s)
12199 register char *eol;
12201 SV * const stuff = newSVpvs("");
12202 bool needargs = FALSE;
12203 bool eofmt = FALSE;
12205 char *tokenstart = s;
12208 if (PL_madskills) {
12209 savewhite = PL_thiswhite;
12214 while (!needargs) {
12217 #ifdef PERL_STRICT_CR
12218 while (SPACE_OR_TAB(*t))
12221 while (SPACE_OR_TAB(*t) || *t == '\r')
12224 if (*t == '\n' || t == PL_bufend) {
12229 if (PL_in_eval && !PL_rsfp) {
12230 eol = (char *) memchr(s,'\n',PL_bufend-s);
12235 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12237 for (t = s; t < eol; t++) {
12238 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12240 goto enough; /* ~~ must be first line in formline */
12242 if (*t == '@' || *t == '^')
12246 sv_catpvn(stuff, s, eol-s);
12247 #ifndef PERL_STRICT_CR
12248 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12249 char *end = SvPVX(stuff) + SvCUR(stuff);
12252 SvCUR_set(stuff, SvCUR(stuff) - 1);
12262 if (PL_madskills) {
12264 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12266 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12269 s = filter_gets(PL_linestr, PL_rsfp, 0);
12271 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12273 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12275 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12276 PL_last_lop = PL_last_uni = NULL;
12285 if (SvCUR(stuff)) {
12288 PL_lex_state = LEX_NORMAL;
12289 start_force(PL_curforce);
12290 NEXTVAL_NEXTTOKE.ival = 0;
12294 PL_lex_state = LEX_FORMLINE;
12296 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12298 else if (PL_encoding)
12299 sv_recode_to_utf8(stuff, PL_encoding);
12301 start_force(PL_curforce);
12302 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12304 start_force(PL_curforce);
12305 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12309 SvREFCNT_dec(stuff);
12311 PL_lex_formbrack = 0;
12315 if (PL_madskills) {
12317 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12319 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12320 PL_thiswhite = savewhite;
12332 PL_cshlen = strlen(PL_cshname);
12334 #if defined(USE_ITHREADS)
12335 PERL_UNUSED_CONTEXT;
12341 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12344 const I32 oldsavestack_ix = PL_savestack_ix;
12345 CV* const outsidecv = PL_compcv;
12348 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12350 SAVEI32(PL_subline);
12351 save_item(PL_subname);
12352 SAVESPTR(PL_compcv);
12354 PL_compcv = (CV*)newSV(0);
12355 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
12356 CvFLAGS(PL_compcv) |= flags;
12358 PL_subline = CopLINE(PL_curcop);
12359 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12360 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
12361 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12363 return oldsavestack_ix;
12367 #pragma segment Perl_yylex
12370 Perl_yywarn(pTHX_ const char *s)
12373 PL_in_eval |= EVAL_WARNONLY;
12375 PL_in_eval &= ~EVAL_WARNONLY;
12380 Perl_yyerror(pTHX_ const char *s)
12383 const char *where = NULL;
12384 const char *context = NULL;
12387 int yychar = PL_parser->yychar;
12389 if (!yychar || (yychar == ';' && !PL_rsfp))
12391 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12392 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12393 PL_oldbufptr != PL_bufptr) {
12396 The code below is removed for NetWare because it abends/crashes on NetWare
12397 when the script has error such as not having the closing quotes like:
12398 if ($var eq "value)
12399 Checking of white spaces is anyway done in NetWare code.
12402 while (isSPACE(*PL_oldoldbufptr))
12405 context = PL_oldoldbufptr;
12406 contlen = PL_bufptr - PL_oldoldbufptr;
12408 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12409 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12412 The code below is removed for NetWare because it abends/crashes on NetWare
12413 when the script has error such as not having the closing quotes like:
12414 if ($var eq "value)
12415 Checking of white spaces is anyway done in NetWare code.
12418 while (isSPACE(*PL_oldbufptr))
12421 context = PL_oldbufptr;
12422 contlen = PL_bufptr - PL_oldbufptr;
12424 else if (yychar > 255)
12425 where = "next token ???";
12426 else if (yychar == -2) { /* YYEMPTY */
12427 if (PL_lex_state == LEX_NORMAL ||
12428 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12429 where = "at end of line";
12430 else if (PL_lex_inpat)
12431 where = "within pattern";
12433 where = "within string";
12436 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
12438 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12439 else if (isPRINT_LC(yychar))
12440 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
12442 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12443 where = SvPVX_const(where_sv);
12445 msg = sv_2mortal(newSVpv(s, 0));
12446 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12447 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12449 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12451 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12452 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12453 Perl_sv_catpvf(aTHX_ msg,
12454 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12455 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12458 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
12459 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, (void*)msg);
12462 if (PL_error_count >= 10) {
12463 if (PL_in_eval && SvCUR(ERRSV))
12464 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12465 (void*)ERRSV, OutCopFILE(PL_curcop));
12467 Perl_croak(aTHX_ "%s has too many errors.\n",
12468 OutCopFILE(PL_curcop));
12471 PL_in_my_stash = NULL;
12475 #pragma segment Main
12479 S_swallow_bom(pTHX_ U8 *s)
12482 const STRLEN slen = SvCUR(PL_linestr);
12485 if (s[1] == 0xFE) {
12486 /* UTF-16 little-endian? (or UTF32-LE?) */
12487 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12488 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12489 #ifndef PERL_NO_UTF16_FILTER
12490 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12493 if (PL_bufend > (char*)s) {
12497 filter_add(utf16rev_textfilter, NULL);
12498 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12499 utf16_to_utf8_reversed(s, news,
12500 PL_bufend - (char*)s - 1,
12502 sv_setpvn(PL_linestr, (const char*)news, newlen);
12504 s = (U8*)SvPVX(PL_linestr);
12505 Copy(news, s, newlen, U8);
12509 SvUTF8_on(PL_linestr);
12510 s = (U8*)SvPVX(PL_linestr);
12512 /* FIXME - is this a general bug fix? */
12515 PL_bufend = SvPVX(PL_linestr) + newlen;
12518 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12523 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12524 #ifndef PERL_NO_UTF16_FILTER
12525 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12528 if (PL_bufend > (char *)s) {
12532 filter_add(utf16_textfilter, NULL);
12533 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12534 utf16_to_utf8(s, news,
12535 PL_bufend - (char*)s,
12537 sv_setpvn(PL_linestr, (const char*)news, newlen);
12539 SvUTF8_on(PL_linestr);
12540 s = (U8*)SvPVX(PL_linestr);
12541 PL_bufend = SvPVX(PL_linestr) + newlen;
12544 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12549 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12550 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12551 s += 3; /* UTF-8 */
12557 if (s[2] == 0xFE && s[3] == 0xFF) {
12558 /* UTF-32 big-endian */
12559 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12562 else if (s[2] == 0 && s[3] != 0) {
12565 * are a good indicator of UTF-16BE. */
12566 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12572 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12573 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12574 s += 4; /* UTF-8 */
12580 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12583 * are a good indicator of UTF-16LE. */
12584 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12593 * Restore a source filter.
12597 restore_rsfp(pTHX_ void *f)
12600 PerlIO * const fp = (PerlIO*)f;
12602 if (PL_rsfp == PerlIO_stdin())
12603 PerlIO_clearerr(PL_rsfp);
12604 else if (PL_rsfp && (PL_rsfp != fp))
12605 PerlIO_close(PL_rsfp);
12609 #ifndef PERL_NO_UTF16_FILTER
12611 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12614 const STRLEN old = SvCUR(sv);
12615 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12616 DEBUG_P(PerlIO_printf(Perl_debug_log,
12617 "utf16_textfilter(%p): %d %d (%d)\n",
12618 FPTR2DPTR(void *, utf16_textfilter),
12619 idx, maxlen, (int) count));
12623 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12624 Copy(SvPVX_const(sv), tmps, old, char);
12625 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12626 SvCUR(sv) - old, &newlen);
12627 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12629 DEBUG_P({sv_dump(sv);});
12634 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12637 const STRLEN old = SvCUR(sv);
12638 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12639 DEBUG_P(PerlIO_printf(Perl_debug_log,
12640 "utf16rev_textfilter(%p): %d %d (%d)\n",
12641 FPTR2DPTR(void *, utf16rev_textfilter),
12642 idx, maxlen, (int) count));
12646 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12647 Copy(SvPVX_const(sv), tmps, old, char);
12648 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12649 SvCUR(sv) - old, &newlen);
12650 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12652 DEBUG_P({ sv_dump(sv); });
12658 Returns a pointer to the next character after the parsed
12659 vstring, as well as updating the passed in sv.
12661 Function must be called like
12664 s = scan_vstring(s,sv);
12666 The sv should already be large enough to store the vstring
12667 passed in, for performance reasons.
12672 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
12675 const char *pos = s;
12676 const char *start = s;
12677 if (*pos == 'v') pos++; /* get past 'v' */
12678 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
12680 if ( *pos != '.') {
12681 /* this may not be a v-string if followed by => */
12682 const char *next = pos;
12683 while (next < PL_bufend && isSPACE(*next))
12685 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
12686 /* return string not v-string */
12687 sv_setpvn(sv,(char *)s,pos-s);
12688 return (char *)pos;
12692 if (!isALPHA(*pos)) {
12693 U8 tmpbuf[UTF8_MAXBYTES+1];
12696 s++; /* get past 'v' */
12698 sv_setpvn(sv, "", 0);
12701 /* this is atoi() that tolerates underscores */
12704 const char *end = pos;
12706 while (--end >= s) {
12708 const UV orev = rev;
12709 rev += (*end - '0') * mult;
12711 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12712 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12713 "Integer overflow in decimal number");
12717 if (rev > 0x7FFFFFFF)
12718 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12720 /* Append native character for the rev point */
12721 tmpend = uvchr_to_utf8(tmpbuf, rev);
12722 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12723 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12725 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
12731 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
12735 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12743 * c-indentation-style: bsd
12744 * c-basic-offset: 4
12745 * indent-tabs-mode: t
12748 * ex: set ts=8 sts=4 sw=4 noet: