3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yylval (PL_parser->yylval)
28 /* YYINITDEPTH -- initial size of the parser's stacks. */
29 #define YYINITDEPTH 200
31 static const char ident_too_long[] = "Identifier too long";
32 static const char commaless_variable_list[] = "comma-less variable list";
34 static void restore_rsfp(pTHX_ void *f);
35 #ifndef PERL_NO_UTF16_FILTER
36 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
37 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
41 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
42 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
44 # define CURMAD(slot,sv)
45 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
48 #define XFAKEBRACK 128
51 #ifdef USE_UTF8_SCRIPTS
52 # define UTF (!IN_BYTES)
54 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
57 /* In variables named $^X, these are the legal values for X.
58 * 1999-02-27 mjd-perl-patch@plover.com */
59 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
61 /* On MacOS, respect nonbreaking spaces */
62 #ifdef MACOS_TRADITIONAL
63 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
65 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
68 /* LEX_* are values for PL_lex_state, the state of the lexer.
69 * They are arranged oddly so that the guard on the switch statement
70 * can get by with a single comparison (if the compiler is smart enough).
73 /* #define LEX_NOTPARSING 11 is done in perl.h. */
75 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
76 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
77 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
78 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
79 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
81 /* at end of code, eg "$x" followed by: */
82 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
83 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
85 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
86 string or after \E, $foo, etc */
87 #define LEX_INTERPCONST 2 /* NOT USED */
88 #define LEX_FORMLINE 1 /* expecting a format line */
89 #define LEX_KNOWNEXT 0 /* next token known; just return it */
93 static const char* const lex_state_names[] = {
112 #include "keywords.h"
114 /* CLINE is a macro that ensures PL_copline has a sane value */
119 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
122 # define SKIPSPACE0(s) skipspace0(s)
123 # define SKIPSPACE1(s) skipspace1(s)
124 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
125 # define PEEKSPACE(s) skipspace2(s,0)
127 # define SKIPSPACE0(s) skipspace(s)
128 # define SKIPSPACE1(s) skipspace(s)
129 # define SKIPSPACE2(s,tsv) skipspace(s)
130 # define PEEKSPACE(s) skipspace(s)
134 * Convenience functions to return different tokens and prime the
135 * lexer for the next token. They all take an argument.
137 * TOKEN : generic token (used for '(', DOLSHARP, etc)
138 * OPERATOR : generic operator
139 * AOPERATOR : assignment operator
140 * PREBLOCK : beginning the block after an if, while, foreach, ...
141 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
142 * PREREF : *EXPR where EXPR is not a simple identifier
143 * TERM : expression term
144 * LOOPX : loop exiting command (goto, last, dump, etc)
145 * FTST : file test operator
146 * FUN0 : zero-argument function
147 * FUN1 : not used, except for not, which isn't a UNIOP
148 * BOop : bitwise or or xor
150 * SHop : shift operator
151 * PWop : power operator
152 * PMop : pattern-matching operator
153 * Aop : addition-level operator
154 * Mop : multiplication-level operator
155 * Eop : equality-testing operator
156 * Rop : relational operator <= != gt
158 * Also see LOP and lop() below.
161 #ifdef DEBUGGING /* Serve -DT. */
162 # define REPORT(retval) tokereport((I32)retval)
164 # define REPORT(retval) (retval)
167 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
168 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
169 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
170 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
171 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
172 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
173 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
174 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
175 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
176 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
177 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
178 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
179 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
180 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
181 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
182 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
183 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
184 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
185 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
186 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
188 /* This bit of chicanery makes a unary function followed by
189 * a parenthesis into a function with one argument, highest precedence.
190 * The UNIDOR macro is for unary functions that can be followed by the //
191 * operator (such as C<shift // 0>).
193 #define UNI2(f,x) { \
197 PL_last_uni = PL_oldbufptr; \
198 PL_last_lop_op = f; \
200 return REPORT( (int)FUNC1 ); \
202 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
204 #define UNI(f) UNI2(f,XTERM)
205 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
207 #define UNIBRACK(f) { \
210 PL_last_uni = PL_oldbufptr; \
212 return REPORT( (int)FUNC1 ); \
214 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
217 /* grandfather return to old style */
218 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
222 /* how to interpret the yylval associated with the token */
226 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
232 static struct debug_tokens {
234 enum token_type type;
236 } const debug_tokens[] =
238 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
239 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
240 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
241 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
242 { ARROW, TOKENTYPE_NONE, "ARROW" },
243 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
244 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
245 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
246 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
247 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
248 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
249 { DO, TOKENTYPE_NONE, "DO" },
250 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
251 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
252 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
253 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
254 { ELSE, TOKENTYPE_NONE, "ELSE" },
255 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
256 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
257 { FOR, TOKENTYPE_IVAL, "FOR" },
258 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
259 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
260 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
261 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
262 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
263 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
264 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
265 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
266 { IF, TOKENTYPE_IVAL, "IF" },
267 { LABEL, TOKENTYPE_PVAL, "LABEL" },
268 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
269 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
270 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
271 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
272 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
273 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
274 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
275 { MY, TOKENTYPE_IVAL, "MY" },
276 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
277 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
278 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
279 { OROP, TOKENTYPE_IVAL, "OROP" },
280 { OROR, TOKENTYPE_NONE, "OROR" },
281 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
282 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
283 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
284 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
285 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
286 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
287 { PREINC, TOKENTYPE_NONE, "PREINC" },
288 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
289 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
290 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
291 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
292 { SUB, TOKENTYPE_NONE, "SUB" },
293 { THING, TOKENTYPE_OPVAL, "THING" },
294 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
295 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
296 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
297 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
298 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
299 { USE, TOKENTYPE_IVAL, "USE" },
300 { WHEN, TOKENTYPE_IVAL, "WHEN" },
301 { WHILE, TOKENTYPE_IVAL, "WHILE" },
302 { WORD, TOKENTYPE_OPVAL, "WORD" },
303 { 0, TOKENTYPE_NONE, NULL }
306 /* dump the returned token in rv, plus any optional arg in yylval */
309 S_tokereport(pTHX_ I32 rv)
313 const char *name = NULL;
314 enum token_type type = TOKENTYPE_NONE;
315 const struct debug_tokens *p;
316 SV* const report = newSVpvs("<== ");
318 for (p = debug_tokens; p->token; p++) {
319 if (p->token == (int)rv) {
326 Perl_sv_catpv(aTHX_ report, name);
327 else if ((char)rv > ' ' && (char)rv < '~')
328 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
330 sv_catpvs(report, "EOF");
332 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
335 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
338 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
340 case TOKENTYPE_OPNUM:
341 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
342 PL_op_name[yylval.ival]);
345 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
347 case TOKENTYPE_OPVAL:
349 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
350 PL_op_name[yylval.opval->op_type]);
351 if (yylval.opval->op_type == OP_CONST) {
352 Perl_sv_catpvf(aTHX_ report, " %s",
353 SvPEEK(cSVOPx_sv(yylval.opval)));
358 sv_catpvs(report, "(opval=null)");
361 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
367 /* print the buffer with suitable escapes */
370 S_printbuf(pTHX_ const char* fmt, const char* s)
372 SV* const tmp = newSVpvs("");
373 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
382 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
383 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
387 S_ao(pTHX_ int toketype)
390 if (*PL_bufptr == '=') {
392 if (toketype == ANDAND)
393 yylval.ival = OP_ANDASSIGN;
394 else if (toketype == OROR)
395 yylval.ival = OP_ORASSIGN;
396 else if (toketype == DORDOR)
397 yylval.ival = OP_DORASSIGN;
405 * When Perl expects an operator and finds something else, no_op
406 * prints the warning. It always prints "<something> found where
407 * operator expected. It prints "Missing semicolon on previous line?"
408 * if the surprise occurs at the start of the line. "do you need to
409 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
410 * where the compiler doesn't know if foo is a method call or a function.
411 * It prints "Missing operator before end of line" if there's nothing
412 * after the missing operator, or "... before <...>" if there is something
413 * after the missing operator.
417 S_no_op(pTHX_ const char *what, char *s)
420 char * const oldbp = PL_bufptr;
421 const bool is_first = (PL_oldbufptr == PL_linestart);
427 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
428 if (ckWARN_d(WARN_SYNTAX)) {
430 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
431 "\t(Missing semicolon on previous line?)\n");
432 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
434 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
436 if (t < PL_bufptr && isSPACE(*t))
437 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
438 "\t(Do you need to predeclare %.*s?)\n",
439 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
443 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
444 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
452 * Complain about missing quote/regexp/heredoc terminator.
453 * If it's called with NULL then it cauterizes the line buffer.
454 * If we're in a delimited string and the delimiter is a control
455 * character, it's reformatted into a two-char sequence like ^C.
460 S_missingterm(pTHX_ char *s)
466 char * const nl = strrchr(s,'\n');
472 iscntrl(PL_multi_close)
474 PL_multi_close < 32 || PL_multi_close == 127
478 tmpbuf[1] = (char)toCTRL(PL_multi_close);
483 *tmpbuf = (char)PL_multi_close;
487 q = strchr(s,'"') ? '\'' : '"';
488 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
491 #define FEATURE_IS_ENABLED(name) \
492 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
493 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
495 * S_feature_is_enabled
496 * Check whether the named feature is enabled.
499 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
502 HV * const hinthv = GvHV(PL_hintgv);
503 char he_name[32] = "feature_";
504 (void) my_strlcpy(&he_name[8], name, 24);
506 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
514 Perl_deprecate(pTHX_ const char *s)
516 if (ckWARN(WARN_DEPRECATED))
517 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
521 Perl_deprecate_old(pTHX_ const char *s)
523 /* This function should NOT be called for any new deprecated warnings */
524 /* Use Perl_deprecate instead */
526 /* It is here to maintain backward compatibility with the pre-5.8 */
527 /* warnings category hierarchy. The "deprecated" category used to */
528 /* live under the "syntax" category. It is now a top-level category */
529 /* in its own right. */
531 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
532 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
533 "Use of %s is deprecated", s);
537 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
538 * utf16-to-utf8-reversed.
541 #ifdef PERL_CR_FILTER
545 register const char *s = SvPVX_const(sv);
546 register const char * const e = s + SvCUR(sv);
547 /* outer loop optimized to do nothing if there are no CR-LFs */
549 if (*s++ == '\r' && *s == '\n') {
550 /* hit a CR-LF, need to copy the rest */
551 register char *d = s - 1;
554 if (*s == '\r' && s[1] == '\n')
565 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
567 const I32 count = FILTER_READ(idx+1, sv, maxlen);
568 if (count > 0 && !maxlen)
576 * Initialize variables. Uses the Perl save_stack to save its state (for
577 * recursive calls to the parser).
581 Perl_lex_start(pTHX_ SV *line)
588 /* create and initialise a parser */
590 Newx(parser, 1, yy_parser);
591 parser->old_parser = PL_parser;
594 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
595 parser->ps = parser->stack;
596 parser->stack_size = YYINITDEPTH;
598 parser->stack->state = 0;
599 parser->yyerrstatus = 0;
600 parser->yychar = YYEMPTY; /* Cause a token to be read. */
602 /* initialise lexer state */
604 SAVEI32(PL_lex_dojoin);
605 SAVEI32(PL_lex_brackets);
606 SAVEI32(PL_lex_casemods);
607 SAVEI32(PL_lex_starts);
608 SAVEI32(PL_lex_state);
609 SAVEVPTR(PL_lex_inpat);
610 SAVEI32(PL_lex_inwhat);
612 if (PL_lex_state == LEX_KNOWNEXT) {
613 I32 toke = PL_lasttoke;
614 while (--toke >= 0) {
615 SAVEI32(PL_nexttoke[toke].next_type);
616 SAVEVPTR(PL_nexttoke[toke].next_val);
618 SAVEVPTR(PL_nexttoke[toke].next_mad);
620 SAVEI32(PL_lasttoke);
622 SAVESPTR(PL_endwhite);
623 SAVESPTR(PL_thistoken);
624 SAVESPTR(PL_thiswhite);
625 SAVESPTR(PL_nextwhite);
626 SAVESPTR(PL_thisopen);
627 SAVESPTR(PL_thisclose);
628 SAVESPTR(PL_thisstuff);
629 SAVEVPTR(PL_thismad);
630 SAVEI32(PL_realtokenstart);
631 SAVEI32(PL_faketokens);
632 SAVESPTR(PL_skipwhite);
633 SAVEI32(PL_curforce);
635 if (PL_lex_state == LEX_KNOWNEXT) {
636 I32 toke = PL_nexttoke;
637 while (--toke >= 0) {
638 SAVEI32(PL_nexttype[toke]);
639 SAVEVPTR(PL_nextval[toke]);
641 SAVEI32(PL_nexttoke);
644 SAVECOPLINE(PL_curcop);
647 SAVEPPTR(PL_oldbufptr);
648 SAVEPPTR(PL_oldoldbufptr);
649 SAVEPPTR(PL_last_lop);
650 SAVEPPTR(PL_last_uni);
651 SAVEPPTR(PL_linestart);
652 SAVESPTR(PL_linestr);
653 SAVEGENERICPV(PL_lex_brackstack);
654 SAVEGENERICPV(PL_lex_casestack);
655 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
656 SAVESPTR(PL_lex_stuff);
657 SAVEI32(PL_lex_defer);
658 SAVEI32(PL_sublex_info.sub_inwhat);
659 SAVEI32(PL_sublex_info.super_state);
660 SAVEVPTR(PL_sublex_info.sub_op);
661 SAVEPPTR(PL_sublex_info.super_bufptr);
662 SAVEPPTR(PL_sublex_info.super_bufend);
663 SAVESPTR(PL_lex_repl);
665 SAVEINT(PL_lex_expect);
666 SAVEI32(PL_lex_formbrack);
668 SAVEI32(PL_multi_close);
669 SAVEI32(PL_multi_open);
670 SAVEI32(PL_multi_start);
671 SAVEI8(PL_pending_ident);
672 SAVEBOOL(PL_preambled);
674 PL_lex_state = LEX_NORMAL;
678 Newx(PL_lex_brackstack, 120, char);
679 Newx(PL_lex_casestack, 12, char);
681 *PL_lex_casestack = '\0';
692 PL_realtokenstart = 0;
704 PL_sublex_info.sub_inwhat = 0;
705 PL_sublex_info.super_state = 0;
706 PL_sublex_info.sub_op = NULL;
707 PL_sublex_info.super_bufptr = NULL;
708 PL_sublex_info.super_bufend = NULL;
710 PL_lex_formbrack = 0;
715 PL_pending_ident = '\0';
716 PL_preambled = FALSE;
719 s = SvPV_const(line, len);
724 PL_linestr = newSVpvs("\n;");
725 } else if (SvREADONLY(line) || s[len-1] != ';') {
726 PL_linestr = newSVsv(line);
728 sv_catpvs(PL_linestr, "\n;");
731 SvREFCNT_inc_simple_void_NN(line);
734 /* PL_linestr needs to survive until end of scope, not just the next
735 FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
736 SAVEFREESV(PL_linestr);
737 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
738 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
739 PL_last_lop = PL_last_uni = NULL;
745 * Finalizer for lexing operations. Must be called when the parser is
746 * done with the lexer.
753 PL_doextract = FALSE;
758 * This subroutine has nothing to do with tilting, whether at windmills
759 * or pinball tables. Its name is short for "increment line". It
760 * increments the current line number in CopLINE(PL_curcop) and checks
761 * to see whether the line starts with a comment of the form
762 * # line 500 "foo.pm"
763 * If so, it sets the current line number and file to the values in the comment.
767 S_incline(pTHX_ char *s)
775 CopLINE_inc(PL_curcop);
778 while (SPACE_OR_TAB(*s))
780 if (strnEQ(s, "line", 4))
784 if (SPACE_OR_TAB(*s))
788 while (SPACE_OR_TAB(*s))
796 while (SPACE_OR_TAB(*s))
798 if (*s == '"' && (t = strchr(s+1, '"'))) {
808 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
810 if (*e != '\n' && *e != '\0')
811 return; /* false alarm */
817 const char * const cf = CopFILE(PL_curcop);
818 STRLEN tmplen = cf ? strlen(cf) : 0;
819 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
820 /* must copy *{"::_<(eval N)[oldfilename:L]"}
821 * to *{"::_<newfilename"} */
822 char smallbuf[256], smallbuf2[256];
823 char *tmpbuf, *tmpbuf2;
825 STRLEN tmplen2 = strlen(s);
826 if (tmplen + 3 < sizeof smallbuf)
829 Newx(tmpbuf, tmplen + 3, char);
830 if (tmplen2 + 3 < sizeof smallbuf2)
833 Newx(tmpbuf2, tmplen2 + 3, char);
834 tmpbuf[0] = tmpbuf2[0] = '_';
835 tmpbuf[1] = tmpbuf2[1] = '<';
836 memcpy(tmpbuf + 2, cf, ++tmplen);
837 memcpy(tmpbuf2 + 2, s, ++tmplen2);
839 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
841 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
843 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
844 /* adjust ${"::_<newfilename"} to store the new file name */
845 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
846 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
847 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
850 if (tmpbuf != smallbuf) Safefree(tmpbuf);
851 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
854 CopFILE_free(PL_curcop);
855 CopFILE_set(PL_curcop, s);
858 CopLINE_set(PL_curcop, atoi(n)-1);
862 /* skip space before PL_thistoken */
865 S_skipspace0(pTHX_ register char *s)
872 PL_thiswhite = newSVpvs("");
873 sv_catsv(PL_thiswhite, PL_skipwhite);
874 sv_free(PL_skipwhite);
877 PL_realtokenstart = s - SvPVX(PL_linestr);
881 /* skip space after PL_thistoken */
884 S_skipspace1(pTHX_ register char *s)
886 const char *start = s;
887 I32 startoff = start - SvPVX(PL_linestr);
892 start = SvPVX(PL_linestr) + startoff;
893 if (!PL_thistoken && PL_realtokenstart >= 0) {
894 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
895 PL_thistoken = newSVpvn(tstart, start - tstart);
897 PL_realtokenstart = -1;
900 PL_nextwhite = newSVpvs("");
901 sv_catsv(PL_nextwhite, PL_skipwhite);
902 sv_free(PL_skipwhite);
909 S_skipspace2(pTHX_ register char *s, SV **svp)
912 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
913 const I32 startoff = s - SvPVX(PL_linestr);
916 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
917 if (!PL_madskills || !svp)
919 start = SvPVX(PL_linestr) + startoff;
920 if (!PL_thistoken && PL_realtokenstart >= 0) {
921 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
922 PL_thistoken = newSVpvn(tstart, start - tstart);
923 PL_realtokenstart = -1;
928 sv_setsv(*svp, PL_skipwhite);
929 sv_free(PL_skipwhite);
938 S_update_debugger_info_pv(pTHX_ const char *buf, STRLEN len)
940 AV *av = CopFILEAVx(PL_curcop);
942 SV * const sv = newSV(0);
943 sv_upgrade(sv, SVt_PVMG);
944 sv_setpvn(sv, buf, len);
947 av_store(av, (I32)CopLINE(PL_curcop), sv);
952 S_update_debugger_info_sv(pTHX_ SV *orig_sv)
954 AV *av = CopFILEAVx(PL_curcop);
956 SV * const sv = newSV(0);
957 sv_upgrade(sv, SVt_PVMG);
958 sv_setsv(sv, orig_sv);
961 av_store(av, (I32)CopLINE(PL_curcop), sv);
967 * Called to gobble the appropriate amount and type of whitespace.
968 * Skips comments as well.
972 S_skipspace(pTHX_ register char *s)
977 int startoff = s - SvPVX(PL_linestr);
980 sv_free(PL_skipwhite);
985 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
986 while (s < PL_bufend && SPACE_OR_TAB(*s))
996 SSize_t oldprevlen, oldoldprevlen;
997 SSize_t oldloplen = 0, oldunilen = 0;
998 while (s < PL_bufend && isSPACE(*s)) {
999 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1004 if (s < PL_bufend && *s == '#') {
1005 while (s < PL_bufend && *s != '\n')
1007 if (s < PL_bufend) {
1009 if (PL_in_eval && !PL_rsfp) {
1016 /* only continue to recharge the buffer if we're at the end
1017 * of the buffer, we're not reading from a source filter, and
1018 * we're in normal lexing mode
1020 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1021 PL_lex_state == LEX_FORMLINE)
1028 /* try to recharge the buffer */
1030 curoff = s - SvPVX(PL_linestr);
1033 if ((s = filter_gets(PL_linestr, PL_rsfp,
1034 (prevlen = SvCUR(PL_linestr)))) == NULL)
1037 if (PL_madskills && curoff != startoff) {
1039 PL_skipwhite = newSVpvs("");
1040 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1044 /* mustn't throw out old stuff yet if madpropping */
1045 SvCUR(PL_linestr) = curoff;
1046 s = SvPVX(PL_linestr) + curoff;
1048 if (curoff && s[-1] == '\n')
1052 /* end of file. Add on the -p or -n magic */
1053 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1056 sv_catpv(PL_linestr,
1057 ";}continue{print or die qq(-p destination: $!\\n);}");
1059 sv_setpv(PL_linestr,
1060 ";}continue{print or die qq(-p destination: $!\\n);}");
1062 PL_minus_n = PL_minus_p = 0;
1064 else if (PL_minus_n) {
1066 sv_catpvn(PL_linestr, ";}", 2);
1068 sv_setpvn(PL_linestr, ";}", 2);
1074 sv_catpvn(PL_linestr,";", 1);
1076 sv_setpvn(PL_linestr,";", 1);
1079 /* reset variables for next time we lex */
1080 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1086 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1087 PL_last_lop = PL_last_uni = NULL;
1089 /* Close the filehandle. Could be from -P preprocessor,
1090 * STDIN, or a regular file. If we were reading code from
1091 * STDIN (because the commandline held no -e or filename)
1092 * then we don't close it, we reset it so the code can
1093 * read from STDIN too.
1096 if (PL_preprocess && !PL_in_eval)
1097 (void)PerlProc_pclose(PL_rsfp);
1098 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1099 PerlIO_clearerr(PL_rsfp);
1101 (void)PerlIO_close(PL_rsfp);
1106 /* not at end of file, so we only read another line */
1107 /* make corresponding updates to old pointers, for yyerror() */
1108 oldprevlen = PL_oldbufptr - PL_bufend;
1109 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1111 oldunilen = PL_last_uni - PL_bufend;
1113 oldloplen = PL_last_lop - PL_bufend;
1114 PL_linestart = PL_bufptr = s + prevlen;
1115 PL_bufend = s + SvCUR(PL_linestr);
1117 PL_oldbufptr = s + oldprevlen;
1118 PL_oldoldbufptr = s + oldoldprevlen;
1120 PL_last_uni = s + oldunilen;
1122 PL_last_lop = s + oldloplen;
1125 /* debugger active and we're not compiling the debugger code,
1126 * so store the line into the debugger's array of lines
1128 if (PERLDB_LINE && PL_curstash != PL_debstash)
1129 update_debugger_info_pv(PL_bufptr, PL_bufend - PL_bufptr);
1136 PL_skipwhite = newSVpvs("");
1137 curoff = s - SvPVX(PL_linestr);
1138 if (curoff - startoff)
1139 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1148 * Check the unary operators to ensure there's no ambiguity in how they're
1149 * used. An ambiguous piece of code would be:
1151 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1152 * the +5 is its argument.
1162 if (PL_oldoldbufptr != PL_last_uni)
1164 while (isSPACE(*PL_last_uni))
1167 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1169 if ((t = strchr(s, '(')) && t < PL_bufptr)
1172 if (ckWARN_d(WARN_AMBIGUOUS)){
1173 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1174 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1175 (int)(s - PL_last_uni), PL_last_uni);
1180 * LOP : macro to build a list operator. Its behaviour has been replaced
1181 * with a subroutine, S_lop() for which LOP is just another name.
1184 #define LOP(f,x) return lop(f,x,s)
1188 * Build a list operator (or something that might be one). The rules:
1189 * - if we have a next token, then it's a list operator [why?]
1190 * - if the next thing is an opening paren, then it's a function
1191 * - else it's a list operator
1195 S_lop(pTHX_ I32 f, int x, char *s)
1202 PL_last_lop = PL_oldbufptr;
1203 PL_last_lop_op = (OPCODE)f;
1206 return REPORT(LSTOP);
1209 return REPORT(LSTOP);
1212 return REPORT(FUNC);
1215 return REPORT(FUNC);
1217 return REPORT(LSTOP);
1223 * Sets up for an eventual force_next(). start_force(0) basically does
1224 * an unshift, while start_force(-1) does a push. yylex removes items
1229 S_start_force(pTHX_ int where)
1233 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1234 where = PL_lasttoke;
1235 assert(PL_curforce < 0 || PL_curforce == where);
1236 if (PL_curforce != where) {
1237 for (i = PL_lasttoke; i > where; --i) {
1238 PL_nexttoke[i] = PL_nexttoke[i-1];
1242 if (PL_curforce < 0) /* in case of duplicate start_force() */
1243 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1244 PL_curforce = where;
1247 curmad('^', newSVpvs(""));
1248 CURMAD('_', PL_nextwhite);
1253 S_curmad(pTHX_ char slot, SV *sv)
1259 if (PL_curforce < 0)
1260 where = &PL_thismad;
1262 where = &PL_nexttoke[PL_curforce].next_mad;
1265 sv_setpvn(sv, "", 0);
1268 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1270 else if (PL_encoding) {
1271 sv_recode_to_utf8(sv, PL_encoding);
1276 /* keep a slot open for the head of the list? */
1277 if (slot != '_' && *where && (*where)->mad_key == '^') {
1278 (*where)->mad_key = slot;
1279 sv_free((*where)->mad_val);
1280 (*where)->mad_val = (void*)sv;
1283 addmad(newMADsv(slot, sv), where, 0);
1286 # define start_force(where) NOOP
1287 # define curmad(slot, sv) NOOP
1292 * When the lexer realizes it knows the next token (for instance,
1293 * it is reordering tokens for the parser) then it can call S_force_next
1294 * to know what token to return the next time the lexer is called. Caller
1295 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1296 * and possibly PL_expect to ensure the lexer handles the token correctly.
1300 S_force_next(pTHX_ I32 type)
1304 if (PL_curforce < 0)
1305 start_force(PL_lasttoke);
1306 PL_nexttoke[PL_curforce].next_type = type;
1307 if (PL_lex_state != LEX_KNOWNEXT)
1308 PL_lex_defer = PL_lex_state;
1309 PL_lex_state = LEX_KNOWNEXT;
1310 PL_lex_expect = PL_expect;
1313 PL_nexttype[PL_nexttoke] = type;
1315 if (PL_lex_state != LEX_KNOWNEXT) {
1316 PL_lex_defer = PL_lex_state;
1317 PL_lex_expect = PL_expect;
1318 PL_lex_state = LEX_KNOWNEXT;
1324 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1327 SV * const sv = newSVpvn(start,len);
1328 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1335 * When the lexer knows the next thing is a word (for instance, it has
1336 * just seen -> and it knows that the next char is a word char, then
1337 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1341 * char *start : buffer position (must be within PL_linestr)
1342 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1343 * int check_keyword : if true, Perl checks to make sure the word isn't
1344 * a keyword (do this if the word is a label, e.g. goto FOO)
1345 * int allow_pack : if true, : characters will also be allowed (require,
1346 * use, etc. do this)
1347 * int allow_initial_tick : used by the "sub" lexer only.
1351 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1357 start = SKIPSPACE1(start);
1359 if (isIDFIRST_lazy_if(s,UTF) ||
1360 (allow_pack && *s == ':') ||
1361 (allow_initial_tick && *s == '\'') )
1363 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1364 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1366 start_force(PL_curforce);
1368 curmad('X', newSVpvn(start,s-start));
1369 if (token == METHOD) {
1374 PL_expect = XOPERATOR;
1377 NEXTVAL_NEXTTOKE.opval
1378 = (OP*)newSVOP(OP_CONST,0,
1379 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1380 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1388 * Called when the lexer wants $foo *foo &foo etc, but the program
1389 * text only contains the "foo" portion. The first argument is a pointer
1390 * to the "foo", and the second argument is the type symbol to prefix.
1391 * Forces the next token to be a "WORD".
1392 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1396 S_force_ident(pTHX_ register const char *s, int kind)
1400 const STRLEN len = strlen(s);
1401 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1402 start_force(PL_curforce);
1403 NEXTVAL_NEXTTOKE.opval = o;
1406 o->op_private = OPpCONST_ENTERED;
1407 /* XXX see note in pp_entereval() for why we forgo typo
1408 warnings if the symbol must be introduced in an eval.
1410 gv_fetchpvn_flags(s, len,
1411 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1413 kind == '$' ? SVt_PV :
1414 kind == '@' ? SVt_PVAV :
1415 kind == '%' ? SVt_PVHV :
1423 Perl_str_to_version(pTHX_ SV *sv)
1428 const char *start = SvPV_const(sv,len);
1429 const char * const end = start + len;
1430 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1431 while (start < end) {
1435 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1440 retval += ((NV)n)/nshift;
1449 * Forces the next token to be a version number.
1450 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1451 * and if "guessing" is TRUE, then no new token is created (and the caller
1452 * must use an alternative parsing method).
1456 S_force_version(pTHX_ char *s, int guessing)
1462 I32 startoff = s - SvPVX(PL_linestr);
1471 while (isDIGIT(*d) || *d == '_' || *d == '.')
1475 start_force(PL_curforce);
1476 curmad('X', newSVpvn(s,d-s));
1479 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1481 s = scan_num(s, &yylval);
1482 version = yylval.opval;
1483 ver = cSVOPx(version)->op_sv;
1484 if (SvPOK(ver) && !SvNIOK(ver)) {
1485 SvUPGRADE(ver, SVt_PVNV);
1486 SvNV_set(ver, str_to_version(ver));
1487 SvNOK_on(ver); /* hint that it is a version */
1490 else if (guessing) {
1493 sv_free(PL_nextwhite); /* let next token collect whitespace */
1495 s = SvPVX(PL_linestr) + startoff;
1503 if (PL_madskills && !version) {
1504 sv_free(PL_nextwhite); /* let next token collect whitespace */
1506 s = SvPVX(PL_linestr) + startoff;
1509 /* NOTE: The parser sees the package name and the VERSION swapped */
1510 start_force(PL_curforce);
1511 NEXTVAL_NEXTTOKE.opval = version;
1519 * Tokenize a quoted string passed in as an SV. It finds the next
1520 * chunk, up to end of string or a backslash. It may make a new
1521 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1526 S_tokeq(pTHX_ SV *sv)
1530 register char *send;
1538 s = SvPV_force(sv, len);
1539 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1542 while (s < send && *s != '\\')
1547 if ( PL_hints & HINT_NEW_STRING ) {
1548 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1554 if (s + 1 < send && (s[1] == '\\'))
1555 s++; /* all that, just for this */
1560 SvCUR_set(sv, d - SvPVX_const(sv));
1562 if ( PL_hints & HINT_NEW_STRING )
1563 return new_constant(NULL, 0, "q", sv, pv, "q");
1568 * Now come three functions related to double-quote context,
1569 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1570 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1571 * interact with PL_lex_state, and create fake ( ... ) argument lists
1572 * to handle functions and concatenation.
1573 * They assume that whoever calls them will be setting up a fake
1574 * join call, because each subthing puts a ',' after it. This lets
1577 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1579 * (I'm not sure whether the spurious commas at the end of lcfirst's
1580 * arguments and join's arguments are created or not).
1585 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1587 * Pattern matching will set PL_lex_op to the pattern-matching op to
1588 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1590 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1592 * Everything else becomes a FUNC.
1594 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1595 * had an OP_CONST or OP_READLINE). This just sets us up for a
1596 * call to S_sublex_push().
1600 S_sublex_start(pTHX)
1603 register const I32 op_type = yylval.ival;
1605 if (op_type == OP_NULL) {
1606 yylval.opval = PL_lex_op;
1610 if (op_type == OP_CONST || op_type == OP_READLINE) {
1611 SV *sv = tokeq(PL_lex_stuff);
1613 if (SvTYPE(sv) == SVt_PVIV) {
1614 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1616 const char * const p = SvPV_const(sv, len);
1617 SV * const nsv = newSVpvn(p, len);
1623 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1624 PL_lex_stuff = NULL;
1625 /* Allow <FH> // "foo" */
1626 if (op_type == OP_READLINE)
1627 PL_expect = XTERMORDORDOR;
1630 else if (op_type == OP_BACKTICK && PL_lex_op) {
1631 /* readpipe() vas overriden */
1632 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1633 yylval.opval = PL_lex_op;
1635 PL_lex_stuff = NULL;
1639 PL_sublex_info.super_state = PL_lex_state;
1640 PL_sublex_info.sub_inwhat = op_type;
1641 PL_sublex_info.sub_op = PL_lex_op;
1642 PL_lex_state = LEX_INTERPPUSH;
1646 yylval.opval = PL_lex_op;
1656 * Create a new scope to save the lexing state. The scope will be
1657 * ended in S_sublex_done. Returns a '(', starting the function arguments
1658 * to the uc, lc, etc. found before.
1659 * Sets PL_lex_state to LEX_INTERPCONCAT.
1668 PL_lex_state = PL_sublex_info.super_state;
1669 SAVEI32(PL_lex_dojoin);
1670 SAVEI32(PL_lex_brackets);
1671 SAVEI32(PL_lex_casemods);
1672 SAVEI32(PL_lex_starts);
1673 SAVEI32(PL_lex_state);
1674 SAVEVPTR(PL_lex_inpat);
1675 SAVEI32(PL_lex_inwhat);
1676 SAVECOPLINE(PL_curcop);
1677 SAVEPPTR(PL_bufptr);
1678 SAVEPPTR(PL_bufend);
1679 SAVEPPTR(PL_oldbufptr);
1680 SAVEPPTR(PL_oldoldbufptr);
1681 SAVEPPTR(PL_last_lop);
1682 SAVEPPTR(PL_last_uni);
1683 SAVEPPTR(PL_linestart);
1684 SAVESPTR(PL_linestr);
1685 SAVEGENERICPV(PL_lex_brackstack);
1686 SAVEGENERICPV(PL_lex_casestack);
1688 PL_linestr = PL_lex_stuff;
1689 PL_lex_stuff = NULL;
1691 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1692 = SvPVX(PL_linestr);
1693 PL_bufend += SvCUR(PL_linestr);
1694 PL_last_lop = PL_last_uni = NULL;
1695 SAVEFREESV(PL_linestr);
1697 PL_lex_dojoin = FALSE;
1698 PL_lex_brackets = 0;
1699 Newx(PL_lex_brackstack, 120, char);
1700 Newx(PL_lex_casestack, 12, char);
1701 PL_lex_casemods = 0;
1702 *PL_lex_casestack = '\0';
1704 PL_lex_state = LEX_INTERPCONCAT;
1705 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1707 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1708 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1709 PL_lex_inpat = PL_sublex_info.sub_op;
1711 PL_lex_inpat = NULL;
1718 * Restores lexer state after a S_sublex_push.
1725 if (!PL_lex_starts++) {
1726 SV * const sv = newSVpvs("");
1727 if (SvUTF8(PL_linestr))
1729 PL_expect = XOPERATOR;
1730 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1734 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1735 PL_lex_state = LEX_INTERPCASEMOD;
1739 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1740 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1741 PL_linestr = PL_lex_repl;
1743 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1744 PL_bufend += SvCUR(PL_linestr);
1745 PL_last_lop = PL_last_uni = NULL;
1746 SAVEFREESV(PL_linestr);
1747 PL_lex_dojoin = FALSE;
1748 PL_lex_brackets = 0;
1749 PL_lex_casemods = 0;
1750 *PL_lex_casestack = '\0';
1752 if (SvEVALED(PL_lex_repl)) {
1753 PL_lex_state = LEX_INTERPNORMAL;
1755 /* we don't clear PL_lex_repl here, so that we can check later
1756 whether this is an evalled subst; that means we rely on the
1757 logic to ensure sublex_done() is called again only via the
1758 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1761 PL_lex_state = LEX_INTERPCONCAT;
1771 PL_endwhite = newSVpvs("");
1772 sv_catsv(PL_endwhite, PL_thiswhite);
1776 sv_setpvn(PL_thistoken,"",0);
1778 PL_realtokenstart = -1;
1782 PL_bufend = SvPVX(PL_linestr);
1783 PL_bufend += SvCUR(PL_linestr);
1784 PL_expect = XOPERATOR;
1785 PL_sublex_info.sub_inwhat = 0;
1793 Extracts a pattern, double-quoted string, or transliteration. This
1796 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1797 processing a pattern (PL_lex_inpat is true), a transliteration
1798 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1800 Returns a pointer to the character scanned up to. If this is
1801 advanced from the start pointer supplied (i.e. if anything was
1802 successfully parsed), will leave an OP for the substring scanned
1803 in yylval. Caller must intuit reason for not parsing further
1804 by looking at the next characters herself.
1808 double-quoted style: \r and \n
1809 regexp special ones: \D \s
1812 case and quoting: \U \Q \E
1813 stops on @ and $, but not for $ as tail anchor
1815 In transliterations:
1816 characters are VERY literal, except for - not at the start or end
1817 of the string, which indicates a range. If the range is in bytes,
1818 scan_const expands the range to the full set of intermediate
1819 characters. If the range is in utf8, the hyphen is replaced with
1820 a certain range mark which will be handled by pmtrans() in op.c.
1822 In double-quoted strings:
1824 double-quoted style: \r and \n
1826 deprecated backrefs: \1 (in substitution replacements)
1827 case and quoting: \U \Q \E
1830 scan_const does *not* construct ops to handle interpolated strings.
1831 It stops processing as soon as it finds an embedded $ or @ variable
1832 and leaves it to the caller to work out what's going on.
1834 embedded arrays (whether in pattern or not) could be:
1835 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1837 $ in double-quoted strings must be the symbol of an embedded scalar.
1839 $ in pattern could be $foo or could be tail anchor. Assumption:
1840 it's a tail anchor if $ is the last thing in the string, or if it's
1841 followed by one of "()| \r\n\t"
1843 \1 (backreferences) are turned into $1
1845 The structure of the code is
1846 while (there's a character to process) {
1847 handle transliteration ranges
1848 skip regexp comments /(?#comment)/ and codes /(?{code})/
1849 skip #-initiated comments in //x patterns
1850 check for embedded arrays
1851 check for embedded scalars
1853 leave intact backslashes from leaveit (below)
1854 deprecate \1 in substitution replacements
1855 handle string-changing backslashes \l \U \Q \E, etc.
1856 switch (what was escaped) {
1857 handle \- in a transliteration (becomes a literal -)
1858 handle \132 (octal characters)
1859 handle \x15 and \x{1234} (hex characters)
1860 handle \N{name} (named characters)
1861 handle \cV (control characters)
1862 handle printf-style backslashes (\f, \r, \n, etc)
1864 } (end if backslash)
1865 } (end while character to read)
1870 S_scan_const(pTHX_ char *start)
1873 register char *send = PL_bufend; /* end of the constant */
1874 SV *sv = newSV(send - start); /* sv for the constant */
1875 register char *s = start; /* start of the constant */
1876 register char *d = SvPVX(sv); /* destination for copies */
1877 bool dorange = FALSE; /* are we in a translit range? */
1878 bool didrange = FALSE; /* did we just finish a range? */
1879 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1880 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1883 UV literal_endpoint = 0;
1884 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1887 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1888 /* If we are doing a trans and we know we want UTF8 set expectation */
1889 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1890 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1894 while (s < send || dorange) {
1895 /* get transliterations out of the way (they're most literal) */
1896 if (PL_lex_inwhat == OP_TRANS) {
1897 /* expand a range A-Z to the full set of characters. AIE! */
1899 I32 i; /* current expanded character */
1900 I32 min; /* first character in range */
1901 I32 max; /* last character in range */
1912 char * const c = (char*)utf8_hop((U8*)d, -1);
1916 *c = (char)UTF_TO_NATIVE(0xff);
1917 /* mark the range as done, and continue */
1923 i = d - SvPVX_const(sv); /* remember current offset */
1926 SvLEN(sv) + (has_utf8 ?
1927 (512 - UTF_CONTINUATION_MARK +
1930 /* How many two-byte within 0..255: 128 in UTF-8,
1931 * 96 in UTF-8-mod. */
1933 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1935 d = SvPVX(sv) + i; /* refresh d after realloc */
1939 for (j = 0; j <= 1; j++) {
1940 char * const c = (char*)utf8_hop((U8*)d, -1);
1941 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1947 max = (U8)0xff; /* only to \xff */
1948 uvmax = uv; /* \x{100} to uvmax */
1950 d = c; /* eat endpoint chars */
1955 d -= 2; /* eat the first char and the - */
1956 min = (U8)*d; /* first char in range */
1957 max = (U8)d[1]; /* last char in range */
1964 "Invalid range \"%c-%c\" in transliteration operator",
1965 (char)min, (char)max);
1969 if (literal_endpoint == 2 &&
1970 ((isLOWER(min) && isLOWER(max)) ||
1971 (isUPPER(min) && isUPPER(max)))) {
1973 for (i = min; i <= max; i++)
1975 *d++ = NATIVE_TO_NEED(has_utf8,i);
1977 for (i = min; i <= max; i++)
1979 *d++ = NATIVE_TO_NEED(has_utf8,i);
1984 for (i = min; i <= max; i++)
1987 const U8 ch = (U8)NATIVE_TO_UTF(i);
1988 if (UNI_IS_INVARIANT(ch))
1991 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1992 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2001 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2003 *d++ = (char)UTF_TO_NATIVE(0xff);
2005 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2009 /* mark the range as done, and continue */
2013 literal_endpoint = 0;
2018 /* range begins (ignore - as first or last char) */
2019 else if (*s == '-' && s+1 < send && s != start) {
2021 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2028 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2038 literal_endpoint = 0;
2039 native_range = TRUE;
2044 /* if we get here, we're not doing a transliteration */
2046 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2047 except for the last char, which will be done separately. */
2048 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2050 while (s+1 < send && *s != ')')
2051 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2053 else if (s[2] == '{' /* This should match regcomp.c */
2054 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
2057 char *regparse = s + (s[2] == '{' ? 3 : 4);
2060 while (count && (c = *regparse)) {
2061 if (c == '\\' && regparse[1])
2069 if (*regparse != ')')
2070 regparse--; /* Leave one char for continuation. */
2071 while (s < regparse)
2072 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2076 /* likewise skip #-initiated comments in //x patterns */
2077 else if (*s == '#' && PL_lex_inpat &&
2078 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2079 while (s+1 < send && *s != '\n')
2080 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2083 /* check for embedded arrays
2084 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2086 else if (*s == '@' && s[1]) {
2087 if (isALNUM_lazy_if(s+1,UTF))
2089 if (strchr(":'{$", s[1]))
2091 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2092 break; /* in regexp, neither @+ nor @- are interpolated */
2095 /* check for embedded scalars. only stop if we're sure it's a
2098 else if (*s == '$') {
2099 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2101 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2102 break; /* in regexp, $ might be tail anchor */
2105 /* End of else if chain - OP_TRANS rejoin rest */
2108 if (*s == '\\' && s+1 < send) {
2111 /* deprecate \1 in strings and substitution replacements */
2112 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2113 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2115 if (ckWARN(WARN_SYNTAX))
2116 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2121 /* string-change backslash escapes */
2122 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2126 /* skip any other backslash escapes in a pattern */
2127 else if (PL_lex_inpat) {
2128 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2129 goto default_action;
2132 /* if we get here, it's either a quoted -, or a digit */
2135 /* quoted - in transliterations */
2137 if (PL_lex_inwhat == OP_TRANS) {
2144 if ((isALPHA(*s) || isDIGIT(*s)) &&
2146 Perl_warner(aTHX_ packWARN(WARN_MISC),
2147 "Unrecognized escape \\%c passed through",
2149 /* default action is to copy the quoted character */
2150 goto default_action;
2153 /* \132 indicates an octal constant */
2154 case '0': case '1': case '2': case '3':
2155 case '4': case '5': case '6': case '7':
2159 uv = grok_oct(s, &len, &flags, NULL);
2162 goto NUM_ESCAPE_INSERT;
2164 /* \x24 indicates a hex constant */
2168 char* const e = strchr(s, '}');
2169 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2170 PERL_SCAN_DISALLOW_PREFIX;
2175 yyerror("Missing right brace on \\x{}");
2179 uv = grok_hex(s, &len, &flags, NULL);
2185 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2186 uv = grok_hex(s, &len, &flags, NULL);
2192 /* Insert oct or hex escaped character.
2193 * There will always enough room in sv since such
2194 * escapes will be longer than any UTF-8 sequence
2195 * they can end up as. */
2197 /* We need to map to chars to ASCII before doing the tests
2200 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2201 if (!has_utf8 && uv > 255) {
2202 /* Might need to recode whatever we have
2203 * accumulated so far if it contains any
2206 * (Can't we keep track of that and avoid
2207 * this rescan? --jhi)
2211 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2212 if (!NATIVE_IS_INVARIANT(*c)) {
2217 const STRLEN offset = d - SvPVX_const(sv);
2219 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2223 while (src >= (const U8 *)SvPVX_const(sv)) {
2224 if (!NATIVE_IS_INVARIANT(*src)) {
2225 const U8 ch = NATIVE_TO_ASCII(*src);
2226 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2227 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2237 if (has_utf8 || uv > 255) {
2238 d = (char*)uvchr_to_utf8((U8*)d, uv);
2240 if (PL_lex_inwhat == OP_TRANS &&
2241 PL_sublex_info.sub_op) {
2242 PL_sublex_info.sub_op->op_private |=
2243 (PL_lex_repl ? OPpTRANS_FROM_UTF
2247 if (uv > 255 && !dorange)
2248 native_range = FALSE;
2260 /* \N{LATIN SMALL LETTER A} is a named character */
2264 char* e = strchr(s, '}');
2271 yyerror("Missing right brace on \\N{}");
2275 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2277 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2278 PERL_SCAN_DISALLOW_PREFIX;
2281 uv = grok_hex(s, &len, &flags, NULL);
2282 if ( e > s && len != (STRLEN)(e - s) ) {
2286 goto NUM_ESCAPE_INSERT;
2288 res = newSVpvn(s + 1, e - s - 1);
2289 type = newSVpvn(s - 2,e - s + 3);
2290 res = new_constant( NULL, 0, "charnames",
2291 res, NULL, SvPVX(type) );
2294 sv_utf8_upgrade(res);
2295 str = SvPV_const(res,len);
2296 #ifdef EBCDIC_NEVER_MIND
2297 /* charnames uses pack U and that has been
2298 * recently changed to do the below uni->native
2299 * mapping, so this would be redundant (and wrong,
2300 * the code point would be doubly converted).
2301 * But leave this in just in case the pack U change
2302 * gets revoked, but the semantics is still
2303 * desireable for charnames. --jhi */
2305 UV uv = utf8_to_uvchr((const U8*)str, 0);
2308 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2310 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2311 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2312 str = SvPV_const(res, len);
2316 if (!has_utf8 && SvUTF8(res)) {
2317 const char * const ostart = SvPVX_const(sv);
2318 SvCUR_set(sv, d - ostart);
2321 sv_utf8_upgrade(sv);
2322 /* this just broke our allocation above... */
2323 SvGROW(sv, (STRLEN)(send - start));
2324 d = SvPVX(sv) + SvCUR(sv);
2327 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2328 const char * const odest = SvPVX_const(sv);
2330 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2331 d = SvPVX(sv) + (d - odest);
2335 native_range = FALSE; /* \N{} is guessed to be Unicode */
2337 Copy(str, d, len, char);
2344 yyerror("Missing braces on \\N{}");
2347 /* \c is a control character */
2356 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2359 yyerror("Missing control char name in \\c");
2363 /* printf-style backslashes, formfeeds, newlines, etc */
2365 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2368 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2371 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2374 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2377 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2380 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2383 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2389 } /* end if (backslash) */
2396 /* If we started with encoded form, or already know we want it
2397 and then encode the next character */
2398 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2400 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2401 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2404 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2405 const STRLEN off = d - SvPVX_const(sv);
2406 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2408 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2411 if (uv > 255 && !dorange)
2412 native_range = FALSE;
2416 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2418 } /* while loop to process each character */
2420 /* terminate the string and set up the sv */
2422 SvCUR_set(sv, d - SvPVX_const(sv));
2423 if (SvCUR(sv) >= SvLEN(sv))
2424 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2427 if (PL_encoding && !has_utf8) {
2428 sv_recode_to_utf8(sv, PL_encoding);
2434 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2435 PL_sublex_info.sub_op->op_private |=
2436 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2440 /* shrink the sv if we allocated more than we used */
2441 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2442 SvPV_shrink_to_cur(sv);
2445 /* return the substring (via yylval) only if we parsed anything */
2446 if (s > PL_bufptr) {
2447 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2448 sv = new_constant(start, s - start,
2449 (const char *)(PL_lex_inpat ? "qr" : "q"),
2452 (( PL_lex_inwhat == OP_TRANS
2454 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2457 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2464 * Returns TRUE if there's more to the expression (e.g., a subscript),
2467 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2469 * ->[ and ->{ return TRUE
2470 * { and [ outside a pattern are always subscripts, so return TRUE
2471 * if we're outside a pattern and it's not { or [, then return FALSE
2472 * if we're in a pattern and the first char is a {
2473 * {4,5} (any digits around the comma) returns FALSE
2474 * if we're in a pattern and the first char is a [
2476 * [SOMETHING] has a funky algorithm to decide whether it's a
2477 * character class or not. It has to deal with things like
2478 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2479 * anything else returns TRUE
2482 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2485 S_intuit_more(pTHX_ register char *s)
2488 if (PL_lex_brackets)
2490 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2492 if (*s != '{' && *s != '[')
2497 /* In a pattern, so maybe we have {n,m}. */
2514 /* On the other hand, maybe we have a character class */
2517 if (*s == ']' || *s == '^')
2520 /* this is terrifying, and it works */
2521 int weight = 2; /* let's weigh the evidence */
2523 unsigned char un_char = 255, last_un_char;
2524 const char * const send = strchr(s,']');
2525 char tmpbuf[sizeof PL_tokenbuf * 4];
2527 if (!send) /* has to be an expression */
2530 Zero(seen,256,char);
2533 else if (isDIGIT(*s)) {
2535 if (isDIGIT(s[1]) && s[2] == ']')
2541 for (; s < send; s++) {
2542 last_un_char = un_char;
2543 un_char = (unsigned char)*s;
2548 weight -= seen[un_char] * 10;
2549 if (isALNUM_lazy_if(s+1,UTF)) {
2551 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2552 len = (int)strlen(tmpbuf);
2553 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2558 else if (*s == '$' && s[1] &&
2559 strchr("[#!%*<>()-=",s[1])) {
2560 if (/*{*/ strchr("])} =",s[2]))
2569 if (strchr("wds]",s[1]))
2571 else if (seen[(U8)'\''] || seen[(U8)'"'])
2573 else if (strchr("rnftbxcav",s[1]))
2575 else if (isDIGIT(s[1])) {
2577 while (s[1] && isDIGIT(s[1]))
2587 if (strchr("aA01! ",last_un_char))
2589 if (strchr("zZ79~",s[1]))
2591 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2592 weight -= 5; /* cope with negative subscript */
2595 if (!isALNUM(last_un_char)
2596 && !(last_un_char == '$' || last_un_char == '@'
2597 || last_un_char == '&')
2598 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2603 if (keyword(tmpbuf, d - tmpbuf, 0))
2606 if (un_char == last_un_char + 1)
2608 weight -= seen[un_char];
2613 if (weight >= 0) /* probably a character class */
2623 * Does all the checking to disambiguate
2625 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2626 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2628 * First argument is the stuff after the first token, e.g. "bar".
2630 * Not a method if bar is a filehandle.
2631 * Not a method if foo is a subroutine prototyped to take a filehandle.
2632 * Not a method if it's really "Foo $bar"
2633 * Method if it's "foo $bar"
2634 * Not a method if it's really "print foo $bar"
2635 * Method if it's really "foo package::" (interpreted as package->foo)
2636 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2637 * Not a method if bar is a filehandle or package, but is quoted with
2642 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2645 char *s = start + (*start == '$');
2646 char tmpbuf[sizeof PL_tokenbuf];
2654 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2658 const char *proto = SvPVX_const(cv);
2669 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2670 /* start is the beginning of the possible filehandle/object,
2671 * and s is the end of it
2672 * tmpbuf is a copy of it
2675 if (*start == '$') {
2676 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2679 len = start - SvPVX(PL_linestr);
2683 start = SvPVX(PL_linestr) + len;
2687 return *s == '(' ? FUNCMETH : METHOD;
2689 if (!keyword(tmpbuf, len, 0)) {
2690 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2694 soff = s - SvPVX(PL_linestr);
2698 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2699 if (indirgv && GvCVu(indirgv))
2701 /* filehandle or package name makes it a method */
2702 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2704 soff = s - SvPVX(PL_linestr);
2707 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2708 return 0; /* no assumptions -- "=>" quotes bearword */
2710 start_force(PL_curforce);
2711 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2712 newSVpvn(tmpbuf,len));
2713 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2715 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2720 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2722 return *s == '(' ? FUNCMETH : METHOD;
2730 * Return a string of Perl code to load the debugger. If PERL5DB
2731 * is set, it will return the contents of that, otherwise a
2732 * compile-time require of perl5db.pl.
2740 const char * const pdb = PerlEnv_getenv("PERL5DB");
2744 SETERRNO(0,SS_NORMAL);
2745 return "BEGIN { require 'perl5db.pl' }";
2751 /* Encoded script support. filter_add() effectively inserts a
2752 * 'pre-processing' function into the current source input stream.
2753 * Note that the filter function only applies to the current source file
2754 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2756 * The datasv parameter (which may be NULL) can be used to pass
2757 * private data to this instance of the filter. The filter function
2758 * can recover the SV using the FILTER_DATA macro and use it to
2759 * store private buffers and state information.
2761 * The supplied datasv parameter is upgraded to a PVIO type
2762 * and the IoDIRP/IoANY field is used to store the function pointer,
2763 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2764 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2765 * private use must be set using malloc'd pointers.
2769 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2775 if (!PL_rsfp_filters)
2776 PL_rsfp_filters = newAV();
2779 SvUPGRADE(datasv, SVt_PVIO);
2780 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2781 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2782 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2783 FPTR2DPTR(void *, IoANY(datasv)),
2784 SvPV_nolen(datasv)));
2785 av_unshift(PL_rsfp_filters, 1);
2786 av_store(PL_rsfp_filters, 0, datasv) ;
2791 /* Delete most recently added instance of this filter function. */
2793 Perl_filter_del(pTHX_ filter_t funcp)
2799 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2800 FPTR2DPTR(void*, funcp)));
2802 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2804 /* if filter is on top of stack (usual case) just pop it off */
2805 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2806 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2807 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2808 IoANY(datasv) = (void *)NULL;
2809 sv_free(av_pop(PL_rsfp_filters));
2813 /* we need to search for the correct entry and clear it */
2814 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2818 /* Invoke the idxth filter function for the current rsfp. */
2819 /* maxlen 0 = read one text line */
2821 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2826 /* This API is bad. It should have been using unsigned int for maxlen.
2827 Not sure if we want to change the API, but if not we should sanity
2828 check the value here. */
2829 const unsigned int correct_length
2838 if (!PL_rsfp_filters)
2840 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2841 /* Provide a default input filter to make life easy. */
2842 /* Note that we append to the line. This is handy. */
2843 DEBUG_P(PerlIO_printf(Perl_debug_log,
2844 "filter_read %d: from rsfp\n", idx));
2845 if (correct_length) {
2848 const int old_len = SvCUR(buf_sv);
2850 /* ensure buf_sv is large enough */
2851 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2852 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2853 correct_length)) <= 0) {
2854 if (PerlIO_error(PL_rsfp))
2855 return -1; /* error */
2857 return 0 ; /* end of file */
2859 SvCUR_set(buf_sv, old_len + len) ;
2862 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2863 if (PerlIO_error(PL_rsfp))
2864 return -1; /* error */
2866 return 0 ; /* end of file */
2869 return SvCUR(buf_sv);
2871 /* Skip this filter slot if filter has been deleted */
2872 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2873 DEBUG_P(PerlIO_printf(Perl_debug_log,
2874 "filter_read %d: skipped (filter deleted)\n",
2876 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2878 /* Get function pointer hidden within datasv */
2879 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2880 DEBUG_P(PerlIO_printf(Perl_debug_log,
2881 "filter_read %d: via function %p (%s)\n",
2882 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2883 /* Call function. The function is expected to */
2884 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2885 /* Return: <0:error, =0:eof, >0:not eof */
2886 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2890 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2893 #ifdef PERL_CR_FILTER
2894 if (!PL_rsfp_filters) {
2895 filter_add(S_cr_textfilter,NULL);
2898 if (PL_rsfp_filters) {
2900 SvCUR_set(sv, 0); /* start with empty line */
2901 if (FILTER_READ(0, sv, 0) > 0)
2902 return ( SvPVX(sv) ) ;
2907 return (sv_gets(sv, fp, append));
2911 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2916 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2920 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2921 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2923 return GvHV(gv); /* Foo:: */
2926 /* use constant CLASS => 'MyClass' */
2927 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2928 if (gv && GvCV(gv)) {
2929 SV * const sv = cv_const_sv(GvCV(gv));
2931 pkgname = SvPV_nolen_const(sv);
2934 return gv_stashpv(pkgname, FALSE);
2938 * S_readpipe_override
2939 * Check whether readpipe() is overriden, and generates the appropriate
2940 * optree, provided sublex_start() is called afterwards.
2943 S_readpipe_override(pTHX)
2946 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2947 yylval.ival = OP_BACKTICK;
2949 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2951 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2952 && (gv_readpipe = *gvp) != (GV*)&PL_sv_undef
2953 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2955 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2956 append_elem(OP_LIST,
2957 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2958 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2968 * The intent of this yylex wrapper is to minimize the changes to the
2969 * tokener when we aren't interested in collecting madprops. It remains
2970 * to be seen how successful this strategy will be...
2977 char *s = PL_bufptr;
2979 /* make sure PL_thiswhite is initialized */
2983 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2984 if (PL_pending_ident)
2985 return S_pending_ident(aTHX);
2987 /* previous token ate up our whitespace? */
2988 if (!PL_lasttoke && PL_nextwhite) {
2989 PL_thiswhite = PL_nextwhite;
2993 /* isolate the token, and figure out where it is without whitespace */
2994 PL_realtokenstart = -1;
2998 assert(PL_curforce < 0);
3000 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3001 if (!PL_thistoken) {
3002 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3003 PL_thistoken = newSVpvs("");
3005 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3006 PL_thistoken = newSVpvn(tstart, s - tstart);
3009 if (PL_thismad) /* install head */
3010 CURMAD('X', PL_thistoken);
3013 /* last whitespace of a sublex? */
3014 if (optype == ')' && PL_endwhite) {
3015 CURMAD('X', PL_endwhite);
3020 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3021 if (!PL_thiswhite && !PL_endwhite && !optype) {
3022 sv_free(PL_thistoken);
3027 /* put off final whitespace till peg */
3028 if (optype == ';' && !PL_rsfp) {
3029 PL_nextwhite = PL_thiswhite;
3032 else if (PL_thisopen) {
3033 CURMAD('q', PL_thisopen);
3035 sv_free(PL_thistoken);
3039 /* Store actual token text as madprop X */
3040 CURMAD('X', PL_thistoken);
3044 /* add preceding whitespace as madprop _ */
3045 CURMAD('_', PL_thiswhite);
3049 /* add quoted material as madprop = */
3050 CURMAD('=', PL_thisstuff);
3054 /* add terminating quote as madprop Q */
3055 CURMAD('Q', PL_thisclose);
3059 /* special processing based on optype */
3063 /* opval doesn't need a TOKEN since it can already store mp */
3074 append_madprops(PL_thismad, yylval.opval, 0);
3082 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3091 /* remember any fake bracket that lexer is about to discard */
3092 if (PL_lex_brackets == 1 &&
3093 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3096 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3099 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3100 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3103 break; /* don't bother looking for trailing comment */
3112 /* attach a trailing comment to its statement instead of next token */
3116 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3118 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3120 if (*s == '\n' || *s == '#') {
3121 while (s < PL_bufend && *s != '\n')
3125 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3126 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3143 /* Create new token struct. Note: opvals return early above. */
3144 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3151 S_tokenize_use(pTHX_ int is_use, char *s) {
3153 if (PL_expect != XSTATE)
3154 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3155 is_use ? "use" : "no"));
3157 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3158 s = force_version(s, TRUE);
3159 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3160 start_force(PL_curforce);
3161 NEXTVAL_NEXTTOKE.opval = NULL;
3164 else if (*s == 'v') {
3165 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3166 s = force_version(s, FALSE);
3170 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3171 s = force_version(s, FALSE);
3173 yylval.ival = is_use;
3177 static const char* const exp_name[] =
3178 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3179 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3186 Works out what to call the token just pulled out of the input
3187 stream. The yacc parser takes care of taking the ops we return and
3188 stitching them into a tree.
3194 if read an identifier
3195 if we're in a my declaration
3196 croak if they tried to say my($foo::bar)
3197 build the ops for a my() declaration
3198 if it's an access to a my() variable
3199 are we in a sort block?
3200 croak if my($a); $a <=> $b
3201 build ops for access to a my() variable
3202 if in a dq string, and they've said @foo and we can't find @foo
3204 build ops for a bareword
3205 if we already built the token before, use it.
3210 #pragma segment Perl_yylex
3216 register char *s = PL_bufptr;
3221 /* orig_keyword, gvp, and gv are initialized here because
3222 * jump to the label just_a_word_zero can bypass their
3223 * initialization later. */
3224 I32 orig_keyword = 0;
3229 SV* tmp = newSVpvs("");
3230 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3231 (IV)CopLINE(PL_curcop),
3232 lex_state_names[PL_lex_state],
3233 exp_name[PL_expect],
3234 pv_display(tmp, s, strlen(s), 0, 60));
3237 /* check if there's an identifier for us to look at */
3238 if (PL_pending_ident)
3239 return REPORT(S_pending_ident(aTHX));
3241 /* no identifier pending identification */
3243 switch (PL_lex_state) {
3245 case LEX_NORMAL: /* Some compilers will produce faster */
3246 case LEX_INTERPNORMAL: /* code if we comment these out. */
3250 /* when we've already built the next token, just pull it out of the queue */
3254 yylval = PL_nexttoke[PL_lasttoke].next_val;
3256 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3257 PL_nexttoke[PL_lasttoke].next_mad = 0;
3258 if (PL_thismad && PL_thismad->mad_key == '_') {
3259 PL_thiswhite = (SV*)PL_thismad->mad_val;
3260 PL_thismad->mad_val = 0;
3261 mad_free(PL_thismad);
3266 PL_lex_state = PL_lex_defer;
3267 PL_expect = PL_lex_expect;
3268 PL_lex_defer = LEX_NORMAL;
3269 if (!PL_nexttoke[PL_lasttoke].next_type)
3274 yylval = PL_nextval[PL_nexttoke];
3276 PL_lex_state = PL_lex_defer;
3277 PL_expect = PL_lex_expect;
3278 PL_lex_defer = LEX_NORMAL;
3282 /* FIXME - can these be merged? */
3283 return(PL_nexttoke[PL_lasttoke].next_type);
3285 return REPORT(PL_nexttype[PL_nexttoke]);
3288 /* interpolated case modifiers like \L \U, including \Q and \E.
3289 when we get here, PL_bufptr is at the \
3291 case LEX_INTERPCASEMOD:
3293 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3294 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3296 /* handle \E or end of string */
3297 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3299 if (PL_lex_casemods) {
3300 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3301 PL_lex_casestack[PL_lex_casemods] = '\0';
3303 if (PL_bufptr != PL_bufend
3304 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3306 PL_lex_state = LEX_INTERPCONCAT;
3309 PL_thistoken = newSVpvs("\\E");
3315 while (PL_bufptr != PL_bufend &&
3316 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3318 PL_thiswhite = newSVpvs("");
3319 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3323 if (PL_bufptr != PL_bufend)
3326 PL_lex_state = LEX_INTERPCONCAT;
3330 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3331 "### Saw case modifier\n"); });
3333 if (s[1] == '\\' && s[2] == 'E') {
3336 PL_thiswhite = newSVpvs("");
3337 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3340 PL_lex_state = LEX_INTERPCONCAT;
3345 if (!PL_madskills) /* when just compiling don't need correct */
3346 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3347 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3348 if ((*s == 'L' || *s == 'U') &&
3349 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3350 PL_lex_casestack[--PL_lex_casemods] = '\0';
3353 if (PL_lex_casemods > 10)
3354 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3355 PL_lex_casestack[PL_lex_casemods++] = *s;
3356 PL_lex_casestack[PL_lex_casemods] = '\0';
3357 PL_lex_state = LEX_INTERPCONCAT;
3358 start_force(PL_curforce);
3359 NEXTVAL_NEXTTOKE.ival = 0;
3361 start_force(PL_curforce);
3363 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3365 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3367 NEXTVAL_NEXTTOKE.ival = OP_LC;
3369 NEXTVAL_NEXTTOKE.ival = OP_UC;
3371 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3373 Perl_croak(aTHX_ "panic: yylex");
3375 SV* const tmpsv = newSVpvs("");
3376 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3382 if (PL_lex_starts) {
3388 sv_free(PL_thistoken);
3389 PL_thistoken = newSVpvs("");
3392 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3393 if (PL_lex_casemods == 1 && PL_lex_inpat)
3402 case LEX_INTERPPUSH:
3403 return REPORT(sublex_push());
3405 case LEX_INTERPSTART:
3406 if (PL_bufptr == PL_bufend)
3407 return REPORT(sublex_done());
3408 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3409 "### Interpolated variable\n"); });
3411 PL_lex_dojoin = (*PL_bufptr == '@');
3412 PL_lex_state = LEX_INTERPNORMAL;
3413 if (PL_lex_dojoin) {
3414 start_force(PL_curforce);
3415 NEXTVAL_NEXTTOKE.ival = 0;
3417 start_force(PL_curforce);
3418 force_ident("\"", '$');
3419 start_force(PL_curforce);
3420 NEXTVAL_NEXTTOKE.ival = 0;
3422 start_force(PL_curforce);
3423 NEXTVAL_NEXTTOKE.ival = 0;
3425 start_force(PL_curforce);
3426 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3429 if (PL_lex_starts++) {
3434 sv_free(PL_thistoken);
3435 PL_thistoken = newSVpvs("");
3438 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3439 if (!PL_lex_casemods && PL_lex_inpat)
3446 case LEX_INTERPENDMAYBE:
3447 if (intuit_more(PL_bufptr)) {
3448 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3454 if (PL_lex_dojoin) {
3455 PL_lex_dojoin = FALSE;
3456 PL_lex_state = LEX_INTERPCONCAT;
3460 sv_free(PL_thistoken);
3461 PL_thistoken = newSVpvs("");
3466 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3467 && SvEVALED(PL_lex_repl))
3469 if (PL_bufptr != PL_bufend)
3470 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3474 case LEX_INTERPCONCAT:
3476 if (PL_lex_brackets)
3477 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3479 if (PL_bufptr == PL_bufend)
3480 return REPORT(sublex_done());
3482 if (SvIVX(PL_linestr) == '\'') {
3483 SV *sv = newSVsv(PL_linestr);
3486 else if ( PL_hints & HINT_NEW_RE )
3487 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
3488 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3492 s = scan_const(PL_bufptr);
3494 PL_lex_state = LEX_INTERPCASEMOD;
3496 PL_lex_state = LEX_INTERPSTART;
3499 if (s != PL_bufptr) {
3500 start_force(PL_curforce);
3502 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3504 NEXTVAL_NEXTTOKE = yylval;
3507 if (PL_lex_starts++) {
3511 sv_free(PL_thistoken);
3512 PL_thistoken = newSVpvs("");
3515 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3516 if (!PL_lex_casemods && PL_lex_inpat)
3529 PL_lex_state = LEX_NORMAL;
3530 s = scan_formline(PL_bufptr);
3531 if (!PL_lex_formbrack)
3537 PL_oldoldbufptr = PL_oldbufptr;
3543 sv_free(PL_thistoken);
3546 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3550 if (isIDFIRST_lazy_if(s,UTF))
3552 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
3555 goto fake_eof; /* emulate EOF on ^D or ^Z */
3564 if (PL_lex_brackets) {
3565 yyerror((const char *)
3567 ? "Format not terminated"
3568 : "Missing right curly or square bracket"));
3570 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3571 "### Tokener got EOF\n");
3575 if (s++ < PL_bufend)
3576 goto retry; /* ignore stray nulls */
3579 if (!PL_in_eval && !PL_preambled) {
3580 PL_preambled = TRUE;
3585 sv_setpv(PL_linestr,incl_perldb());
3586 if (SvCUR(PL_linestr))
3587 sv_catpvs(PL_linestr,";");
3589 while(AvFILLp(PL_preambleav) >= 0) {
3590 SV *tmpsv = av_shift(PL_preambleav);
3591 sv_catsv(PL_linestr, tmpsv);
3592 sv_catpvs(PL_linestr, ";");
3595 sv_free((SV*)PL_preambleav);
3596 PL_preambleav = NULL;
3598 if (PL_minus_n || PL_minus_p) {
3599 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3601 sv_catpvs(PL_linestr,"chomp;");
3604 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3605 || *PL_splitstr == '"')
3606 && strchr(PL_splitstr + 1, *PL_splitstr))
3607 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3609 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3610 bytes can be used as quoting characters. :-) */
3611 const char *splits = PL_splitstr;
3612 sv_catpvs(PL_linestr, "our @F=split(q\0");
3615 if (*splits == '\\')
3616 sv_catpvn(PL_linestr, splits, 1);
3617 sv_catpvn(PL_linestr, splits, 1);
3618 } while (*splits++);
3619 /* This loop will embed the trailing NUL of
3620 PL_linestr as the last thing it does before
3622 sv_catpvs(PL_linestr, ");");
3626 sv_catpvs(PL_linestr,"our @F=split(' ');");
3630 sv_catpvs(PL_linestr,"use feature ':5.10';");
3631 sv_catpvs(PL_linestr, "\n");
3632 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3633 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3634 PL_last_lop = PL_last_uni = NULL;
3635 if (PERLDB_LINE && PL_curstash != PL_debstash)
3636 update_debugger_info_sv(PL_linestr);
3640 bof = PL_rsfp ? TRUE : FALSE;
3641 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3644 PL_realtokenstart = -1;
3647 if (PL_preprocess && !PL_in_eval)
3648 (void)PerlProc_pclose(PL_rsfp);
3649 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3650 PerlIO_clearerr(PL_rsfp);
3652 (void)PerlIO_close(PL_rsfp);
3654 PL_doextract = FALSE;
3656 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3661 sv_setpv(PL_linestr,
3664 ? ";}continue{print;}" : ";}"));
3665 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3666 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3667 PL_last_lop = PL_last_uni = NULL;
3668 PL_minus_n = PL_minus_p = 0;
3671 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3672 PL_last_lop = PL_last_uni = NULL;
3673 sv_setpvn(PL_linestr,"",0);
3674 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3676 /* If it looks like the start of a BOM or raw UTF-16,
3677 * check if it in fact is. */
3683 #ifdef PERLIO_IS_STDIO
3684 # ifdef __GNU_LIBRARY__
3685 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3686 # define FTELL_FOR_PIPE_IS_BROKEN
3690 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3691 # define FTELL_FOR_PIPE_IS_BROKEN
3696 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3697 /* This loses the possibility to detect the bof
3698 * situation on perl -P when the libc5 is being used.
3699 * Workaround? Maybe attach some extra state to PL_rsfp?
3702 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3704 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3707 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3708 s = swallow_bom((U8*)s);
3712 /* Incest with pod. */
3715 sv_catsv(PL_thiswhite, PL_linestr);
3717 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3718 sv_setpvn(PL_linestr, "", 0);
3719 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3720 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3721 PL_last_lop = PL_last_uni = NULL;
3722 PL_doextract = FALSE;
3726 } while (PL_doextract);
3727 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3728 if (PERLDB_LINE && PL_curstash != PL_debstash)
3729 update_debugger_info_sv(PL_linestr);
3730 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3731 PL_last_lop = PL_last_uni = NULL;
3732 if (CopLINE(PL_curcop) == 1) {
3733 while (s < PL_bufend && isSPACE(*s))
3735 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3739 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3743 if (*s == '#' && *(s+1) == '!')
3745 #ifdef ALTERNATE_SHEBANG
3747 static char const as[] = ALTERNATE_SHEBANG;
3748 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3749 d = s + (sizeof(as) - 1);
3751 #endif /* ALTERNATE_SHEBANG */
3760 while (*d && !isSPACE(*d))
3764 #ifdef ARG_ZERO_IS_SCRIPT
3765 if (ipathend > ipath) {
3767 * HP-UX (at least) sets argv[0] to the script name,
3768 * which makes $^X incorrect. And Digital UNIX and Linux,
3769 * at least, set argv[0] to the basename of the Perl
3770 * interpreter. So, having found "#!", we'll set it right.
3772 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3774 assert(SvPOK(x) || SvGMAGICAL(x));
3775 if (sv_eq(x, CopFILESV(PL_curcop))) {
3776 sv_setpvn(x, ipath, ipathend - ipath);
3782 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3783 const char * const lstart = SvPV_const(x,llen);
3785 bstart += blen - llen;
3786 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3787 sv_setpvn(x, ipath, ipathend - ipath);
3792 TAINT_NOT; /* $^X is always tainted, but that's OK */
3794 #endif /* ARG_ZERO_IS_SCRIPT */
3799 d = instr(s,"perl -");
3801 d = instr(s,"perl");
3803 /* avoid getting into infinite loops when shebang
3804 * line contains "Perl" rather than "perl" */
3806 for (d = ipathend-4; d >= ipath; --d) {
3807 if ((*d == 'p' || *d == 'P')
3808 && !ibcmp(d, "perl", 4))
3818 #ifdef ALTERNATE_SHEBANG
3820 * If the ALTERNATE_SHEBANG on this system starts with a
3821 * character that can be part of a Perl expression, then if
3822 * we see it but not "perl", we're probably looking at the
3823 * start of Perl code, not a request to hand off to some
3824 * other interpreter. Similarly, if "perl" is there, but
3825 * not in the first 'word' of the line, we assume the line
3826 * contains the start of the Perl program.
3828 if (d && *s != '#') {
3829 const char *c = ipath;
3830 while (*c && !strchr("; \t\r\n\f\v#", *c))
3833 d = NULL; /* "perl" not in first word; ignore */
3835 *s = '#'; /* Don't try to parse shebang line */
3837 #endif /* ALTERNATE_SHEBANG */
3838 #ifndef MACOS_TRADITIONAL
3843 !instr(s,"indir") &&
3844 instr(PL_origargv[0],"perl"))
3851 while (s < PL_bufend && isSPACE(*s))
3853 if (s < PL_bufend) {
3854 Newxz(newargv,PL_origargc+3,char*);
3856 while (s < PL_bufend && !isSPACE(*s))
3859 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3862 newargv = PL_origargv;
3865 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3867 Perl_croak(aTHX_ "Can't exec %s", ipath);
3871 while (*d && !isSPACE(*d))
3873 while (SPACE_OR_TAB(*d))
3877 const bool switches_done = PL_doswitches;
3878 const U32 oldpdb = PL_perldb;
3879 const bool oldn = PL_minus_n;
3880 const bool oldp = PL_minus_p;
3883 if (*d == 'M' || *d == 'm' || *d == 'C') {
3884 const char * const m = d;
3885 while (*d && !isSPACE(*d))
3887 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3890 d = moreswitches(d);
3892 if (PL_doswitches && !switches_done) {
3893 int argc = PL_origargc;
3894 char **argv = PL_origargv;
3897 } while (argc && argv[0][0] == '-' && argv[0][1]);
3898 init_argv_symbols(argc,argv);
3900 if ((PERLDB_LINE && !oldpdb) ||
3901 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3902 /* if we have already added "LINE: while (<>) {",
3903 we must not do it again */
3905 sv_setpvn(PL_linestr, "", 0);
3906 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3907 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3908 PL_last_lop = PL_last_uni = NULL;
3909 PL_preambled = FALSE;
3911 (void)gv_fetchfile(PL_origfilename);
3918 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3920 PL_lex_state = LEX_FORMLINE;
3925 #ifdef PERL_STRICT_CR
3926 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3928 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3930 case ' ': case '\t': case '\f': case 013:
3931 #ifdef MACOS_TRADITIONAL
3935 PL_realtokenstart = -1;
3944 PL_realtokenstart = -1;
3948 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3949 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3950 /* handle eval qq[#line 1 "foo"\n ...] */
3951 CopLINE_dec(PL_curcop);
3954 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3956 if (!PL_in_eval || PL_rsfp)
3961 while (d < PL_bufend && *d != '\n')
3965 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3966 Perl_croak(aTHX_ "panic: input overflow");
3969 PL_thiswhite = newSVpvn(s, d - s);
3974 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3976 PL_lex_state = LEX_FORMLINE;
3982 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
3983 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
3986 TOKEN(PEG); /* make sure any #! line is accessible */
3991 /* if (PL_madskills && PL_lex_formbrack) { */
3993 while (d < PL_bufend && *d != '\n')
3997 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3998 Perl_croak(aTHX_ "panic: input overflow");
3999 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4001 PL_thiswhite = newSVpvs("");
4002 if (CopLINE(PL_curcop) == 1) {
4003 sv_setpvn(PL_thiswhite, "", 0);
4006 sv_catpvn(PL_thiswhite, s, d - s);
4020 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4028 while (s < PL_bufend && SPACE_OR_TAB(*s))
4031 if (strnEQ(s,"=>",2)) {
4032 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4033 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4034 OPERATOR('-'); /* unary minus */
4036 PL_last_uni = PL_oldbufptr;
4038 case 'r': ftst = OP_FTEREAD; break;
4039 case 'w': ftst = OP_FTEWRITE; break;
4040 case 'x': ftst = OP_FTEEXEC; break;
4041 case 'o': ftst = OP_FTEOWNED; break;
4042 case 'R': ftst = OP_FTRREAD; break;
4043 case 'W': ftst = OP_FTRWRITE; break;
4044 case 'X': ftst = OP_FTREXEC; break;
4045 case 'O': ftst = OP_FTROWNED; break;
4046 case 'e': ftst = OP_FTIS; break;
4047 case 'z': ftst = OP_FTZERO; break;
4048 case 's': ftst = OP_FTSIZE; break;
4049 case 'f': ftst = OP_FTFILE; break;
4050 case 'd': ftst = OP_FTDIR; break;
4051 case 'l': ftst = OP_FTLINK; break;
4052 case 'p': ftst = OP_FTPIPE; break;
4053 case 'S': ftst = OP_FTSOCK; break;
4054 case 'u': ftst = OP_FTSUID; break;
4055 case 'g': ftst = OP_FTSGID; break;
4056 case 'k': ftst = OP_FTSVTX; break;
4057 case 'b': ftst = OP_FTBLK; break;
4058 case 'c': ftst = OP_FTCHR; break;
4059 case 't': ftst = OP_FTTTY; break;
4060 case 'T': ftst = OP_FTTEXT; break;
4061 case 'B': ftst = OP_FTBINARY; break;
4062 case 'M': case 'A': case 'C':
4063 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4065 case 'M': ftst = OP_FTMTIME; break;
4066 case 'A': ftst = OP_FTATIME; break;
4067 case 'C': ftst = OP_FTCTIME; break;
4075 PL_last_lop_op = (OPCODE)ftst;
4076 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4077 "### Saw file test %c\n", (int)tmp);
4082 /* Assume it was a minus followed by a one-letter named
4083 * subroutine call (or a -bareword), then. */
4084 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4085 "### '-%c' looked like a file test but was not\n",
4092 const char tmp = *s++;
4095 if (PL_expect == XOPERATOR)
4100 else if (*s == '>') {
4103 if (isIDFIRST_lazy_if(s,UTF)) {
4104 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4112 if (PL_expect == XOPERATOR)
4115 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4117 OPERATOR('-'); /* unary minus */
4123 const char tmp = *s++;
4126 if (PL_expect == XOPERATOR)
4131 if (PL_expect == XOPERATOR)
4134 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4141 if (PL_expect != XOPERATOR) {
4142 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4143 PL_expect = XOPERATOR;
4144 force_ident(PL_tokenbuf, '*');
4157 if (PL_expect == XOPERATOR) {
4161 PL_tokenbuf[0] = '%';
4162 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
4163 if (!PL_tokenbuf[1]) {
4166 PL_pending_ident = '%';
4177 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
4178 && FEATURE_IS_ENABLED("~~"))
4185 const char tmp = *s++;
4191 goto just_a_word_zero_gv;
4194 switch (PL_expect) {
4200 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4202 PL_bufptr = s; /* update in case we back off */
4208 PL_expect = XTERMBLOCK;
4211 stuffstart = s - SvPVX(PL_linestr) - 1;
4215 while (isIDFIRST_lazy_if(s,UTF)) {
4218 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4219 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4220 if (tmp < 0) tmp = -tmp;
4235 sv = newSVpvn(s, len);
4237 d = scan_str(d,TRUE,TRUE);
4239 /* MUST advance bufptr here to avoid bogus
4240 "at end of line" context messages from yyerror().
4242 PL_bufptr = s + len;
4243 yyerror("Unterminated attribute parameter in attribute list");
4247 return REPORT(0); /* EOF indicator */
4251 sv_catsv(sv, PL_lex_stuff);
4252 attrs = append_elem(OP_LIST, attrs,
4253 newSVOP(OP_CONST, 0, sv));
4254 SvREFCNT_dec(PL_lex_stuff);
4255 PL_lex_stuff = NULL;
4258 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4260 if (PL_in_my == KEY_our) {
4262 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
4264 /* skip to avoid loading attributes.pm */
4266 deprecate(":unique");
4269 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4272 /* NOTE: any CV attrs applied here need to be part of
4273 the CVf_BUILTIN_ATTRS define in cv.h! */
4274 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4276 CvLVALUE_on(PL_compcv);
4278 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4280 CvLOCKED_on(PL_compcv);
4282 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4284 CvMETHOD_on(PL_compcv);
4286 else if (!PL_in_my && len == 9 && strnEQ(SvPVX(sv), "assertion", len)) {
4288 CvASSERTION_on(PL_compcv);
4290 /* After we've set the flags, it could be argued that
4291 we don't need to do the attributes.pm-based setting
4292 process, and shouldn't bother appending recognized
4293 flags. To experiment with that, uncomment the
4294 following "else". (Note that's already been
4295 uncommented. That keeps the above-applied built-in
4296 attributes from being intercepted (and possibly
4297 rejected) by a package's attribute routines, but is
4298 justified by the performance win for the common case
4299 of applying only built-in attributes.) */
4301 attrs = append_elem(OP_LIST, attrs,
4302 newSVOP(OP_CONST, 0,
4306 if (*s == ':' && s[1] != ':')
4309 break; /* require real whitespace or :'s */
4310 /* XXX losing whitespace on sequential attributes here */
4314 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4315 if (*s != ';' && *s != '}' && *s != tmp
4316 && (tmp != '=' || *s != ')')) {
4317 const char q = ((*s == '\'') ? '"' : '\'');
4318 /* If here for an expression, and parsed no attrs, back
4320 if (tmp == '=' && !attrs) {
4324 /* MUST advance bufptr here to avoid bogus "at end of line"
4325 context messages from yyerror().
4328 yyerror( (const char *)
4330 ? Perl_form(aTHX_ "Invalid separator character "
4331 "%c%c%c in attribute list", q, *s, q)
4332 : "Unterminated attribute list" ) );
4340 start_force(PL_curforce);
4341 NEXTVAL_NEXTTOKE.opval = attrs;
4342 CURMAD('_', PL_nextwhite);
4347 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4348 (s - SvPVX(PL_linestr)) - stuffstart);
4356 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4357 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4365 const char tmp = *s++;
4370 const char tmp = *s++;
4378 if (PL_lex_brackets <= 0)
4379 yyerror("Unmatched right square bracket");
4382 if (PL_lex_state == LEX_INTERPNORMAL) {
4383 if (PL_lex_brackets == 0) {
4384 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
4385 PL_lex_state = LEX_INTERPEND;
4392 if (PL_lex_brackets > 100) {
4393 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4395 switch (PL_expect) {
4397 if (PL_lex_formbrack) {
4401 if (PL_oldoldbufptr == PL_last_lop)
4402 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4404 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4405 OPERATOR(HASHBRACK);
4407 while (s < PL_bufend && SPACE_OR_TAB(*s))
4410 PL_tokenbuf[0] = '\0';
4411 if (d < PL_bufend && *d == '-') {
4412 PL_tokenbuf[0] = '-';
4414 while (d < PL_bufend && SPACE_OR_TAB(*d))
4417 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4418 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4420 while (d < PL_bufend && SPACE_OR_TAB(*d))
4423 const char minus = (PL_tokenbuf[0] == '-');
4424 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4432 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4437 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4442 if (PL_oldoldbufptr == PL_last_lop)
4443 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4445 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4448 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4450 /* This hack is to get the ${} in the message. */
4452 yyerror("syntax error");
4455 OPERATOR(HASHBRACK);
4457 /* This hack serves to disambiguate a pair of curlies
4458 * as being a block or an anon hash. Normally, expectation
4459 * determines that, but in cases where we're not in a
4460 * position to expect anything in particular (like inside
4461 * eval"") we have to resolve the ambiguity. This code
4462 * covers the case where the first term in the curlies is a
4463 * quoted string. Most other cases need to be explicitly
4464 * disambiguated by prepending a "+" before the opening
4465 * curly in order to force resolution as an anon hash.
4467 * XXX should probably propagate the outer expectation
4468 * into eval"" to rely less on this hack, but that could
4469 * potentially break current behavior of eval"".
4473 if (*s == '\'' || *s == '"' || *s == '`') {
4474 /* common case: get past first string, handling escapes */
4475 for (t++; t < PL_bufend && *t != *s;)
4476 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4480 else if (*s == 'q') {
4483 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4486 /* skip q//-like construct */
4488 char open, close, term;
4491 while (t < PL_bufend && isSPACE(*t))
4493 /* check for q => */
4494 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4495 OPERATOR(HASHBRACK);
4499 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4503 for (t++; t < PL_bufend; t++) {
4504 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4506 else if (*t == open)
4510 for (t++; t < PL_bufend; t++) {
4511 if (*t == '\\' && t+1 < PL_bufend)
4513 else if (*t == close && --brackets <= 0)
4515 else if (*t == open)
4522 /* skip plain q word */
4523 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4526 else if (isALNUM_lazy_if(t,UTF)) {
4528 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4531 while (t < PL_bufend && isSPACE(*t))
4533 /* if comma follows first term, call it an anon hash */
4534 /* XXX it could be a comma expression with loop modifiers */
4535 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4536 || (*t == '=' && t[1] == '>')))
4537 OPERATOR(HASHBRACK);
4538 if (PL_expect == XREF)
4541 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4547 yylval.ival = CopLINE(PL_curcop);
4548 if (isSPACE(*s) || *s == '#')
4549 PL_copline = NOLINE; /* invalidate current command line number */
4554 if (PL_lex_brackets <= 0)
4555 yyerror("Unmatched right curly bracket");
4557 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4558 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4559 PL_lex_formbrack = 0;
4560 if (PL_lex_state == LEX_INTERPNORMAL) {
4561 if (PL_lex_brackets == 0) {
4562 if (PL_expect & XFAKEBRACK) {
4563 PL_expect &= XENUMMASK;
4564 PL_lex_state = LEX_INTERPEND;
4569 PL_thiswhite = newSVpvs("");
4570 sv_catpvn(PL_thiswhite,"}",1);
4573 return yylex(); /* ignore fake brackets */
4575 if (*s == '-' && s[1] == '>')
4576 PL_lex_state = LEX_INTERPENDMAYBE;
4577 else if (*s != '[' && *s != '{')
4578 PL_lex_state = LEX_INTERPEND;
4581 if (PL_expect & XFAKEBRACK) {
4582 PL_expect &= XENUMMASK;
4584 return yylex(); /* ignore fake brackets */
4586 start_force(PL_curforce);
4588 curmad('X', newSVpvn(s-1,1));
4589 CURMAD('_', PL_thiswhite);
4594 PL_thistoken = newSVpvs("");
4602 if (PL_expect == XOPERATOR) {
4603 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4604 && isIDFIRST_lazy_if(s,UTF))
4606 CopLINE_dec(PL_curcop);
4607 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4608 CopLINE_inc(PL_curcop);
4613 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4615 PL_expect = XOPERATOR;
4616 force_ident(PL_tokenbuf, '&');
4620 yylval.ival = (OPpENTERSUB_AMPER<<8);
4632 const char tmp = *s++;
4639 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4640 && strchr("+-*/%.^&|<",tmp))
4641 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4642 "Reversed %c= operator",(int)tmp);
4644 if (PL_expect == XSTATE && isALPHA(tmp) &&
4645 (s == PL_linestart+1 || s[-2] == '\n') )
4647 if (PL_in_eval && !PL_rsfp) {
4652 if (strnEQ(s,"=cut",4)) {
4668 PL_thiswhite = newSVpvs("");
4669 sv_catpvn(PL_thiswhite, PL_linestart,
4670 PL_bufend - PL_linestart);
4674 PL_doextract = TRUE;
4678 if (PL_lex_brackets < PL_lex_formbrack) {
4680 #ifdef PERL_STRICT_CR
4681 while (SPACE_OR_TAB(*t))
4683 while (SPACE_OR_TAB(*t) || *t == '\r')
4686 if (*t == '\n' || *t == '#') {
4697 const char tmp = *s++;
4699 /* was this !=~ where !~ was meant?
4700 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4702 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4703 const char *t = s+1;
4705 while (t < PL_bufend && isSPACE(*t))
4708 if (*t == '/' || *t == '?' ||
4709 ((*t == 'm' || *t == 's' || *t == 'y')
4710 && !isALNUM(t[1])) ||
4711 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4712 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4713 "!=~ should be !~");
4723 if (PL_expect != XOPERATOR) {
4724 if (s[1] != '<' && !strchr(s,'>'))
4727 s = scan_heredoc(s);
4729 s = scan_inputsymbol(s);
4730 TERM(sublex_start());
4736 SHop(OP_LEFT_SHIFT);
4750 const char tmp = *s++;
4752 SHop(OP_RIGHT_SHIFT);
4753 else if (tmp == '=')
4762 if (PL_expect == XOPERATOR) {
4763 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4765 deprecate_old(commaless_variable_list);
4766 return REPORT(','); /* grandfather non-comma-format format */
4770 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4771 PL_tokenbuf[0] = '@';
4772 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4773 sizeof PL_tokenbuf - 1, FALSE);
4774 if (PL_expect == XOPERATOR)
4775 no_op("Array length", s);
4776 if (!PL_tokenbuf[1])
4778 PL_expect = XOPERATOR;
4779 PL_pending_ident = '#';
4783 PL_tokenbuf[0] = '$';
4784 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4785 sizeof PL_tokenbuf - 1, FALSE);
4786 if (PL_expect == XOPERATOR)
4788 if (!PL_tokenbuf[1]) {
4790 yyerror("Final $ should be \\$ or $name");
4794 /* This kludge not intended to be bulletproof. */
4795 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4796 yylval.opval = newSVOP(OP_CONST, 0,
4797 newSViv(CopARYBASE_get(&PL_compiling)));
4798 yylval.opval->op_private = OPpCONST_ARYBASE;
4804 const char tmp = *s;
4805 if (PL_lex_state == LEX_NORMAL)
4808 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4809 && intuit_more(s)) {
4811 PL_tokenbuf[0] = '@';
4812 if (ckWARN(WARN_SYNTAX)) {
4815 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4818 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4819 while (t < PL_bufend && *t != ']')
4821 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4822 "Multidimensional syntax %.*s not supported",
4823 (int)((t - PL_bufptr) + 1), PL_bufptr);
4827 else if (*s == '{') {
4829 PL_tokenbuf[0] = '%';
4830 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4831 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4833 char tmpbuf[sizeof PL_tokenbuf];
4836 } while (isSPACE(*t));
4837 if (isIDFIRST_lazy_if(t,UTF)) {
4839 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4843 if (*t == ';' && get_cv(tmpbuf, FALSE))
4844 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4845 "You need to quote \"%s\"",
4852 PL_expect = XOPERATOR;
4853 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4854 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4855 if (!islop || PL_last_lop_op == OP_GREPSTART)
4856 PL_expect = XOPERATOR;
4857 else if (strchr("$@\"'`q", *s))
4858 PL_expect = XTERM; /* e.g. print $fh "foo" */
4859 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4860 PL_expect = XTERM; /* e.g. print $fh &sub */
4861 else if (isIDFIRST_lazy_if(s,UTF)) {
4862 char tmpbuf[sizeof PL_tokenbuf];
4864 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4865 if ((t2 = keyword(tmpbuf, len, 0))) {
4866 /* binary operators exclude handle interpretations */
4878 PL_expect = XTERM; /* e.g. print $fh length() */
4883 PL_expect = XTERM; /* e.g. print $fh subr() */
4886 else if (isDIGIT(*s))
4887 PL_expect = XTERM; /* e.g. print $fh 3 */
4888 else if (*s == '.' && isDIGIT(s[1]))
4889 PL_expect = XTERM; /* e.g. print $fh .3 */
4890 else if ((*s == '?' || *s == '-' || *s == '+')
4891 && !isSPACE(s[1]) && s[1] != '=')
4892 PL_expect = XTERM; /* e.g. print $fh -1 */
4893 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4895 PL_expect = XTERM; /* e.g. print $fh /.../
4896 XXX except DORDOR operator
4898 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4900 PL_expect = XTERM; /* print $fh <<"EOF" */
4903 PL_pending_ident = '$';
4907 if (PL_expect == XOPERATOR)
4909 PL_tokenbuf[0] = '@';
4910 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4911 if (!PL_tokenbuf[1]) {
4914 if (PL_lex_state == LEX_NORMAL)
4916 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4918 PL_tokenbuf[0] = '%';
4920 /* Warn about @ where they meant $. */
4921 if (*s == '[' || *s == '{') {
4922 if (ckWARN(WARN_SYNTAX)) {
4923 const char *t = s + 1;
4924 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4926 if (*t == '}' || *t == ']') {
4928 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4929 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4930 "Scalar value %.*s better written as $%.*s",
4931 (int)(t-PL_bufptr), PL_bufptr,
4932 (int)(t-PL_bufptr-1), PL_bufptr+1);
4937 PL_pending_ident = '@';
4940 case '/': /* may be division, defined-or, or pattern */
4941 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4945 case '?': /* may either be conditional or pattern */
4946 if(PL_expect == XOPERATOR) {
4954 /* A // operator. */
4964 /* Disable warning on "study /blah/" */
4965 if (PL_oldoldbufptr == PL_last_uni
4966 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4967 || memNE(PL_last_uni, "study", 5)
4968 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4971 s = scan_pat(s,OP_MATCH);
4972 TERM(sublex_start());
4976 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4977 #ifdef PERL_STRICT_CR
4980 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
4982 && (s == PL_linestart || s[-1] == '\n') )
4984 PL_lex_formbrack = 0;
4988 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
4994 yylval.ival = OPf_SPECIAL;
5000 if (PL_expect != XOPERATOR)
5005 case '0': case '1': case '2': case '3': case '4':
5006 case '5': case '6': case '7': case '8': case '9':
5007 s = scan_num(s, &yylval);
5008 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5009 if (PL_expect == XOPERATOR)
5014 s = scan_str(s,!!PL_madskills,FALSE);
5015 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5016 if (PL_expect == XOPERATOR) {
5017 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5019 deprecate_old(commaless_variable_list);
5020 return REPORT(','); /* grandfather non-comma-format format */
5027 yylval.ival = OP_CONST;
5028 TERM(sublex_start());
5031 s = scan_str(s,!!PL_madskills,FALSE);
5032 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5033 if (PL_expect == XOPERATOR) {
5034 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5036 deprecate_old(commaless_variable_list);
5037 return REPORT(','); /* grandfather non-comma-format format */
5044 yylval.ival = OP_CONST;
5045 /* FIXME. I think that this can be const if char *d is replaced by
5046 more localised variables. */
5047 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5048 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5049 yylval.ival = OP_STRINGIFY;
5053 TERM(sublex_start());
5056 s = scan_str(s,!!PL_madskills,FALSE);
5057 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5058 if (PL_expect == XOPERATOR)
5059 no_op("Backticks",s);
5062 readpipe_override();
5063 TERM(sublex_start());
5067 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
5068 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5070 if (PL_expect == XOPERATOR)
5071 no_op("Backslash",s);
5075 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5076 char *start = s + 2;
5077 while (isDIGIT(*start) || *start == '_')
5079 if (*start == '.' && isDIGIT(start[1])) {
5080 s = scan_num(s, &yylval);
5083 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5084 else if (!isALPHA(*start) && (PL_expect == XTERM
5085 || PL_expect == XREF || PL_expect == XSTATE
5086 || PL_expect == XTERMORDORDOR)) {
5087 /* XXX Use gv_fetchpvn rather than stomping on a const string */
5088 const char c = *start;
5091 gv = gv_fetchpv(s, 0, SVt_PVCV);
5094 s = scan_num(s, &yylval);
5101 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5143 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5145 /* Some keywords can be followed by any delimiter, including ':' */
5146 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5147 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5148 (PL_tokenbuf[0] == 'q' &&
5149 strchr("qwxr", PL_tokenbuf[1])))));
5151 /* x::* is just a word, unless x is "CORE" */
5152 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5156 while (d < PL_bufend && isSPACE(*d))
5157 d++; /* no comments skipped here, or s### is misparsed */
5159 /* Is this a label? */
5160 if (!tmp && PL_expect == XSTATE
5161 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5163 yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5168 /* Check for keywords */
5169 tmp = keyword(PL_tokenbuf, len, 0);
5171 /* Is this a word before a => operator? */
5172 if (*d == '=' && d[1] == '>') {
5175 = (OP*)newSVOP(OP_CONST, 0,
5176 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5177 yylval.opval->op_private = OPpCONST_BARE;
5181 if (tmp < 0) { /* second-class keyword? */
5182 GV *ogv = NULL; /* override (winner) */
5183 GV *hgv = NULL; /* hidden (loser) */
5184 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5186 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5189 if (GvIMPORTED_CV(gv))
5191 else if (! CvMETHOD(cv))
5195 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5196 (gv = *gvp) != (GV*)&PL_sv_undef &&
5197 GvCVu(gv) && GvIMPORTED_CV(gv))
5204 tmp = 0; /* overridden by import or by GLOBAL */
5207 && -tmp==KEY_lock /* XXX generalizable kludge */
5209 && !hv_fetchs(GvHVn(PL_incgv), "Thread.pm", FALSE))
5211 tmp = 0; /* any sub overrides "weak" keyword */
5213 else { /* no override */
5215 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
5216 Perl_warner(aTHX_ packWARN(WARN_MISC),
5217 "dump() better written as CORE::dump()");
5221 if (hgv && tmp != KEY_x && tmp != KEY_CORE
5222 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
5223 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5224 "Ambiguous call resolved as CORE::%s(), %s",
5225 GvENAME(hgv), "qualify as such or use &");
5232 default: /* not a keyword */
5233 /* Trade off - by using this evil construction we can pull the
5234 variable gv into the block labelled keylookup. If not, then
5235 we have to give it function scope so that the goto from the
5236 earlier ':' case doesn't bypass the initialisation. */
5238 just_a_word_zero_gv:
5246 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5249 SV *nextPL_nextwhite = 0;
5253 /* Get the rest if it looks like a package qualifier */
5255 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5257 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5260 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5261 *s == '\'' ? "'" : "::");
5266 if (PL_expect == XOPERATOR) {
5267 if (PL_bufptr == PL_linestart) {
5268 CopLINE_dec(PL_curcop);
5269 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
5270 CopLINE_inc(PL_curcop);
5273 no_op("Bareword",s);
5276 /* Look for a subroutine with this name in current package,
5277 unless name is "Foo::", in which case Foo is a bearword
5278 (and a package name). */
5280 if (len > 2 && !PL_madskills &&
5281 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5283 if (ckWARN(WARN_BAREWORD)
5284 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5285 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5286 "Bareword \"%s\" refers to nonexistent package",
5289 PL_tokenbuf[len] = '\0';
5295 /* Mustn't actually add anything to a symbol table.
5296 But also don't want to "initialise" any placeholder
5297 constants that might already be there into full
5298 blown PVGVs with attached PVCV. */
5299 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5300 GV_NOADD_NOINIT, SVt_PVCV);
5305 /* if we saw a global override before, get the right name */
5308 sv = newSVpvs("CORE::GLOBAL::");
5309 sv_catpv(sv,PL_tokenbuf);
5312 /* If len is 0, newSVpv does strlen(), which is correct.
5313 If len is non-zero, then it will be the true length,
5314 and so the scalar will be created correctly. */
5315 sv = newSVpv(PL_tokenbuf,len);
5318 if (PL_madskills && !PL_thistoken) {
5319 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5320 PL_thistoken = newSVpv(start,s - start);
5321 PL_realtokenstart = s - SvPVX(PL_linestr);
5325 /* Presume this is going to be a bareword of some sort. */
5328 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5329 yylval.opval->op_private = OPpCONST_BARE;
5330 /* UTF-8 package name? */
5331 if (UTF && !IN_BYTES &&
5332 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5335 /* And if "Foo::", then that's what it certainly is. */
5340 /* Do the explicit type check so that we don't need to force
5341 the initialisation of the symbol table to have a real GV.
5342 Beware - gv may not really be a PVGV, cv may not really be
5343 a PVCV, (because of the space optimisations that gv_init
5344 understands) But they're true if for this symbol there is
5345 respectively a typeglob and a subroutine.
5347 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5348 /* Real typeglob, so get the real subroutine: */
5350 /* A proxy for a subroutine in this package? */
5351 : SvOK(gv) ? (CV *) gv : NULL)
5354 /* See if it's the indirect object for a list operator. */
5356 if (PL_oldoldbufptr &&
5357 PL_oldoldbufptr < PL_bufptr &&
5358 (PL_oldoldbufptr == PL_last_lop
5359 || PL_oldoldbufptr == PL_last_uni) &&
5360 /* NO SKIPSPACE BEFORE HERE! */
5361 (PL_expect == XREF ||
5362 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5364 bool immediate_paren = *s == '(';
5366 /* (Now we can afford to cross potential line boundary.) */
5367 s = SKIPSPACE2(s,nextPL_nextwhite);
5369 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5372 /* Two barewords in a row may indicate method call. */
5374 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5375 (tmp = intuit_method(s, gv, cv)))
5378 /* If not a declared subroutine, it's an indirect object. */
5379 /* (But it's an indir obj regardless for sort.) */
5380 /* Also, if "_" follows a filetest operator, it's a bareword */
5383 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5385 (PL_last_lop_op != OP_MAPSTART &&
5386 PL_last_lop_op != OP_GREPSTART))))
5387 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5388 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5391 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5396 PL_expect = XOPERATOR;
5399 s = SKIPSPACE2(s,nextPL_nextwhite);
5400 PL_nextwhite = nextPL_nextwhite;
5405 /* Is this a word before a => operator? */
5406 if (*s == '=' && s[1] == '>' && !pkgname) {
5408 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
5409 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5410 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
5414 /* If followed by a paren, it's certainly a subroutine. */
5419 while (SPACE_OR_TAB(*d))
5421 if (*d == ')' && (sv = gv_const_sv(gv))) {
5425 char *par = SvPVX(PL_linestr) + PL_realtokenstart;
5426 sv_catpvn(PL_thistoken, par, s - par);
5428 sv_free(PL_nextwhite);
5438 PL_nextwhite = PL_thiswhite;
5441 start_force(PL_curforce);
5443 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5444 PL_expect = XOPERATOR;
5447 PL_nextwhite = nextPL_nextwhite;
5448 curmad('X', PL_thistoken);
5449 PL_thistoken = newSVpvs("");
5457 /* If followed by var or block, call it a method (unless sub) */
5459 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5460 PL_last_lop = PL_oldbufptr;
5461 PL_last_lop_op = OP_METHOD;
5465 /* If followed by a bareword, see if it looks like indir obj. */
5468 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5469 && (tmp = intuit_method(s, gv, cv)))
5472 /* Not a method, so call it a subroutine (if defined) */
5475 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5476 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5477 "Ambiguous use of -%s resolved as -&%s()",
5478 PL_tokenbuf, PL_tokenbuf);
5479 /* Check for a constant sub */
5480 if ((sv = gv_const_sv(gv))) {
5482 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
5483 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5484 yylval.opval->op_private = 0;
5488 /* Resolve to GV now. */
5489 if (SvTYPE(gv) != SVt_PVGV) {
5490 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5491 assert (SvTYPE(gv) == SVt_PVGV);
5492 /* cv must have been some sort of placeholder, so
5493 now needs replacing with a real code reference. */
5497 op_free(yylval.opval);
5498 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5499 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5500 PL_last_lop = PL_oldbufptr;
5501 PL_last_lop_op = OP_ENTERSUB;
5502 /* Is there a prototype? */
5510 const char *proto = SvPV_const((SV*)cv, protolen);
5513 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5515 while (*proto == ';')
5517 if (*proto == '&' && *s == '{') {
5518 sv_setpv(PL_subname,
5521 "__ANON__" : "__ANON__::__ANON__"));
5528 PL_nextwhite = PL_thiswhite;
5531 start_force(PL_curforce);
5532 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5535 PL_nextwhite = nextPL_nextwhite;
5536 curmad('X', PL_thistoken);
5537 PL_thistoken = newSVpvs("");
5544 /* Guess harder when madskills require "best effort". */
5545 if (PL_madskills && (!gv || !GvCVu(gv))) {
5546 int probable_sub = 0;
5547 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5549 else if (isALPHA(*s)) {
5553 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5554 if (!keyword(tmpbuf, tmplen, 0))
5557 while (d < PL_bufend && isSPACE(*d))
5559 if (*d == '=' && d[1] == '>')
5564 gv = gv_fetchpv(PL_tokenbuf, TRUE, SVt_PVCV);
5565 op_free(yylval.opval);
5566 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5567 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5568 PL_last_lop = PL_oldbufptr;
5569 PL_last_lop_op = OP_ENTERSUB;
5570 PL_nextwhite = PL_thiswhite;
5572 start_force(PL_curforce);
5573 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5575 PL_nextwhite = nextPL_nextwhite;
5576 curmad('X', PL_thistoken);
5577 PL_thistoken = newSVpvs("");
5582 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5589 /* Call it a bare word */
5591 if (PL_hints & HINT_STRICT_SUBS)
5592 yylval.opval->op_private |= OPpCONST_STRICT;
5595 if (lastchar != '-') {
5596 if (ckWARN(WARN_RESERVED)) {
5600 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
5601 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5608 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5609 && ckWARN_d(WARN_AMBIGUOUS)) {
5610 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5611 "Operator or semicolon missing before %c%s",
5612 lastchar, PL_tokenbuf);
5613 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5614 "Ambiguous use of %c resolved as operator %c",
5615 lastchar, lastchar);
5621 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5622 newSVpv(CopFILE(PL_curcop),0));
5626 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5627 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5630 case KEY___PACKAGE__:
5631 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5633 ? newSVhek(HvNAME_HEK(PL_curstash))
5640 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5641 const char *pname = "main";
5642 if (PL_tokenbuf[2] == 'D')
5643 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5644 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5648 GvIOp(gv) = newIO();
5649 IoIFP(GvIOp(gv)) = PL_rsfp;
5650 #if defined(HAS_FCNTL) && defined(F_SETFD)
5652 const int fd = PerlIO_fileno(PL_rsfp);
5653 fcntl(fd,F_SETFD,fd >= 3);
5656 /* Mark this internal pseudo-handle as clean */
5657 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5659 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
5660 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5661 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5663 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5664 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5665 /* if the script was opened in binmode, we need to revert
5666 * it to text mode for compatibility; but only iff it has CRs
5667 * XXX this is a questionable hack at best. */
5668 if (PL_bufend-PL_bufptr > 2
5669 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5672 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5673 loc = PerlIO_tell(PL_rsfp);
5674 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5677 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5679 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5680 #endif /* NETWARE */
5681 #ifdef PERLIO_IS_STDIO /* really? */
5682 # if defined(__BORLANDC__)
5683 /* XXX see note in do_binmode() */
5684 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5688 PerlIO_seek(PL_rsfp, loc, 0);
5692 #ifdef PERLIO_LAYERS
5695 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5696 else if (PL_encoding) {
5703 XPUSHs(PL_encoding);
5705 call_method("name", G_SCALAR);
5709 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5710 Perl_form(aTHX_ ":encoding(%"SVf")",
5719 if (PL_realtokenstart >= 0) {
5720 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5722 PL_endwhite = newSVpvs("");
5723 sv_catsv(PL_endwhite, PL_thiswhite);
5725 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5726 PL_realtokenstart = -1;
5728 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5729 SvCUR(PL_endwhite))) != Nullch) ;
5744 if (PL_expect == XSTATE) {
5751 if (*s == ':' && s[1] == ':') {
5754 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5755 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5756 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5759 else if (tmp == KEY_require || tmp == KEY_do)
5760 /* that's a way to remember we saw "CORE::" */
5773 LOP(OP_ACCEPT,XTERM);
5779 LOP(OP_ATAN2,XTERM);
5785 LOP(OP_BINMODE,XTERM);
5788 LOP(OP_BLESS,XTERM);
5797 /* When 'use switch' is in effect, continue has a dual
5798 life as a control operator. */
5800 if (!FEATURE_IS_ENABLED("switch"))
5803 /* We have to disambiguate the two senses of
5804 "continue". If the next token is a '{' then
5805 treat it as the start of a continue block;
5806 otherwise treat it as a control operator.
5818 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5835 if (!PL_cryptseen) {
5836 PL_cryptseen = TRUE;
5840 LOP(OP_CRYPT,XTERM);
5843 LOP(OP_CHMOD,XTERM);
5846 LOP(OP_CHOWN,XTERM);
5849 LOP(OP_CONNECT,XTERM);
5868 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5869 if (orig_keyword == KEY_do) {
5878 PL_hints |= HINT_BLOCK_SCOPE;
5888 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5889 LOP(OP_DBMOPEN,XTERM);
5895 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5902 yylval.ival = CopLINE(PL_curcop);
5918 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
5919 UNIBRACK(OP_ENTEREVAL);
5937 case KEY_endhostent:
5943 case KEY_endservent:
5946 case KEY_endprotoent:
5957 yylval.ival = CopLINE(PL_curcop);
5959 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
5962 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
5965 if ((PL_bufend - p) >= 3 &&
5966 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
5968 else if ((PL_bufend - p) >= 4 &&
5969 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
5972 if (isIDFIRST_lazy_if(p,UTF)) {
5973 p = scan_ident(p, PL_bufend,
5974 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5978 Perl_croak(aTHX_ "Missing $ on loop variable");
5980 s = SvPVX(PL_linestr) + soff;
5986 LOP(OP_FORMLINE,XTERM);
5992 LOP(OP_FCNTL,XTERM);
5998 LOP(OP_FLOCK,XTERM);
6007 LOP(OP_GREPSTART, XREF);
6010 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6025 case KEY_getpriority:
6026 LOP(OP_GETPRIORITY,XTERM);
6028 case KEY_getprotobyname:
6031 case KEY_getprotobynumber:
6032 LOP(OP_GPBYNUMBER,XTERM);
6034 case KEY_getprotoent:
6046 case KEY_getpeername:
6047 UNI(OP_GETPEERNAME);
6049 case KEY_gethostbyname:
6052 case KEY_gethostbyaddr:
6053 LOP(OP_GHBYADDR,XTERM);
6055 case KEY_gethostent:
6058 case KEY_getnetbyname:
6061 case KEY_getnetbyaddr:
6062 LOP(OP_GNBYADDR,XTERM);
6067 case KEY_getservbyname:
6068 LOP(OP_GSBYNAME,XTERM);
6070 case KEY_getservbyport:
6071 LOP(OP_GSBYPORT,XTERM);
6073 case KEY_getservent:
6076 case KEY_getsockname:
6077 UNI(OP_GETSOCKNAME);
6079 case KEY_getsockopt:
6080 LOP(OP_GSOCKOPT,XTERM);
6095 yylval.ival = CopLINE(PL_curcop);
6106 yylval.ival = CopLINE(PL_curcop);
6110 LOP(OP_INDEX,XTERM);
6116 LOP(OP_IOCTL,XTERM);
6128 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6160 LOP(OP_LISTEN,XTERM);
6169 s = scan_pat(s,OP_MATCH);
6170 TERM(sublex_start());
6173 LOP(OP_MAPSTART, XREF);
6176 LOP(OP_MKDIR,XTERM);
6179 LOP(OP_MSGCTL,XTERM);
6182 LOP(OP_MSGGET,XTERM);
6185 LOP(OP_MSGRCV,XTERM);
6188 LOP(OP_MSGSND,XTERM);
6195 if (isIDFIRST_lazy_if(s,UTF)) {
6199 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6200 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6202 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6203 if (!PL_in_my_stash) {
6206 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6210 if (PL_madskills) { /* just add type to declarator token */
6211 sv_catsv(PL_thistoken, PL_nextwhite);
6213 sv_catpvn(PL_thistoken, start, s - start);
6221 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6228 s = tokenize_use(0, s);
6232 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6239 if (isIDFIRST_lazy_if(s,UTF)) {
6241 for (d = s; isALNUM_lazy_if(d,UTF);)
6243 for (t=d; isSPACE(*t);)
6245 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6247 && !(t[0] == '=' && t[1] == '>')
6249 int parms_len = (int)(d-s);
6250 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6251 "Precedence problem: open %.*s should be open(%.*s)",
6252 parms_len, s, parms_len, s);
6258 yylval.ival = OP_OR;
6268 LOP(OP_OPEN_DIR,XTERM);
6271 checkcomma(s,PL_tokenbuf,"filehandle");
6275 checkcomma(s,PL_tokenbuf,"filehandle");
6294 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6298 LOP(OP_PIPE_OP,XTERM);
6301 s = scan_str(s,!!PL_madskills,FALSE);
6304 yylval.ival = OP_CONST;
6305 TERM(sublex_start());
6311 s = scan_str(s,!!PL_madskills,FALSE);
6314 PL_expect = XOPERATOR;
6316 if (SvCUR(PL_lex_stuff)) {
6319 d = SvPV_force(PL_lex_stuff, len);
6321 for (; isSPACE(*d) && len; --len, ++d)
6326 if (!warned && ckWARN(WARN_QW)) {
6327 for (; !isSPACE(*d) && len; --len, ++d) {
6329 Perl_warner(aTHX_ packWARN(WARN_QW),
6330 "Possible attempt to separate words with commas");
6333 else if (*d == '#') {
6334 Perl_warner(aTHX_ packWARN(WARN_QW),
6335 "Possible attempt to put comments in qw() list");
6341 for (; !isSPACE(*d) && len; --len, ++d)
6344 sv = newSVpvn(b, d-b);
6345 if (DO_UTF8(PL_lex_stuff))
6347 words = append_elem(OP_LIST, words,
6348 newSVOP(OP_CONST, 0, tokeq(sv)));
6352 start_force(PL_curforce);
6353 NEXTVAL_NEXTTOKE.opval = words;
6358 SvREFCNT_dec(PL_lex_stuff);
6359 PL_lex_stuff = NULL;
6365 s = scan_str(s,!!PL_madskills,FALSE);
6368 yylval.ival = OP_STRINGIFY;
6369 if (SvIVX(PL_lex_stuff) == '\'')
6370 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6371 TERM(sublex_start());
6374 s = scan_pat(s,OP_QR);
6375 TERM(sublex_start());
6378 s = scan_str(s,!!PL_madskills,FALSE);
6381 readpipe_override();
6382 TERM(sublex_start());
6390 s = force_version(s, FALSE);
6392 else if (*s != 'v' || !isDIGIT(s[1])
6393 || (s = force_version(s, TRUE), *s == 'v'))
6395 *PL_tokenbuf = '\0';
6396 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6397 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6398 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
6400 yyerror("<> should be quotes");
6402 if (orig_keyword == KEY_require) {
6410 PL_last_uni = PL_oldbufptr;
6411 PL_last_lop_op = OP_REQUIRE;
6413 return REPORT( (int)REQUIRE );
6419 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6423 LOP(OP_RENAME,XTERM);
6432 LOP(OP_RINDEX,XTERM);
6442 UNIDOR(OP_READLINE);
6455 LOP(OP_REVERSE,XTERM);
6458 UNIDOR(OP_READLINK);
6466 TERM(sublex_start());
6468 TOKEN(1); /* force error */
6471 checkcomma(s,PL_tokenbuf,"filehandle");
6481 LOP(OP_SELECT,XTERM);
6487 LOP(OP_SEMCTL,XTERM);
6490 LOP(OP_SEMGET,XTERM);
6493 LOP(OP_SEMOP,XTERM);
6499 LOP(OP_SETPGRP,XTERM);
6501 case KEY_setpriority:
6502 LOP(OP_SETPRIORITY,XTERM);
6504 case KEY_sethostent:
6510 case KEY_setservent:
6513 case KEY_setprotoent:
6523 LOP(OP_SEEKDIR,XTERM);
6525 case KEY_setsockopt:
6526 LOP(OP_SSOCKOPT,XTERM);
6532 LOP(OP_SHMCTL,XTERM);
6535 LOP(OP_SHMGET,XTERM);
6538 LOP(OP_SHMREAD,XTERM);
6541 LOP(OP_SHMWRITE,XTERM);
6544 LOP(OP_SHUTDOWN,XTERM);
6553 LOP(OP_SOCKET,XTERM);
6555 case KEY_socketpair:
6556 LOP(OP_SOCKPAIR,XTERM);
6559 checkcomma(s,PL_tokenbuf,"subroutine name");
6561 if (*s == ';' || *s == ')') /* probably a close */
6562 Perl_croak(aTHX_ "sort is now a reserved word");
6564 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6568 LOP(OP_SPLIT,XTERM);
6571 LOP(OP_SPRINTF,XTERM);
6574 LOP(OP_SPLICE,XTERM);
6589 LOP(OP_SUBSTR,XTERM);
6595 char tmpbuf[sizeof PL_tokenbuf];
6596 SSize_t tboffset = 0;
6597 expectation attrful;
6598 bool have_name, have_proto;
6599 const int key = tmp;
6604 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6605 SV *subtoken = newSVpvn(tstart, s - tstart);
6609 s = SKIPSPACE2(s,tmpwhite);
6614 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6615 (*s == ':' && s[1] == ':'))
6622 attrful = XATTRBLOCK;
6623 /* remember buffer pos'n for later force_word */
6624 tboffset = s - PL_oldbufptr;
6625 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6628 nametoke = newSVpvn(s, d - s);
6630 if (strchr(tmpbuf, ':'))
6631 sv_setpv(PL_subname, tmpbuf);
6633 sv_setsv(PL_subname,PL_curstname);
6634 sv_catpvs(PL_subname,"::");
6635 sv_catpvn(PL_subname,tmpbuf,len);
6642 CURMAD('X', nametoke);
6643 CURMAD('_', tmpwhite);
6644 (void) force_word(PL_oldbufptr + tboffset, WORD,
6647 s = SKIPSPACE2(d,tmpwhite);
6654 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6655 PL_expect = XTERMBLOCK;
6656 attrful = XATTRTERM;
6657 sv_setpvn(PL_subname,"?",1);
6661 if (key == KEY_format) {
6663 PL_lex_formbrack = PL_lex_brackets + 1;
6665 PL_thistoken = subtoken;
6669 (void) force_word(PL_oldbufptr + tboffset, WORD,
6675 /* Look for a prototype */
6678 bool bad_proto = FALSE;
6679 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6681 s = scan_str(s,!!PL_madskills,FALSE);
6683 Perl_croak(aTHX_ "Prototype not terminated");
6684 /* strip spaces and check for bad characters */
6685 d = SvPVX(PL_lex_stuff);
6687 for (p = d; *p; ++p) {
6690 if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
6696 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6697 "Illegal character in prototype for %"SVf" : %s",
6698 (void*)PL_subname, d);
6699 SvCUR_set(PL_lex_stuff, tmp);
6704 CURMAD('q', PL_thisopen);
6705 CURMAD('_', tmpwhite);
6706 CURMAD('=', PL_thisstuff);
6707 CURMAD('Q', PL_thisclose);
6708 NEXTVAL_NEXTTOKE.opval =
6709 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6710 PL_lex_stuff = Nullsv;
6713 s = SKIPSPACE2(s,tmpwhite);
6721 if (*s == ':' && s[1] != ':')
6722 PL_expect = attrful;
6723 else if (*s != '{' && key == KEY_sub) {
6725 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6727 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, (void*)PL_subname);
6734 curmad('^', newSVpvs(""));
6735 CURMAD('_', tmpwhite);
6739 PL_thistoken = subtoken;
6742 NEXTVAL_NEXTTOKE.opval =
6743 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6744 PL_lex_stuff = NULL;
6749 sv_setpv(PL_subname,
6751 (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"));
6755 (void) force_word(PL_oldbufptr + tboffset, WORD,
6765 LOP(OP_SYSTEM,XREF);
6768 LOP(OP_SYMLINK,XTERM);
6771 LOP(OP_SYSCALL,XTERM);
6774 LOP(OP_SYSOPEN,XTERM);
6777 LOP(OP_SYSSEEK,XTERM);
6780 LOP(OP_SYSREAD,XTERM);
6783 LOP(OP_SYSWRITE,XTERM);
6787 TERM(sublex_start());
6808 LOP(OP_TRUNCATE,XTERM);
6820 yylval.ival = CopLINE(PL_curcop);
6824 yylval.ival = CopLINE(PL_curcop);
6828 LOP(OP_UNLINK,XTERM);
6834 LOP(OP_UNPACK,XTERM);
6837 LOP(OP_UTIME,XTERM);
6843 LOP(OP_UNSHIFT,XTERM);
6846 s = tokenize_use(1, s);
6856 yylval.ival = CopLINE(PL_curcop);
6860 yylval.ival = CopLINE(PL_curcop);
6864 PL_hints |= HINT_BLOCK_SCOPE;
6871 LOP(OP_WAITPID,XTERM);
6880 ctl_l[0] = toCTRL('L');
6882 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6885 /* Make sure $^L is defined */
6886 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
6891 if (PL_expect == XOPERATOR)
6897 yylval.ival = OP_XOR;
6902 TERM(sublex_start());
6907 #pragma segment Main
6911 S_pending_ident(pTHX)
6916 /* pit holds the identifier we read and pending_ident is reset */
6917 char pit = PL_pending_ident;
6918 PL_pending_ident = 0;
6920 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
6921 DEBUG_T({ PerlIO_printf(Perl_debug_log,
6922 "### Pending identifier '%s'\n", PL_tokenbuf); });
6924 /* if we're in a my(), we can't allow dynamics here.
6925 $foo'bar has already been turned into $foo::bar, so
6926 just check for colons.
6928 if it's a legal name, the OP is a PADANY.
6931 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
6932 if (strchr(PL_tokenbuf,':'))
6933 yyerror(Perl_form(aTHX_ "No package name allowed for "
6934 "variable %s in \"our\"",
6936 tmp = allocmy(PL_tokenbuf);
6939 if (strchr(PL_tokenbuf,':'))
6940 yyerror(Perl_form(aTHX_ PL_no_myglob,
6941 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
6943 yylval.opval = newOP(OP_PADANY, 0);
6944 yylval.opval->op_targ = allocmy(PL_tokenbuf);
6950 build the ops for accesses to a my() variable.
6952 Deny my($a) or my($b) in a sort block, *if* $a or $b is
6953 then used in a comparison. This catches most, but not
6954 all cases. For instance, it catches
6955 sort { my($a); $a <=> $b }
6957 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
6958 (although why you'd do that is anyone's guess).
6961 if (!strchr(PL_tokenbuf,':')) {
6963 tmp = pad_findmy(PL_tokenbuf);
6964 if (tmp != NOT_IN_PAD) {
6965 /* might be an "our" variable" */
6966 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6967 /* build ops for a bareword */
6968 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
6969 HEK * const stashname = HvNAME_HEK(stash);
6970 SV * const sym = newSVhek(stashname);
6971 sv_catpvs(sym, "::");
6972 sv_catpv(sym, PL_tokenbuf+1);
6973 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
6974 yylval.opval->op_private = OPpCONST_ENTERED;
6977 ? (GV_ADDMULTI | GV_ADDINEVAL)
6980 ((PL_tokenbuf[0] == '$') ? SVt_PV
6981 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6986 /* if it's a sort block and they're naming $a or $b */
6987 if (PL_last_lop_op == OP_SORT &&
6988 PL_tokenbuf[0] == '$' &&
6989 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
6992 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
6993 d < PL_bufend && *d != '\n';
6996 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
6997 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7003 yylval.opval = newOP(OP_PADANY, 0);
7004 yylval.opval->op_targ = tmp;
7010 Whine if they've said @foo in a doublequoted string,
7011 and @foo isn't a variable we can find in the symbol
7014 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7015 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
7016 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7017 && ckWARN(WARN_AMBIGUOUS))
7019 /* Downgraded from fatal to warning 20000522 mjd */
7020 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7021 "Possible unintended interpolation of %s in string",
7026 /* build ops for a bareword */
7027 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
7028 yylval.opval->op_private = OPpCONST_ENTERED;
7031 /* If the identifier refers to a stash, don't autovivify it.
7032 * Change 24660 had the side effect of causing symbol table
7033 * hashes to always be defined, even if they were freshly
7034 * created and the only reference in the entire program was
7035 * the single statement with the defined %foo::bar:: test.
7036 * It appears that all code in the wild doing this actually
7037 * wants to know whether sub-packages have been loaded, so
7038 * by avoiding auto-vivifying symbol tables, we ensure that
7039 * defined %foo::bar:: continues to be false, and the existing
7040 * tests still give the expected answers, even though what
7041 * they're actually testing has now changed subtly.
7043 (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
7045 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7046 ((PL_tokenbuf[0] == '$') ? SVt_PV
7047 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7053 * The following code was generated by perl_keyword.pl.
7057 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7062 case 1: /* 5 tokens of length 1 */
7094 case 2: /* 18 tokens of length 2 */
7240 case 3: /* 29 tokens of length 3 */
7244 if (name[1] == 'N' &&
7307 if (name[1] == 'i' &&
7329 return (all_keywords || FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
7347 if (name[1] == 'o' &&
7356 if (name[1] == 'e' &&
7365 if (name[1] == 'n' &&
7374 if (name[1] == 'o' &&
7383 if (name[1] == 'a' &&
7392 if (name[1] == 'o' &&
7454 if (name[1] == 'e' &&
7468 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7494 if (name[1] == 'i' &&
7503 if (name[1] == 's' &&
7512 if (name[1] == 'e' &&
7521 if (name[1] == 'o' &&
7533 case 4: /* 41 tokens of length 4 */
7537 if (name[1] == 'O' &&
7547 if (name[1] == 'N' &&
7557 if (name[1] == 'i' &&
7567 if (name[1] == 'h' &&
7577 if (name[1] == 'u' &&
7590 if (name[2] == 'c' &&
7599 if (name[2] == 's' &&
7608 if (name[2] == 'a' &&
7644 if (name[1] == 'o' &&
7657 if (name[2] == 't' &&
7666 if (name[2] == 'o' &&
7675 if (name[2] == 't' &&
7684 if (name[2] == 'e' &&
7697 if (name[1] == 'o' &&
7710 if (name[2] == 'y' &&
7719 if (name[2] == 'l' &&
7735 if (name[2] == 's' &&
7744 if (name[2] == 'n' &&
7753 if (name[2] == 'c' &&
7766 if (name[1] == 'e' &&
7776 if (name[1] == 'p' &&
7789 if (name[2] == 'c' &&
7798 if (name[2] == 'p' &&
7807 if (name[2] == 's' &&
7823 if (name[2] == 'n' &&
7893 if (name[2] == 'r' &&
7902 if (name[2] == 'r' &&
7911 if (name[2] == 'a' &&
7927 if (name[2] == 'l' &&
7989 if (name[2] == 'e' &&
7992 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8005 case 5: /* 39 tokens of length 5 */
8009 if (name[1] == 'E' &&
8020 if (name[1] == 'H' &&
8034 if (name[2] == 'a' &&
8044 if (name[2] == 'a' &&
8061 if (name[2] == 'e' &&
8071 if (name[2] == 'e' &&
8075 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8091 if (name[3] == 'i' &&
8100 if (name[3] == 'o' &&
8136 if (name[2] == 'o' &&
8146 if (name[2] == 'y' &&
8160 if (name[1] == 'l' &&
8174 if (name[2] == 'n' &&
8184 if (name[2] == 'o' &&
8198 if (name[1] == 'i' &&
8203 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8212 if (name[2] == 'd' &&
8222 if (name[2] == 'c' &&
8239 if (name[2] == 'c' &&
8249 if (name[2] == 't' &&
8263 if (name[1] == 'k' &&
8274 if (name[1] == 'r' &&
8288 if (name[2] == 's' &&
8298 if (name[2] == 'd' &&
8315 if (name[2] == 'm' &&
8325 if (name[2] == 'i' &&
8335 if (name[2] == 'e' &&
8345 if (name[2] == 'l' &&
8355 if (name[2] == 'a' &&
8368 if (name[3] == 't' &&
8371 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8377 if (name[3] == 'd' &&
8394 if (name[1] == 'i' &&
8408 if (name[2] == 'a' &&
8421 if (name[3] == 'e' &&
8456 if (name[2] == 'i' &&
8473 if (name[2] == 'i' &&
8483 if (name[2] == 'i' &&
8500 case 6: /* 33 tokens of length 6 */
8504 if (name[1] == 'c' &&
8519 if (name[2] == 'l' &&
8530 if (name[2] == 'r' &&
8545 if (name[1] == 'e' &&
8560 if (name[2] == 's' &&
8565 if(ckWARN_d(WARN_SYNTAX))
8566 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8572 if (name[2] == 'i' &&
8590 if (name[2] == 'l' &&
8601 if (name[2] == 'r' &&
8616 if (name[1] == 'm' &&
8631 if (name[2] == 'n' &&
8642 if (name[2] == 's' &&
8657 if (name[1] == 's' &&
8663 if (name[4] == 't' &&
8672 if (name[4] == 'e' &&
8681 if (name[4] == 'c' &&
8690 if (name[4] == 'n' &&
8706 if (name[1] == 'r' &&
8724 if (name[3] == 'a' &&
8734 if (name[3] == 'u' &&
8748 if (name[2] == 'n' &&
8766 if (name[2] == 'a' &&
8780 if (name[3] == 'e' &&
8793 if (name[4] == 't' &&
8802 if (name[4] == 'e' &&
8824 if (name[4] == 't' &&
8833 if (name[4] == 'e' &&
8849 if (name[2] == 'c' &&
8860 if (name[2] == 'l' &&
8871 if (name[2] == 'b' &&
8882 if (name[2] == 's' &&
8905 if (name[4] == 's' &&
8914 if (name[4] == 'n' &&
8927 if (name[3] == 'a' &&
8944 if (name[1] == 'a' &&
8959 case 7: /* 29 tokens of length 7 */
8963 if (name[1] == 'E' &&
8976 if (name[1] == '_' &&
8989 if (name[1] == 'i' &&
8996 return -KEY_binmode;
9002 if (name[1] == 'o' &&
9009 return -KEY_connect;
9018 if (name[2] == 'm' &&
9024 return -KEY_dbmopen;
9035 if (name[4] == 'u' &&
9039 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9045 if (name[4] == 'n' &&
9066 if (name[1] == 'o' &&
9079 if (name[1] == 'e' &&
9086 if (name[5] == 'r' &&
9089 return -KEY_getpgrp;
9095 if (name[5] == 'i' &&
9098 return -KEY_getppid;
9111 if (name[1] == 'c' &&
9118 return -KEY_lcfirst;
9124 if (name[1] == 'p' &&
9131 return -KEY_opendir;
9137 if (name[1] == 'a' &&
9155 if (name[3] == 'd' &&
9160 return -KEY_readdir;
9166 if (name[3] == 'u' &&
9177 if (name[3] == 'e' &&
9182 return -KEY_reverse;
9201 if (name[3] == 'k' &&
9206 return -KEY_seekdir;
9212 if (name[3] == 'p' &&
9217 return -KEY_setpgrp;
9227 if (name[2] == 'm' &&
9233 return -KEY_shmread;
9239 if (name[2] == 'r' &&
9245 return -KEY_sprintf;
9254 if (name[3] == 'l' &&
9259 return -KEY_symlink;
9268 if (name[4] == 'a' &&
9272 return -KEY_syscall;
9278 if (name[4] == 'p' &&
9282 return -KEY_sysopen;
9288 if (name[4] == 'e' &&
9292 return -KEY_sysread;
9298 if (name[4] == 'e' &&
9302 return -KEY_sysseek;
9320 if (name[1] == 'e' &&
9327 return -KEY_telldir;
9336 if (name[2] == 'f' &&
9342 return -KEY_ucfirst;
9348 if (name[2] == 's' &&
9354 return -KEY_unshift;
9364 if (name[1] == 'a' &&
9371 return -KEY_waitpid;
9380 case 8: /* 26 tokens of length 8 */
9384 if (name[1] == 'U' &&
9392 return KEY_AUTOLOAD;
9403 if (name[3] == 'A' &&
9409 return KEY___DATA__;
9415 if (name[3] == 'I' &&
9421 return -KEY___FILE__;
9427 if (name[3] == 'I' &&
9433 return -KEY___LINE__;
9449 if (name[2] == 'o' &&
9456 return -KEY_closedir;
9462 if (name[2] == 'n' &&
9469 return -KEY_continue;
9479 if (name[1] == 'b' &&
9487 return -KEY_dbmclose;
9493 if (name[1] == 'n' &&
9499 if (name[4] == 'r' &&
9504 return -KEY_endgrent;
9510 if (name[4] == 'w' &&
9515 return -KEY_endpwent;
9528 if (name[1] == 'o' &&
9536 return -KEY_formline;
9542 if (name[1] == 'e' &&
9553 if (name[6] == 'n' &&
9556 return -KEY_getgrent;
9562 if (name[6] == 'i' &&
9565 return -KEY_getgrgid;
9571 if (name[6] == 'a' &&
9574 return -KEY_getgrnam;
9587 if (name[4] == 'o' &&
9592 return -KEY_getlogin;
9603 if (name[6] == 'n' &&
9606 return -KEY_getpwent;
9612 if (name[6] == 'a' &&
9615 return -KEY_getpwnam;
9621 if (name[6] == 'i' &&
9624 return -KEY_getpwuid;
9644 if (name[1] == 'e' &&
9651 if (name[5] == 'i' &&
9658 return -KEY_readline;
9663 return -KEY_readlink;
9674 if (name[5] == 'i' &&
9678 return -KEY_readpipe;
9699 if (name[4] == 'r' &&
9704 return -KEY_setgrent;
9710 if (name[4] == 'w' &&
9715 return -KEY_setpwent;
9731 if (name[3] == 'w' &&
9737 return -KEY_shmwrite;
9743 if (name[3] == 't' &&
9749 return -KEY_shutdown;
9759 if (name[2] == 's' &&
9766 return -KEY_syswrite;
9776 if (name[1] == 'r' &&
9784 return -KEY_truncate;
9793 case 9: /* 9 tokens of length 9 */
9797 if (name[1] == 'N' &&
9806 return KEY_UNITCHECK;
9812 if (name[1] == 'n' &&
9821 return -KEY_endnetent;
9827 if (name[1] == 'e' &&
9836 return -KEY_getnetent;
9842 if (name[1] == 'o' &&
9851 return -KEY_localtime;
9857 if (name[1] == 'r' &&
9866 return KEY_prototype;
9872 if (name[1] == 'u' &&
9881 return -KEY_quotemeta;
9887 if (name[1] == 'e' &&
9896 return -KEY_rewinddir;
9902 if (name[1] == 'e' &&
9911 return -KEY_setnetent;
9917 if (name[1] == 'a' &&
9926 return -KEY_wantarray;
9935 case 10: /* 9 tokens of length 10 */
9939 if (name[1] == 'n' &&
9945 if (name[4] == 'o' &&
9952 return -KEY_endhostent;
9958 if (name[4] == 'e' &&
9965 return -KEY_endservent;
9978 if (name[1] == 'e' &&
9984 if (name[4] == 'o' &&
9991 return -KEY_gethostent;
10000 if (name[5] == 'r' &&
10006 return -KEY_getservent;
10012 if (name[5] == 'c' &&
10018 return -KEY_getsockopt;
10038 if (name[2] == 't')
10043 if (name[4] == 'o' &&
10050 return -KEY_sethostent;
10059 if (name[5] == 'r' &&
10065 return -KEY_setservent;
10071 if (name[5] == 'c' &&
10077 return -KEY_setsockopt;
10094 if (name[2] == 'c' &&
10103 return -KEY_socketpair;
10116 case 11: /* 8 tokens of length 11 */
10120 if (name[1] == '_' &&
10130 { /* __PACKAGE__ */
10131 return -KEY___PACKAGE__;
10137 if (name[1] == 'n' &&
10147 { /* endprotoent */
10148 return -KEY_endprotoent;
10154 if (name[1] == 'e' &&
10163 if (name[5] == 'e' &&
10169 { /* getpeername */
10170 return -KEY_getpeername;
10179 if (name[6] == 'o' &&
10184 { /* getpriority */
10185 return -KEY_getpriority;
10191 if (name[6] == 't' &&
10196 { /* getprotoent */
10197 return -KEY_getprotoent;
10211 if (name[4] == 'o' &&
10218 { /* getsockname */
10219 return -KEY_getsockname;
10232 if (name[1] == 'e' &&
10240 if (name[6] == 'o' &&
10245 { /* setpriority */
10246 return -KEY_setpriority;
10252 if (name[6] == 't' &&
10257 { /* setprotoent */
10258 return -KEY_setprotoent;
10274 case 12: /* 2 tokens of length 12 */
10275 if (name[0] == 'g' &&
10287 if (name[9] == 'd' &&
10290 { /* getnetbyaddr */
10291 return -KEY_getnetbyaddr;
10297 if (name[9] == 'a' &&
10300 { /* getnetbyname */
10301 return -KEY_getnetbyname;
10313 case 13: /* 4 tokens of length 13 */
10314 if (name[0] == 'g' &&
10321 if (name[4] == 'o' &&
10330 if (name[10] == 'd' &&
10333 { /* gethostbyaddr */
10334 return -KEY_gethostbyaddr;
10340 if (name[10] == 'a' &&
10343 { /* gethostbyname */
10344 return -KEY_gethostbyname;
10357 if (name[4] == 'e' &&
10366 if (name[10] == 'a' &&
10369 { /* getservbyname */
10370 return -KEY_getservbyname;
10376 if (name[10] == 'o' &&
10379 { /* getservbyport */
10380 return -KEY_getservbyport;
10399 case 14: /* 1 tokens of length 14 */
10400 if (name[0] == 'g' &&
10414 { /* getprotobyname */
10415 return -KEY_getprotobyname;
10420 case 16: /* 1 tokens of length 16 */
10421 if (name[0] == 'g' &&
10437 { /* getprotobynumber */
10438 return -KEY_getprotobynumber;
10452 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10456 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10457 if (ckWARN(WARN_SYNTAX)) {
10460 for (w = s+2; *w && level; w++) {
10463 else if (*w == ')')
10466 while (isSPACE(*w))
10468 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
10469 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10470 "%s (...) interpreted as function",name);
10473 while (s < PL_bufend && isSPACE(*s))
10477 while (s < PL_bufend && isSPACE(*s))
10479 if (isIDFIRST_lazy_if(s,UTF)) {
10480 const char * const w = s++;
10481 while (isALNUM_lazy_if(s,UTF))
10483 while (s < PL_bufend && isSPACE(*s))
10487 if (keyword(w, s - w, 0))
10490 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10491 if (gv && GvCVu(gv))
10493 Perl_croak(aTHX_ "No comma allowed after %s", what);
10498 /* Either returns sv, or mortalizes sv and returns a new SV*.
10499 Best used as sv=new_constant(..., sv, ...).
10500 If s, pv are NULL, calls subroutine with one argument,
10501 and type is used with error messages only. */
10504 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
10508 HV * const table = GvHV(PL_hintgv); /* ^H */
10512 const char *why1 = "", *why2 = "", *why3 = "";
10514 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10517 why2 = (const char *)
10518 (strEQ(key,"charnames")
10519 ? "(possibly a missing \"use charnames ...\")"
10521 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10522 (type ? type: "undef"), why2);
10524 /* This is convoluted and evil ("goto considered harmful")
10525 * but I do not understand the intricacies of all the different
10526 * failure modes of %^H in here. The goal here is to make
10527 * the most probable error message user-friendly. --jhi */
10532 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10533 (type ? type: "undef"), why1, why2, why3);
10535 yyerror(SvPVX_const(msg));
10539 cvp = hv_fetch(table, key, strlen(key), FALSE);
10540 if (!cvp || !SvOK(*cvp)) {
10543 why3 = "} is not defined";
10546 sv_2mortal(sv); /* Parent created it permanently */
10549 pv = sv_2mortal(newSVpvn(s, len));
10551 typesv = sv_2mortal(newSVpv(type, 0));
10553 typesv = &PL_sv_undef;
10555 PUSHSTACKi(PERLSI_OVERLOAD);
10567 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10571 /* Check the eval first */
10572 if (!PL_in_eval && SvTRUE(ERRSV)) {
10573 sv_catpvs(ERRSV, "Propagated");
10574 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10576 res = SvREFCNT_inc_simple(sv);
10580 SvREFCNT_inc_simple_void(res);
10589 why1 = "Call to &{$^H{";
10591 why3 = "}} did not return a defined value";
10599 /* Returns a NUL terminated string, with the length of the string written to
10603 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10606 register char *d = dest;
10607 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10610 Perl_croak(aTHX_ ident_too_long);
10611 if (isALNUM(*s)) /* UTF handled below */
10613 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10618 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10622 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10623 char *t = s + UTF8SKIP(s);
10625 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10629 Perl_croak(aTHX_ ident_too_long);
10630 Copy(s, d, len, char);
10643 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10646 char *bracket = NULL;
10648 register char *d = dest;
10649 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10654 while (isDIGIT(*s)) {
10656 Perl_croak(aTHX_ ident_too_long);
10663 Perl_croak(aTHX_ ident_too_long);
10664 if (isALNUM(*s)) /* UTF handled below */
10666 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10671 else if (*s == ':' && s[1] == ':') {
10675 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10676 char *t = s + UTF8SKIP(s);
10677 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10679 if (d + (t - s) > e)
10680 Perl_croak(aTHX_ ident_too_long);
10681 Copy(s, d, t - s, char);
10692 if (PL_lex_state != LEX_NORMAL)
10693 PL_lex_state = LEX_INTERPENDMAYBE;
10696 if (*s == '$' && s[1] &&
10697 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10710 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10715 if (isSPACE(s[-1])) {
10717 const char ch = *s++;
10718 if (!SPACE_OR_TAB(ch)) {
10724 if (isIDFIRST_lazy_if(d,UTF)) {
10728 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10729 end += UTF8SKIP(end);
10730 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10731 end += UTF8SKIP(end);
10733 Copy(s, d, end - s, char);
10738 while ((isALNUM(*s) || *s == ':') && d < e)
10741 Perl_croak(aTHX_ ident_too_long);
10744 while (s < send && SPACE_OR_TAB(*s))
10746 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10747 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10748 const char * const brack =
10750 ((*s == '[') ? "[...]" : "{...}");
10751 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10752 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10753 funny, dest, brack, funny, dest, brack);
10756 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10760 /* Handle extended ${^Foo} variables
10761 * 1999-02-27 mjd-perl-patch@plover.com */
10762 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10766 while (isALNUM(*s) && d < e) {
10770 Perl_croak(aTHX_ ident_too_long);
10775 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10776 PL_lex_state = LEX_INTERPEND;
10779 if (PL_lex_state == LEX_NORMAL) {
10780 if (ckWARN(WARN_AMBIGUOUS) &&
10781 (keyword(dest, d - dest, 0) || get_cv(dest, FALSE)))
10785 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10786 "Ambiguous use of %c{%s} resolved to %c%s",
10787 funny, dest, funny, dest);
10792 s = bracket; /* let the parser handle it */
10796 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10797 PL_lex_state = LEX_INTERPEND;
10802 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10804 PERL_UNUSED_CONTEXT;
10807 else if (ch == 'g')
10808 *pmfl |= PMf_GLOBAL;
10809 else if (ch == 'c')
10810 *pmfl |= PMf_CONTINUE;
10811 else if (ch == 'o')
10813 else if (ch == 'm')
10814 *pmfl |= PMf_MULTILINE;
10815 else if (ch == 's')
10816 *pmfl |= PMf_SINGLELINE;
10817 else if (ch == 'x')
10818 *pmfl |= PMf_EXTENDED;
10822 S_scan_pat(pTHX_ char *start, I32 type)
10826 char *s = scan_str(start,!!PL_madskills,FALSE);
10827 const char * const valid_flags =
10828 (const char *)((type == OP_QR) ? "iomsx" : "iogcmsx");
10835 const char * const delimiter = skipspace(start);
10839 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10840 : "Search pattern not terminated" ));
10843 pm = (PMOP*)newPMOP(type, 0);
10844 if (PL_multi_open == '?')
10845 pm->op_pmflags |= PMf_ONCE;
10849 while (*s && strchr(valid_flags, *s))
10850 pmflag(&pm->op_pmflags,*s++);
10852 if (PL_madskills && modstart != s) {
10853 SV* tmptoken = newSVpvn(modstart, s - modstart);
10854 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
10857 /* issue a warning if /c is specified,but /g is not */
10858 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
10859 && ckWARN(WARN_REGEXP))
10861 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
10864 pm->op_pmpermflags = pm->op_pmflags;
10866 PL_lex_op = (OP*)pm;
10867 yylval.ival = OP_MATCH;
10872 S_scan_subst(pTHX_ char *start)
10883 yylval.ival = OP_NULL;
10885 s = scan_str(start,!!PL_madskills,FALSE);
10888 Perl_croak(aTHX_ "Substitution pattern not terminated");
10890 if (s[-1] == PL_multi_open)
10893 if (PL_madskills) {
10894 CURMAD('q', PL_thisopen);
10895 CURMAD('_', PL_thiswhite);
10896 CURMAD('E', PL_thisstuff);
10897 CURMAD('Q', PL_thisclose);
10898 PL_realtokenstart = s - SvPVX(PL_linestr);
10902 first_start = PL_multi_start;
10903 s = scan_str(s,!!PL_madskills,FALSE);
10905 if (PL_lex_stuff) {
10906 SvREFCNT_dec(PL_lex_stuff);
10907 PL_lex_stuff = NULL;
10909 Perl_croak(aTHX_ "Substitution replacement not terminated");
10911 PL_multi_start = first_start; /* so whole substitution is taken together */
10913 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10916 if (PL_madskills) {
10917 CURMAD('z', PL_thisopen);
10918 CURMAD('R', PL_thisstuff);
10919 CURMAD('Z', PL_thisclose);
10929 else if (strchr("iogcmsx", *s))
10930 pmflag(&pm->op_pmflags,*s++);
10936 if (PL_madskills) {
10938 curmad('m', newSVpvn(modstart, s - modstart));
10939 append_madprops(PL_thismad, (OP*)pm, 0);
10943 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
10944 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10948 SV * const repl = newSVpvs("");
10950 PL_sublex_info.super_bufptr = s;
10951 PL_sublex_info.super_bufend = PL_bufend;
10953 pm->op_pmflags |= PMf_EVAL;
10955 sv_catpv(repl, (const char *)(es ? "eval " : "do "));
10956 sv_catpvs(repl, "{");
10957 sv_catsv(repl, PL_lex_repl);
10958 if (strchr(SvPVX(PL_lex_repl), '#'))
10959 sv_catpvs(repl, "\n");
10960 sv_catpvs(repl, "}");
10962 SvREFCNT_dec(PL_lex_repl);
10963 PL_lex_repl = repl;
10966 pm->op_pmpermflags = pm->op_pmflags;
10967 PL_lex_op = (OP*)pm;
10968 yylval.ival = OP_SUBST;
10973 S_scan_trans(pTHX_ char *start)
10986 yylval.ival = OP_NULL;
10988 s = scan_str(start,!!PL_madskills,FALSE);
10990 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10992 if (s[-1] == PL_multi_open)
10995 if (PL_madskills) {
10996 CURMAD('q', PL_thisopen);
10997 CURMAD('_', PL_thiswhite);
10998 CURMAD('E', PL_thisstuff);
10999 CURMAD('Q', PL_thisclose);
11000 PL_realtokenstart = s - SvPVX(PL_linestr);
11004 s = scan_str(s,!!PL_madskills,FALSE);
11006 if (PL_lex_stuff) {
11007 SvREFCNT_dec(PL_lex_stuff);
11008 PL_lex_stuff = NULL;
11010 Perl_croak(aTHX_ "Transliteration replacement not terminated");
11012 if (PL_madskills) {
11013 CURMAD('z', PL_thisopen);
11014 CURMAD('R', PL_thisstuff);
11015 CURMAD('Z', PL_thisclose);
11018 complement = del = squash = 0;
11025 complement = OPpTRANS_COMPLEMENT;
11028 del = OPpTRANS_DELETE;
11031 squash = OPpTRANS_SQUASH;
11040 Newx(tbl, complement&&!del?258:256, short);
11041 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11042 o->op_private &= ~OPpTRANS_ALL;
11043 o->op_private |= del|squash|complement|
11044 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11045 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11048 yylval.ival = OP_TRANS;
11051 if (PL_madskills) {
11053 curmad('m', newSVpvn(modstart, s - modstart));
11054 append_madprops(PL_thismad, o, 0);
11063 S_scan_heredoc(pTHX_ register char *s)
11067 I32 op_type = OP_SCALAR;
11071 const char *found_newline;
11075 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11077 I32 stuffstart = s - SvPVX(PL_linestr);
11080 PL_realtokenstart = -1;
11085 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11089 while (SPACE_OR_TAB(*peek))
11091 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11094 s = delimcpy(d, e, s, PL_bufend, term, &len);
11104 if (!isALNUM_lazy_if(s,UTF))
11105 deprecate_old("bare << to mean <<\"\"");
11106 for (; isALNUM_lazy_if(s,UTF); s++) {
11111 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11112 Perl_croak(aTHX_ "Delimiter for here document is too long");
11115 len = d - PL_tokenbuf;
11118 if (PL_madskills) {
11119 tstart = PL_tokenbuf + !outer;
11120 PL_thisclose = newSVpvn(tstart, len - !outer);
11121 tstart = SvPVX(PL_linestr) + stuffstart;
11122 PL_thisopen = newSVpvn(tstart, s - tstart);
11123 stuffstart = s - SvPVX(PL_linestr);
11126 #ifndef PERL_STRICT_CR
11127 d = strchr(s, '\r');
11129 char * const olds = s;
11131 while (s < PL_bufend) {
11137 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11146 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11153 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11154 herewas = newSVpvn(s,PL_bufend-s);
11158 herewas = newSVpvn(s-1,found_newline-s+1);
11161 herewas = newSVpvn(s,found_newline-s);
11165 if (PL_madskills) {
11166 tstart = SvPVX(PL_linestr) + stuffstart;
11168 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11170 PL_thisstuff = newSVpvn(tstart, s - tstart);
11173 s += SvCUR(herewas);
11176 stuffstart = s - SvPVX(PL_linestr);
11182 tmpstr = newSV(79);
11183 sv_upgrade(tmpstr, SVt_PVIV);
11184 if (term == '\'') {
11185 op_type = OP_CONST;
11186 SvIV_set(tmpstr, -1);
11188 else if (term == '`') {
11189 op_type = OP_BACKTICK;
11190 SvIV_set(tmpstr, '\\');
11194 PL_multi_start = CopLINE(PL_curcop);
11195 PL_multi_open = PL_multi_close = '<';
11196 term = *PL_tokenbuf;
11197 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11198 char * const bufptr = PL_sublex_info.super_bufptr;
11199 char * const bufend = PL_sublex_info.super_bufend;
11200 char * const olds = s - SvCUR(herewas);
11201 s = strchr(bufptr, '\n');
11205 while (s < bufend &&
11206 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11208 CopLINE_inc(PL_curcop);
11211 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11212 missingterm(PL_tokenbuf);
11214 sv_setpvn(herewas,bufptr,d-bufptr+1);
11215 sv_setpvn(tmpstr,d+1,s-d);
11217 sv_catpvn(herewas,s,bufend-s);
11218 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11225 while (s < PL_bufend &&
11226 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11228 CopLINE_inc(PL_curcop);
11230 if (s >= PL_bufend) {
11231 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11232 missingterm(PL_tokenbuf);
11234 sv_setpvn(tmpstr,d+1,s-d);
11236 if (PL_madskills) {
11238 sv_catpvn(PL_thisstuff, d + 1, s - d);
11240 PL_thisstuff = newSVpvn(d + 1, s - d);
11241 stuffstart = s - SvPVX(PL_linestr);
11245 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11247 sv_catpvn(herewas,s,PL_bufend-s);
11248 sv_setsv(PL_linestr,herewas);
11249 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11250 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11251 PL_last_lop = PL_last_uni = NULL;
11254 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
11255 while (s >= PL_bufend) { /* multiple line string? */
11257 if (PL_madskills) {
11258 tstart = SvPVX(PL_linestr) + stuffstart;
11260 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11262 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11266 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11267 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11268 missingterm(PL_tokenbuf);
11271 stuffstart = s - SvPVX(PL_linestr);
11273 CopLINE_inc(PL_curcop);
11274 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11275 PL_last_lop = PL_last_uni = NULL;
11276 #ifndef PERL_STRICT_CR
11277 if (PL_bufend - PL_linestart >= 2) {
11278 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11279 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11281 PL_bufend[-2] = '\n';
11283 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11285 else if (PL_bufend[-1] == '\r')
11286 PL_bufend[-1] = '\n';
11288 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11289 PL_bufend[-1] = '\n';
11291 if (PERLDB_LINE && PL_curstash != PL_debstash)
11292 update_debugger_info_sv(PL_linestr);
11293 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11294 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11295 *(SvPVX(PL_linestr) + off ) = ' ';
11296 sv_catsv(PL_linestr,herewas);
11297 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11298 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11302 sv_catsv(tmpstr,PL_linestr);
11307 PL_multi_end = CopLINE(PL_curcop);
11308 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11309 SvPV_shrink_to_cur(tmpstr);
11311 SvREFCNT_dec(herewas);
11313 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11315 else if (PL_encoding)
11316 sv_recode_to_utf8(tmpstr, PL_encoding);
11318 PL_lex_stuff = tmpstr;
11319 yylval.ival = op_type;
11323 /* scan_inputsymbol
11324 takes: current position in input buffer
11325 returns: new position in input buffer
11326 side-effects: yylval and lex_op are set.
11331 <FH> read from filehandle
11332 <pkg::FH> read from package qualified filehandle
11333 <pkg'FH> read from package qualified filehandle
11334 <$fh> read from filehandle in $fh
11335 <*.h> filename glob
11340 S_scan_inputsymbol(pTHX_ char *start)
11343 register char *s = start; /* current position in buffer */
11347 char *d = PL_tokenbuf; /* start of temp holding space */
11348 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11350 end = strchr(s, '\n');
11353 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11355 /* die if we didn't have space for the contents of the <>,
11356 or if it didn't end, or if we see a newline
11359 if (len >= (I32)sizeof PL_tokenbuf)
11360 Perl_croak(aTHX_ "Excessively long <> operator");
11362 Perl_croak(aTHX_ "Unterminated <> operator");
11367 Remember, only scalar variables are interpreted as filehandles by
11368 this code. Anything more complex (e.g., <$fh{$num}>) will be
11369 treated as a glob() call.
11370 This code makes use of the fact that except for the $ at the front,
11371 a scalar variable and a filehandle look the same.
11373 if (*d == '$' && d[1]) d++;
11375 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11376 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11379 /* If we've tried to read what we allow filehandles to look like, and
11380 there's still text left, then it must be a glob() and not a getline.
11381 Use scan_str to pull out the stuff between the <> and treat it
11382 as nothing more than a string.
11385 if (d - PL_tokenbuf != len) {
11386 yylval.ival = OP_GLOB;
11388 s = scan_str(start,!!PL_madskills,FALSE);
11390 Perl_croak(aTHX_ "Glob not terminated");
11394 bool readline_overriden = FALSE;
11397 /* we're in a filehandle read situation */
11400 /* turn <> into <ARGV> */
11402 Copy("ARGV",d,5,char);
11404 /* Check whether readline() is overriden */
11405 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11407 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11409 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11410 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
11411 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11412 readline_overriden = TRUE;
11414 /* if <$fh>, create the ops to turn the variable into a
11418 /* try to find it in the pad for this block, otherwise find
11419 add symbol table ops
11421 const PADOFFSET tmp = pad_findmy(d);
11422 if (tmp != NOT_IN_PAD) {
11423 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11424 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11425 HEK * const stashname = HvNAME_HEK(stash);
11426 SV * const sym = sv_2mortal(newSVhek(stashname));
11427 sv_catpvs(sym, "::");
11428 sv_catpv(sym, d+1);
11433 OP * const o = newOP(OP_PADSV, 0);
11435 PL_lex_op = readline_overriden
11436 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11437 append_elem(OP_LIST, o,
11438 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11439 : (OP*)newUNOP(OP_READLINE, 0, o);
11448 ? (GV_ADDMULTI | GV_ADDINEVAL)
11451 PL_lex_op = readline_overriden
11452 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11453 append_elem(OP_LIST,
11454 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11455 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11456 : (OP*)newUNOP(OP_READLINE, 0,
11457 newUNOP(OP_RV2SV, 0,
11458 newGVOP(OP_GV, 0, gv)));
11460 if (!readline_overriden)
11461 PL_lex_op->op_flags |= OPf_SPECIAL;
11462 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
11463 yylval.ival = OP_NULL;
11466 /* If it's none of the above, it must be a literal filehandle
11467 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11469 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11470 PL_lex_op = readline_overriden
11471 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11472 append_elem(OP_LIST,
11473 newGVOP(OP_GV, 0, gv),
11474 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11475 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11476 yylval.ival = OP_NULL;
11485 takes: start position in buffer
11486 keep_quoted preserve \ on the embedded delimiter(s)
11487 keep_delims preserve the delimiters around the string
11488 returns: position to continue reading from buffer
11489 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11490 updates the read buffer.
11492 This subroutine pulls a string out of the input. It is called for:
11493 q single quotes q(literal text)
11494 ' single quotes 'literal text'
11495 qq double quotes qq(interpolate $here please)
11496 " double quotes "interpolate $here please"
11497 qx backticks qx(/bin/ls -l)
11498 ` backticks `/bin/ls -l`
11499 qw quote words @EXPORT_OK = qw( func() $spam )
11500 m// regexp match m/this/
11501 s/// regexp substitute s/this/that/
11502 tr/// string transliterate tr/this/that/
11503 y/// string transliterate y/this/that/
11504 ($*@) sub prototypes sub foo ($)
11505 (stuff) sub attr parameters sub foo : attr(stuff)
11506 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11508 In most of these cases (all but <>, patterns and transliterate)
11509 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11510 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11511 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11514 It skips whitespace before the string starts, and treats the first
11515 character as the delimiter. If the delimiter is one of ([{< then
11516 the corresponding "close" character )]}> is used as the closing
11517 delimiter. It allows quoting of delimiters, and if the string has
11518 balanced delimiters ([{<>}]) it allows nesting.
11520 On success, the SV with the resulting string is put into lex_stuff or,
11521 if that is already non-NULL, into lex_repl. The second case occurs only
11522 when parsing the RHS of the special constructs s/// and tr/// (y///).
11523 For convenience, the terminating delimiter character is stuffed into
11528 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11531 SV *sv; /* scalar value: string */
11532 const char *tmps; /* temp string, used for delimiter matching */
11533 register char *s = start; /* current position in the buffer */
11534 register char term; /* terminating character */
11535 register char *to; /* current position in the sv's data */
11536 I32 brackets = 1; /* bracket nesting level */
11537 bool has_utf8 = FALSE; /* is there any utf8 content? */
11538 I32 termcode; /* terminating char. code */
11539 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11540 STRLEN termlen; /* length of terminating string */
11541 int last_off = 0; /* last position for nesting bracket */
11547 /* skip space before the delimiter */
11553 if (PL_realtokenstart >= 0) {
11554 stuffstart = PL_realtokenstart;
11555 PL_realtokenstart = -1;
11558 stuffstart = start - SvPVX(PL_linestr);
11560 /* mark where we are, in case we need to report errors */
11563 /* after skipping whitespace, the next character is the terminator */
11566 termcode = termstr[0] = term;
11570 termcode = utf8_to_uvchr((U8*)s, &termlen);
11571 Copy(s, termstr, termlen, U8);
11572 if (!UTF8_IS_INVARIANT(term))
11576 /* mark where we are */
11577 PL_multi_start = CopLINE(PL_curcop);
11578 PL_multi_open = term;
11580 /* find corresponding closing delimiter */
11581 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11582 termcode = termstr[0] = term = tmps[5];
11584 PL_multi_close = term;
11586 /* create a new SV to hold the contents. 79 is the SV's initial length.
11587 What a random number. */
11589 sv_upgrade(sv, SVt_PVIV);
11590 SvIV_set(sv, termcode);
11591 (void)SvPOK_only(sv); /* validate pointer */
11593 /* move past delimiter and try to read a complete string */
11595 sv_catpvn(sv, s, termlen);
11598 tstart = SvPVX(PL_linestr) + stuffstart;
11599 if (!PL_thisopen && !keep_delims) {
11600 PL_thisopen = newSVpvn(tstart, s - tstart);
11601 stuffstart = s - SvPVX(PL_linestr);
11605 if (PL_encoding && !UTF) {
11609 int offset = s - SvPVX_const(PL_linestr);
11610 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11611 &offset, (char*)termstr, termlen);
11612 const char * const ns = SvPVX_const(PL_linestr) + offset;
11613 char * const svlast = SvEND(sv) - 1;
11615 for (; s < ns; s++) {
11616 if (*s == '\n' && !PL_rsfp)
11617 CopLINE_inc(PL_curcop);
11620 goto read_more_line;
11622 /* handle quoted delimiters */
11623 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11625 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11627 if ((svlast-1 - t) % 2) {
11628 if (!keep_quoted) {
11629 *(svlast-1) = term;
11631 SvCUR_set(sv, SvCUR(sv) - 1);
11636 if (PL_multi_open == PL_multi_close) {
11642 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11643 /* At here, all closes are "was quoted" one,
11644 so we don't check PL_multi_close. */
11646 if (!keep_quoted && *(t+1) == PL_multi_open)
11651 else if (*t == PL_multi_open)
11659 SvCUR_set(sv, w - SvPVX_const(sv));
11661 last_off = w - SvPVX(sv);
11662 if (--brackets <= 0)
11667 if (!keep_delims) {
11668 SvCUR_set(sv, SvCUR(sv) - 1);
11674 /* extend sv if need be */
11675 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11676 /* set 'to' to the next character in the sv's string */
11677 to = SvPVX(sv)+SvCUR(sv);
11679 /* if open delimiter is the close delimiter read unbridle */
11680 if (PL_multi_open == PL_multi_close) {
11681 for (; s < PL_bufend; s++,to++) {
11682 /* embedded newlines increment the current line number */
11683 if (*s == '\n' && !PL_rsfp)
11684 CopLINE_inc(PL_curcop);
11685 /* handle quoted delimiters */
11686 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11687 if (!keep_quoted && s[1] == term)
11689 /* any other quotes are simply copied straight through */
11693 /* terminate when run out of buffer (the for() condition), or
11694 have found the terminator */
11695 else if (*s == term) {
11698 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11701 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11707 /* if the terminator isn't the same as the start character (e.g.,
11708 matched brackets), we have to allow more in the quoting, and
11709 be prepared for nested brackets.
11712 /* read until we run out of string, or we find the terminator */
11713 for (; s < PL_bufend; s++,to++) {
11714 /* embedded newlines increment the line count */
11715 if (*s == '\n' && !PL_rsfp)
11716 CopLINE_inc(PL_curcop);
11717 /* backslashes can escape the open or closing characters */
11718 if (*s == '\\' && s+1 < PL_bufend) {
11719 if (!keep_quoted &&
11720 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11725 /* allow nested opens and closes */
11726 else if (*s == PL_multi_close && --brackets <= 0)
11728 else if (*s == PL_multi_open)
11730 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11735 /* terminate the copied string and update the sv's end-of-string */
11737 SvCUR_set(sv, to - SvPVX_const(sv));
11740 * this next chunk reads more into the buffer if we're not done yet
11744 break; /* handle case where we are done yet :-) */
11746 #ifndef PERL_STRICT_CR
11747 if (to - SvPVX_const(sv) >= 2) {
11748 if ((to[-2] == '\r' && to[-1] == '\n') ||
11749 (to[-2] == '\n' && to[-1] == '\r'))
11753 SvCUR_set(sv, to - SvPVX_const(sv));
11755 else if (to[-1] == '\r')
11758 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11763 /* if we're out of file, or a read fails, bail and reset the current
11764 line marker so we can report where the unterminated string began
11767 if (PL_madskills) {
11768 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11770 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11772 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11776 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11778 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11784 /* we read a line, so increment our line counter */
11785 CopLINE_inc(PL_curcop);
11787 /* update debugger info */
11788 if (PERLDB_LINE && PL_curstash != PL_debstash)
11789 update_debugger_info_sv(PL_linestr);
11791 /* having changed the buffer, we must update PL_bufend */
11792 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11793 PL_last_lop = PL_last_uni = NULL;
11796 /* at this point, we have successfully read the delimited string */
11798 if (!PL_encoding || UTF) {
11800 if (PL_madskills) {
11801 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11802 const int len = s - tstart;
11804 sv_catpvn(PL_thisstuff, tstart, len);
11806 PL_thisstuff = newSVpvn(tstart, len);
11807 if (!PL_thisclose && !keep_delims)
11808 PL_thisclose = newSVpvn(s,termlen);
11813 sv_catpvn(sv, s, termlen);
11818 if (PL_madskills) {
11819 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11820 const int len = s - tstart - termlen;
11822 sv_catpvn(PL_thisstuff, tstart, len);
11824 PL_thisstuff = newSVpvn(tstart, len);
11825 if (!PL_thisclose && !keep_delims)
11826 PL_thisclose = newSVpvn(s - termlen,termlen);
11830 if (has_utf8 || PL_encoding)
11833 PL_multi_end = CopLINE(PL_curcop);
11835 /* if we allocated too much space, give some back */
11836 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11837 SvLEN_set(sv, SvCUR(sv) + 1);
11838 SvPV_renew(sv, SvLEN(sv));
11841 /* decide whether this is the first or second quoted string we've read
11854 takes: pointer to position in buffer
11855 returns: pointer to new position in buffer
11856 side-effects: builds ops for the constant in yylval.op
11858 Read a number in any of the formats that Perl accepts:
11860 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11861 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11864 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11866 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11869 If it reads a number without a decimal point or an exponent, it will
11870 try converting the number to an integer and see if it can do so
11871 without loss of precision.
11875 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11878 register const char *s = start; /* current position in buffer */
11879 register char *d; /* destination in temp buffer */
11880 register char *e; /* end of temp buffer */
11881 NV nv; /* number read, as a double */
11882 SV *sv = NULL; /* place to put the converted number */
11883 bool floatit; /* boolean: int or float? */
11884 const char *lastub = NULL; /* position of last underbar */
11885 static char const number_too_long[] = "Number too long";
11887 /* We use the first character to decide what type of number this is */
11891 Perl_croak(aTHX_ "panic: scan_num");
11893 /* if it starts with a 0, it could be an octal number, a decimal in
11894 0.13 disguise, or a hexadecimal number, or a binary number. */
11898 u holds the "number so far"
11899 shift the power of 2 of the base
11900 (hex == 4, octal == 3, binary == 1)
11901 overflowed was the number more than we can hold?
11903 Shift is used when we add a digit. It also serves as an "are
11904 we in octal/hex/binary?" indicator to disallow hex characters
11905 when in octal mode.
11910 bool overflowed = FALSE;
11911 bool just_zero = TRUE; /* just plain 0 or binary number? */
11912 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11913 static const char* const bases[5] =
11914 { "", "binary", "", "octal", "hexadecimal" };
11915 static const char* const Bases[5] =
11916 { "", "Binary", "", "Octal", "Hexadecimal" };
11917 static const char* const maxima[5] =
11919 "0b11111111111111111111111111111111",
11923 const char *base, *Base, *max;
11925 /* check for hex */
11930 } else if (s[1] == 'b') {
11935 /* check for a decimal in disguise */
11936 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11938 /* so it must be octal */
11945 if (ckWARN(WARN_SYNTAX))
11946 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11947 "Misplaced _ in number");
11951 base = bases[shift];
11952 Base = Bases[shift];
11953 max = maxima[shift];
11955 /* read the rest of the number */
11957 /* x is used in the overflow test,
11958 b is the digit we're adding on. */
11963 /* if we don't mention it, we're done */
11967 /* _ are ignored -- but warned about if consecutive */
11969 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11970 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11971 "Misplaced _ in number");
11975 /* 8 and 9 are not octal */
11976 case '8': case '9':
11978 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11982 case '2': case '3': case '4':
11983 case '5': case '6': case '7':
11985 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11988 case '0': case '1':
11989 b = *s++ & 15; /* ASCII digit -> value of digit */
11993 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11994 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11995 /* make sure they said 0x */
11998 b = (*s++ & 7) + 9;
12000 /* Prepare to put the digit we have onto the end
12001 of the number so far. We check for overflows.
12007 x = u << shift; /* make room for the digit */
12009 if ((x >> shift) != u
12010 && !(PL_hints & HINT_NEW_BINARY)) {
12013 if (ckWARN_d(WARN_OVERFLOW))
12014 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12015 "Integer overflow in %s number",
12018 u = x | b; /* add the digit to the end */
12021 n *= nvshift[shift];
12022 /* If an NV has not enough bits in its
12023 * mantissa to represent an UV this summing of
12024 * small low-order numbers is a waste of time
12025 * (because the NV cannot preserve the
12026 * low-order bits anyway): we could just
12027 * remember when did we overflow and in the
12028 * end just multiply n by the right
12036 /* if we get here, we had success: make a scalar value from
12041 /* final misplaced underbar check */
12042 if (s[-1] == '_') {
12043 if (ckWARN(WARN_SYNTAX))
12044 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12049 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
12050 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12051 "%s number > %s non-portable",
12057 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
12058 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12059 "%s number > %s non-portable",
12064 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12065 sv = new_constant(start, s - start, "integer",
12067 else if (PL_hints & HINT_NEW_BINARY)
12068 sv = new_constant(start, s - start, "binary", sv, NULL, NULL);
12073 handle decimal numbers.
12074 we're also sent here when we read a 0 as the first digit
12076 case '1': case '2': case '3': case '4': case '5':
12077 case '6': case '7': case '8': case '9': case '.':
12080 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12083 /* read next group of digits and _ and copy into d */
12084 while (isDIGIT(*s) || *s == '_') {
12085 /* skip underscores, checking for misplaced ones
12089 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12090 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12091 "Misplaced _ in number");
12095 /* check for end of fixed-length buffer */
12097 Perl_croak(aTHX_ number_too_long);
12098 /* if we're ok, copy the character */
12103 /* final misplaced underbar check */
12104 if (lastub && s == lastub + 1) {
12105 if (ckWARN(WARN_SYNTAX))
12106 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12109 /* read a decimal portion if there is one. avoid
12110 3..5 being interpreted as the number 3. followed
12113 if (*s == '.' && s[1] != '.') {
12118 if (ckWARN(WARN_SYNTAX))
12119 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12120 "Misplaced _ in number");
12124 /* copy, ignoring underbars, until we run out of digits.
12126 for (; isDIGIT(*s) || *s == '_'; s++) {
12127 /* fixed length buffer check */
12129 Perl_croak(aTHX_ number_too_long);
12131 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12132 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12133 "Misplaced _ in number");
12139 /* fractional part ending in underbar? */
12140 if (s[-1] == '_') {
12141 if (ckWARN(WARN_SYNTAX))
12142 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12143 "Misplaced _ in number");
12145 if (*s == '.' && isDIGIT(s[1])) {
12146 /* oops, it's really a v-string, but without the "v" */
12152 /* read exponent part, if present */
12153 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12157 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12158 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12160 /* stray preinitial _ */
12162 if (ckWARN(WARN_SYNTAX))
12163 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12164 "Misplaced _ in number");
12168 /* allow positive or negative exponent */
12169 if (*s == '+' || *s == '-')
12172 /* stray initial _ */
12174 if (ckWARN(WARN_SYNTAX))
12175 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12176 "Misplaced _ in number");
12180 /* read digits of exponent */
12181 while (isDIGIT(*s) || *s == '_') {
12184 Perl_croak(aTHX_ number_too_long);
12188 if (((lastub && s == lastub + 1) ||
12189 (!isDIGIT(s[1]) && s[1] != '_'))
12190 && ckWARN(WARN_SYNTAX))
12191 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12192 "Misplaced _ in number");
12199 /* make an sv from the string */
12203 We try to do an integer conversion first if no characters
12204 indicating "float" have been found.
12209 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12211 if (flags == IS_NUMBER_IN_UV) {
12213 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12216 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12217 if (uv <= (UV) IV_MIN)
12218 sv_setiv(sv, -(IV)uv);
12225 /* terminate the string */
12227 nv = Atof(PL_tokenbuf);
12231 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
12232 (PL_hints & HINT_NEW_INTEGER) )
12233 sv = new_constant(PL_tokenbuf,
12236 (floatit ? "float" : "integer"),
12240 /* if it starts with a v, it could be a v-string */
12243 sv = newSV(5); /* preallocate storage space */
12244 s = scan_vstring(s,sv);
12248 /* make the op for the constant and return */
12251 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12253 lvalp->opval = NULL;
12259 S_scan_formline(pTHX_ register char *s)
12262 register char *eol;
12264 SV * const stuff = newSVpvs("");
12265 bool needargs = FALSE;
12266 bool eofmt = FALSE;
12268 char *tokenstart = s;
12271 if (PL_madskills) {
12272 savewhite = PL_thiswhite;
12277 while (!needargs) {
12280 #ifdef PERL_STRICT_CR
12281 while (SPACE_OR_TAB(*t))
12284 while (SPACE_OR_TAB(*t) || *t == '\r')
12287 if (*t == '\n' || t == PL_bufend) {
12292 if (PL_in_eval && !PL_rsfp) {
12293 eol = (char *) memchr(s,'\n',PL_bufend-s);
12298 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12300 for (t = s; t < eol; t++) {
12301 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12303 goto enough; /* ~~ must be first line in formline */
12305 if (*t == '@' || *t == '^')
12309 sv_catpvn(stuff, s, eol-s);
12310 #ifndef PERL_STRICT_CR
12311 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12312 char *end = SvPVX(stuff) + SvCUR(stuff);
12315 SvCUR_set(stuff, SvCUR(stuff) - 1);
12325 if (PL_madskills) {
12327 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12329 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12332 s = filter_gets(PL_linestr, PL_rsfp, 0);
12334 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12336 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12338 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12339 PL_last_lop = PL_last_uni = NULL;
12348 if (SvCUR(stuff)) {
12351 PL_lex_state = LEX_NORMAL;
12352 start_force(PL_curforce);
12353 NEXTVAL_NEXTTOKE.ival = 0;
12357 PL_lex_state = LEX_FORMLINE;
12359 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12361 else if (PL_encoding)
12362 sv_recode_to_utf8(stuff, PL_encoding);
12364 start_force(PL_curforce);
12365 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12367 start_force(PL_curforce);
12368 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12372 SvREFCNT_dec(stuff);
12374 PL_lex_formbrack = 0;
12378 if (PL_madskills) {
12380 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12382 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12383 PL_thiswhite = savewhite;
12395 PL_cshlen = strlen(PL_cshname);
12397 #if defined(USE_ITHREADS)
12398 PERL_UNUSED_CONTEXT;
12404 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12407 const I32 oldsavestack_ix = PL_savestack_ix;
12408 CV* const outsidecv = PL_compcv;
12411 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12413 SAVEI32(PL_subline);
12414 save_item(PL_subname);
12415 SAVESPTR(PL_compcv);
12417 PL_compcv = (CV*)newSV(0);
12418 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
12419 CvFLAGS(PL_compcv) |= flags;
12421 PL_subline = CopLINE(PL_curcop);
12422 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12423 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
12424 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12426 return oldsavestack_ix;
12430 #pragma segment Perl_yylex
12433 Perl_yywarn(pTHX_ const char *s)
12436 PL_in_eval |= EVAL_WARNONLY;
12438 PL_in_eval &= ~EVAL_WARNONLY;
12443 Perl_yyerror(pTHX_ const char *s)
12446 const char *where = NULL;
12447 const char *context = NULL;
12450 int yychar = PL_parser->yychar;
12452 if (!yychar || (yychar == ';' && !PL_rsfp))
12454 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12455 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12456 PL_oldbufptr != PL_bufptr) {
12459 The code below is removed for NetWare because it abends/crashes on NetWare
12460 when the script has error such as not having the closing quotes like:
12461 if ($var eq "value)
12462 Checking of white spaces is anyway done in NetWare code.
12465 while (isSPACE(*PL_oldoldbufptr))
12468 context = PL_oldoldbufptr;
12469 contlen = PL_bufptr - PL_oldoldbufptr;
12471 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12472 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12475 The code below is removed for NetWare because it abends/crashes on NetWare
12476 when the script has error such as not having the closing quotes like:
12477 if ($var eq "value)
12478 Checking of white spaces is anyway done in NetWare code.
12481 while (isSPACE(*PL_oldbufptr))
12484 context = PL_oldbufptr;
12485 contlen = PL_bufptr - PL_oldbufptr;
12487 else if (yychar > 255)
12488 where = "next token ???";
12489 else if (yychar == -2) { /* YYEMPTY */
12490 if (PL_lex_state == LEX_NORMAL ||
12491 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12492 where = "at end of line";
12493 else if (PL_lex_inpat)
12494 where = "within pattern";
12496 where = "within string";
12499 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
12501 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12502 else if (isPRINT_LC(yychar))
12503 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
12505 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12506 where = SvPVX_const(where_sv);
12508 msg = sv_2mortal(newSVpv(s, 0));
12509 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12510 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12512 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12514 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12515 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12516 Perl_sv_catpvf(aTHX_ msg,
12517 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12518 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12521 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
12522 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, (void*)msg);
12525 if (PL_error_count >= 10) {
12526 if (PL_in_eval && SvCUR(ERRSV))
12527 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12528 (void*)ERRSV, OutCopFILE(PL_curcop));
12530 Perl_croak(aTHX_ "%s has too many errors.\n",
12531 OutCopFILE(PL_curcop));
12534 PL_in_my_stash = NULL;
12538 #pragma segment Main
12542 S_swallow_bom(pTHX_ U8 *s)
12545 const STRLEN slen = SvCUR(PL_linestr);
12548 if (s[1] == 0xFE) {
12549 /* UTF-16 little-endian? (or UTF32-LE?) */
12550 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12551 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12552 #ifndef PERL_NO_UTF16_FILTER
12553 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12556 if (PL_bufend > (char*)s) {
12560 filter_add(utf16rev_textfilter, NULL);
12561 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12562 utf16_to_utf8_reversed(s, news,
12563 PL_bufend - (char*)s - 1,
12565 sv_setpvn(PL_linestr, (const char*)news, newlen);
12567 s = (U8*)SvPVX(PL_linestr);
12568 Copy(news, s, newlen, U8);
12572 SvUTF8_on(PL_linestr);
12573 s = (U8*)SvPVX(PL_linestr);
12575 /* FIXME - is this a general bug fix? */
12578 PL_bufend = SvPVX(PL_linestr) + newlen;
12581 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12586 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12587 #ifndef PERL_NO_UTF16_FILTER
12588 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12591 if (PL_bufend > (char *)s) {
12595 filter_add(utf16_textfilter, NULL);
12596 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12597 utf16_to_utf8(s, news,
12598 PL_bufend - (char*)s,
12600 sv_setpvn(PL_linestr, (const char*)news, newlen);
12602 SvUTF8_on(PL_linestr);
12603 s = (U8*)SvPVX(PL_linestr);
12604 PL_bufend = SvPVX(PL_linestr) + newlen;
12607 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12612 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12613 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12614 s += 3; /* UTF-8 */
12620 if (s[2] == 0xFE && s[3] == 0xFF) {
12621 /* UTF-32 big-endian */
12622 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12625 else if (s[2] == 0 && s[3] != 0) {
12628 * are a good indicator of UTF-16BE. */
12629 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12635 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12636 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12637 s += 4; /* UTF-8 */
12643 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12646 * are a good indicator of UTF-16LE. */
12647 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12656 * Restore a source filter.
12660 restore_rsfp(pTHX_ void *f)
12663 PerlIO * const fp = (PerlIO*)f;
12665 if (PL_rsfp == PerlIO_stdin())
12666 PerlIO_clearerr(PL_rsfp);
12667 else if (PL_rsfp && (PL_rsfp != fp))
12668 PerlIO_close(PL_rsfp);
12672 #ifndef PERL_NO_UTF16_FILTER
12674 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12677 const STRLEN old = SvCUR(sv);
12678 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12679 DEBUG_P(PerlIO_printf(Perl_debug_log,
12680 "utf16_textfilter(%p): %d %d (%d)\n",
12681 FPTR2DPTR(void *, utf16_textfilter),
12682 idx, maxlen, (int) count));
12686 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12687 Copy(SvPVX_const(sv), tmps, old, char);
12688 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12689 SvCUR(sv) - old, &newlen);
12690 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12692 DEBUG_P({sv_dump(sv);});
12697 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12700 const STRLEN old = SvCUR(sv);
12701 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12702 DEBUG_P(PerlIO_printf(Perl_debug_log,
12703 "utf16rev_textfilter(%p): %d %d (%d)\n",
12704 FPTR2DPTR(void *, utf16rev_textfilter),
12705 idx, maxlen, (int) count));
12709 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12710 Copy(SvPVX_const(sv), tmps, old, char);
12711 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12712 SvCUR(sv) - old, &newlen);
12713 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12715 DEBUG_P({ sv_dump(sv); });
12721 Returns a pointer to the next character after the parsed
12722 vstring, as well as updating the passed in sv.
12724 Function must be called like
12727 s = scan_vstring(s,sv);
12729 The sv should already be large enough to store the vstring
12730 passed in, for performance reasons.
12735 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
12738 const char *pos = s;
12739 const char *start = s;
12740 if (*pos == 'v') pos++; /* get past 'v' */
12741 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
12743 if ( *pos != '.') {
12744 /* this may not be a v-string if followed by => */
12745 const char *next = pos;
12746 while (next < PL_bufend && isSPACE(*next))
12748 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
12749 /* return string not v-string */
12750 sv_setpvn(sv,(char *)s,pos-s);
12751 return (char *)pos;
12755 if (!isALPHA(*pos)) {
12756 U8 tmpbuf[UTF8_MAXBYTES+1];
12759 s++; /* get past 'v' */
12761 sv_setpvn(sv, "", 0);
12764 /* this is atoi() that tolerates underscores */
12767 const char *end = pos;
12769 while (--end >= s) {
12771 const UV orev = rev;
12772 rev += (*end - '0') * mult;
12774 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12775 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12776 "Integer overflow in decimal number");
12780 if (rev > 0x7FFFFFFF)
12781 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12783 /* Append native character for the rev point */
12784 tmpend = uvchr_to_utf8(tmpbuf, rev);
12785 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12786 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12788 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
12794 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
12798 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12806 * c-indentation-style: bsd
12807 * c-basic-offset: 4
12808 * indent-tabs-mode: t
12811 * ex: set ts=8 sts=4 sw=4 noet: