3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yylval (PL_parser->yylval)
28 /* YYINITDEPTH -- initial size of the parser's stacks. */
29 #define YYINITDEPTH 200
31 /* XXX temporary backwards compatibility */
32 #define PL_lex_brackets (PL_parser->lex_brackets)
33 #define PL_lex_brackstack (PL_parser->lex_brackstack)
34 #define PL_lex_casemods (PL_parser->lex_casemods)
35 #define PL_lex_casestack (PL_parser->lex_casestack)
36 #define PL_lex_defer (PL_parser->lex_defer)
37 #define PL_lex_dojoin (PL_parser->lex_dojoin)
38 #define PL_lex_expect (PL_parser->lex_expect)
39 #define PL_lex_formbrack (PL_parser->lex_formbrack)
40 #define PL_lex_inpat (PL_parser->lex_inpat)
41 #define PL_lex_inwhat (PL_parser->lex_inwhat)
42 #define PL_lex_op (PL_parser->lex_op)
43 #define PL_lex_repl (PL_parser->lex_repl)
44 #define PL_lex_starts (PL_parser->lex_starts)
45 #define PL_lex_stuff (PL_parser->lex_stuff)
46 #define PL_multi_start (PL_parser->multi_start)
47 #define PL_multi_open (PL_parser->multi_open)
48 #define PL_multi_close (PL_parser->multi_close)
49 #define PL_pending_ident (PL_parser->pending_ident)
50 #define PL_preambled (PL_parser->preambled)
51 #define PL_sublex_info (PL_parser->sublex_info)
52 #define PL_linestr (PL_parser->linestr)
56 # define PL_endwhite (PL_parser->endwhite)
57 # define PL_faketokens (PL_parser->faketokens)
58 # define PL_lasttoke (PL_parser->lasttoke)
59 # define PL_nextwhite (PL_parser->nextwhite)
60 # define PL_realtokenstart (PL_parser->realtokenstart)
61 # define PL_skipwhite (PL_parser->skipwhite)
62 # define PL_thisclose (PL_parser->thisclose)
63 # define PL_thismad (PL_parser->thismad)
64 # define PL_thisopen (PL_parser->thisopen)
65 # define PL_thisstuff (PL_parser->thisstuff)
66 # define PL_thistoken (PL_parser->thistoken)
67 # define PL_thiswhite (PL_parser->thiswhite)
71 S_pending_ident(pTHX);
73 static const char ident_too_long[] = "Identifier too long";
74 static const char commaless_variable_list[] = "comma-less variable list";
76 static void restore_rsfp(pTHX_ void *f);
77 #ifndef PERL_NO_UTF16_FILTER
78 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
79 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
83 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
84 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
86 # define CURMAD(slot,sv)
87 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
90 #define XFAKEBRACK 128
93 #ifdef USE_UTF8_SCRIPTS
94 # define UTF (!IN_BYTES)
96 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
99 /* In variables named $^X, these are the legal values for X.
100 * 1999-02-27 mjd-perl-patch@plover.com */
101 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
103 /* On MacOS, respect nonbreaking spaces */
104 #ifdef MACOS_TRADITIONAL
105 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
107 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
110 /* LEX_* are values for PL_lex_state, the state of the lexer.
111 * They are arranged oddly so that the guard on the switch statement
112 * can get by with a single comparison (if the compiler is smart enough).
115 /* #define LEX_NOTPARSING 11 is done in perl.h. */
117 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
118 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
119 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
120 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
121 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
123 /* at end of code, eg "$x" followed by: */
124 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
125 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
127 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
128 string or after \E, $foo, etc */
129 #define LEX_INTERPCONST 2 /* NOT USED */
130 #define LEX_FORMLINE 1 /* expecting a format line */
131 #define LEX_KNOWNEXT 0 /* next token known; just return it */
135 static const char* const lex_state_names[] = {
154 #include "keywords.h"
156 /* CLINE is a macro that ensures PL_copline has a sane value */
161 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
164 # define SKIPSPACE0(s) skipspace0(s)
165 # define SKIPSPACE1(s) skipspace1(s)
166 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
167 # define PEEKSPACE(s) skipspace2(s,0)
169 # define SKIPSPACE0(s) skipspace(s)
170 # define SKIPSPACE1(s) skipspace(s)
171 # define SKIPSPACE2(s,tsv) skipspace(s)
172 # define PEEKSPACE(s) skipspace(s)
176 * Convenience functions to return different tokens and prime the
177 * lexer for the next token. They all take an argument.
179 * TOKEN : generic token (used for '(', DOLSHARP, etc)
180 * OPERATOR : generic operator
181 * AOPERATOR : assignment operator
182 * PREBLOCK : beginning the block after an if, while, foreach, ...
183 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
184 * PREREF : *EXPR where EXPR is not a simple identifier
185 * TERM : expression term
186 * LOOPX : loop exiting command (goto, last, dump, etc)
187 * FTST : file test operator
188 * FUN0 : zero-argument function
189 * FUN1 : not used, except for not, which isn't a UNIOP
190 * BOop : bitwise or or xor
192 * SHop : shift operator
193 * PWop : power operator
194 * PMop : pattern-matching operator
195 * Aop : addition-level operator
196 * Mop : multiplication-level operator
197 * Eop : equality-testing operator
198 * Rop : relational operator <= != gt
200 * Also see LOP and lop() below.
203 #ifdef DEBUGGING /* Serve -DT. */
204 # define REPORT(retval) tokereport((I32)retval)
206 # define REPORT(retval) (retval)
209 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
210 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
211 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
212 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
213 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
214 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
215 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
216 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
217 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
218 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
219 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
220 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
221 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
222 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
223 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
224 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
225 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
226 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
227 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
228 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
230 /* This bit of chicanery makes a unary function followed by
231 * a parenthesis into a function with one argument, highest precedence.
232 * The UNIDOR macro is for unary functions that can be followed by the //
233 * operator (such as C<shift // 0>).
235 #define UNI2(f,x) { \
239 PL_last_uni = PL_oldbufptr; \
240 PL_last_lop_op = f; \
242 return REPORT( (int)FUNC1 ); \
244 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
246 #define UNI(f) UNI2(f,XTERM)
247 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
249 #define UNIBRACK(f) { \
252 PL_last_uni = PL_oldbufptr; \
254 return REPORT( (int)FUNC1 ); \
256 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
259 /* grandfather return to old style */
260 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
264 /* how to interpret the yylval associated with the token */
268 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
274 static struct debug_tokens {
276 enum token_type type;
278 } const debug_tokens[] =
280 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
281 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
282 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
283 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
284 { ARROW, TOKENTYPE_NONE, "ARROW" },
285 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
286 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
287 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
288 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
289 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
290 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
291 { DO, TOKENTYPE_NONE, "DO" },
292 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
293 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
294 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
295 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
296 { ELSE, TOKENTYPE_NONE, "ELSE" },
297 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
298 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
299 { FOR, TOKENTYPE_IVAL, "FOR" },
300 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
301 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
302 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
303 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
304 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
305 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
306 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
307 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
308 { IF, TOKENTYPE_IVAL, "IF" },
309 { LABEL, TOKENTYPE_PVAL, "LABEL" },
310 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
311 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
312 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
313 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
314 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
315 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
316 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
317 { MY, TOKENTYPE_IVAL, "MY" },
318 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
319 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
320 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
321 { OROP, TOKENTYPE_IVAL, "OROP" },
322 { OROR, TOKENTYPE_NONE, "OROR" },
323 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
324 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
325 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
326 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
327 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
328 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
329 { PREINC, TOKENTYPE_NONE, "PREINC" },
330 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
331 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
332 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
333 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
334 { SUB, TOKENTYPE_NONE, "SUB" },
335 { THING, TOKENTYPE_OPVAL, "THING" },
336 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
337 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
338 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
339 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
340 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
341 { USE, TOKENTYPE_IVAL, "USE" },
342 { WHEN, TOKENTYPE_IVAL, "WHEN" },
343 { WHILE, TOKENTYPE_IVAL, "WHILE" },
344 { WORD, TOKENTYPE_OPVAL, "WORD" },
345 { 0, TOKENTYPE_NONE, NULL }
348 /* dump the returned token in rv, plus any optional arg in yylval */
351 S_tokereport(pTHX_ I32 rv)
355 const char *name = NULL;
356 enum token_type type = TOKENTYPE_NONE;
357 const struct debug_tokens *p;
358 SV* const report = newSVpvs("<== ");
360 for (p = debug_tokens; p->token; p++) {
361 if (p->token == (int)rv) {
368 Perl_sv_catpv(aTHX_ report, name);
369 else if ((char)rv > ' ' && (char)rv < '~')
370 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
372 sv_catpvs(report, "EOF");
374 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
377 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
380 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
382 case TOKENTYPE_OPNUM:
383 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
384 PL_op_name[yylval.ival]);
387 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
389 case TOKENTYPE_OPVAL:
391 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
392 PL_op_name[yylval.opval->op_type]);
393 if (yylval.opval->op_type == OP_CONST) {
394 Perl_sv_catpvf(aTHX_ report, " %s",
395 SvPEEK(cSVOPx_sv(yylval.opval)));
400 sv_catpvs(report, "(opval=null)");
403 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
409 /* print the buffer with suitable escapes */
412 S_printbuf(pTHX_ const char* fmt, const char* s)
414 SV* const tmp = newSVpvs("");
415 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
424 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
425 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
429 S_ao(pTHX_ int toketype)
432 if (*PL_bufptr == '=') {
434 if (toketype == ANDAND)
435 yylval.ival = OP_ANDASSIGN;
436 else if (toketype == OROR)
437 yylval.ival = OP_ORASSIGN;
438 else if (toketype == DORDOR)
439 yylval.ival = OP_DORASSIGN;
447 * When Perl expects an operator and finds something else, no_op
448 * prints the warning. It always prints "<something> found where
449 * operator expected. It prints "Missing semicolon on previous line?"
450 * if the surprise occurs at the start of the line. "do you need to
451 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
452 * where the compiler doesn't know if foo is a method call or a function.
453 * It prints "Missing operator before end of line" if there's nothing
454 * after the missing operator, or "... before <...>" if there is something
455 * after the missing operator.
459 S_no_op(pTHX_ const char *what, char *s)
462 char * const oldbp = PL_bufptr;
463 const bool is_first = (PL_oldbufptr == PL_linestart);
469 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
470 if (ckWARN_d(WARN_SYNTAX)) {
472 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
473 "\t(Missing semicolon on previous line?)\n");
474 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
476 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
478 if (t < PL_bufptr && isSPACE(*t))
479 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
480 "\t(Do you need to predeclare %.*s?)\n",
481 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
485 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
486 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
494 * Complain about missing quote/regexp/heredoc terminator.
495 * If it's called with NULL then it cauterizes the line buffer.
496 * If we're in a delimited string and the delimiter is a control
497 * character, it's reformatted into a two-char sequence like ^C.
502 S_missingterm(pTHX_ char *s)
508 char * const nl = strrchr(s,'\n');
514 iscntrl(PL_multi_close)
516 PL_multi_close < 32 || PL_multi_close == 127
520 tmpbuf[1] = (char)toCTRL(PL_multi_close);
525 *tmpbuf = (char)PL_multi_close;
529 q = strchr(s,'"') ? '\'' : '"';
530 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
533 #define FEATURE_IS_ENABLED(name) \
534 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
535 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
537 * S_feature_is_enabled
538 * Check whether the named feature is enabled.
541 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
544 HV * const hinthv = GvHV(PL_hintgv);
545 char he_name[32] = "feature_";
546 (void) my_strlcpy(&he_name[8], name, 24);
548 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
556 Perl_deprecate(pTHX_ const char *s)
558 if (ckWARN(WARN_DEPRECATED))
559 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
563 Perl_deprecate_old(pTHX_ const char *s)
565 /* This function should NOT be called for any new deprecated warnings */
566 /* Use Perl_deprecate instead */
568 /* It is here to maintain backward compatibility with the pre-5.8 */
569 /* warnings category hierarchy. The "deprecated" category used to */
570 /* live under the "syntax" category. It is now a top-level category */
571 /* in its own right. */
573 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
574 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
575 "Use of %s is deprecated", s);
579 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
580 * utf16-to-utf8-reversed.
583 #ifdef PERL_CR_FILTER
587 register const char *s = SvPVX_const(sv);
588 register const char * const e = s + SvCUR(sv);
589 /* outer loop optimized to do nothing if there are no CR-LFs */
591 if (*s++ == '\r' && *s == '\n') {
592 /* hit a CR-LF, need to copy the rest */
593 register char *d = s - 1;
596 if (*s == '\r' && s[1] == '\n')
607 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
609 const I32 count = FILTER_READ(idx+1, sv, maxlen);
610 if (count > 0 && !maxlen)
620 * Create a parser object and initialise its parser and lexer fields
624 Perl_lex_start(pTHX_ SV *line)
627 const char *s = NULL;
631 /* create and initialise a parser */
633 Newxz(parser, 1, yy_parser);
634 parser->old_parser = PL_parser;
637 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
638 parser->ps = parser->stack;
639 parser->stack_size = YYINITDEPTH;
641 parser->stack->state = 0;
642 parser->yyerrstatus = 0;
643 parser->yychar = YYEMPTY; /* Cause a token to be read. */
645 /* on scope exit, free this parser and restore any outer one */
648 /* initialise lexer state */
650 SAVEI8(PL_lex_state);
652 if (PL_lex_state == LEX_KNOWNEXT) {
653 I32 toke = parser->old_parser->lasttoke;
654 while (--toke >= 0) {
655 SAVEI32(PL_nexttoke[toke].next_type);
656 SAVEVPTR(PL_nexttoke[toke].next_val);
658 SAVEVPTR(PL_nexttoke[toke].next_mad);
661 SAVEI32(PL_curforce);
664 if (PL_lex_state == LEX_KNOWNEXT) {
665 I32 toke = PL_nexttoke;
666 while (--toke >= 0) {
667 SAVEI32(PL_nexttype[toke]);
668 SAVEVPTR(PL_nextval[toke]);
670 SAVEI32(PL_nexttoke);
673 SAVECOPLINE(PL_curcop);
676 SAVEPPTR(PL_oldbufptr);
677 SAVEPPTR(PL_oldoldbufptr);
678 SAVEPPTR(PL_last_lop);
679 SAVEPPTR(PL_last_uni);
680 SAVEPPTR(PL_linestart);
681 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
685 PL_lex_state = LEX_NORMAL;
687 Newx(parser->lex_brackstack, 120, char);
688 Newx(parser->lex_casestack, 12, char);
689 *parser->lex_casestack = '\0';
695 s = SvPV_const(line, len);
701 parser->linestr = newSVpvs("\n;");
702 } else if (SvREADONLY(line) || s[len-1] != ';') {
703 parser->linestr = newSVsv(line);
705 sv_catpvs(parser->linestr, "\n;");
708 SvREFCNT_inc_simple_void_NN(line);
709 parser->linestr = line;
711 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(parser->linestr);
712 PL_bufend = PL_bufptr + SvCUR(parser->linestr);
713 PL_last_lop = PL_last_uni = NULL;
718 /* delete a parser object */
721 Perl_parser_free(pTHX_ const yy_parser *parser)
723 SvREFCNT_dec(parser->linestr);
725 Safefree(parser->stack);
726 Safefree(parser->lex_brackstack);
727 Safefree(parser->lex_casestack);
728 PL_parser = parser->old_parser;
735 * Finalizer for lexing operations. Must be called when the parser is
736 * done with the lexer.
743 PL_doextract = FALSE;
748 * This subroutine has nothing to do with tilting, whether at windmills
749 * or pinball tables. Its name is short for "increment line". It
750 * increments the current line number in CopLINE(PL_curcop) and checks
751 * to see whether the line starts with a comment of the form
752 * # line 500 "foo.pm"
753 * If so, it sets the current line number and file to the values in the comment.
757 S_incline(pTHX_ const char *s)
764 CopLINE_inc(PL_curcop);
767 while (SPACE_OR_TAB(*s))
769 if (strnEQ(s, "line", 4))
773 if (SPACE_OR_TAB(*s))
777 while (SPACE_OR_TAB(*s))
785 while (SPACE_OR_TAB(*s))
787 if (*s == '"' && (t = strchr(s+1, '"'))) {
797 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
799 if (*e != '\n' && *e != '\0')
800 return; /* false alarm */
803 const STRLEN len = t - s;
805 const char * const cf = CopFILE(PL_curcop);
806 STRLEN tmplen = cf ? strlen(cf) : 0;
807 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
808 /* must copy *{"::_<(eval N)[oldfilename:L]"}
809 * to *{"::_<newfilename"} */
810 /* However, the long form of evals is only turned on by the
811 debugger - usually they're "(eval %lu)" */
815 STRLEN tmplen2 = len;
816 if (tmplen + 2 <= sizeof smallbuf)
819 Newx(tmpbuf, tmplen + 2, char);
822 memcpy(tmpbuf + 2, cf, tmplen);
824 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
829 if (tmplen2 + 2 <= sizeof smallbuf)
832 Newx(tmpbuf2, tmplen2 + 2, char);
834 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
835 /* Either they malloc'd it, or we malloc'd it,
836 so no prefix is present in ours. */
841 memcpy(tmpbuf2 + 2, s, tmplen2);
844 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
846 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
847 /* adjust ${"::_<newfilename"} to store the new file name */
848 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
849 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
850 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
853 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
855 if (tmpbuf != smallbuf) Safefree(tmpbuf);
858 CopFILE_free(PL_curcop);
859 CopFILE_setn(PL_curcop, s, len);
861 CopLINE_set(PL_curcop, atoi(n)-1);
865 /* skip space before PL_thistoken */
868 S_skipspace0(pTHX_ register char *s)
875 PL_thiswhite = newSVpvs("");
876 sv_catsv(PL_thiswhite, PL_skipwhite);
877 sv_free(PL_skipwhite);
880 PL_realtokenstart = s - SvPVX(PL_linestr);
884 /* skip space after PL_thistoken */
887 S_skipspace1(pTHX_ register char *s)
889 const char *start = s;
890 I32 startoff = start - SvPVX(PL_linestr);
895 start = SvPVX(PL_linestr) + startoff;
896 if (!PL_thistoken && PL_realtokenstart >= 0) {
897 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
898 PL_thistoken = newSVpvn(tstart, start - tstart);
900 PL_realtokenstart = -1;
903 PL_nextwhite = newSVpvs("");
904 sv_catsv(PL_nextwhite, PL_skipwhite);
905 sv_free(PL_skipwhite);
912 S_skipspace2(pTHX_ register char *s, SV **svp)
915 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
916 const I32 startoff = s - SvPVX(PL_linestr);
919 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
920 if (!PL_madskills || !svp)
922 start = SvPVX(PL_linestr) + startoff;
923 if (!PL_thistoken && PL_realtokenstart >= 0) {
924 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
925 PL_thistoken = newSVpvn(tstart, start - tstart);
926 PL_realtokenstart = -1;
931 sv_setsv(*svp, PL_skipwhite);
932 sv_free(PL_skipwhite);
941 S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
943 AV *av = CopFILEAVx(PL_curcop);
945 SV * const sv = newSV_type(SVt_PVMG);
947 sv_setsv(sv, orig_sv);
949 sv_setpvn(sv, buf, len);
952 av_store(av, (I32)CopLINE(PL_curcop), sv);
958 * Called to gobble the appropriate amount and type of whitespace.
959 * Skips comments as well.
963 S_skipspace(pTHX_ register char *s)
968 int startoff = s - SvPVX(PL_linestr);
971 sv_free(PL_skipwhite);
976 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
977 while (s < PL_bufend && SPACE_OR_TAB(*s))
987 SSize_t oldprevlen, oldoldprevlen;
988 SSize_t oldloplen = 0, oldunilen = 0;
989 while (s < PL_bufend && isSPACE(*s)) {
990 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
995 if (s < PL_bufend && *s == '#') {
996 while (s < PL_bufend && *s != '\n')
1000 if (PL_in_eval && !PL_rsfp) {
1007 /* only continue to recharge the buffer if we're at the end
1008 * of the buffer, we're not reading from a source filter, and
1009 * we're in normal lexing mode
1011 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1012 PL_lex_state == LEX_FORMLINE)
1019 /* try to recharge the buffer */
1021 curoff = s - SvPVX(PL_linestr);
1024 if ((s = filter_gets(PL_linestr, PL_rsfp,
1025 (prevlen = SvCUR(PL_linestr)))) == NULL)
1028 if (PL_madskills && curoff != startoff) {
1030 PL_skipwhite = newSVpvs("");
1031 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1035 /* mustn't throw out old stuff yet if madpropping */
1036 SvCUR(PL_linestr) = curoff;
1037 s = SvPVX(PL_linestr) + curoff;
1039 if (curoff && s[-1] == '\n')
1043 /* end of file. Add on the -p or -n magic */
1044 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1047 sv_catpvs(PL_linestr,
1048 ";}continue{print or die qq(-p destination: $!\\n);}");
1050 sv_setpvs(PL_linestr,
1051 ";}continue{print or die qq(-p destination: $!\\n);}");
1053 PL_minus_n = PL_minus_p = 0;
1055 else if (PL_minus_n) {
1057 sv_catpvn(PL_linestr, ";}", 2);
1059 sv_setpvn(PL_linestr, ";}", 2);
1065 sv_catpvn(PL_linestr,";", 1);
1067 sv_setpvn(PL_linestr,";", 1);
1070 /* reset variables for next time we lex */
1071 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1077 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1078 PL_last_lop = PL_last_uni = NULL;
1080 /* Close the filehandle. Could be from -P preprocessor,
1081 * STDIN, or a regular file. If we were reading code from
1082 * STDIN (because the commandline held no -e or filename)
1083 * then we don't close it, we reset it so the code can
1084 * read from STDIN too.
1087 if (PL_preprocess && !PL_in_eval)
1088 (void)PerlProc_pclose(PL_rsfp);
1089 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1090 PerlIO_clearerr(PL_rsfp);
1092 (void)PerlIO_close(PL_rsfp);
1097 /* not at end of file, so we only read another line */
1098 /* make corresponding updates to old pointers, for yyerror() */
1099 oldprevlen = PL_oldbufptr - PL_bufend;
1100 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1102 oldunilen = PL_last_uni - PL_bufend;
1104 oldloplen = PL_last_lop - PL_bufend;
1105 PL_linestart = PL_bufptr = s + prevlen;
1106 PL_bufend = s + SvCUR(PL_linestr);
1108 PL_oldbufptr = s + oldprevlen;
1109 PL_oldoldbufptr = s + oldoldprevlen;
1111 PL_last_uni = s + oldunilen;
1113 PL_last_lop = s + oldloplen;
1116 /* debugger active and we're not compiling the debugger code,
1117 * so store the line into the debugger's array of lines
1119 if (PERLDB_LINE && PL_curstash != PL_debstash)
1120 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1127 PL_skipwhite = newSVpvs("");
1128 curoff = s - SvPVX(PL_linestr);
1129 if (curoff - startoff)
1130 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1139 * Check the unary operators to ensure there's no ambiguity in how they're
1140 * used. An ambiguous piece of code would be:
1142 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1143 * the +5 is its argument.
1153 if (PL_oldoldbufptr != PL_last_uni)
1155 while (isSPACE(*PL_last_uni))
1158 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1160 if ((t = strchr(s, '(')) && t < PL_bufptr)
1163 if (ckWARN_d(WARN_AMBIGUOUS)){
1164 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1165 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1166 (int)(s - PL_last_uni), PL_last_uni);
1171 * LOP : macro to build a list operator. Its behaviour has been replaced
1172 * with a subroutine, S_lop() for which LOP is just another name.
1175 #define LOP(f,x) return lop(f,x,s)
1179 * Build a list operator (or something that might be one). The rules:
1180 * - if we have a next token, then it's a list operator [why?]
1181 * - if the next thing is an opening paren, then it's a function
1182 * - else it's a list operator
1186 S_lop(pTHX_ I32 f, int x, char *s)
1193 PL_last_lop = PL_oldbufptr;
1194 PL_last_lop_op = (OPCODE)f;
1197 return REPORT(LSTOP);
1200 return REPORT(LSTOP);
1203 return REPORT(FUNC);
1206 return REPORT(FUNC);
1208 return REPORT(LSTOP);
1214 * Sets up for an eventual force_next(). start_force(0) basically does
1215 * an unshift, while start_force(-1) does a push. yylex removes items
1220 S_start_force(pTHX_ int where)
1224 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1225 where = PL_lasttoke;
1226 assert(PL_curforce < 0 || PL_curforce == where);
1227 if (PL_curforce != where) {
1228 for (i = PL_lasttoke; i > where; --i) {
1229 PL_nexttoke[i] = PL_nexttoke[i-1];
1233 if (PL_curforce < 0) /* in case of duplicate start_force() */
1234 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1235 PL_curforce = where;
1238 curmad('^', newSVpvs(""));
1239 CURMAD('_', PL_nextwhite);
1244 S_curmad(pTHX_ char slot, SV *sv)
1250 if (PL_curforce < 0)
1251 where = &PL_thismad;
1253 where = &PL_nexttoke[PL_curforce].next_mad;
1256 sv_setpvn(sv, "", 0);
1259 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1261 else if (PL_encoding) {
1262 sv_recode_to_utf8(sv, PL_encoding);
1267 /* keep a slot open for the head of the list? */
1268 if (slot != '_' && *where && (*where)->mad_key == '^') {
1269 (*where)->mad_key = slot;
1270 sv_free((*where)->mad_val);
1271 (*where)->mad_val = (void*)sv;
1274 addmad(newMADsv(slot, sv), where, 0);
1277 # define start_force(where) NOOP
1278 # define curmad(slot, sv) NOOP
1283 * When the lexer realizes it knows the next token (for instance,
1284 * it is reordering tokens for the parser) then it can call S_force_next
1285 * to know what token to return the next time the lexer is called. Caller
1286 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1287 * and possibly PL_expect to ensure the lexer handles the token correctly.
1291 S_force_next(pTHX_ I32 type)
1295 if (PL_curforce < 0)
1296 start_force(PL_lasttoke);
1297 PL_nexttoke[PL_curforce].next_type = type;
1298 if (PL_lex_state != LEX_KNOWNEXT)
1299 PL_lex_defer = PL_lex_state;
1300 PL_lex_state = LEX_KNOWNEXT;
1301 PL_lex_expect = PL_expect;
1304 PL_nexttype[PL_nexttoke] = type;
1306 if (PL_lex_state != LEX_KNOWNEXT) {
1307 PL_lex_defer = PL_lex_state;
1308 PL_lex_expect = PL_expect;
1309 PL_lex_state = LEX_KNOWNEXT;
1315 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1318 SV * const sv = newSVpvn(start,len);
1319 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1326 * When the lexer knows the next thing is a word (for instance, it has
1327 * just seen -> and it knows that the next char is a word char, then
1328 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1332 * char *start : buffer position (must be within PL_linestr)
1333 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1334 * int check_keyword : if true, Perl checks to make sure the word isn't
1335 * a keyword (do this if the word is a label, e.g. goto FOO)
1336 * int allow_pack : if true, : characters will also be allowed (require,
1337 * use, etc. do this)
1338 * int allow_initial_tick : used by the "sub" lexer only.
1342 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1348 start = SKIPSPACE1(start);
1350 if (isIDFIRST_lazy_if(s,UTF) ||
1351 (allow_pack && *s == ':') ||
1352 (allow_initial_tick && *s == '\'') )
1354 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1355 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1357 start_force(PL_curforce);
1359 curmad('X', newSVpvn(start,s-start));
1360 if (token == METHOD) {
1365 PL_expect = XOPERATOR;
1369 curmad('g', newSVpvs( "forced" ));
1370 NEXTVAL_NEXTTOKE.opval
1371 = (OP*)newSVOP(OP_CONST,0,
1372 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1373 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1381 * Called when the lexer wants $foo *foo &foo etc, but the program
1382 * text only contains the "foo" portion. The first argument is a pointer
1383 * to the "foo", and the second argument is the type symbol to prefix.
1384 * Forces the next token to be a "WORD".
1385 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1389 S_force_ident(pTHX_ register const char *s, int kind)
1393 const STRLEN len = strlen(s);
1394 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1395 start_force(PL_curforce);
1396 NEXTVAL_NEXTTOKE.opval = o;
1399 o->op_private = OPpCONST_ENTERED;
1400 /* XXX see note in pp_entereval() for why we forgo typo
1401 warnings if the symbol must be introduced in an eval.
1403 gv_fetchpvn_flags(s, len,
1404 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1406 kind == '$' ? SVt_PV :
1407 kind == '@' ? SVt_PVAV :
1408 kind == '%' ? SVt_PVHV :
1416 Perl_str_to_version(pTHX_ SV *sv)
1421 const char *start = SvPV_const(sv,len);
1422 const char * const end = start + len;
1423 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1424 while (start < end) {
1428 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1433 retval += ((NV)n)/nshift;
1442 * Forces the next token to be a version number.
1443 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1444 * and if "guessing" is TRUE, then no new token is created (and the caller
1445 * must use an alternative parsing method).
1449 S_force_version(pTHX_ char *s, int guessing)
1455 I32 startoff = s - SvPVX(PL_linestr);
1464 while (isDIGIT(*d) || *d == '_' || *d == '.')
1468 start_force(PL_curforce);
1469 curmad('X', newSVpvn(s,d-s));
1472 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1474 s = scan_num(s, &yylval);
1475 version = yylval.opval;
1476 ver = cSVOPx(version)->op_sv;
1477 if (SvPOK(ver) && !SvNIOK(ver)) {
1478 SvUPGRADE(ver, SVt_PVNV);
1479 SvNV_set(ver, str_to_version(ver));
1480 SvNOK_on(ver); /* hint that it is a version */
1483 else if (guessing) {
1486 sv_free(PL_nextwhite); /* let next token collect whitespace */
1488 s = SvPVX(PL_linestr) + startoff;
1496 if (PL_madskills && !version) {
1497 sv_free(PL_nextwhite); /* let next token collect whitespace */
1499 s = SvPVX(PL_linestr) + startoff;
1502 /* NOTE: The parser sees the package name and the VERSION swapped */
1503 start_force(PL_curforce);
1504 NEXTVAL_NEXTTOKE.opval = version;
1512 * Tokenize a quoted string passed in as an SV. It finds the next
1513 * chunk, up to end of string or a backslash. It may make a new
1514 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1519 S_tokeq(pTHX_ SV *sv)
1523 register char *send;
1531 s = SvPV_force(sv, len);
1532 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1535 while (s < send && *s != '\\')
1540 if ( PL_hints & HINT_NEW_STRING ) {
1541 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1547 if (s + 1 < send && (s[1] == '\\'))
1548 s++; /* all that, just for this */
1553 SvCUR_set(sv, d - SvPVX_const(sv));
1555 if ( PL_hints & HINT_NEW_STRING )
1556 return new_constant(NULL, 0, "q", sv, pv, "q");
1561 * Now come three functions related to double-quote context,
1562 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1563 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1564 * interact with PL_lex_state, and create fake ( ... ) argument lists
1565 * to handle functions and concatenation.
1566 * They assume that whoever calls them will be setting up a fake
1567 * join call, because each subthing puts a ',' after it. This lets
1570 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1572 * (I'm not sure whether the spurious commas at the end of lcfirst's
1573 * arguments and join's arguments are created or not).
1578 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1580 * Pattern matching will set PL_lex_op to the pattern-matching op to
1581 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1583 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1585 * Everything else becomes a FUNC.
1587 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1588 * had an OP_CONST or OP_READLINE). This just sets us up for a
1589 * call to S_sublex_push().
1593 S_sublex_start(pTHX)
1596 register const I32 op_type = yylval.ival;
1598 if (op_type == OP_NULL) {
1599 yylval.opval = PL_lex_op;
1603 if (op_type == OP_CONST || op_type == OP_READLINE) {
1604 SV *sv = tokeq(PL_lex_stuff);
1606 if (SvTYPE(sv) == SVt_PVIV) {
1607 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1609 const char * const p = SvPV_const(sv, len);
1610 SV * const nsv = newSVpvn(p, len);
1616 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1617 PL_lex_stuff = NULL;
1618 /* Allow <FH> // "foo" */
1619 if (op_type == OP_READLINE)
1620 PL_expect = XTERMORDORDOR;
1623 else if (op_type == OP_BACKTICK && PL_lex_op) {
1624 /* readpipe() vas overriden */
1625 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1626 yylval.opval = PL_lex_op;
1628 PL_lex_stuff = NULL;
1632 PL_sublex_info.super_state = PL_lex_state;
1633 PL_sublex_info.sub_inwhat = (U16)op_type;
1634 PL_sublex_info.sub_op = PL_lex_op;
1635 PL_lex_state = LEX_INTERPPUSH;
1639 yylval.opval = PL_lex_op;
1649 * Create a new scope to save the lexing state. The scope will be
1650 * ended in S_sublex_done. Returns a '(', starting the function arguments
1651 * to the uc, lc, etc. found before.
1652 * Sets PL_lex_state to LEX_INTERPCONCAT.
1661 PL_lex_state = PL_sublex_info.super_state;
1662 SAVEBOOL(PL_lex_dojoin);
1663 SAVEI32(PL_lex_brackets);
1664 SAVEI32(PL_lex_casemods);
1665 SAVEI32(PL_lex_starts);
1666 SAVEI8(PL_lex_state);
1667 SAVEVPTR(PL_lex_inpat);
1668 SAVEI16(PL_lex_inwhat);
1669 SAVECOPLINE(PL_curcop);
1670 SAVEPPTR(PL_bufptr);
1671 SAVEPPTR(PL_bufend);
1672 SAVEPPTR(PL_oldbufptr);
1673 SAVEPPTR(PL_oldoldbufptr);
1674 SAVEPPTR(PL_last_lop);
1675 SAVEPPTR(PL_last_uni);
1676 SAVEPPTR(PL_linestart);
1677 SAVESPTR(PL_linestr);
1678 SAVEGENERICPV(PL_lex_brackstack);
1679 SAVEGENERICPV(PL_lex_casestack);
1681 PL_linestr = PL_lex_stuff;
1682 PL_lex_stuff = NULL;
1684 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1685 = SvPVX(PL_linestr);
1686 PL_bufend += SvCUR(PL_linestr);
1687 PL_last_lop = PL_last_uni = NULL;
1688 SAVEFREESV(PL_linestr);
1690 PL_lex_dojoin = FALSE;
1691 PL_lex_brackets = 0;
1692 Newx(PL_lex_brackstack, 120, char);
1693 Newx(PL_lex_casestack, 12, char);
1694 PL_lex_casemods = 0;
1695 *PL_lex_casestack = '\0';
1697 PL_lex_state = LEX_INTERPCONCAT;
1698 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1700 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1701 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1702 PL_lex_inpat = PL_sublex_info.sub_op;
1704 PL_lex_inpat = NULL;
1711 * Restores lexer state after a S_sublex_push.
1718 if (!PL_lex_starts++) {
1719 SV * const sv = newSVpvs("");
1720 if (SvUTF8(PL_linestr))
1722 PL_expect = XOPERATOR;
1723 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1727 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1728 PL_lex_state = LEX_INTERPCASEMOD;
1732 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1733 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1734 PL_linestr = PL_lex_repl;
1736 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1737 PL_bufend += SvCUR(PL_linestr);
1738 PL_last_lop = PL_last_uni = NULL;
1739 SAVEFREESV(PL_linestr);
1740 PL_lex_dojoin = FALSE;
1741 PL_lex_brackets = 0;
1742 PL_lex_casemods = 0;
1743 *PL_lex_casestack = '\0';
1745 if (SvEVALED(PL_lex_repl)) {
1746 PL_lex_state = LEX_INTERPNORMAL;
1748 /* we don't clear PL_lex_repl here, so that we can check later
1749 whether this is an evalled subst; that means we rely on the
1750 logic to ensure sublex_done() is called again only via the
1751 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1754 PL_lex_state = LEX_INTERPCONCAT;
1764 PL_endwhite = newSVpvs("");
1765 sv_catsv(PL_endwhite, PL_thiswhite);
1769 sv_setpvn(PL_thistoken,"",0);
1771 PL_realtokenstart = -1;
1775 PL_bufend = SvPVX(PL_linestr);
1776 PL_bufend += SvCUR(PL_linestr);
1777 PL_expect = XOPERATOR;
1778 PL_sublex_info.sub_inwhat = 0;
1786 Extracts a pattern, double-quoted string, or transliteration. This
1789 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1790 processing a pattern (PL_lex_inpat is true), a transliteration
1791 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1793 Returns a pointer to the character scanned up to. If this is
1794 advanced from the start pointer supplied (i.e. if anything was
1795 successfully parsed), will leave an OP for the substring scanned
1796 in yylval. Caller must intuit reason for not parsing further
1797 by looking at the next characters herself.
1801 double-quoted style: \r and \n
1802 regexp special ones: \D \s
1805 case and quoting: \U \Q \E
1806 stops on @ and $, but not for $ as tail anchor
1808 In transliterations:
1809 characters are VERY literal, except for - not at the start or end
1810 of the string, which indicates a range. If the range is in bytes,
1811 scan_const expands the range to the full set of intermediate
1812 characters. If the range is in utf8, the hyphen is replaced with
1813 a certain range mark which will be handled by pmtrans() in op.c.
1815 In double-quoted strings:
1817 double-quoted style: \r and \n
1819 deprecated backrefs: \1 (in substitution replacements)
1820 case and quoting: \U \Q \E
1823 scan_const does *not* construct ops to handle interpolated strings.
1824 It stops processing as soon as it finds an embedded $ or @ variable
1825 and leaves it to the caller to work out what's going on.
1827 embedded arrays (whether in pattern or not) could be:
1828 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1830 $ in double-quoted strings must be the symbol of an embedded scalar.
1832 $ in pattern could be $foo or could be tail anchor. Assumption:
1833 it's a tail anchor if $ is the last thing in the string, or if it's
1834 followed by one of "()| \r\n\t"
1836 \1 (backreferences) are turned into $1
1838 The structure of the code is
1839 while (there's a character to process) {
1840 handle transliteration ranges
1841 skip regexp comments /(?#comment)/ and codes /(?{code})/
1842 skip #-initiated comments in //x patterns
1843 check for embedded arrays
1844 check for embedded scalars
1846 leave intact backslashes from leaveit (below)
1847 deprecate \1 in substitution replacements
1848 handle string-changing backslashes \l \U \Q \E, etc.
1849 switch (what was escaped) {
1850 handle \- in a transliteration (becomes a literal -)
1851 handle \132 (octal characters)
1852 handle \x15 and \x{1234} (hex characters)
1853 handle \N{name} (named characters)
1854 handle \cV (control characters)
1855 handle printf-style backslashes (\f, \r, \n, etc)
1857 } (end if backslash)
1858 } (end while character to read)
1863 S_scan_const(pTHX_ char *start)
1866 register char *send = PL_bufend; /* end of the constant */
1867 SV *sv = newSV(send - start); /* sv for the constant */
1868 register char *s = start; /* start of the constant */
1869 register char *d = SvPVX(sv); /* destination for copies */
1870 bool dorange = FALSE; /* are we in a translit range? */
1871 bool didrange = FALSE; /* did we just finish a range? */
1872 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1873 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1876 UV literal_endpoint = 0;
1877 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1880 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1881 /* If we are doing a trans and we know we want UTF8 set expectation */
1882 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1883 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1887 while (s < send || dorange) {
1888 /* get transliterations out of the way (they're most literal) */
1889 if (PL_lex_inwhat == OP_TRANS) {
1890 /* expand a range A-Z to the full set of characters. AIE! */
1892 I32 i; /* current expanded character */
1893 I32 min; /* first character in range */
1894 I32 max; /* last character in range */
1905 char * const c = (char*)utf8_hop((U8*)d, -1);
1909 *c = (char)UTF_TO_NATIVE(0xff);
1910 /* mark the range as done, and continue */
1916 i = d - SvPVX_const(sv); /* remember current offset */
1919 SvLEN(sv) + (has_utf8 ?
1920 (512 - UTF_CONTINUATION_MARK +
1923 /* How many two-byte within 0..255: 128 in UTF-8,
1924 * 96 in UTF-8-mod. */
1926 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1928 d = SvPVX(sv) + i; /* refresh d after realloc */
1932 for (j = 0; j <= 1; j++) {
1933 char * const c = (char*)utf8_hop((U8*)d, -1);
1934 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1940 max = (U8)0xff; /* only to \xff */
1941 uvmax = uv; /* \x{100} to uvmax */
1943 d = c; /* eat endpoint chars */
1948 d -= 2; /* eat the first char and the - */
1949 min = (U8)*d; /* first char in range */
1950 max = (U8)d[1]; /* last char in range */
1957 "Invalid range \"%c-%c\" in transliteration operator",
1958 (char)min, (char)max);
1962 if (literal_endpoint == 2 &&
1963 ((isLOWER(min) && isLOWER(max)) ||
1964 (isUPPER(min) && isUPPER(max)))) {
1966 for (i = min; i <= max; i++)
1968 *d++ = NATIVE_TO_NEED(has_utf8,i);
1970 for (i = min; i <= max; i++)
1972 *d++ = NATIVE_TO_NEED(has_utf8,i);
1977 for (i = min; i <= max; i++)
1980 const U8 ch = (U8)NATIVE_TO_UTF(i);
1981 if (UNI_IS_INVARIANT(ch))
1984 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1985 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1994 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1996 *d++ = (char)UTF_TO_NATIVE(0xff);
1998 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2002 /* mark the range as done, and continue */
2006 literal_endpoint = 0;
2011 /* range begins (ignore - as first or last char) */
2012 else if (*s == '-' && s+1 < send && s != start) {
2014 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2021 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2031 literal_endpoint = 0;
2032 native_range = TRUE;
2037 /* if we get here, we're not doing a transliteration */
2039 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2040 except for the last char, which will be done separately. */
2041 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2043 while (s+1 < send && *s != ')')
2044 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2046 else if (s[2] == '{' /* This should match regcomp.c */
2047 || (s[2] == '?' && s[3] == '{'))
2050 char *regparse = s + (s[2] == '{' ? 3 : 4);
2053 while (count && (c = *regparse)) {
2054 if (c == '\\' && regparse[1])
2062 if (*regparse != ')')
2063 regparse--; /* Leave one char for continuation. */
2064 while (s < regparse)
2065 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2069 /* likewise skip #-initiated comments in //x patterns */
2070 else if (*s == '#' && PL_lex_inpat &&
2071 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2072 while (s+1 < send && *s != '\n')
2073 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2076 /* check for embedded arrays
2077 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2079 else if (*s == '@' && s[1]) {
2080 if (isALNUM_lazy_if(s+1,UTF))
2082 if (strchr(":'{$", s[1]))
2084 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2085 break; /* in regexp, neither @+ nor @- are interpolated */
2088 /* check for embedded scalars. only stop if we're sure it's a
2091 else if (*s == '$') {
2092 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2094 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2095 break; /* in regexp, $ might be tail anchor */
2098 /* End of else if chain - OP_TRANS rejoin rest */
2101 if (*s == '\\' && s+1 < send) {
2104 /* deprecate \1 in strings and substitution replacements */
2105 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2106 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2108 if (ckWARN(WARN_SYNTAX))
2109 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2114 /* string-change backslash escapes */
2115 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2119 /* skip any other backslash escapes in a pattern */
2120 else if (PL_lex_inpat) {
2121 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2122 goto default_action;
2125 /* if we get here, it's either a quoted -, or a digit */
2128 /* quoted - in transliterations */
2130 if (PL_lex_inwhat == OP_TRANS) {
2137 if ((isALPHA(*s) || isDIGIT(*s)) &&
2139 Perl_warner(aTHX_ packWARN(WARN_MISC),
2140 "Unrecognized escape \\%c passed through",
2142 /* default action is to copy the quoted character */
2143 goto default_action;
2146 /* \132 indicates an octal constant */
2147 case '0': case '1': case '2': case '3':
2148 case '4': case '5': case '6': case '7':
2152 uv = grok_oct(s, &len, &flags, NULL);
2155 goto NUM_ESCAPE_INSERT;
2157 /* \x24 indicates a hex constant */
2161 char* const e = strchr(s, '}');
2162 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2163 PERL_SCAN_DISALLOW_PREFIX;
2168 yyerror("Missing right brace on \\x{}");
2172 uv = grok_hex(s, &len, &flags, NULL);
2178 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2179 uv = grok_hex(s, &len, &flags, NULL);
2185 /* Insert oct or hex escaped character.
2186 * There will always enough room in sv since such
2187 * escapes will be longer than any UTF-8 sequence
2188 * they can end up as. */
2190 /* We need to map to chars to ASCII before doing the tests
2193 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2194 if (!has_utf8 && uv > 255) {
2195 /* Might need to recode whatever we have
2196 * accumulated so far if it contains any
2199 * (Can't we keep track of that and avoid
2200 * this rescan? --jhi)
2204 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2205 if (!NATIVE_IS_INVARIANT(*c)) {
2210 const STRLEN offset = d - SvPVX_const(sv);
2212 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2216 while (src >= (const U8 *)SvPVX_const(sv)) {
2217 if (!NATIVE_IS_INVARIANT(*src)) {
2218 const U8 ch = NATIVE_TO_ASCII(*src);
2219 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2220 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2230 if (has_utf8 || uv > 255) {
2231 d = (char*)uvchr_to_utf8((U8*)d, uv);
2233 if (PL_lex_inwhat == OP_TRANS &&
2234 PL_sublex_info.sub_op) {
2235 PL_sublex_info.sub_op->op_private |=
2236 (PL_lex_repl ? OPpTRANS_FROM_UTF
2240 if (uv > 255 && !dorange)
2241 native_range = FALSE;
2253 /* \N{LATIN SMALL LETTER A} is a named character */
2257 char* e = strchr(s, '}');
2264 yyerror("Missing right brace on \\N{}");
2268 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2270 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2271 PERL_SCAN_DISALLOW_PREFIX;
2274 uv = grok_hex(s, &len, &flags, NULL);
2275 if ( e > s && len != (STRLEN)(e - s) ) {
2279 goto NUM_ESCAPE_INSERT;
2281 res = newSVpvn(s + 1, e - s - 1);
2282 type = newSVpvn(s - 2,e - s + 3);
2283 res = new_constant( NULL, 0, "charnames",
2284 res, NULL, SvPVX(type) );
2287 sv_utf8_upgrade(res);
2288 str = SvPV_const(res,len);
2289 #ifdef EBCDIC_NEVER_MIND
2290 /* charnames uses pack U and that has been
2291 * recently changed to do the below uni->native
2292 * mapping, so this would be redundant (and wrong,
2293 * the code point would be doubly converted).
2294 * But leave this in just in case the pack U change
2295 * gets revoked, but the semantics is still
2296 * desireable for charnames. --jhi */
2298 UV uv = utf8_to_uvchr((const U8*)str, 0);
2301 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2303 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2304 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2305 str = SvPV_const(res, len);
2309 if (!has_utf8 && SvUTF8(res)) {
2310 const char * const ostart = SvPVX_const(sv);
2311 SvCUR_set(sv, d - ostart);
2314 sv_utf8_upgrade(sv);
2315 /* this just broke our allocation above... */
2316 SvGROW(sv, (STRLEN)(send - start));
2317 d = SvPVX(sv) + SvCUR(sv);
2320 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2321 const char * const odest = SvPVX_const(sv);
2323 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2324 d = SvPVX(sv) + (d - odest);
2328 native_range = FALSE; /* \N{} is guessed to be Unicode */
2330 Copy(str, d, len, char);
2337 yyerror("Missing braces on \\N{}");
2340 /* \c is a control character */
2349 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2352 yyerror("Missing control char name in \\c");
2356 /* printf-style backslashes, formfeeds, newlines, etc */
2358 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2361 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2364 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2367 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2370 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2373 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2376 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2382 } /* end if (backslash) */
2389 /* If we started with encoded form, or already know we want it
2390 and then encode the next character */
2391 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2393 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2394 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2397 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2398 const STRLEN off = d - SvPVX_const(sv);
2399 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2401 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2404 if (uv > 255 && !dorange)
2405 native_range = FALSE;
2409 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2411 } /* while loop to process each character */
2413 /* terminate the string and set up the sv */
2415 SvCUR_set(sv, d - SvPVX_const(sv));
2416 if (SvCUR(sv) >= SvLEN(sv))
2417 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2420 if (PL_encoding && !has_utf8) {
2421 sv_recode_to_utf8(sv, PL_encoding);
2427 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2428 PL_sublex_info.sub_op->op_private |=
2429 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2433 /* shrink the sv if we allocated more than we used */
2434 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2435 SvPV_shrink_to_cur(sv);
2438 /* return the substring (via yylval) only if we parsed anything */
2439 if (s > PL_bufptr) {
2440 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2441 sv = new_constant(start, s - start,
2442 (const char *)(PL_lex_inpat ? "qr" : "q"),
2445 (( PL_lex_inwhat == OP_TRANS
2447 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2450 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2457 * Returns TRUE if there's more to the expression (e.g., a subscript),
2460 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2462 * ->[ and ->{ return TRUE
2463 * { and [ outside a pattern are always subscripts, so return TRUE
2464 * if we're outside a pattern and it's not { or [, then return FALSE
2465 * if we're in a pattern and the first char is a {
2466 * {4,5} (any digits around the comma) returns FALSE
2467 * if we're in a pattern and the first char is a [
2469 * [SOMETHING] has a funky algorithm to decide whether it's a
2470 * character class or not. It has to deal with things like
2471 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2472 * anything else returns TRUE
2475 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2478 S_intuit_more(pTHX_ register char *s)
2481 if (PL_lex_brackets)
2483 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2485 if (*s != '{' && *s != '[')
2490 /* In a pattern, so maybe we have {n,m}. */
2507 /* On the other hand, maybe we have a character class */
2510 if (*s == ']' || *s == '^')
2513 /* this is terrifying, and it works */
2514 int weight = 2; /* let's weigh the evidence */
2516 unsigned char un_char = 255, last_un_char;
2517 const char * const send = strchr(s,']');
2518 char tmpbuf[sizeof PL_tokenbuf * 4];
2520 if (!send) /* has to be an expression */
2523 Zero(seen,256,char);
2526 else if (isDIGIT(*s)) {
2528 if (isDIGIT(s[1]) && s[2] == ']')
2534 for (; s < send; s++) {
2535 last_un_char = un_char;
2536 un_char = (unsigned char)*s;
2541 weight -= seen[un_char] * 10;
2542 if (isALNUM_lazy_if(s+1,UTF)) {
2544 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2545 len = (int)strlen(tmpbuf);
2546 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2551 else if (*s == '$' && s[1] &&
2552 strchr("[#!%*<>()-=",s[1])) {
2553 if (/*{*/ strchr("])} =",s[2]))
2562 if (strchr("wds]",s[1]))
2564 else if (seen[(U8)'\''] || seen[(U8)'"'])
2566 else if (strchr("rnftbxcav",s[1]))
2568 else if (isDIGIT(s[1])) {
2570 while (s[1] && isDIGIT(s[1]))
2580 if (strchr("aA01! ",last_un_char))
2582 if (strchr("zZ79~",s[1]))
2584 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2585 weight -= 5; /* cope with negative subscript */
2588 if (!isALNUM(last_un_char)
2589 && !(last_un_char == '$' || last_un_char == '@'
2590 || last_un_char == '&')
2591 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2596 if (keyword(tmpbuf, d - tmpbuf, 0))
2599 if (un_char == last_un_char + 1)
2601 weight -= seen[un_char];
2606 if (weight >= 0) /* probably a character class */
2616 * Does all the checking to disambiguate
2618 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2619 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2621 * First argument is the stuff after the first token, e.g. "bar".
2623 * Not a method if bar is a filehandle.
2624 * Not a method if foo is a subroutine prototyped to take a filehandle.
2625 * Not a method if it's really "Foo $bar"
2626 * Method if it's "foo $bar"
2627 * Not a method if it's really "print foo $bar"
2628 * Method if it's really "foo package::" (interpreted as package->foo)
2629 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2630 * Not a method if bar is a filehandle or package, but is quoted with
2635 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2638 char *s = start + (*start == '$');
2639 char tmpbuf[sizeof PL_tokenbuf];
2647 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2651 const char *proto = SvPVX_const(cv);
2662 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2663 /* start is the beginning of the possible filehandle/object,
2664 * and s is the end of it
2665 * tmpbuf is a copy of it
2668 if (*start == '$') {
2669 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2670 isUPPER(*PL_tokenbuf))
2673 len = start - SvPVX(PL_linestr);
2677 start = SvPVX(PL_linestr) + len;
2681 return *s == '(' ? FUNCMETH : METHOD;
2683 if (!keyword(tmpbuf, len, 0)) {
2684 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2688 soff = s - SvPVX(PL_linestr);
2692 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2693 if (indirgv && GvCVu(indirgv))
2695 /* filehandle or package name makes it a method */
2696 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2698 soff = s - SvPVX(PL_linestr);
2701 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2702 return 0; /* no assumptions -- "=>" quotes bearword */
2704 start_force(PL_curforce);
2705 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2706 newSVpvn(tmpbuf,len));
2707 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2709 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2714 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2716 return *s == '(' ? FUNCMETH : METHOD;
2724 * Return a string of Perl code to load the debugger. If PERL5DB
2725 * is set, it will return the contents of that, otherwise a
2726 * compile-time require of perl5db.pl.
2734 const char * const pdb = PerlEnv_getenv("PERL5DB");
2738 SETERRNO(0,SS_NORMAL);
2739 return "BEGIN { require 'perl5db.pl' }";
2745 /* Encoded script support. filter_add() effectively inserts a
2746 * 'pre-processing' function into the current source input stream.
2747 * Note that the filter function only applies to the current source file
2748 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2750 * The datasv parameter (which may be NULL) can be used to pass
2751 * private data to this instance of the filter. The filter function
2752 * can recover the SV using the FILTER_DATA macro and use it to
2753 * store private buffers and state information.
2755 * The supplied datasv parameter is upgraded to a PVIO type
2756 * and the IoDIRP/IoANY field is used to store the function pointer,
2757 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2758 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2759 * private use must be set using malloc'd pointers.
2763 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2769 if (!PL_rsfp_filters)
2770 PL_rsfp_filters = newAV();
2773 SvUPGRADE(datasv, SVt_PVIO);
2774 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2775 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2776 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2777 FPTR2DPTR(void *, IoANY(datasv)),
2778 SvPV_nolen(datasv)));
2779 av_unshift(PL_rsfp_filters, 1);
2780 av_store(PL_rsfp_filters, 0, datasv) ;
2785 /* Delete most recently added instance of this filter function. */
2787 Perl_filter_del(pTHX_ filter_t funcp)
2793 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2794 FPTR2DPTR(void*, funcp)));
2796 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2798 /* if filter is on top of stack (usual case) just pop it off */
2799 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2800 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2801 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2802 IoANY(datasv) = (void *)NULL;
2803 sv_free(av_pop(PL_rsfp_filters));
2807 /* we need to search for the correct entry and clear it */
2808 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2812 /* Invoke the idxth filter function for the current rsfp. */
2813 /* maxlen 0 = read one text line */
2815 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2820 /* This API is bad. It should have been using unsigned int for maxlen.
2821 Not sure if we want to change the API, but if not we should sanity
2822 check the value here. */
2823 const unsigned int correct_length
2832 if (!PL_rsfp_filters)
2834 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2835 /* Provide a default input filter to make life easy. */
2836 /* Note that we append to the line. This is handy. */
2837 DEBUG_P(PerlIO_printf(Perl_debug_log,
2838 "filter_read %d: from rsfp\n", idx));
2839 if (correct_length) {
2842 const int old_len = SvCUR(buf_sv);
2844 /* ensure buf_sv is large enough */
2845 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2846 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2847 correct_length)) <= 0) {
2848 if (PerlIO_error(PL_rsfp))
2849 return -1; /* error */
2851 return 0 ; /* end of file */
2853 SvCUR_set(buf_sv, old_len + len) ;
2856 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2857 if (PerlIO_error(PL_rsfp))
2858 return -1; /* error */
2860 return 0 ; /* end of file */
2863 return SvCUR(buf_sv);
2865 /* Skip this filter slot if filter has been deleted */
2866 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2867 DEBUG_P(PerlIO_printf(Perl_debug_log,
2868 "filter_read %d: skipped (filter deleted)\n",
2870 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2872 /* Get function pointer hidden within datasv */
2873 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2874 DEBUG_P(PerlIO_printf(Perl_debug_log,
2875 "filter_read %d: via function %p (%s)\n",
2876 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2877 /* Call function. The function is expected to */
2878 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2879 /* Return: <0:error, =0:eof, >0:not eof */
2880 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2884 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2887 #ifdef PERL_CR_FILTER
2888 if (!PL_rsfp_filters) {
2889 filter_add(S_cr_textfilter,NULL);
2892 if (PL_rsfp_filters) {
2894 SvCUR_set(sv, 0); /* start with empty line */
2895 if (FILTER_READ(0, sv, 0) > 0)
2896 return ( SvPVX(sv) ) ;
2901 return (sv_gets(sv, fp, append));
2905 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2910 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2914 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2915 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2917 return GvHV(gv); /* Foo:: */
2920 /* use constant CLASS => 'MyClass' */
2921 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2922 if (gv && GvCV(gv)) {
2923 SV * const sv = cv_const_sv(GvCV(gv));
2925 pkgname = SvPV_nolen_const(sv);
2928 return gv_stashpv(pkgname, 0);
2932 * S_readpipe_override
2933 * Check whether readpipe() is overriden, and generates the appropriate
2934 * optree, provided sublex_start() is called afterwards.
2937 S_readpipe_override(pTHX)
2940 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2941 yylval.ival = OP_BACKTICK;
2943 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2945 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2946 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
2947 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2949 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2950 append_elem(OP_LIST,
2951 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2952 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2962 * The intent of this yylex wrapper is to minimize the changes to the
2963 * tokener when we aren't interested in collecting madprops. It remains
2964 * to be seen how successful this strategy will be...
2971 char *s = PL_bufptr;
2973 /* make sure PL_thiswhite is initialized */
2977 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2978 if (PL_pending_ident)
2979 return S_pending_ident(aTHX);
2981 /* previous token ate up our whitespace? */
2982 if (!PL_lasttoke && PL_nextwhite) {
2983 PL_thiswhite = PL_nextwhite;
2987 /* isolate the token, and figure out where it is without whitespace */
2988 PL_realtokenstart = -1;
2992 assert(PL_curforce < 0);
2994 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2995 if (!PL_thistoken) {
2996 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2997 PL_thistoken = newSVpvs("");
2999 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3000 PL_thistoken = newSVpvn(tstart, s - tstart);
3003 if (PL_thismad) /* install head */
3004 CURMAD('X', PL_thistoken);
3007 /* last whitespace of a sublex? */
3008 if (optype == ')' && PL_endwhite) {
3009 CURMAD('X', PL_endwhite);
3014 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3015 if (!PL_thiswhite && !PL_endwhite && !optype) {
3016 sv_free(PL_thistoken);
3021 /* put off final whitespace till peg */
3022 if (optype == ';' && !PL_rsfp) {
3023 PL_nextwhite = PL_thiswhite;
3026 else if (PL_thisopen) {
3027 CURMAD('q', PL_thisopen);
3029 sv_free(PL_thistoken);
3033 /* Store actual token text as madprop X */
3034 CURMAD('X', PL_thistoken);
3038 /* add preceding whitespace as madprop _ */
3039 CURMAD('_', PL_thiswhite);
3043 /* add quoted material as madprop = */
3044 CURMAD('=', PL_thisstuff);
3048 /* add terminating quote as madprop Q */
3049 CURMAD('Q', PL_thisclose);
3053 /* special processing based on optype */
3057 /* opval doesn't need a TOKEN since it can already store mp */
3068 append_madprops(PL_thismad, yylval.opval, 0);
3076 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3085 /* remember any fake bracket that lexer is about to discard */
3086 if (PL_lex_brackets == 1 &&
3087 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3090 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3093 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3094 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3097 break; /* don't bother looking for trailing comment */
3106 /* attach a trailing comment to its statement instead of next token */
3110 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3112 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3114 if (*s == '\n' || *s == '#') {
3115 while (s < PL_bufend && *s != '\n')
3119 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3120 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3137 /* Create new token struct. Note: opvals return early above. */
3138 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3145 S_tokenize_use(pTHX_ int is_use, char *s) {
3147 if (PL_expect != XSTATE)
3148 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3149 is_use ? "use" : "no"));
3151 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3152 s = force_version(s, TRUE);
3153 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3154 start_force(PL_curforce);
3155 NEXTVAL_NEXTTOKE.opval = NULL;
3158 else if (*s == 'v') {
3159 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3160 s = force_version(s, FALSE);
3164 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3165 s = force_version(s, FALSE);
3167 yylval.ival = is_use;
3171 static const char* const exp_name[] =
3172 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3173 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3180 Works out what to call the token just pulled out of the input
3181 stream. The yacc parser takes care of taking the ops we return and
3182 stitching them into a tree.
3188 if read an identifier
3189 if we're in a my declaration
3190 croak if they tried to say my($foo::bar)
3191 build the ops for a my() declaration
3192 if it's an access to a my() variable
3193 are we in a sort block?
3194 croak if my($a); $a <=> $b
3195 build ops for access to a my() variable
3196 if in a dq string, and they've said @foo and we can't find @foo
3198 build ops for a bareword
3199 if we already built the token before, use it.
3204 #pragma segment Perl_yylex
3210 register char *s = PL_bufptr;
3215 /* orig_keyword, gvp, and gv are initialized here because
3216 * jump to the label just_a_word_zero can bypass their
3217 * initialization later. */
3218 I32 orig_keyword = 0;
3223 SV* tmp = newSVpvs("");
3224 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3225 (IV)CopLINE(PL_curcop),
3226 lex_state_names[PL_lex_state],
3227 exp_name[PL_expect],
3228 pv_display(tmp, s, strlen(s), 0, 60));
3231 /* check if there's an identifier for us to look at */
3232 if (PL_pending_ident)
3233 return REPORT(S_pending_ident(aTHX));
3235 /* no identifier pending identification */
3237 switch (PL_lex_state) {
3239 case LEX_NORMAL: /* Some compilers will produce faster */
3240 case LEX_INTERPNORMAL: /* code if we comment these out. */
3244 /* when we've already built the next token, just pull it out of the queue */
3248 yylval = PL_nexttoke[PL_lasttoke].next_val;
3250 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3251 PL_nexttoke[PL_lasttoke].next_mad = 0;
3252 if (PL_thismad && PL_thismad->mad_key == '_') {
3253 PL_thiswhite = (SV*)PL_thismad->mad_val;
3254 PL_thismad->mad_val = 0;
3255 mad_free(PL_thismad);
3260 PL_lex_state = PL_lex_defer;
3261 PL_expect = PL_lex_expect;
3262 PL_lex_defer = LEX_NORMAL;
3263 if (!PL_nexttoke[PL_lasttoke].next_type)
3268 yylval = PL_nextval[PL_nexttoke];
3270 PL_lex_state = PL_lex_defer;
3271 PL_expect = PL_lex_expect;
3272 PL_lex_defer = LEX_NORMAL;
3276 /* FIXME - can these be merged? */
3277 return(PL_nexttoke[PL_lasttoke].next_type);
3279 return REPORT(PL_nexttype[PL_nexttoke]);
3282 /* interpolated case modifiers like \L \U, including \Q and \E.
3283 when we get here, PL_bufptr is at the \
3285 case LEX_INTERPCASEMOD:
3287 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3288 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3290 /* handle \E or end of string */
3291 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3293 if (PL_lex_casemods) {
3294 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3295 PL_lex_casestack[PL_lex_casemods] = '\0';
3297 if (PL_bufptr != PL_bufend
3298 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3300 PL_lex_state = LEX_INTERPCONCAT;
3303 PL_thistoken = newSVpvs("\\E");
3309 while (PL_bufptr != PL_bufend &&
3310 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3312 PL_thiswhite = newSVpvs("");
3313 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3317 if (PL_bufptr != PL_bufend)
3320 PL_lex_state = LEX_INTERPCONCAT;
3324 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3325 "### Saw case modifier\n"); });
3327 if (s[1] == '\\' && s[2] == 'E') {
3330 PL_thiswhite = newSVpvs("");
3331 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3334 PL_lex_state = LEX_INTERPCONCAT;
3339 if (!PL_madskills) /* when just compiling don't need correct */
3340 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3341 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3342 if ((*s == 'L' || *s == 'U') &&
3343 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3344 PL_lex_casestack[--PL_lex_casemods] = '\0';
3347 if (PL_lex_casemods > 10)
3348 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3349 PL_lex_casestack[PL_lex_casemods++] = *s;
3350 PL_lex_casestack[PL_lex_casemods] = '\0';
3351 PL_lex_state = LEX_INTERPCONCAT;
3352 start_force(PL_curforce);
3353 NEXTVAL_NEXTTOKE.ival = 0;
3355 start_force(PL_curforce);
3357 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3359 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3361 NEXTVAL_NEXTTOKE.ival = OP_LC;
3363 NEXTVAL_NEXTTOKE.ival = OP_UC;
3365 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3367 Perl_croak(aTHX_ "panic: yylex");
3369 SV* const tmpsv = newSVpvs("");
3370 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3376 if (PL_lex_starts) {
3382 sv_free(PL_thistoken);
3383 PL_thistoken = newSVpvs("");
3386 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3387 if (PL_lex_casemods == 1 && PL_lex_inpat)
3396 case LEX_INTERPPUSH:
3397 return REPORT(sublex_push());
3399 case LEX_INTERPSTART:
3400 if (PL_bufptr == PL_bufend)
3401 return REPORT(sublex_done());
3402 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3403 "### Interpolated variable\n"); });
3405 PL_lex_dojoin = (*PL_bufptr == '@');
3406 PL_lex_state = LEX_INTERPNORMAL;
3407 if (PL_lex_dojoin) {
3408 start_force(PL_curforce);
3409 NEXTVAL_NEXTTOKE.ival = 0;
3411 start_force(PL_curforce);
3412 force_ident("\"", '$');
3413 start_force(PL_curforce);
3414 NEXTVAL_NEXTTOKE.ival = 0;
3416 start_force(PL_curforce);
3417 NEXTVAL_NEXTTOKE.ival = 0;
3419 start_force(PL_curforce);
3420 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3423 if (PL_lex_starts++) {
3428 sv_free(PL_thistoken);
3429 PL_thistoken = newSVpvs("");
3432 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3433 if (!PL_lex_casemods && PL_lex_inpat)
3440 case LEX_INTERPENDMAYBE:
3441 if (intuit_more(PL_bufptr)) {
3442 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3448 if (PL_lex_dojoin) {
3449 PL_lex_dojoin = FALSE;
3450 PL_lex_state = LEX_INTERPCONCAT;
3454 sv_free(PL_thistoken);
3455 PL_thistoken = newSVpvs("");
3460 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3461 && SvEVALED(PL_lex_repl))
3463 if (PL_bufptr != PL_bufend)
3464 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3468 case LEX_INTERPCONCAT:
3470 if (PL_lex_brackets)
3471 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3473 if (PL_bufptr == PL_bufend)
3474 return REPORT(sublex_done());
3476 if (SvIVX(PL_linestr) == '\'') {
3477 SV *sv = newSVsv(PL_linestr);
3480 else if ( PL_hints & HINT_NEW_RE )
3481 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
3482 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3486 s = scan_const(PL_bufptr);
3488 PL_lex_state = LEX_INTERPCASEMOD;
3490 PL_lex_state = LEX_INTERPSTART;
3493 if (s != PL_bufptr) {
3494 start_force(PL_curforce);
3496 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3498 NEXTVAL_NEXTTOKE = yylval;
3501 if (PL_lex_starts++) {
3505 sv_free(PL_thistoken);
3506 PL_thistoken = newSVpvs("");
3509 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3510 if (!PL_lex_casemods && PL_lex_inpat)
3523 PL_lex_state = LEX_NORMAL;
3524 s = scan_formline(PL_bufptr);
3525 if (!PL_lex_formbrack)
3531 PL_oldoldbufptr = PL_oldbufptr;
3537 sv_free(PL_thistoken);
3540 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3544 if (isIDFIRST_lazy_if(s,UTF))
3546 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
3549 goto fake_eof; /* emulate EOF on ^D or ^Z */
3558 if (PL_lex_brackets) {
3559 yyerror((const char *)
3561 ? "Format not terminated"
3562 : "Missing right curly or square bracket"));
3564 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3565 "### Tokener got EOF\n");
3569 if (s++ < PL_bufend)
3570 goto retry; /* ignore stray nulls */
3573 if (!PL_in_eval && !PL_preambled) {
3574 PL_preambled = TRUE;
3579 sv_setpv(PL_linestr,incl_perldb());
3580 if (SvCUR(PL_linestr))
3581 sv_catpvs(PL_linestr,";");
3583 while(AvFILLp(PL_preambleav) >= 0) {
3584 SV *tmpsv = av_shift(PL_preambleav);
3585 sv_catsv(PL_linestr, tmpsv);
3586 sv_catpvs(PL_linestr, ";");
3589 sv_free((SV*)PL_preambleav);
3590 PL_preambleav = NULL;
3592 if (PL_minus_n || PL_minus_p) {
3593 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3595 sv_catpvs(PL_linestr,"chomp;");
3598 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3599 || *PL_splitstr == '"')
3600 && strchr(PL_splitstr + 1, *PL_splitstr))
3601 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3603 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3604 bytes can be used as quoting characters. :-) */
3605 const char *splits = PL_splitstr;
3606 sv_catpvs(PL_linestr, "our @F=split(q\0");
3609 if (*splits == '\\')
3610 sv_catpvn(PL_linestr, splits, 1);
3611 sv_catpvn(PL_linestr, splits, 1);
3612 } while (*splits++);
3613 /* This loop will embed the trailing NUL of
3614 PL_linestr as the last thing it does before
3616 sv_catpvs(PL_linestr, ");");
3620 sv_catpvs(PL_linestr,"our @F=split(' ');");
3624 sv_catpvs(PL_linestr,"use feature ':5.10';");
3625 sv_catpvs(PL_linestr, "\n");
3626 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3627 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3628 PL_last_lop = PL_last_uni = NULL;
3629 if (PERLDB_LINE && PL_curstash != PL_debstash)
3630 update_debugger_info(PL_linestr, NULL, 0);
3634 bof = PL_rsfp ? TRUE : FALSE;
3635 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3638 PL_realtokenstart = -1;
3641 if (PL_preprocess && !PL_in_eval)
3642 (void)PerlProc_pclose(PL_rsfp);
3643 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3644 PerlIO_clearerr(PL_rsfp);
3646 (void)PerlIO_close(PL_rsfp);
3648 PL_doextract = FALSE;
3650 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3655 sv_setpv(PL_linestr,
3658 ? ";}continue{print;}" : ";}"));
3659 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3660 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3661 PL_last_lop = PL_last_uni = NULL;
3662 PL_minus_n = PL_minus_p = 0;
3665 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3666 PL_last_lop = PL_last_uni = NULL;
3667 sv_setpvn(PL_linestr,"",0);
3668 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3670 /* If it looks like the start of a BOM or raw UTF-16,
3671 * check if it in fact is. */
3677 #ifdef PERLIO_IS_STDIO
3678 # ifdef __GNU_LIBRARY__
3679 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3680 # define FTELL_FOR_PIPE_IS_BROKEN
3684 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3685 # define FTELL_FOR_PIPE_IS_BROKEN
3690 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3691 /* This loses the possibility to detect the bof
3692 * situation on perl -P when the libc5 is being used.
3693 * Workaround? Maybe attach some extra state to PL_rsfp?
3696 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3698 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3701 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3702 s = swallow_bom((U8*)s);
3706 /* Incest with pod. */
3709 sv_catsv(PL_thiswhite, PL_linestr);
3711 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3712 sv_setpvn(PL_linestr, "", 0);
3713 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3714 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3715 PL_last_lop = PL_last_uni = NULL;
3716 PL_doextract = FALSE;
3720 } while (PL_doextract);
3721 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3722 if (PERLDB_LINE && PL_curstash != PL_debstash)
3723 update_debugger_info(PL_linestr, NULL, 0);
3724 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3725 PL_last_lop = PL_last_uni = NULL;
3726 if (CopLINE(PL_curcop) == 1) {
3727 while (s < PL_bufend && isSPACE(*s))
3729 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3733 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3737 if (*s == '#' && *(s+1) == '!')
3739 #ifdef ALTERNATE_SHEBANG
3741 static char const as[] = ALTERNATE_SHEBANG;
3742 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3743 d = s + (sizeof(as) - 1);
3745 #endif /* ALTERNATE_SHEBANG */
3754 while (*d && !isSPACE(*d))
3758 #ifdef ARG_ZERO_IS_SCRIPT
3759 if (ipathend > ipath) {
3761 * HP-UX (at least) sets argv[0] to the script name,
3762 * which makes $^X incorrect. And Digital UNIX and Linux,
3763 * at least, set argv[0] to the basename of the Perl
3764 * interpreter. So, having found "#!", we'll set it right.
3766 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3768 assert(SvPOK(x) || SvGMAGICAL(x));
3769 if (sv_eq(x, CopFILESV(PL_curcop))) {
3770 sv_setpvn(x, ipath, ipathend - ipath);
3776 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3777 const char * const lstart = SvPV_const(x,llen);
3779 bstart += blen - llen;
3780 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3781 sv_setpvn(x, ipath, ipathend - ipath);
3786 TAINT_NOT; /* $^X is always tainted, but that's OK */
3788 #endif /* ARG_ZERO_IS_SCRIPT */
3793 d = instr(s,"perl -");
3795 d = instr(s,"perl");
3797 /* avoid getting into infinite loops when shebang
3798 * line contains "Perl" rather than "perl" */
3800 for (d = ipathend-4; d >= ipath; --d) {
3801 if ((*d == 'p' || *d == 'P')
3802 && !ibcmp(d, "perl", 4))
3812 #ifdef ALTERNATE_SHEBANG
3814 * If the ALTERNATE_SHEBANG on this system starts with a
3815 * character that can be part of a Perl expression, then if
3816 * we see it but not "perl", we're probably looking at the
3817 * start of Perl code, not a request to hand off to some
3818 * other interpreter. Similarly, if "perl" is there, but
3819 * not in the first 'word' of the line, we assume the line
3820 * contains the start of the Perl program.
3822 if (d && *s != '#') {
3823 const char *c = ipath;
3824 while (*c && !strchr("; \t\r\n\f\v#", *c))
3827 d = NULL; /* "perl" not in first word; ignore */
3829 *s = '#'; /* Don't try to parse shebang line */
3831 #endif /* ALTERNATE_SHEBANG */
3832 #ifndef MACOS_TRADITIONAL
3837 !instr(s,"indir") &&
3838 instr(PL_origargv[0],"perl"))
3845 while (s < PL_bufend && isSPACE(*s))
3847 if (s < PL_bufend) {
3848 Newxz(newargv,PL_origargc+3,char*);
3850 while (s < PL_bufend && !isSPACE(*s))
3853 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3856 newargv = PL_origargv;
3859 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3861 Perl_croak(aTHX_ "Can't exec %s", ipath);
3865 while (*d && !isSPACE(*d))
3867 while (SPACE_OR_TAB(*d))
3871 const bool switches_done = PL_doswitches;
3872 const U32 oldpdb = PL_perldb;
3873 const bool oldn = PL_minus_n;
3874 const bool oldp = PL_minus_p;
3877 if (*d == 'M' || *d == 'm' || *d == 'C') {
3878 const char * const m = d;
3879 while (*d && !isSPACE(*d))
3881 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3884 d = moreswitches(d);
3886 if (PL_doswitches && !switches_done) {
3887 int argc = PL_origargc;
3888 char **argv = PL_origargv;
3891 } while (argc && argv[0][0] == '-' && argv[0][1]);
3892 init_argv_symbols(argc,argv);
3894 if ((PERLDB_LINE && !oldpdb) ||
3895 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3896 /* if we have already added "LINE: while (<>) {",
3897 we must not do it again */
3899 sv_setpvn(PL_linestr, "", 0);
3900 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3901 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3902 PL_last_lop = PL_last_uni = NULL;
3903 PL_preambled = FALSE;
3905 (void)gv_fetchfile(PL_origfilename);
3912 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3914 PL_lex_state = LEX_FORMLINE;
3919 #ifdef PERL_STRICT_CR
3920 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3922 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3924 case ' ': case '\t': case '\f': case 013:
3925 #ifdef MACOS_TRADITIONAL
3929 PL_realtokenstart = -1;
3931 PL_thiswhite = newSVpvs("");
3932 sv_catpvn(PL_thiswhite, s, 1);
3939 PL_realtokenstart = -1;
3943 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3944 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3945 /* handle eval qq[#line 1 "foo"\n ...] */
3946 CopLINE_dec(PL_curcop);
3949 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3951 if (!PL_in_eval || PL_rsfp)
3956 while (d < PL_bufend && *d != '\n')
3960 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3961 Perl_croak(aTHX_ "panic: input overflow");
3964 PL_thiswhite = newSVpvn(s, d - s);
3969 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3971 PL_lex_state = LEX_FORMLINE;
3977 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
3978 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
3981 TOKEN(PEG); /* make sure any #! line is accessible */
3986 /* if (PL_madskills && PL_lex_formbrack) { */
3988 while (d < PL_bufend && *d != '\n')
3992 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3993 Perl_croak(aTHX_ "panic: input overflow");
3994 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
3996 PL_thiswhite = newSVpvs("");
3997 if (CopLINE(PL_curcop) == 1) {
3998 sv_setpvn(PL_thiswhite, "", 0);
4001 sv_catpvn(PL_thiswhite, s, d - s);
4015 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4023 while (s < PL_bufend && SPACE_OR_TAB(*s))
4026 if (strnEQ(s,"=>",2)) {
4027 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4028 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4029 OPERATOR('-'); /* unary minus */
4031 PL_last_uni = PL_oldbufptr;
4033 case 'r': ftst = OP_FTEREAD; break;
4034 case 'w': ftst = OP_FTEWRITE; break;
4035 case 'x': ftst = OP_FTEEXEC; break;
4036 case 'o': ftst = OP_FTEOWNED; break;
4037 case 'R': ftst = OP_FTRREAD; break;
4038 case 'W': ftst = OP_FTRWRITE; break;
4039 case 'X': ftst = OP_FTREXEC; break;
4040 case 'O': ftst = OP_FTROWNED; break;
4041 case 'e': ftst = OP_FTIS; break;
4042 case 'z': ftst = OP_FTZERO; break;
4043 case 's': ftst = OP_FTSIZE; break;
4044 case 'f': ftst = OP_FTFILE; break;
4045 case 'd': ftst = OP_FTDIR; break;
4046 case 'l': ftst = OP_FTLINK; break;
4047 case 'p': ftst = OP_FTPIPE; break;
4048 case 'S': ftst = OP_FTSOCK; break;
4049 case 'u': ftst = OP_FTSUID; break;
4050 case 'g': ftst = OP_FTSGID; break;
4051 case 'k': ftst = OP_FTSVTX; break;
4052 case 'b': ftst = OP_FTBLK; break;
4053 case 'c': ftst = OP_FTCHR; break;
4054 case 't': ftst = OP_FTTTY; break;
4055 case 'T': ftst = OP_FTTEXT; break;
4056 case 'B': ftst = OP_FTBINARY; break;
4057 case 'M': case 'A': case 'C':
4058 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4060 case 'M': ftst = OP_FTMTIME; break;
4061 case 'A': ftst = OP_FTATIME; break;
4062 case 'C': ftst = OP_FTCTIME; break;
4070 PL_last_lop_op = (OPCODE)ftst;
4071 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4072 "### Saw file test %c\n", (int)tmp);
4077 /* Assume it was a minus followed by a one-letter named
4078 * subroutine call (or a -bareword), then. */
4079 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4080 "### '-%c' looked like a file test but was not\n",
4087 const char tmp = *s++;
4090 if (PL_expect == XOPERATOR)
4095 else if (*s == '>') {
4098 if (isIDFIRST_lazy_if(s,UTF)) {
4099 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4107 if (PL_expect == XOPERATOR)
4110 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4112 OPERATOR('-'); /* unary minus */
4118 const char tmp = *s++;
4121 if (PL_expect == XOPERATOR)
4126 if (PL_expect == XOPERATOR)
4129 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4136 if (PL_expect != XOPERATOR) {
4137 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4138 PL_expect = XOPERATOR;
4139 force_ident(PL_tokenbuf, '*');
4152 if (PL_expect == XOPERATOR) {
4156 PL_tokenbuf[0] = '%';
4157 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4158 sizeof PL_tokenbuf - 1, FALSE);
4159 if (!PL_tokenbuf[1]) {
4162 PL_pending_ident = '%';
4173 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4180 const char tmp = *s++;
4186 goto just_a_word_zero_gv;
4189 switch (PL_expect) {
4195 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4197 PL_bufptr = s; /* update in case we back off */
4203 PL_expect = XTERMBLOCK;
4206 stuffstart = s - SvPVX(PL_linestr) - 1;
4210 while (isIDFIRST_lazy_if(s,UTF)) {
4213 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4214 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4215 if (tmp < 0) tmp = -tmp;
4230 sv = newSVpvn(s, len);
4232 d = scan_str(d,TRUE,TRUE);
4234 /* MUST advance bufptr here to avoid bogus
4235 "at end of line" context messages from yyerror().
4237 PL_bufptr = s + len;
4238 yyerror("Unterminated attribute parameter in attribute list");
4242 return REPORT(0); /* EOF indicator */
4246 sv_catsv(sv, PL_lex_stuff);
4247 attrs = append_elem(OP_LIST, attrs,
4248 newSVOP(OP_CONST, 0, sv));
4249 SvREFCNT_dec(PL_lex_stuff);
4250 PL_lex_stuff = NULL;
4253 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4255 if (PL_in_my == KEY_our) {
4257 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
4259 /* skip to avoid loading attributes.pm */
4261 deprecate(":unique");
4264 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4267 /* NOTE: any CV attrs applied here need to be part of
4268 the CVf_BUILTIN_ATTRS define in cv.h! */
4269 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4271 CvLVALUE_on(PL_compcv);
4273 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4275 CvLOCKED_on(PL_compcv);
4277 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4279 CvMETHOD_on(PL_compcv);
4281 else if (!PL_in_my && len == 9 && strnEQ(SvPVX(sv), "assertion", len)) {
4283 CvASSERTION_on(PL_compcv);
4285 /* After we've set the flags, it could be argued that
4286 we don't need to do the attributes.pm-based setting
4287 process, and shouldn't bother appending recognized
4288 flags. To experiment with that, uncomment the
4289 following "else". (Note that's already been
4290 uncommented. That keeps the above-applied built-in
4291 attributes from being intercepted (and possibly
4292 rejected) by a package's attribute routines, but is
4293 justified by the performance win for the common case
4294 of applying only built-in attributes.) */
4296 attrs = append_elem(OP_LIST, attrs,
4297 newSVOP(OP_CONST, 0,
4301 if (*s == ':' && s[1] != ':')
4304 break; /* require real whitespace or :'s */
4305 /* XXX losing whitespace on sequential attributes here */
4309 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4310 if (*s != ';' && *s != '}' && *s != tmp
4311 && (tmp != '=' || *s != ')')) {
4312 const char q = ((*s == '\'') ? '"' : '\'');
4313 /* If here for an expression, and parsed no attrs, back
4315 if (tmp == '=' && !attrs) {
4319 /* MUST advance bufptr here to avoid bogus "at end of line"
4320 context messages from yyerror().
4323 yyerror( (const char *)
4325 ? Perl_form(aTHX_ "Invalid separator character "
4326 "%c%c%c in attribute list", q, *s, q)
4327 : "Unterminated attribute list" ) );
4335 start_force(PL_curforce);
4336 NEXTVAL_NEXTTOKE.opval = attrs;
4337 CURMAD('_', PL_nextwhite);
4342 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4343 (s - SvPVX(PL_linestr)) - stuffstart);
4351 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4352 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4360 const char tmp = *s++;
4365 const char tmp = *s++;
4373 if (PL_lex_brackets <= 0)
4374 yyerror("Unmatched right square bracket");
4377 if (PL_lex_state == LEX_INTERPNORMAL) {
4378 if (PL_lex_brackets == 0) {
4379 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
4380 PL_lex_state = LEX_INTERPEND;
4387 if (PL_lex_brackets > 100) {
4388 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4390 switch (PL_expect) {
4392 if (PL_lex_formbrack) {
4396 if (PL_oldoldbufptr == PL_last_lop)
4397 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4399 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4400 OPERATOR(HASHBRACK);
4402 while (s < PL_bufend && SPACE_OR_TAB(*s))
4405 PL_tokenbuf[0] = '\0';
4406 if (d < PL_bufend && *d == '-') {
4407 PL_tokenbuf[0] = '-';
4409 while (d < PL_bufend && SPACE_OR_TAB(*d))
4412 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4413 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4415 while (d < PL_bufend && SPACE_OR_TAB(*d))
4418 const char minus = (PL_tokenbuf[0] == '-');
4419 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4427 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4432 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4437 if (PL_oldoldbufptr == PL_last_lop)
4438 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4440 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4443 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4445 /* This hack is to get the ${} in the message. */
4447 yyerror("syntax error");
4450 OPERATOR(HASHBRACK);
4452 /* This hack serves to disambiguate a pair of curlies
4453 * as being a block or an anon hash. Normally, expectation
4454 * determines that, but in cases where we're not in a
4455 * position to expect anything in particular (like inside
4456 * eval"") we have to resolve the ambiguity. This code
4457 * covers the case where the first term in the curlies is a
4458 * quoted string. Most other cases need to be explicitly
4459 * disambiguated by prepending a "+" before the opening
4460 * curly in order to force resolution as an anon hash.
4462 * XXX should probably propagate the outer expectation
4463 * into eval"" to rely less on this hack, but that could
4464 * potentially break current behavior of eval"".
4468 if (*s == '\'' || *s == '"' || *s == '`') {
4469 /* common case: get past first string, handling escapes */
4470 for (t++; t < PL_bufend && *t != *s;)
4471 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4475 else if (*s == 'q') {
4478 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4481 /* skip q//-like construct */
4483 char open, close, term;
4486 while (t < PL_bufend && isSPACE(*t))
4488 /* check for q => */
4489 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4490 OPERATOR(HASHBRACK);
4494 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4498 for (t++; t < PL_bufend; t++) {
4499 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4501 else if (*t == open)
4505 for (t++; t < PL_bufend; t++) {
4506 if (*t == '\\' && t+1 < PL_bufend)
4508 else if (*t == close && --brackets <= 0)
4510 else if (*t == open)
4517 /* skip plain q word */
4518 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4521 else if (isALNUM_lazy_if(t,UTF)) {
4523 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4526 while (t < PL_bufend && isSPACE(*t))
4528 /* if comma follows first term, call it an anon hash */
4529 /* XXX it could be a comma expression with loop modifiers */
4530 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4531 || (*t == '=' && t[1] == '>')))
4532 OPERATOR(HASHBRACK);
4533 if (PL_expect == XREF)
4536 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4542 yylval.ival = CopLINE(PL_curcop);
4543 if (isSPACE(*s) || *s == '#')
4544 PL_copline = NOLINE; /* invalidate current command line number */
4549 if (PL_lex_brackets <= 0)
4550 yyerror("Unmatched right curly bracket");
4552 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4553 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4554 PL_lex_formbrack = 0;
4555 if (PL_lex_state == LEX_INTERPNORMAL) {
4556 if (PL_lex_brackets == 0) {
4557 if (PL_expect & XFAKEBRACK) {
4558 PL_expect &= XENUMMASK;
4559 PL_lex_state = LEX_INTERPEND;
4564 PL_thiswhite = newSVpvs("");
4565 sv_catpvn(PL_thiswhite,"}",1);
4568 return yylex(); /* ignore fake brackets */
4570 if (*s == '-' && s[1] == '>')
4571 PL_lex_state = LEX_INTERPENDMAYBE;
4572 else if (*s != '[' && *s != '{')
4573 PL_lex_state = LEX_INTERPEND;
4576 if (PL_expect & XFAKEBRACK) {
4577 PL_expect &= XENUMMASK;
4579 return yylex(); /* ignore fake brackets */
4581 start_force(PL_curforce);
4583 curmad('X', newSVpvn(s-1,1));
4584 CURMAD('_', PL_thiswhite);
4589 PL_thistoken = newSVpvs("");
4597 if (PL_expect == XOPERATOR) {
4598 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4599 && isIDFIRST_lazy_if(s,UTF))
4601 CopLINE_dec(PL_curcop);
4602 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4603 CopLINE_inc(PL_curcop);
4608 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4610 PL_expect = XOPERATOR;
4611 force_ident(PL_tokenbuf, '&');
4615 yylval.ival = (OPpENTERSUB_AMPER<<8);
4627 const char tmp = *s++;
4634 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4635 && strchr("+-*/%.^&|<",tmp))
4636 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4637 "Reversed %c= operator",(int)tmp);
4639 if (PL_expect == XSTATE && isALPHA(tmp) &&
4640 (s == PL_linestart+1 || s[-2] == '\n') )
4642 if (PL_in_eval && !PL_rsfp) {
4647 if (strnEQ(s,"=cut",4)) {
4663 PL_thiswhite = newSVpvs("");
4664 sv_catpvn(PL_thiswhite, PL_linestart,
4665 PL_bufend - PL_linestart);
4669 PL_doextract = TRUE;
4673 if (PL_lex_brackets < PL_lex_formbrack) {
4675 #ifdef PERL_STRICT_CR
4676 while (SPACE_OR_TAB(*t))
4678 while (SPACE_OR_TAB(*t) || *t == '\r')
4681 if (*t == '\n' || *t == '#') {
4692 const char tmp = *s++;
4694 /* was this !=~ where !~ was meant?
4695 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4697 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4698 const char *t = s+1;
4700 while (t < PL_bufend && isSPACE(*t))
4703 if (*t == '/' || *t == '?' ||
4704 ((*t == 'm' || *t == 's' || *t == 'y')
4705 && !isALNUM(t[1])) ||
4706 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4707 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4708 "!=~ should be !~");
4718 if (PL_expect != XOPERATOR) {
4719 if (s[1] != '<' && !strchr(s,'>'))
4722 s = scan_heredoc(s);
4724 s = scan_inputsymbol(s);
4725 TERM(sublex_start());
4731 SHop(OP_LEFT_SHIFT);
4745 const char tmp = *s++;
4747 SHop(OP_RIGHT_SHIFT);
4748 else if (tmp == '=')
4757 if (PL_expect == XOPERATOR) {
4758 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4760 deprecate_old(commaless_variable_list);
4761 return REPORT(','); /* grandfather non-comma-format format */
4765 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4766 PL_tokenbuf[0] = '@';
4767 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4768 sizeof PL_tokenbuf - 1, FALSE);
4769 if (PL_expect == XOPERATOR)
4770 no_op("Array length", s);
4771 if (!PL_tokenbuf[1])
4773 PL_expect = XOPERATOR;
4774 PL_pending_ident = '#';
4778 PL_tokenbuf[0] = '$';
4779 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4780 sizeof PL_tokenbuf - 1, FALSE);
4781 if (PL_expect == XOPERATOR)
4783 if (!PL_tokenbuf[1]) {
4785 yyerror("Final $ should be \\$ or $name");
4789 /* This kludge not intended to be bulletproof. */
4790 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4791 yylval.opval = newSVOP(OP_CONST, 0,
4792 newSViv(CopARYBASE_get(&PL_compiling)));
4793 yylval.opval->op_private = OPpCONST_ARYBASE;
4799 const char tmp = *s;
4800 if (PL_lex_state == LEX_NORMAL)
4803 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4804 && intuit_more(s)) {
4806 PL_tokenbuf[0] = '@';
4807 if (ckWARN(WARN_SYNTAX)) {
4810 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4813 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4814 while (t < PL_bufend && *t != ']')
4816 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4817 "Multidimensional syntax %.*s not supported",
4818 (int)((t - PL_bufptr) + 1), PL_bufptr);
4822 else if (*s == '{') {
4824 PL_tokenbuf[0] = '%';
4825 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4826 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4828 char tmpbuf[sizeof PL_tokenbuf];
4831 } while (isSPACE(*t));
4832 if (isIDFIRST_lazy_if(t,UTF)) {
4834 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4838 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4839 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4840 "You need to quote \"%s\"",
4847 PL_expect = XOPERATOR;
4848 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4849 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4850 if (!islop || PL_last_lop_op == OP_GREPSTART)
4851 PL_expect = XOPERATOR;
4852 else if (strchr("$@\"'`q", *s))
4853 PL_expect = XTERM; /* e.g. print $fh "foo" */
4854 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4855 PL_expect = XTERM; /* e.g. print $fh &sub */
4856 else if (isIDFIRST_lazy_if(s,UTF)) {
4857 char tmpbuf[sizeof PL_tokenbuf];
4859 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4860 if ((t2 = keyword(tmpbuf, len, 0))) {
4861 /* binary operators exclude handle interpretations */
4873 PL_expect = XTERM; /* e.g. print $fh length() */
4878 PL_expect = XTERM; /* e.g. print $fh subr() */
4881 else if (isDIGIT(*s))
4882 PL_expect = XTERM; /* e.g. print $fh 3 */
4883 else if (*s == '.' && isDIGIT(s[1]))
4884 PL_expect = XTERM; /* e.g. print $fh .3 */
4885 else if ((*s == '?' || *s == '-' || *s == '+')
4886 && !isSPACE(s[1]) && s[1] != '=')
4887 PL_expect = XTERM; /* e.g. print $fh -1 */
4888 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4890 PL_expect = XTERM; /* e.g. print $fh /.../
4891 XXX except DORDOR operator
4893 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4895 PL_expect = XTERM; /* print $fh <<"EOF" */
4898 PL_pending_ident = '$';
4902 if (PL_expect == XOPERATOR)
4904 PL_tokenbuf[0] = '@';
4905 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4906 if (!PL_tokenbuf[1]) {
4909 if (PL_lex_state == LEX_NORMAL)
4911 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4913 PL_tokenbuf[0] = '%';
4915 /* Warn about @ where they meant $. */
4916 if (*s == '[' || *s == '{') {
4917 if (ckWARN(WARN_SYNTAX)) {
4918 const char *t = s + 1;
4919 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4921 if (*t == '}' || *t == ']') {
4923 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4924 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4925 "Scalar value %.*s better written as $%.*s",
4926 (int)(t-PL_bufptr), PL_bufptr,
4927 (int)(t-PL_bufptr-1), PL_bufptr+1);
4932 PL_pending_ident = '@';
4935 case '/': /* may be division, defined-or, or pattern */
4936 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4940 case '?': /* may either be conditional or pattern */
4941 if(PL_expect == XOPERATOR) {
4949 /* A // operator. */
4959 /* Disable warning on "study /blah/" */
4960 if (PL_oldoldbufptr == PL_last_uni
4961 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4962 || memNE(PL_last_uni, "study", 5)
4963 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4966 s = scan_pat(s,OP_MATCH);
4967 TERM(sublex_start());
4971 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4972 #ifdef PERL_STRICT_CR
4975 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
4977 && (s == PL_linestart || s[-1] == '\n') )
4979 PL_lex_formbrack = 0;
4983 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
4989 yylval.ival = OPf_SPECIAL;
4995 if (PL_expect != XOPERATOR)
5000 case '0': case '1': case '2': case '3': case '4':
5001 case '5': case '6': case '7': case '8': case '9':
5002 s = scan_num(s, &yylval);
5003 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5004 if (PL_expect == XOPERATOR)
5009 s = scan_str(s,!!PL_madskills,FALSE);
5010 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5011 if (PL_expect == XOPERATOR) {
5012 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5014 deprecate_old(commaless_variable_list);
5015 return REPORT(','); /* grandfather non-comma-format format */
5022 yylval.ival = OP_CONST;
5023 TERM(sublex_start());
5026 s = scan_str(s,!!PL_madskills,FALSE);
5027 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5028 if (PL_expect == XOPERATOR) {
5029 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5031 deprecate_old(commaless_variable_list);
5032 return REPORT(','); /* grandfather non-comma-format format */
5039 yylval.ival = OP_CONST;
5040 /* FIXME. I think that this can be const if char *d is replaced by
5041 more localised variables. */
5042 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5043 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5044 yylval.ival = OP_STRINGIFY;
5048 TERM(sublex_start());
5051 s = scan_str(s,!!PL_madskills,FALSE);
5052 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5053 if (PL_expect == XOPERATOR)
5054 no_op("Backticks",s);
5057 readpipe_override();
5058 TERM(sublex_start());
5062 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
5063 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5065 if (PL_expect == XOPERATOR)
5066 no_op("Backslash",s);
5070 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5071 char *start = s + 2;
5072 while (isDIGIT(*start) || *start == '_')
5074 if (*start == '.' && isDIGIT(start[1])) {
5075 s = scan_num(s, &yylval);
5078 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5079 else if (!isALPHA(*start) && (PL_expect == XTERM
5080 || PL_expect == XREF || PL_expect == XSTATE
5081 || PL_expect == XTERMORDORDOR)) {
5082 /* XXX Use gv_fetchpvn rather than stomping on a const string */
5083 const char c = *start;
5086 gv = gv_fetchpv(s, 0, SVt_PVCV);
5089 s = scan_num(s, &yylval);
5096 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5138 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5140 /* Some keywords can be followed by any delimiter, including ':' */
5141 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5142 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5143 (PL_tokenbuf[0] == 'q' &&
5144 strchr("qwxr", PL_tokenbuf[1])))));
5146 /* x::* is just a word, unless x is "CORE" */
5147 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5151 while (d < PL_bufend && isSPACE(*d))
5152 d++; /* no comments skipped here, or s### is misparsed */
5154 /* Is this a label? */
5155 if (!tmp && PL_expect == XSTATE
5156 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5158 yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5163 /* Check for keywords */
5164 tmp = keyword(PL_tokenbuf, len, 0);
5166 /* Is this a word before a => operator? */
5167 if (*d == '=' && d[1] == '>') {
5170 = (OP*)newSVOP(OP_CONST, 0,
5171 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5172 yylval.opval->op_private = OPpCONST_BARE;
5176 if (tmp < 0) { /* second-class keyword? */
5177 GV *ogv = NULL; /* override (winner) */
5178 GV *hgv = NULL; /* hidden (loser) */
5179 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5181 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5184 if (GvIMPORTED_CV(gv))
5186 else if (! CvMETHOD(cv))
5190 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5191 (gv = *gvp) && isGV_with_GP(gv) &&
5192 GvCVu(gv) && GvIMPORTED_CV(gv))
5199 tmp = 0; /* overridden by import or by GLOBAL */
5202 && -tmp==KEY_lock /* XXX generalizable kludge */
5204 && !hv_fetchs(GvHVn(PL_incgv), "Thread.pm", FALSE))
5206 tmp = 0; /* any sub overrides "weak" keyword */
5208 else { /* no override */
5210 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
5211 Perl_warner(aTHX_ packWARN(WARN_MISC),
5212 "dump() better written as CORE::dump()");
5216 if (hgv && tmp != KEY_x && tmp != KEY_CORE
5217 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
5218 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5219 "Ambiguous call resolved as CORE::%s(), %s",
5220 GvENAME(hgv), "qualify as such or use &");
5227 default: /* not a keyword */
5228 /* Trade off - by using this evil construction we can pull the
5229 variable gv into the block labelled keylookup. If not, then
5230 we have to give it function scope so that the goto from the
5231 earlier ':' case doesn't bypass the initialisation. */
5233 just_a_word_zero_gv:
5241 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5244 SV *nextPL_nextwhite = 0;
5248 /* Get the rest if it looks like a package qualifier */
5250 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5252 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5255 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5256 *s == '\'' ? "'" : "::");
5261 if (PL_expect == XOPERATOR) {
5262 if (PL_bufptr == PL_linestart) {
5263 CopLINE_dec(PL_curcop);
5264 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
5265 CopLINE_inc(PL_curcop);
5268 no_op("Bareword",s);
5271 /* Look for a subroutine with this name in current package,
5272 unless name is "Foo::", in which case Foo is a bearword
5273 (and a package name). */
5275 if (len > 2 && !PL_madskills &&
5276 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5278 if (ckWARN(WARN_BAREWORD)
5279 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5280 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5281 "Bareword \"%s\" refers to nonexistent package",
5284 PL_tokenbuf[len] = '\0';
5290 /* Mustn't actually add anything to a symbol table.
5291 But also don't want to "initialise" any placeholder
5292 constants that might already be there into full
5293 blown PVGVs with attached PVCV. */
5294 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5295 GV_NOADD_NOINIT, SVt_PVCV);
5300 /* if we saw a global override before, get the right name */
5303 sv = newSVpvs("CORE::GLOBAL::");
5304 sv_catpv(sv,PL_tokenbuf);
5307 /* If len is 0, newSVpv does strlen(), which is correct.
5308 If len is non-zero, then it will be the true length,
5309 and so the scalar will be created correctly. */
5310 sv = newSVpv(PL_tokenbuf,len);
5313 if (PL_madskills && !PL_thistoken) {
5314 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5315 PL_thistoken = newSVpv(start,s - start);
5316 PL_realtokenstart = s - SvPVX(PL_linestr);
5320 /* Presume this is going to be a bareword of some sort. */
5323 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5324 yylval.opval->op_private = OPpCONST_BARE;
5325 /* UTF-8 package name? */
5326 if (UTF && !IN_BYTES &&
5327 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5330 /* And if "Foo::", then that's what it certainly is. */
5335 /* Do the explicit type check so that we don't need to force
5336 the initialisation of the symbol table to have a real GV.
5337 Beware - gv may not really be a PVGV, cv may not really be
5338 a PVCV, (because of the space optimisations that gv_init
5339 understands) But they're true if for this symbol there is
5340 respectively a typeglob and a subroutine.
5342 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5343 /* Real typeglob, so get the real subroutine: */
5345 /* A proxy for a subroutine in this package? */
5346 : SvOK(gv) ? (CV *) gv : NULL)
5349 /* See if it's the indirect object for a list operator. */
5351 if (PL_oldoldbufptr &&
5352 PL_oldoldbufptr < PL_bufptr &&
5353 (PL_oldoldbufptr == PL_last_lop
5354 || PL_oldoldbufptr == PL_last_uni) &&
5355 /* NO SKIPSPACE BEFORE HERE! */
5356 (PL_expect == XREF ||
5357 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5359 bool immediate_paren = *s == '(';
5361 /* (Now we can afford to cross potential line boundary.) */
5362 s = SKIPSPACE2(s,nextPL_nextwhite);
5364 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5367 /* Two barewords in a row may indicate method call. */
5369 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5370 (tmp = intuit_method(s, gv, cv)))
5373 /* If not a declared subroutine, it's an indirect object. */
5374 /* (But it's an indir obj regardless for sort.) */
5375 /* Also, if "_" follows a filetest operator, it's a bareword */
5378 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5380 (PL_last_lop_op != OP_MAPSTART &&
5381 PL_last_lop_op != OP_GREPSTART))))
5382 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5383 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5386 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5391 PL_expect = XOPERATOR;
5394 s = SKIPSPACE2(s,nextPL_nextwhite);
5395 PL_nextwhite = nextPL_nextwhite;
5400 /* Is this a word before a => operator? */
5401 if (*s == '=' && s[1] == '>' && !pkgname) {
5403 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
5404 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5405 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
5409 /* If followed by a paren, it's certainly a subroutine. */
5414 while (SPACE_OR_TAB(*d))
5416 if (*d == ')' && (sv = gv_const_sv(gv))) {
5420 char *par = SvPVX(PL_linestr) + PL_realtokenstart;
5421 sv_catpvn(PL_thistoken, par, s - par);
5423 sv_free(PL_nextwhite);
5434 PL_nextwhite = PL_thiswhite;
5437 start_force(PL_curforce);
5439 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5440 PL_expect = XOPERATOR;
5443 PL_nextwhite = nextPL_nextwhite;
5444 curmad('X', PL_thistoken);
5445 PL_thistoken = newSVpvs("");
5453 /* If followed by var or block, call it a method (unless sub) */
5455 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5456 PL_last_lop = PL_oldbufptr;
5457 PL_last_lop_op = OP_METHOD;
5461 /* If followed by a bareword, see if it looks like indir obj. */
5464 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5465 && (tmp = intuit_method(s, gv, cv)))
5468 /* Not a method, so call it a subroutine (if defined) */
5471 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5472 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5473 "Ambiguous use of -%s resolved as -&%s()",
5474 PL_tokenbuf, PL_tokenbuf);
5475 /* Check for a constant sub */
5476 if ((sv = gv_const_sv(gv)) && !PL_madskills) {
5478 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
5479 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5480 yylval.opval->op_private = 0;
5484 /* Resolve to GV now. */
5485 if (SvTYPE(gv) != SVt_PVGV) {
5486 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5487 assert (SvTYPE(gv) == SVt_PVGV);
5488 /* cv must have been some sort of placeholder, so
5489 now needs replacing with a real code reference. */
5493 op_free(yylval.opval);
5494 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5495 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5496 PL_last_lop = PL_oldbufptr;
5497 PL_last_lop_op = OP_ENTERSUB;
5498 /* Is there a prototype? */
5506 const char *proto = SvPV_const((SV*)cv, protolen);
5509 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5511 while (*proto == ';')
5513 if (*proto == '&' && *s == '{') {
5514 sv_setpv(PL_subname,
5517 "__ANON__" : "__ANON__::__ANON__"));
5524 PL_nextwhite = PL_thiswhite;
5527 start_force(PL_curforce);
5528 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5531 PL_nextwhite = nextPL_nextwhite;
5532 curmad('X', PL_thistoken);
5533 PL_thistoken = newSVpvs("");
5540 /* Guess harder when madskills require "best effort". */
5541 if (PL_madskills && (!gv || !GvCVu(gv))) {
5542 int probable_sub = 0;
5543 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5545 else if (isALPHA(*s)) {
5549 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5550 if (!keyword(tmpbuf, tmplen, 0))
5553 while (d < PL_bufend && isSPACE(*d))
5555 if (*d == '=' && d[1] == '>')
5560 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5561 op_free(yylval.opval);
5562 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5563 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5564 PL_last_lop = PL_oldbufptr;
5565 PL_last_lop_op = OP_ENTERSUB;
5566 PL_nextwhite = PL_thiswhite;
5568 start_force(PL_curforce);
5569 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5571 PL_nextwhite = nextPL_nextwhite;
5572 curmad('X', PL_thistoken);
5573 PL_thistoken = newSVpvs("");
5578 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5585 /* Call it a bare word */
5587 if (PL_hints & HINT_STRICT_SUBS)
5588 yylval.opval->op_private |= OPpCONST_STRICT;
5591 if (lastchar != '-') {
5592 if (ckWARN(WARN_RESERVED)) {
5596 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5597 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5604 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5605 && ckWARN_d(WARN_AMBIGUOUS)) {
5606 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5607 "Operator or semicolon missing before %c%s",
5608 lastchar, PL_tokenbuf);
5609 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5610 "Ambiguous use of %c resolved as operator %c",
5611 lastchar, lastchar);
5617 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5618 newSVpv(CopFILE(PL_curcop),0));
5622 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5623 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5626 case KEY___PACKAGE__:
5627 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5629 ? newSVhek(HvNAME_HEK(PL_curstash))
5636 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5637 const char *pname = "main";
5638 if (PL_tokenbuf[2] == 'D')
5639 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5640 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5644 GvIOp(gv) = newIO();
5645 IoIFP(GvIOp(gv)) = PL_rsfp;
5646 #if defined(HAS_FCNTL) && defined(F_SETFD)
5648 const int fd = PerlIO_fileno(PL_rsfp);
5649 fcntl(fd,F_SETFD,fd >= 3);
5652 /* Mark this internal pseudo-handle as clean */
5653 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5655 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
5656 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5657 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5659 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5660 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5661 /* if the script was opened in binmode, we need to revert
5662 * it to text mode for compatibility; but only iff it has CRs
5663 * XXX this is a questionable hack at best. */
5664 if (PL_bufend-PL_bufptr > 2
5665 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5668 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5669 loc = PerlIO_tell(PL_rsfp);
5670 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5673 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5675 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5676 #endif /* NETWARE */
5677 #ifdef PERLIO_IS_STDIO /* really? */
5678 # if defined(__BORLANDC__)
5679 /* XXX see note in do_binmode() */
5680 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5684 PerlIO_seek(PL_rsfp, loc, 0);
5688 #ifdef PERLIO_LAYERS
5691 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5692 else if (PL_encoding) {
5699 XPUSHs(PL_encoding);
5701 call_method("name", G_SCALAR);
5705 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5706 Perl_form(aTHX_ ":encoding(%"SVf")",
5715 if (PL_realtokenstart >= 0) {
5716 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5718 PL_endwhite = newSVpvs("");
5719 sv_catsv(PL_endwhite, PL_thiswhite);
5721 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5722 PL_realtokenstart = -1;
5724 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5725 SvCUR(PL_endwhite))) != Nullch) ;
5740 if (PL_expect == XSTATE) {
5747 if (*s == ':' && s[1] == ':') {
5750 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5751 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5752 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5755 else if (tmp == KEY_require || tmp == KEY_do)
5756 /* that's a way to remember we saw "CORE::" */
5769 LOP(OP_ACCEPT,XTERM);
5775 LOP(OP_ATAN2,XTERM);
5781 LOP(OP_BINMODE,XTERM);
5784 LOP(OP_BLESS,XTERM);
5793 /* When 'use switch' is in effect, continue has a dual
5794 life as a control operator. */
5796 if (!FEATURE_IS_ENABLED("switch"))
5799 /* We have to disambiguate the two senses of
5800 "continue". If the next token is a '{' then
5801 treat it as the start of a continue block;
5802 otherwise treat it as a control operator.
5814 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5831 if (!PL_cryptseen) {
5832 PL_cryptseen = TRUE;
5836 LOP(OP_CRYPT,XTERM);
5839 LOP(OP_CHMOD,XTERM);
5842 LOP(OP_CHOWN,XTERM);
5845 LOP(OP_CONNECT,XTERM);
5864 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5865 if (orig_keyword == KEY_do) {
5874 PL_hints |= HINT_BLOCK_SCOPE;
5884 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5885 LOP(OP_DBMOPEN,XTERM);
5891 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5898 yylval.ival = CopLINE(PL_curcop);
5914 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
5915 UNIBRACK(OP_ENTEREVAL);
5933 case KEY_endhostent:
5939 case KEY_endservent:
5942 case KEY_endprotoent:
5953 yylval.ival = CopLINE(PL_curcop);
5955 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
5958 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
5961 if ((PL_bufend - p) >= 3 &&
5962 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
5964 else if ((PL_bufend - p) >= 4 &&
5965 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
5968 if (isIDFIRST_lazy_if(p,UTF)) {
5969 p = scan_ident(p, PL_bufend,
5970 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5974 Perl_croak(aTHX_ "Missing $ on loop variable");
5976 s = SvPVX(PL_linestr) + soff;
5982 LOP(OP_FORMLINE,XTERM);
5988 LOP(OP_FCNTL,XTERM);
5994 LOP(OP_FLOCK,XTERM);
6003 LOP(OP_GREPSTART, XREF);
6006 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6021 case KEY_getpriority:
6022 LOP(OP_GETPRIORITY,XTERM);
6024 case KEY_getprotobyname:
6027 case KEY_getprotobynumber:
6028 LOP(OP_GPBYNUMBER,XTERM);
6030 case KEY_getprotoent:
6042 case KEY_getpeername:
6043 UNI(OP_GETPEERNAME);
6045 case KEY_gethostbyname:
6048 case KEY_gethostbyaddr:
6049 LOP(OP_GHBYADDR,XTERM);
6051 case KEY_gethostent:
6054 case KEY_getnetbyname:
6057 case KEY_getnetbyaddr:
6058 LOP(OP_GNBYADDR,XTERM);
6063 case KEY_getservbyname:
6064 LOP(OP_GSBYNAME,XTERM);
6066 case KEY_getservbyport:
6067 LOP(OP_GSBYPORT,XTERM);
6069 case KEY_getservent:
6072 case KEY_getsockname:
6073 UNI(OP_GETSOCKNAME);
6075 case KEY_getsockopt:
6076 LOP(OP_GSOCKOPT,XTERM);
6091 yylval.ival = CopLINE(PL_curcop);
6102 yylval.ival = CopLINE(PL_curcop);
6106 LOP(OP_INDEX,XTERM);
6112 LOP(OP_IOCTL,XTERM);
6124 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6156 LOP(OP_LISTEN,XTERM);
6165 s = scan_pat(s,OP_MATCH);
6166 TERM(sublex_start());
6169 LOP(OP_MAPSTART, XREF);
6172 LOP(OP_MKDIR,XTERM);
6175 LOP(OP_MSGCTL,XTERM);
6178 LOP(OP_MSGGET,XTERM);
6181 LOP(OP_MSGRCV,XTERM);
6184 LOP(OP_MSGSND,XTERM);
6189 PL_in_my = (U16)tmp;
6191 if (isIDFIRST_lazy_if(s,UTF)) {
6195 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6196 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6198 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6199 if (!PL_in_my_stash) {
6202 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6206 if (PL_madskills) { /* just add type to declarator token */
6207 sv_catsv(PL_thistoken, PL_nextwhite);
6209 sv_catpvn(PL_thistoken, start, s - start);
6217 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6224 s = tokenize_use(0, s);
6228 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6235 if (isIDFIRST_lazy_if(s,UTF)) {
6237 for (d = s; isALNUM_lazy_if(d,UTF);)
6239 for (t=d; isSPACE(*t);)
6241 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6243 && !(t[0] == '=' && t[1] == '>')
6245 int parms_len = (int)(d-s);
6246 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6247 "Precedence problem: open %.*s should be open(%.*s)",
6248 parms_len, s, parms_len, s);
6254 yylval.ival = OP_OR;
6264 LOP(OP_OPEN_DIR,XTERM);
6267 checkcomma(s,PL_tokenbuf,"filehandle");
6271 checkcomma(s,PL_tokenbuf,"filehandle");
6290 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6294 LOP(OP_PIPE_OP,XTERM);
6297 s = scan_str(s,!!PL_madskills,FALSE);
6300 yylval.ival = OP_CONST;
6301 TERM(sublex_start());
6307 s = scan_str(s,!!PL_madskills,FALSE);
6310 PL_expect = XOPERATOR;
6312 if (SvCUR(PL_lex_stuff)) {
6315 d = SvPV_force(PL_lex_stuff, len);
6317 for (; isSPACE(*d) && len; --len, ++d)
6322 if (!warned && ckWARN(WARN_QW)) {
6323 for (; !isSPACE(*d) && len; --len, ++d) {
6325 Perl_warner(aTHX_ packWARN(WARN_QW),
6326 "Possible attempt to separate words with commas");
6329 else if (*d == '#') {
6330 Perl_warner(aTHX_ packWARN(WARN_QW),
6331 "Possible attempt to put comments in qw() list");
6337 for (; !isSPACE(*d) && len; --len, ++d)
6340 sv = newSVpvn(b, d-b);
6341 if (DO_UTF8(PL_lex_stuff))
6343 words = append_elem(OP_LIST, words,
6344 newSVOP(OP_CONST, 0, tokeq(sv)));
6348 start_force(PL_curforce);
6349 NEXTVAL_NEXTTOKE.opval = words;
6354 SvREFCNT_dec(PL_lex_stuff);
6355 PL_lex_stuff = NULL;
6361 s = scan_str(s,!!PL_madskills,FALSE);
6364 yylval.ival = OP_STRINGIFY;
6365 if (SvIVX(PL_lex_stuff) == '\'')
6366 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6367 TERM(sublex_start());
6370 s = scan_pat(s,OP_QR);
6371 TERM(sublex_start());
6374 s = scan_str(s,!!PL_madskills,FALSE);
6377 readpipe_override();
6378 TERM(sublex_start());
6386 s = force_version(s, FALSE);
6388 else if (*s != 'v' || !isDIGIT(s[1])
6389 || (s = force_version(s, TRUE), *s == 'v'))
6391 *PL_tokenbuf = '\0';
6392 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6393 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6394 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6396 yyerror("<> should be quotes");
6398 if (orig_keyword == KEY_require) {
6406 PL_last_uni = PL_oldbufptr;
6407 PL_last_lop_op = OP_REQUIRE;
6409 return REPORT( (int)REQUIRE );
6415 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6419 LOP(OP_RENAME,XTERM);
6428 LOP(OP_RINDEX,XTERM);
6438 UNIDOR(OP_READLINE);
6442 UNIDOR(OP_BACKTICK);
6451 LOP(OP_REVERSE,XTERM);
6454 UNIDOR(OP_READLINK);
6462 TERM(sublex_start());
6464 TOKEN(1); /* force error */
6467 checkcomma(s,PL_tokenbuf,"filehandle");
6477 LOP(OP_SELECT,XTERM);
6483 LOP(OP_SEMCTL,XTERM);
6486 LOP(OP_SEMGET,XTERM);
6489 LOP(OP_SEMOP,XTERM);
6495 LOP(OP_SETPGRP,XTERM);
6497 case KEY_setpriority:
6498 LOP(OP_SETPRIORITY,XTERM);
6500 case KEY_sethostent:
6506 case KEY_setservent:
6509 case KEY_setprotoent:
6519 LOP(OP_SEEKDIR,XTERM);
6521 case KEY_setsockopt:
6522 LOP(OP_SSOCKOPT,XTERM);
6528 LOP(OP_SHMCTL,XTERM);
6531 LOP(OP_SHMGET,XTERM);
6534 LOP(OP_SHMREAD,XTERM);
6537 LOP(OP_SHMWRITE,XTERM);
6540 LOP(OP_SHUTDOWN,XTERM);
6549 LOP(OP_SOCKET,XTERM);
6551 case KEY_socketpair:
6552 LOP(OP_SOCKPAIR,XTERM);
6555 checkcomma(s,PL_tokenbuf,"subroutine name");
6557 if (*s == ';' || *s == ')') /* probably a close */
6558 Perl_croak(aTHX_ "sort is now a reserved word");
6560 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6564 LOP(OP_SPLIT,XTERM);
6567 LOP(OP_SPRINTF,XTERM);
6570 LOP(OP_SPLICE,XTERM);
6585 LOP(OP_SUBSTR,XTERM);
6591 char tmpbuf[sizeof PL_tokenbuf];
6592 SSize_t tboffset = 0;
6593 expectation attrful;
6594 bool have_name, have_proto;
6595 const int key = tmp;
6600 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6601 SV *subtoken = newSVpvn(tstart, s - tstart);
6605 s = SKIPSPACE2(s,tmpwhite);
6610 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6611 (*s == ':' && s[1] == ':'))
6618 attrful = XATTRBLOCK;
6619 /* remember buffer pos'n for later force_word */
6620 tboffset = s - PL_oldbufptr;
6621 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6624 nametoke = newSVpvn(s, d - s);
6626 if (memchr(tmpbuf, ':', len))
6627 sv_setpvn(PL_subname, tmpbuf, len);
6629 sv_setsv(PL_subname,PL_curstname);
6630 sv_catpvs(PL_subname,"::");
6631 sv_catpvn(PL_subname,tmpbuf,len);
6638 CURMAD('X', nametoke);
6639 CURMAD('_', tmpwhite);
6640 (void) force_word(PL_oldbufptr + tboffset, WORD,
6643 s = SKIPSPACE2(d,tmpwhite);
6650 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6651 PL_expect = XTERMBLOCK;
6652 attrful = XATTRTERM;
6653 sv_setpvn(PL_subname,"?",1);
6657 if (key == KEY_format) {
6659 PL_lex_formbrack = PL_lex_brackets + 1;
6661 PL_thistoken = subtoken;
6665 (void) force_word(PL_oldbufptr + tboffset, WORD,
6671 /* Look for a prototype */
6674 bool bad_proto = FALSE;
6675 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6677 s = scan_str(s,!!PL_madskills,FALSE);
6679 Perl_croak(aTHX_ "Prototype not terminated");
6680 /* strip spaces and check for bad characters */
6681 d = SvPVX(PL_lex_stuff);
6683 for (p = d; *p; ++p) {
6686 if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
6692 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6693 "Illegal character in prototype for %"SVf" : %s",
6694 SVfARG(PL_subname), d);
6695 SvCUR_set(PL_lex_stuff, tmp);
6700 CURMAD('q', PL_thisopen);
6701 CURMAD('_', tmpwhite);
6702 CURMAD('=', PL_thisstuff);
6703 CURMAD('Q', PL_thisclose);
6704 NEXTVAL_NEXTTOKE.opval =
6705 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6706 PL_lex_stuff = Nullsv;
6709 s = SKIPSPACE2(s,tmpwhite);
6717 if (*s == ':' && s[1] != ':')
6718 PL_expect = attrful;
6719 else if (*s != '{' && key == KEY_sub) {
6721 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6723 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6730 curmad('^', newSVpvs(""));
6731 CURMAD('_', tmpwhite);
6735 PL_thistoken = subtoken;
6738 NEXTVAL_NEXTTOKE.opval =
6739 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6740 PL_lex_stuff = NULL;
6745 sv_setpv(PL_subname,
6747 (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"));
6751 (void) force_word(PL_oldbufptr + tboffset, WORD,
6761 LOP(OP_SYSTEM,XREF);
6764 LOP(OP_SYMLINK,XTERM);
6767 LOP(OP_SYSCALL,XTERM);
6770 LOP(OP_SYSOPEN,XTERM);
6773 LOP(OP_SYSSEEK,XTERM);
6776 LOP(OP_SYSREAD,XTERM);
6779 LOP(OP_SYSWRITE,XTERM);
6783 TERM(sublex_start());
6804 LOP(OP_TRUNCATE,XTERM);
6816 yylval.ival = CopLINE(PL_curcop);
6820 yylval.ival = CopLINE(PL_curcop);
6824 LOP(OP_UNLINK,XTERM);
6830 LOP(OP_UNPACK,XTERM);
6833 LOP(OP_UTIME,XTERM);
6839 LOP(OP_UNSHIFT,XTERM);
6842 s = tokenize_use(1, s);
6852 yylval.ival = CopLINE(PL_curcop);
6856 yylval.ival = CopLINE(PL_curcop);
6860 PL_hints |= HINT_BLOCK_SCOPE;
6867 LOP(OP_WAITPID,XTERM);
6876 ctl_l[0] = toCTRL('L');
6878 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6881 /* Make sure $^L is defined */
6882 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
6887 if (PL_expect == XOPERATOR)
6893 yylval.ival = OP_XOR;
6898 TERM(sublex_start());
6903 #pragma segment Main
6907 S_pending_ident(pTHX)
6912 /* pit holds the identifier we read and pending_ident is reset */
6913 char pit = PL_pending_ident;
6914 PL_pending_ident = 0;
6916 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
6917 DEBUG_T({ PerlIO_printf(Perl_debug_log,
6918 "### Pending identifier '%s'\n", PL_tokenbuf); });
6920 /* if we're in a my(), we can't allow dynamics here.
6921 $foo'bar has already been turned into $foo::bar, so
6922 just check for colons.
6924 if it's a legal name, the OP is a PADANY.
6927 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
6928 if (strchr(PL_tokenbuf,':'))
6929 yyerror(Perl_form(aTHX_ "No package name allowed for "
6930 "variable %s in \"our\"",
6932 tmp = allocmy(PL_tokenbuf);
6935 if (strchr(PL_tokenbuf,':'))
6936 yyerror(Perl_form(aTHX_ PL_no_myglob,
6937 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
6939 yylval.opval = newOP(OP_PADANY, 0);
6940 yylval.opval->op_targ = allocmy(PL_tokenbuf);
6946 build the ops for accesses to a my() variable.
6948 Deny my($a) or my($b) in a sort block, *if* $a or $b is
6949 then used in a comparison. This catches most, but not
6950 all cases. For instance, it catches
6951 sort { my($a); $a <=> $b }
6953 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
6954 (although why you'd do that is anyone's guess).
6957 if (!strchr(PL_tokenbuf,':')) {
6959 tmp = pad_findmy(PL_tokenbuf);
6960 if (tmp != NOT_IN_PAD) {
6961 /* might be an "our" variable" */
6962 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6963 /* build ops for a bareword */
6964 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
6965 HEK * const stashname = HvNAME_HEK(stash);
6966 SV * const sym = newSVhek(stashname);
6967 sv_catpvs(sym, "::");
6968 sv_catpv(sym, PL_tokenbuf+1);
6969 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
6970 yylval.opval->op_private = OPpCONST_ENTERED;
6973 ? (GV_ADDMULTI | GV_ADDINEVAL)
6976 ((PL_tokenbuf[0] == '$') ? SVt_PV
6977 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6982 /* if it's a sort block and they're naming $a or $b */
6983 if (PL_last_lop_op == OP_SORT &&
6984 PL_tokenbuf[0] == '$' &&
6985 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
6988 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
6989 d < PL_bufend && *d != '\n';
6992 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
6993 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
6999 yylval.opval = newOP(OP_PADANY, 0);
7000 yylval.opval->op_targ = tmp;
7006 Whine if they've said @foo in a doublequoted string,
7007 and @foo isn't a variable we can find in the symbol
7010 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7011 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
7012 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7013 && ckWARN(WARN_AMBIGUOUS)
7014 /* DO NOT warn for @- and @+ */
7015 && !( PL_tokenbuf[2] == '\0' &&
7016 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7019 /* Downgraded from fatal to warning 20000522 mjd */
7020 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7021 "Possible unintended interpolation of %s in string",
7026 /* build ops for a bareword */
7027 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
7028 yylval.opval->op_private = OPpCONST_ENTERED;
7031 /* If the identifier refers to a stash, don't autovivify it.
7032 * Change 24660 had the side effect of causing symbol table
7033 * hashes to always be defined, even if they were freshly
7034 * created and the only reference in the entire program was
7035 * the single statement with the defined %foo::bar:: test.
7036 * It appears that all code in the wild doing this actually
7037 * wants to know whether sub-packages have been loaded, so
7038 * by avoiding auto-vivifying symbol tables, we ensure that
7039 * defined %foo::bar:: continues to be false, and the existing
7040 * tests still give the expected answers, even though what
7041 * they're actually testing has now changed subtly.
7043 (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
7045 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7046 ((PL_tokenbuf[0] == '$') ? SVt_PV
7047 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7053 * The following code was generated by perl_keyword.pl.
7057 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7062 case 1: /* 5 tokens of length 1 */
7094 case 2: /* 18 tokens of length 2 */
7240 case 3: /* 29 tokens of length 3 */
7244 if (name[1] == 'N' &&
7307 if (name[1] == 'i' &&
7329 return (all_keywords || FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
7347 if (name[1] == 'o' &&
7356 if (name[1] == 'e' &&
7365 if (name[1] == 'n' &&
7374 if (name[1] == 'o' &&
7383 if (name[1] == 'a' &&
7392 if (name[1] == 'o' &&
7454 if (name[1] == 'e' &&
7468 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7494 if (name[1] == 'i' &&
7503 if (name[1] == 's' &&
7512 if (name[1] == 'e' &&
7521 if (name[1] == 'o' &&
7533 case 4: /* 41 tokens of length 4 */
7537 if (name[1] == 'O' &&
7547 if (name[1] == 'N' &&
7557 if (name[1] == 'i' &&
7567 if (name[1] == 'h' &&
7577 if (name[1] == 'u' &&
7590 if (name[2] == 'c' &&
7599 if (name[2] == 's' &&
7608 if (name[2] == 'a' &&
7644 if (name[1] == 'o' &&
7657 if (name[2] == 't' &&
7666 if (name[2] == 'o' &&
7675 if (name[2] == 't' &&
7684 if (name[2] == 'e' &&
7697 if (name[1] == 'o' &&
7710 if (name[2] == 'y' &&
7719 if (name[2] == 'l' &&
7735 if (name[2] == 's' &&
7744 if (name[2] == 'n' &&
7753 if (name[2] == 'c' &&
7766 if (name[1] == 'e' &&
7776 if (name[1] == 'p' &&
7789 if (name[2] == 'c' &&
7798 if (name[2] == 'p' &&
7807 if (name[2] == 's' &&
7823 if (name[2] == 'n' &&
7893 if (name[2] == 'r' &&
7902 if (name[2] == 'r' &&
7911 if (name[2] == 'a' &&
7927 if (name[2] == 'l' &&
7989 if (name[2] == 'e' &&
7992 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8005 case 5: /* 39 tokens of length 5 */
8009 if (name[1] == 'E' &&
8020 if (name[1] == 'H' &&
8034 if (name[2] == 'a' &&
8044 if (name[2] == 'a' &&
8061 if (name[2] == 'e' &&
8071 if (name[2] == 'e' &&
8075 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8091 if (name[3] == 'i' &&
8100 if (name[3] == 'o' &&
8136 if (name[2] == 'o' &&
8146 if (name[2] == 'y' &&
8160 if (name[1] == 'l' &&
8174 if (name[2] == 'n' &&
8184 if (name[2] == 'o' &&
8198 if (name[1] == 'i' &&
8203 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8212 if (name[2] == 'd' &&
8222 if (name[2] == 'c' &&
8239 if (name[2] == 'c' &&
8249 if (name[2] == 't' &&
8263 if (name[1] == 'k' &&
8274 if (name[1] == 'r' &&
8288 if (name[2] == 's' &&
8298 if (name[2] == 'd' &&
8315 if (name[2] == 'm' &&
8325 if (name[2] == 'i' &&
8335 if (name[2] == 'e' &&
8345 if (name[2] == 'l' &&
8355 if (name[2] == 'a' &&
8368 if (name[3] == 't' &&
8371 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8377 if (name[3] == 'd' &&
8394 if (name[1] == 'i' &&
8408 if (name[2] == 'a' &&
8421 if (name[3] == 'e' &&
8456 if (name[2] == 'i' &&
8473 if (name[2] == 'i' &&
8483 if (name[2] == 'i' &&
8500 case 6: /* 33 tokens of length 6 */
8504 if (name[1] == 'c' &&
8519 if (name[2] == 'l' &&
8530 if (name[2] == 'r' &&
8545 if (name[1] == 'e' &&
8560 if (name[2] == 's' &&
8565 if(ckWARN_d(WARN_SYNTAX))
8566 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8572 if (name[2] == 'i' &&
8590 if (name[2] == 'l' &&
8601 if (name[2] == 'r' &&
8616 if (name[1] == 'm' &&
8631 if (name[2] == 'n' &&
8642 if (name[2] == 's' &&
8657 if (name[1] == 's' &&
8663 if (name[4] == 't' &&
8672 if (name[4] == 'e' &&
8681 if (name[4] == 'c' &&
8690 if (name[4] == 'n' &&
8706 if (name[1] == 'r' &&
8724 if (name[3] == 'a' &&
8734 if (name[3] == 'u' &&
8748 if (name[2] == 'n' &&
8766 if (name[2] == 'a' &&
8780 if (name[3] == 'e' &&
8793 if (name[4] == 't' &&
8802 if (name[4] == 'e' &&
8824 if (name[4] == 't' &&
8833 if (name[4] == 'e' &&
8849 if (name[2] == 'c' &&
8860 if (name[2] == 'l' &&
8871 if (name[2] == 'b' &&
8882 if (name[2] == 's' &&
8905 if (name[4] == 's' &&
8914 if (name[4] == 'n' &&
8927 if (name[3] == 'a' &&
8944 if (name[1] == 'a' &&
8959 case 7: /* 29 tokens of length 7 */
8963 if (name[1] == 'E' &&
8976 if (name[1] == '_' &&
8989 if (name[1] == 'i' &&
8996 return -KEY_binmode;
9002 if (name[1] == 'o' &&
9009 return -KEY_connect;
9018 if (name[2] == 'm' &&
9024 return -KEY_dbmopen;
9035 if (name[4] == 'u' &&
9039 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9045 if (name[4] == 'n' &&
9066 if (name[1] == 'o' &&
9079 if (name[1] == 'e' &&
9086 if (name[5] == 'r' &&
9089 return -KEY_getpgrp;
9095 if (name[5] == 'i' &&
9098 return -KEY_getppid;
9111 if (name[1] == 'c' &&
9118 return -KEY_lcfirst;
9124 if (name[1] == 'p' &&
9131 return -KEY_opendir;
9137 if (name[1] == 'a' &&
9155 if (name[3] == 'd' &&
9160 return -KEY_readdir;
9166 if (name[3] == 'u' &&
9177 if (name[3] == 'e' &&
9182 return -KEY_reverse;
9201 if (name[3] == 'k' &&
9206 return -KEY_seekdir;
9212 if (name[3] == 'p' &&
9217 return -KEY_setpgrp;
9227 if (name[2] == 'm' &&
9233 return -KEY_shmread;
9239 if (name[2] == 'r' &&
9245 return -KEY_sprintf;
9254 if (name[3] == 'l' &&
9259 return -KEY_symlink;
9268 if (name[4] == 'a' &&
9272 return -KEY_syscall;
9278 if (name[4] == 'p' &&
9282 return -KEY_sysopen;
9288 if (name[4] == 'e' &&
9292 return -KEY_sysread;
9298 if (name[4] == 'e' &&
9302 return -KEY_sysseek;
9320 if (name[1] == 'e' &&
9327 return -KEY_telldir;
9336 if (name[2] == 'f' &&
9342 return -KEY_ucfirst;
9348 if (name[2] == 's' &&
9354 return -KEY_unshift;
9364 if (name[1] == 'a' &&
9371 return -KEY_waitpid;
9380 case 8: /* 26 tokens of length 8 */
9384 if (name[1] == 'U' &&
9392 return KEY_AUTOLOAD;
9403 if (name[3] == 'A' &&
9409 return KEY___DATA__;
9415 if (name[3] == 'I' &&
9421 return -KEY___FILE__;
9427 if (name[3] == 'I' &&
9433 return -KEY___LINE__;
9449 if (name[2] == 'o' &&
9456 return -KEY_closedir;
9462 if (name[2] == 'n' &&
9469 return -KEY_continue;
9479 if (name[1] == 'b' &&
9487 return -KEY_dbmclose;
9493 if (name[1] == 'n' &&
9499 if (name[4] == 'r' &&
9504 return -KEY_endgrent;
9510 if (name[4] == 'w' &&
9515 return -KEY_endpwent;
9528 if (name[1] == 'o' &&
9536 return -KEY_formline;
9542 if (name[1] == 'e' &&
9553 if (name[6] == 'n' &&
9556 return -KEY_getgrent;
9562 if (name[6] == 'i' &&
9565 return -KEY_getgrgid;
9571 if (name[6] == 'a' &&
9574 return -KEY_getgrnam;
9587 if (name[4] == 'o' &&
9592 return -KEY_getlogin;
9603 if (name[6] == 'n' &&
9606 return -KEY_getpwent;
9612 if (name[6] == 'a' &&
9615 return -KEY_getpwnam;
9621 if (name[6] == 'i' &&
9624 return -KEY_getpwuid;
9644 if (name[1] == 'e' &&
9651 if (name[5] == 'i' &&
9658 return -KEY_readline;
9663 return -KEY_readlink;
9674 if (name[5] == 'i' &&
9678 return -KEY_readpipe;
9699 if (name[4] == 'r' &&
9704 return -KEY_setgrent;
9710 if (name[4] == 'w' &&
9715 return -KEY_setpwent;
9731 if (name[3] == 'w' &&
9737 return -KEY_shmwrite;
9743 if (name[3] == 't' &&
9749 return -KEY_shutdown;
9759 if (name[2] == 's' &&
9766 return -KEY_syswrite;
9776 if (name[1] == 'r' &&
9784 return -KEY_truncate;
9793 case 9: /* 9 tokens of length 9 */
9797 if (name[1] == 'N' &&
9806 return KEY_UNITCHECK;
9812 if (name[1] == 'n' &&
9821 return -KEY_endnetent;
9827 if (name[1] == 'e' &&
9836 return -KEY_getnetent;
9842 if (name[1] == 'o' &&
9851 return -KEY_localtime;
9857 if (name[1] == 'r' &&
9866 return KEY_prototype;
9872 if (name[1] == 'u' &&
9881 return -KEY_quotemeta;
9887 if (name[1] == 'e' &&
9896 return -KEY_rewinddir;
9902 if (name[1] == 'e' &&
9911 return -KEY_setnetent;
9917 if (name[1] == 'a' &&
9926 return -KEY_wantarray;
9935 case 10: /* 9 tokens of length 10 */
9939 if (name[1] == 'n' &&
9945 if (name[4] == 'o' &&
9952 return -KEY_endhostent;
9958 if (name[4] == 'e' &&
9965 return -KEY_endservent;
9978 if (name[1] == 'e' &&
9984 if (name[4] == 'o' &&
9991 return -KEY_gethostent;
10000 if (name[5] == 'r' &&
10006 return -KEY_getservent;
10012 if (name[5] == 'c' &&
10018 return -KEY_getsockopt;
10038 if (name[2] == 't')
10043 if (name[4] == 'o' &&
10050 return -KEY_sethostent;
10059 if (name[5] == 'r' &&
10065 return -KEY_setservent;
10071 if (name[5] == 'c' &&
10077 return -KEY_setsockopt;
10094 if (name[2] == 'c' &&
10103 return -KEY_socketpair;
10116 case 11: /* 8 tokens of length 11 */
10120 if (name[1] == '_' &&
10130 { /* __PACKAGE__ */
10131 return -KEY___PACKAGE__;
10137 if (name[1] == 'n' &&
10147 { /* endprotoent */
10148 return -KEY_endprotoent;
10154 if (name[1] == 'e' &&
10163 if (name[5] == 'e' &&
10169 { /* getpeername */
10170 return -KEY_getpeername;
10179 if (name[6] == 'o' &&
10184 { /* getpriority */
10185 return -KEY_getpriority;
10191 if (name[6] == 't' &&
10196 { /* getprotoent */
10197 return -KEY_getprotoent;
10211 if (name[4] == 'o' &&
10218 { /* getsockname */
10219 return -KEY_getsockname;
10232 if (name[1] == 'e' &&
10240 if (name[6] == 'o' &&
10245 { /* setpriority */
10246 return -KEY_setpriority;
10252 if (name[6] == 't' &&
10257 { /* setprotoent */
10258 return -KEY_setprotoent;
10274 case 12: /* 2 tokens of length 12 */
10275 if (name[0] == 'g' &&
10287 if (name[9] == 'd' &&
10290 { /* getnetbyaddr */
10291 return -KEY_getnetbyaddr;
10297 if (name[9] == 'a' &&
10300 { /* getnetbyname */
10301 return -KEY_getnetbyname;
10313 case 13: /* 4 tokens of length 13 */
10314 if (name[0] == 'g' &&
10321 if (name[4] == 'o' &&
10330 if (name[10] == 'd' &&
10333 { /* gethostbyaddr */
10334 return -KEY_gethostbyaddr;
10340 if (name[10] == 'a' &&
10343 { /* gethostbyname */
10344 return -KEY_gethostbyname;
10357 if (name[4] == 'e' &&
10366 if (name[10] == 'a' &&
10369 { /* getservbyname */
10370 return -KEY_getservbyname;
10376 if (name[10] == 'o' &&
10379 { /* getservbyport */
10380 return -KEY_getservbyport;
10399 case 14: /* 1 tokens of length 14 */
10400 if (name[0] == 'g' &&
10414 { /* getprotobyname */
10415 return -KEY_getprotobyname;
10420 case 16: /* 1 tokens of length 16 */
10421 if (name[0] == 'g' &&
10437 { /* getprotobynumber */
10438 return -KEY_getprotobynumber;
10452 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10456 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10457 if (ckWARN(WARN_SYNTAX)) {
10460 for (w = s+2; *w && level; w++) {
10463 else if (*w == ')')
10466 while (isSPACE(*w))
10468 /* the list of chars below is for end of statements or
10469 * block / parens, boolean operators (&&, ||, //) and branch
10470 * constructs (or, and, if, until, unless, while, err, for).
10471 * Not a very solid hack... */
10472 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10473 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10474 "%s (...) interpreted as function",name);
10477 while (s < PL_bufend && isSPACE(*s))
10481 while (s < PL_bufend && isSPACE(*s))
10483 if (isIDFIRST_lazy_if(s,UTF)) {
10484 const char * const w = s++;
10485 while (isALNUM_lazy_if(s,UTF))
10487 while (s < PL_bufend && isSPACE(*s))
10491 if (keyword(w, s - w, 0))
10494 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10495 if (gv && GvCVu(gv))
10497 Perl_croak(aTHX_ "No comma allowed after %s", what);
10502 /* Either returns sv, or mortalizes sv and returns a new SV*.
10503 Best used as sv=new_constant(..., sv, ...).
10504 If s, pv are NULL, calls subroutine with one argument,
10505 and type is used with error messages only. */
10508 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
10512 HV * const table = GvHV(PL_hintgv); /* ^H */
10516 const char *why1 = "", *why2 = "", *why3 = "";
10518 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10521 why2 = (const char *)
10522 (strEQ(key,"charnames")
10523 ? "(possibly a missing \"use charnames ...\")"
10525 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10526 (type ? type: "undef"), why2);
10528 /* This is convoluted and evil ("goto considered harmful")
10529 * but I do not understand the intricacies of all the different
10530 * failure modes of %^H in here. The goal here is to make
10531 * the most probable error message user-friendly. --jhi */
10536 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10537 (type ? type: "undef"), why1, why2, why3);
10539 yyerror(SvPVX_const(msg));
10543 cvp = hv_fetch(table, key, strlen(key), FALSE);
10544 if (!cvp || !SvOK(*cvp)) {
10547 why3 = "} is not defined";
10550 sv_2mortal(sv); /* Parent created it permanently */
10553 pv = sv_2mortal(newSVpvn(s, len));
10555 typesv = sv_2mortal(newSVpv(type, 0));
10557 typesv = &PL_sv_undef;
10559 PUSHSTACKi(PERLSI_OVERLOAD);
10571 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10575 /* Check the eval first */
10576 if (!PL_in_eval && SvTRUE(ERRSV)) {
10577 sv_catpvs(ERRSV, "Propagated");
10578 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10580 res = SvREFCNT_inc_simple(sv);
10584 SvREFCNT_inc_simple_void(res);
10593 why1 = "Call to &{$^H{";
10595 why3 = "}} did not return a defined value";
10603 /* Returns a NUL terminated string, with the length of the string written to
10607 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10610 register char *d = dest;
10611 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10614 Perl_croak(aTHX_ ident_too_long);
10615 if (isALNUM(*s)) /* UTF handled below */
10617 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10622 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10626 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10627 char *t = s + UTF8SKIP(s);
10629 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10633 Perl_croak(aTHX_ ident_too_long);
10634 Copy(s, d, len, char);
10647 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10650 char *bracket = NULL;
10652 register char *d = dest;
10653 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10658 while (isDIGIT(*s)) {
10660 Perl_croak(aTHX_ ident_too_long);
10667 Perl_croak(aTHX_ ident_too_long);
10668 if (isALNUM(*s)) /* UTF handled below */
10670 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10675 else if (*s == ':' && s[1] == ':') {
10679 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10680 char *t = s + UTF8SKIP(s);
10681 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10683 if (d + (t - s) > e)
10684 Perl_croak(aTHX_ ident_too_long);
10685 Copy(s, d, t - s, char);
10696 if (PL_lex_state != LEX_NORMAL)
10697 PL_lex_state = LEX_INTERPENDMAYBE;
10700 if (*s == '$' && s[1] &&
10701 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10714 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10719 if (isSPACE(s[-1])) {
10721 const char ch = *s++;
10722 if (!SPACE_OR_TAB(ch)) {
10728 if (isIDFIRST_lazy_if(d,UTF)) {
10732 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10733 end += UTF8SKIP(end);
10734 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10735 end += UTF8SKIP(end);
10737 Copy(s, d, end - s, char);
10742 while ((isALNUM(*s) || *s == ':') && d < e)
10745 Perl_croak(aTHX_ ident_too_long);
10748 while (s < send && SPACE_OR_TAB(*s))
10750 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10751 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10752 const char * const brack =
10754 ((*s == '[') ? "[...]" : "{...}");
10755 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10756 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10757 funny, dest, brack, funny, dest, brack);
10760 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10764 /* Handle extended ${^Foo} variables
10765 * 1999-02-27 mjd-perl-patch@plover.com */
10766 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10770 while (isALNUM(*s) && d < e) {
10774 Perl_croak(aTHX_ ident_too_long);
10779 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10780 PL_lex_state = LEX_INTERPEND;
10783 if (PL_lex_state == LEX_NORMAL) {
10784 if (ckWARN(WARN_AMBIGUOUS) &&
10785 (keyword(dest, d - dest, 0)
10786 || get_cvn_flags(dest, d - dest, 0)))
10790 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10791 "Ambiguous use of %c{%s} resolved to %c%s",
10792 funny, dest, funny, dest);
10797 s = bracket; /* let the parser handle it */
10801 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10802 PL_lex_state = LEX_INTERPEND;
10807 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10809 PERL_UNUSED_CONTEXT;
10813 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
10814 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10815 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10816 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10817 case KEEPCOPY_PAT_MOD: *pmfl |= PMf_KEEPCOPY; break;
10823 S_scan_pat(pTHX_ char *start, I32 type)
10827 char *s = scan_str(start,!!PL_madskills,FALSE);
10828 const char * const valid_flags =
10829 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10836 const char * const delimiter = skipspace(start);
10840 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10841 : "Search pattern not terminated" ));
10844 pm = (PMOP*)newPMOP(type, 0);
10845 if (PL_multi_open == '?') {
10846 /* This is the only point in the code that sets PMf_ONCE: */
10847 pm->op_pmflags |= PMf_ONCE;
10849 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10850 allows us to restrict the list needed by reset to just the ??
10852 assert(type != OP_TRANS);
10854 MAGIC *mg = mg_find((SV*)PL_curstash, PERL_MAGIC_symtab);
10857 mg = sv_magicext((SV*)PL_curstash, 0, PERL_MAGIC_symtab, 0, 0,
10860 elements = mg->mg_len / sizeof(PMOP**);
10861 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10862 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10863 mg->mg_len = elements * sizeof(PMOP**);
10864 PmopSTASH_set(pm,PL_curstash);
10870 while (*s && strchr(valid_flags, *s))
10871 pmflag(&pm->op_pmflags,*s++);
10873 if (PL_madskills && modstart != s) {
10874 SV* tmptoken = newSVpvn(modstart, s - modstart);
10875 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
10878 /* issue a warning if /c is specified,but /g is not */
10879 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
10880 && ckWARN(WARN_REGEXP))
10882 Perl_warner(aTHX_ packWARN(WARN_REGEXP),
10883 "Use of /c modifier is meaningless without /g" );
10886 PL_lex_op = (OP*)pm;
10887 yylval.ival = OP_MATCH;
10892 S_scan_subst(pTHX_ char *start)
10903 yylval.ival = OP_NULL;
10905 s = scan_str(start,!!PL_madskills,FALSE);
10908 Perl_croak(aTHX_ "Substitution pattern not terminated");
10910 if (s[-1] == PL_multi_open)
10913 if (PL_madskills) {
10914 CURMAD('q', PL_thisopen);
10915 CURMAD('_', PL_thiswhite);
10916 CURMAD('E', PL_thisstuff);
10917 CURMAD('Q', PL_thisclose);
10918 PL_realtokenstart = s - SvPVX(PL_linestr);
10922 first_start = PL_multi_start;
10923 s = scan_str(s,!!PL_madskills,FALSE);
10925 if (PL_lex_stuff) {
10926 SvREFCNT_dec(PL_lex_stuff);
10927 PL_lex_stuff = NULL;
10929 Perl_croak(aTHX_ "Substitution replacement not terminated");
10931 PL_multi_start = first_start; /* so whole substitution is taken together */
10933 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10936 if (PL_madskills) {
10937 CURMAD('z', PL_thisopen);
10938 CURMAD('R', PL_thisstuff);
10939 CURMAD('Z', PL_thisclose);
10945 if (*s == EXEC_PAT_MOD) {
10949 else if (strchr(S_PAT_MODS, *s))
10950 pmflag(&pm->op_pmflags,*s++);
10956 if (PL_madskills) {
10958 curmad('m', newSVpvn(modstart, s - modstart));
10959 append_madprops(PL_thismad, (OP*)pm, 0);
10963 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
10964 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10968 SV * const repl = newSVpvs("");
10970 PL_sublex_info.super_bufptr = s;
10971 PL_sublex_info.super_bufend = PL_bufend;
10973 pm->op_pmflags |= PMf_EVAL;
10975 sv_catpv(repl, (const char *)(es ? "eval " : "do "));
10976 sv_catpvs(repl, "{");
10977 sv_catsv(repl, PL_lex_repl);
10978 if (strchr(SvPVX(PL_lex_repl), '#'))
10979 sv_catpvs(repl, "\n");
10980 sv_catpvs(repl, "}");
10982 SvREFCNT_dec(PL_lex_repl);
10983 PL_lex_repl = repl;
10986 PL_lex_op = (OP*)pm;
10987 yylval.ival = OP_SUBST;
10992 S_scan_trans(pTHX_ char *start)
11005 yylval.ival = OP_NULL;
11007 s = scan_str(start,!!PL_madskills,FALSE);
11009 Perl_croak(aTHX_ "Transliteration pattern not terminated");
11011 if (s[-1] == PL_multi_open)
11014 if (PL_madskills) {
11015 CURMAD('q', PL_thisopen);
11016 CURMAD('_', PL_thiswhite);
11017 CURMAD('E', PL_thisstuff);
11018 CURMAD('Q', PL_thisclose);
11019 PL_realtokenstart = s - SvPVX(PL_linestr);
11023 s = scan_str(s,!!PL_madskills,FALSE);
11025 if (PL_lex_stuff) {
11026 SvREFCNT_dec(PL_lex_stuff);
11027 PL_lex_stuff = NULL;
11029 Perl_croak(aTHX_ "Transliteration replacement not terminated");
11031 if (PL_madskills) {
11032 CURMAD('z', PL_thisopen);
11033 CURMAD('R', PL_thisstuff);
11034 CURMAD('Z', PL_thisclose);
11037 complement = del = squash = 0;
11044 complement = OPpTRANS_COMPLEMENT;
11047 del = OPpTRANS_DELETE;
11050 squash = OPpTRANS_SQUASH;
11059 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11060 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11061 o->op_private &= ~OPpTRANS_ALL;
11062 o->op_private |= del|squash|complement|
11063 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11064 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11067 yylval.ival = OP_TRANS;
11070 if (PL_madskills) {
11072 curmad('m', newSVpvn(modstart, s - modstart));
11073 append_madprops(PL_thismad, o, 0);
11082 S_scan_heredoc(pTHX_ register char *s)
11086 I32 op_type = OP_SCALAR;
11090 const char *found_newline;
11094 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11096 I32 stuffstart = s - SvPVX(PL_linestr);
11099 PL_realtokenstart = -1;
11104 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11108 while (SPACE_OR_TAB(*peek))
11110 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11113 s = delimcpy(d, e, s, PL_bufend, term, &len);
11123 if (!isALNUM_lazy_if(s,UTF))
11124 deprecate_old("bare << to mean <<\"\"");
11125 for (; isALNUM_lazy_if(s,UTF); s++) {
11130 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11131 Perl_croak(aTHX_ "Delimiter for here document is too long");
11134 len = d - PL_tokenbuf;
11137 if (PL_madskills) {
11138 tstart = PL_tokenbuf + !outer;
11139 PL_thisclose = newSVpvn(tstart, len - !outer);
11140 tstart = SvPVX(PL_linestr) + stuffstart;
11141 PL_thisopen = newSVpvn(tstart, s - tstart);
11142 stuffstart = s - SvPVX(PL_linestr);
11145 #ifndef PERL_STRICT_CR
11146 d = strchr(s, '\r');
11148 char * const olds = s;
11150 while (s < PL_bufend) {
11156 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11165 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11172 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11173 herewas = newSVpvn(s,PL_bufend-s);
11177 herewas = newSVpvn(s-1,found_newline-s+1);
11180 herewas = newSVpvn(s,found_newline-s);
11184 if (PL_madskills) {
11185 tstart = SvPVX(PL_linestr) + stuffstart;
11187 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11189 PL_thisstuff = newSVpvn(tstart, s - tstart);
11192 s += SvCUR(herewas);
11195 stuffstart = s - SvPVX(PL_linestr);
11201 tmpstr = newSV_type(SVt_PVIV);
11202 SvGROW(tmpstr, 80);
11203 if (term == '\'') {
11204 op_type = OP_CONST;
11205 SvIV_set(tmpstr, -1);
11207 else if (term == '`') {
11208 op_type = OP_BACKTICK;
11209 SvIV_set(tmpstr, '\\');
11213 PL_multi_start = CopLINE(PL_curcop);
11214 PL_multi_open = PL_multi_close = '<';
11215 term = *PL_tokenbuf;
11216 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11217 char * const bufptr = PL_sublex_info.super_bufptr;
11218 char * const bufend = PL_sublex_info.super_bufend;
11219 char * const olds = s - SvCUR(herewas);
11220 s = strchr(bufptr, '\n');
11224 while (s < bufend &&
11225 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11227 CopLINE_inc(PL_curcop);
11230 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11231 missingterm(PL_tokenbuf);
11233 sv_setpvn(herewas,bufptr,d-bufptr+1);
11234 sv_setpvn(tmpstr,d+1,s-d);
11236 sv_catpvn(herewas,s,bufend-s);
11237 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11244 while (s < PL_bufend &&
11245 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11247 CopLINE_inc(PL_curcop);
11249 if (s >= PL_bufend) {
11250 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11251 missingterm(PL_tokenbuf);
11253 sv_setpvn(tmpstr,d+1,s-d);
11255 if (PL_madskills) {
11257 sv_catpvn(PL_thisstuff, d + 1, s - d);
11259 PL_thisstuff = newSVpvn(d + 1, s - d);
11260 stuffstart = s - SvPVX(PL_linestr);
11264 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11266 sv_catpvn(herewas,s,PL_bufend-s);
11267 sv_setsv(PL_linestr,herewas);
11268 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11269 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11270 PL_last_lop = PL_last_uni = NULL;
11273 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
11274 while (s >= PL_bufend) { /* multiple line string? */
11276 if (PL_madskills) {
11277 tstart = SvPVX(PL_linestr) + stuffstart;
11279 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11281 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11285 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11286 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11287 missingterm(PL_tokenbuf);
11290 stuffstart = s - SvPVX(PL_linestr);
11292 CopLINE_inc(PL_curcop);
11293 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11294 PL_last_lop = PL_last_uni = NULL;
11295 #ifndef PERL_STRICT_CR
11296 if (PL_bufend - PL_linestart >= 2) {
11297 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11298 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11300 PL_bufend[-2] = '\n';
11302 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11304 else if (PL_bufend[-1] == '\r')
11305 PL_bufend[-1] = '\n';
11307 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11308 PL_bufend[-1] = '\n';
11310 if (PERLDB_LINE && PL_curstash != PL_debstash)
11311 update_debugger_info(PL_linestr, NULL, 0);
11312 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11313 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11314 *(SvPVX(PL_linestr) + off ) = ' ';
11315 sv_catsv(PL_linestr,herewas);
11316 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11317 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11321 sv_catsv(tmpstr,PL_linestr);
11326 PL_multi_end = CopLINE(PL_curcop);
11327 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11328 SvPV_shrink_to_cur(tmpstr);
11330 SvREFCNT_dec(herewas);
11332 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11334 else if (PL_encoding)
11335 sv_recode_to_utf8(tmpstr, PL_encoding);
11337 PL_lex_stuff = tmpstr;
11338 yylval.ival = op_type;
11342 /* scan_inputsymbol
11343 takes: current position in input buffer
11344 returns: new position in input buffer
11345 side-effects: yylval and lex_op are set.
11350 <FH> read from filehandle
11351 <pkg::FH> read from package qualified filehandle
11352 <pkg'FH> read from package qualified filehandle
11353 <$fh> read from filehandle in $fh
11354 <*.h> filename glob
11359 S_scan_inputsymbol(pTHX_ char *start)
11362 register char *s = start; /* current position in buffer */
11366 char *d = PL_tokenbuf; /* start of temp holding space */
11367 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11369 end = strchr(s, '\n');
11372 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11374 /* die if we didn't have space for the contents of the <>,
11375 or if it didn't end, or if we see a newline
11378 if (len >= (I32)sizeof PL_tokenbuf)
11379 Perl_croak(aTHX_ "Excessively long <> operator");
11381 Perl_croak(aTHX_ "Unterminated <> operator");
11386 Remember, only scalar variables are interpreted as filehandles by
11387 this code. Anything more complex (e.g., <$fh{$num}>) will be
11388 treated as a glob() call.
11389 This code makes use of the fact that except for the $ at the front,
11390 a scalar variable and a filehandle look the same.
11392 if (*d == '$' && d[1]) d++;
11394 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11395 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11398 /* If we've tried to read what we allow filehandles to look like, and
11399 there's still text left, then it must be a glob() and not a getline.
11400 Use scan_str to pull out the stuff between the <> and treat it
11401 as nothing more than a string.
11404 if (d - PL_tokenbuf != len) {
11405 yylval.ival = OP_GLOB;
11407 s = scan_str(start,!!PL_madskills,FALSE);
11409 Perl_croak(aTHX_ "Glob not terminated");
11413 bool readline_overriden = FALSE;
11416 /* we're in a filehandle read situation */
11419 /* turn <> into <ARGV> */
11421 Copy("ARGV",d,5,char);
11423 /* Check whether readline() is overriden */
11424 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11426 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11428 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11429 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11430 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11431 readline_overriden = TRUE;
11433 /* if <$fh>, create the ops to turn the variable into a
11437 /* try to find it in the pad for this block, otherwise find
11438 add symbol table ops
11440 const PADOFFSET tmp = pad_findmy(d);
11441 if (tmp != NOT_IN_PAD) {
11442 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11443 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11444 HEK * const stashname = HvNAME_HEK(stash);
11445 SV * const sym = sv_2mortal(newSVhek(stashname));
11446 sv_catpvs(sym, "::");
11447 sv_catpv(sym, d+1);
11452 OP * const o = newOP(OP_PADSV, 0);
11454 PL_lex_op = readline_overriden
11455 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11456 append_elem(OP_LIST, o,
11457 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11458 : (OP*)newUNOP(OP_READLINE, 0, o);
11467 ? (GV_ADDMULTI | GV_ADDINEVAL)
11470 PL_lex_op = readline_overriden
11471 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11472 append_elem(OP_LIST,
11473 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11474 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11475 : (OP*)newUNOP(OP_READLINE, 0,
11476 newUNOP(OP_RV2SV, 0,
11477 newGVOP(OP_GV, 0, gv)));
11479 if (!readline_overriden)
11480 PL_lex_op->op_flags |= OPf_SPECIAL;
11481 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
11482 yylval.ival = OP_NULL;
11485 /* If it's none of the above, it must be a literal filehandle
11486 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11488 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11489 PL_lex_op = readline_overriden
11490 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11491 append_elem(OP_LIST,
11492 newGVOP(OP_GV, 0, gv),
11493 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11494 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11495 yylval.ival = OP_NULL;
11504 takes: start position in buffer
11505 keep_quoted preserve \ on the embedded delimiter(s)
11506 keep_delims preserve the delimiters around the string
11507 returns: position to continue reading from buffer
11508 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11509 updates the read buffer.
11511 This subroutine pulls a string out of the input. It is called for:
11512 q single quotes q(literal text)
11513 ' single quotes 'literal text'
11514 qq double quotes qq(interpolate $here please)
11515 " double quotes "interpolate $here please"
11516 qx backticks qx(/bin/ls -l)
11517 ` backticks `/bin/ls -l`
11518 qw quote words @EXPORT_OK = qw( func() $spam )
11519 m// regexp match m/this/
11520 s/// regexp substitute s/this/that/
11521 tr/// string transliterate tr/this/that/
11522 y/// string transliterate y/this/that/
11523 ($*@) sub prototypes sub foo ($)
11524 (stuff) sub attr parameters sub foo : attr(stuff)
11525 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11527 In most of these cases (all but <>, patterns and transliterate)
11528 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11529 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11530 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11533 It skips whitespace before the string starts, and treats the first
11534 character as the delimiter. If the delimiter is one of ([{< then
11535 the corresponding "close" character )]}> is used as the closing
11536 delimiter. It allows quoting of delimiters, and if the string has
11537 balanced delimiters ([{<>}]) it allows nesting.
11539 On success, the SV with the resulting string is put into lex_stuff or,
11540 if that is already non-NULL, into lex_repl. The second case occurs only
11541 when parsing the RHS of the special constructs s/// and tr/// (y///).
11542 For convenience, the terminating delimiter character is stuffed into
11547 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11550 SV *sv; /* scalar value: string */
11551 const char *tmps; /* temp string, used for delimiter matching */
11552 register char *s = start; /* current position in the buffer */
11553 register char term; /* terminating character */
11554 register char *to; /* current position in the sv's data */
11555 I32 brackets = 1; /* bracket nesting level */
11556 bool has_utf8 = FALSE; /* is there any utf8 content? */
11557 I32 termcode; /* terminating char. code */
11558 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11559 STRLEN termlen; /* length of terminating string */
11560 int last_off = 0; /* last position for nesting bracket */
11566 /* skip space before the delimiter */
11572 if (PL_realtokenstart >= 0) {
11573 stuffstart = PL_realtokenstart;
11574 PL_realtokenstart = -1;
11577 stuffstart = start - SvPVX(PL_linestr);
11579 /* mark where we are, in case we need to report errors */
11582 /* after skipping whitespace, the next character is the terminator */
11585 termcode = termstr[0] = term;
11589 termcode = utf8_to_uvchr((U8*)s, &termlen);
11590 Copy(s, termstr, termlen, U8);
11591 if (!UTF8_IS_INVARIANT(term))
11595 /* mark where we are */
11596 PL_multi_start = CopLINE(PL_curcop);
11597 PL_multi_open = term;
11599 /* find corresponding closing delimiter */
11600 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11601 termcode = termstr[0] = term = tmps[5];
11603 PL_multi_close = term;
11605 /* create a new SV to hold the contents. 79 is the SV's initial length.
11606 What a random number. */
11607 sv = newSV_type(SVt_PVIV);
11609 SvIV_set(sv, termcode);
11610 (void)SvPOK_only(sv); /* validate pointer */
11612 /* move past delimiter and try to read a complete string */
11614 sv_catpvn(sv, s, termlen);
11617 tstart = SvPVX(PL_linestr) + stuffstart;
11618 if (!PL_thisopen && !keep_delims) {
11619 PL_thisopen = newSVpvn(tstart, s - tstart);
11620 stuffstart = s - SvPVX(PL_linestr);
11624 if (PL_encoding && !UTF) {
11628 int offset = s - SvPVX_const(PL_linestr);
11629 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11630 &offset, (char*)termstr, termlen);
11631 const char * const ns = SvPVX_const(PL_linestr) + offset;
11632 char * const svlast = SvEND(sv) - 1;
11634 for (; s < ns; s++) {
11635 if (*s == '\n' && !PL_rsfp)
11636 CopLINE_inc(PL_curcop);
11639 goto read_more_line;
11641 /* handle quoted delimiters */
11642 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11644 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11646 if ((svlast-1 - t) % 2) {
11647 if (!keep_quoted) {
11648 *(svlast-1) = term;
11650 SvCUR_set(sv, SvCUR(sv) - 1);
11655 if (PL_multi_open == PL_multi_close) {
11661 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11662 /* At here, all closes are "was quoted" one,
11663 so we don't check PL_multi_close. */
11665 if (!keep_quoted && *(t+1) == PL_multi_open)
11670 else if (*t == PL_multi_open)
11678 SvCUR_set(sv, w - SvPVX_const(sv));
11680 last_off = w - SvPVX(sv);
11681 if (--brackets <= 0)
11686 if (!keep_delims) {
11687 SvCUR_set(sv, SvCUR(sv) - 1);
11693 /* extend sv if need be */
11694 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11695 /* set 'to' to the next character in the sv's string */
11696 to = SvPVX(sv)+SvCUR(sv);
11698 /* if open delimiter is the close delimiter read unbridle */
11699 if (PL_multi_open == PL_multi_close) {
11700 for (; s < PL_bufend; s++,to++) {
11701 /* embedded newlines increment the current line number */
11702 if (*s == '\n' && !PL_rsfp)
11703 CopLINE_inc(PL_curcop);
11704 /* handle quoted delimiters */
11705 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11706 if (!keep_quoted && s[1] == term)
11708 /* any other quotes are simply copied straight through */
11712 /* terminate when run out of buffer (the for() condition), or
11713 have found the terminator */
11714 else if (*s == term) {
11717 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11720 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11726 /* if the terminator isn't the same as the start character (e.g.,
11727 matched brackets), we have to allow more in the quoting, and
11728 be prepared for nested brackets.
11731 /* read until we run out of string, or we find the terminator */
11732 for (; s < PL_bufend; s++,to++) {
11733 /* embedded newlines increment the line count */
11734 if (*s == '\n' && !PL_rsfp)
11735 CopLINE_inc(PL_curcop);
11736 /* backslashes can escape the open or closing characters */
11737 if (*s == '\\' && s+1 < PL_bufend) {
11738 if (!keep_quoted &&
11739 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11744 /* allow nested opens and closes */
11745 else if (*s == PL_multi_close && --brackets <= 0)
11747 else if (*s == PL_multi_open)
11749 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11754 /* terminate the copied string and update the sv's end-of-string */
11756 SvCUR_set(sv, to - SvPVX_const(sv));
11759 * this next chunk reads more into the buffer if we're not done yet
11763 break; /* handle case where we are done yet :-) */
11765 #ifndef PERL_STRICT_CR
11766 if (to - SvPVX_const(sv) >= 2) {
11767 if ((to[-2] == '\r' && to[-1] == '\n') ||
11768 (to[-2] == '\n' && to[-1] == '\r'))
11772 SvCUR_set(sv, to - SvPVX_const(sv));
11774 else if (to[-1] == '\r')
11777 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11782 /* if we're out of file, or a read fails, bail and reset the current
11783 line marker so we can report where the unterminated string began
11786 if (PL_madskills) {
11787 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11789 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11791 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11795 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11797 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11803 /* we read a line, so increment our line counter */
11804 CopLINE_inc(PL_curcop);
11806 /* update debugger info */
11807 if (PERLDB_LINE && PL_curstash != PL_debstash)
11808 update_debugger_info(PL_linestr, NULL, 0);
11810 /* having changed the buffer, we must update PL_bufend */
11811 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11812 PL_last_lop = PL_last_uni = NULL;
11815 /* at this point, we have successfully read the delimited string */
11817 if (!PL_encoding || UTF) {
11819 if (PL_madskills) {
11820 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11821 const int len = s - tstart;
11823 sv_catpvn(PL_thisstuff, tstart, len);
11825 PL_thisstuff = newSVpvn(tstart, len);
11826 if (!PL_thisclose && !keep_delims)
11827 PL_thisclose = newSVpvn(s,termlen);
11832 sv_catpvn(sv, s, termlen);
11837 if (PL_madskills) {
11838 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11839 const int len = s - tstart - termlen;
11841 sv_catpvn(PL_thisstuff, tstart, len);
11843 PL_thisstuff = newSVpvn(tstart, len);
11844 if (!PL_thisclose && !keep_delims)
11845 PL_thisclose = newSVpvn(s - termlen,termlen);
11849 if (has_utf8 || PL_encoding)
11852 PL_multi_end = CopLINE(PL_curcop);
11854 /* if we allocated too much space, give some back */
11855 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11856 SvLEN_set(sv, SvCUR(sv) + 1);
11857 SvPV_renew(sv, SvLEN(sv));
11860 /* decide whether this is the first or second quoted string we've read
11873 takes: pointer to position in buffer
11874 returns: pointer to new position in buffer
11875 side-effects: builds ops for the constant in yylval.op
11877 Read a number in any of the formats that Perl accepts:
11879 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11880 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11883 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11885 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11888 If it reads a number without a decimal point or an exponent, it will
11889 try converting the number to an integer and see if it can do so
11890 without loss of precision.
11894 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11897 register const char *s = start; /* current position in buffer */
11898 register char *d; /* destination in temp buffer */
11899 register char *e; /* end of temp buffer */
11900 NV nv; /* number read, as a double */
11901 SV *sv = NULL; /* place to put the converted number */
11902 bool floatit; /* boolean: int or float? */
11903 const char *lastub = NULL; /* position of last underbar */
11904 static char const number_too_long[] = "Number too long";
11906 /* We use the first character to decide what type of number this is */
11910 Perl_croak(aTHX_ "panic: scan_num");
11912 /* if it starts with a 0, it could be an octal number, a decimal in
11913 0.13 disguise, or a hexadecimal number, or a binary number. */
11917 u holds the "number so far"
11918 shift the power of 2 of the base
11919 (hex == 4, octal == 3, binary == 1)
11920 overflowed was the number more than we can hold?
11922 Shift is used when we add a digit. It also serves as an "are
11923 we in octal/hex/binary?" indicator to disallow hex characters
11924 when in octal mode.
11929 bool overflowed = FALSE;
11930 bool just_zero = TRUE; /* just plain 0 or binary number? */
11931 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11932 static const char* const bases[5] =
11933 { "", "binary", "", "octal", "hexadecimal" };
11934 static const char* const Bases[5] =
11935 { "", "Binary", "", "Octal", "Hexadecimal" };
11936 static const char* const maxima[5] =
11938 "0b11111111111111111111111111111111",
11942 const char *base, *Base, *max;
11944 /* check for hex */
11949 } else if (s[1] == 'b') {
11954 /* check for a decimal in disguise */
11955 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11957 /* so it must be octal */
11964 if (ckWARN(WARN_SYNTAX))
11965 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11966 "Misplaced _ in number");
11970 base = bases[shift];
11971 Base = Bases[shift];
11972 max = maxima[shift];
11974 /* read the rest of the number */
11976 /* x is used in the overflow test,
11977 b is the digit we're adding on. */
11982 /* if we don't mention it, we're done */
11986 /* _ are ignored -- but warned about if consecutive */
11988 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11989 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11990 "Misplaced _ in number");
11994 /* 8 and 9 are not octal */
11995 case '8': case '9':
11997 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12001 case '2': case '3': case '4':
12002 case '5': case '6': case '7':
12004 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12007 case '0': case '1':
12008 b = *s++ & 15; /* ASCII digit -> value of digit */
12012 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12013 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12014 /* make sure they said 0x */
12017 b = (*s++ & 7) + 9;
12019 /* Prepare to put the digit we have onto the end
12020 of the number so far. We check for overflows.
12026 x = u << shift; /* make room for the digit */
12028 if ((x >> shift) != u
12029 && !(PL_hints & HINT_NEW_BINARY)) {
12032 if (ckWARN_d(WARN_OVERFLOW))
12033 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12034 "Integer overflow in %s number",
12037 u = x | b; /* add the digit to the end */
12040 n *= nvshift[shift];
12041 /* If an NV has not enough bits in its
12042 * mantissa to represent an UV this summing of
12043 * small low-order numbers is a waste of time
12044 * (because the NV cannot preserve the
12045 * low-order bits anyway): we could just
12046 * remember when did we overflow and in the
12047 * end just multiply n by the right
12055 /* if we get here, we had success: make a scalar value from
12060 /* final misplaced underbar check */
12061 if (s[-1] == '_') {
12062 if (ckWARN(WARN_SYNTAX))
12063 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12068 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
12069 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12070 "%s number > %s non-portable",
12076 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
12077 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12078 "%s number > %s non-portable",
12083 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12084 sv = new_constant(start, s - start, "integer",
12086 else if (PL_hints & HINT_NEW_BINARY)
12087 sv = new_constant(start, s - start, "binary", sv, NULL, NULL);
12092 handle decimal numbers.
12093 we're also sent here when we read a 0 as the first digit
12095 case '1': case '2': case '3': case '4': case '5':
12096 case '6': case '7': case '8': case '9': case '.':
12099 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12102 /* read next group of digits and _ and copy into d */
12103 while (isDIGIT(*s) || *s == '_') {
12104 /* skip underscores, checking for misplaced ones
12108 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12109 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12110 "Misplaced _ in number");
12114 /* check for end of fixed-length buffer */
12116 Perl_croak(aTHX_ number_too_long);
12117 /* if we're ok, copy the character */
12122 /* final misplaced underbar check */
12123 if (lastub && s == lastub + 1) {
12124 if (ckWARN(WARN_SYNTAX))
12125 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12128 /* read a decimal portion if there is one. avoid
12129 3..5 being interpreted as the number 3. followed
12132 if (*s == '.' && s[1] != '.') {
12137 if (ckWARN(WARN_SYNTAX))
12138 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12139 "Misplaced _ in number");
12143 /* copy, ignoring underbars, until we run out of digits.
12145 for (; isDIGIT(*s) || *s == '_'; s++) {
12146 /* fixed length buffer check */
12148 Perl_croak(aTHX_ number_too_long);
12150 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12151 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12152 "Misplaced _ in number");
12158 /* fractional part ending in underbar? */
12159 if (s[-1] == '_') {
12160 if (ckWARN(WARN_SYNTAX))
12161 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12162 "Misplaced _ in number");
12164 if (*s == '.' && isDIGIT(s[1])) {
12165 /* oops, it's really a v-string, but without the "v" */
12171 /* read exponent part, if present */
12172 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12176 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12177 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12179 /* stray preinitial _ */
12181 if (ckWARN(WARN_SYNTAX))
12182 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12183 "Misplaced _ in number");
12187 /* allow positive or negative exponent */
12188 if (*s == '+' || *s == '-')
12191 /* stray initial _ */
12193 if (ckWARN(WARN_SYNTAX))
12194 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12195 "Misplaced _ in number");
12199 /* read digits of exponent */
12200 while (isDIGIT(*s) || *s == '_') {
12203 Perl_croak(aTHX_ number_too_long);
12207 if (((lastub && s == lastub + 1) ||
12208 (!isDIGIT(s[1]) && s[1] != '_'))
12209 && ckWARN(WARN_SYNTAX))
12210 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12211 "Misplaced _ in number");
12218 /* make an sv from the string */
12222 We try to do an integer conversion first if no characters
12223 indicating "float" have been found.
12228 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12230 if (flags == IS_NUMBER_IN_UV) {
12232 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12235 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12236 if (uv <= (UV) IV_MIN)
12237 sv_setiv(sv, -(IV)uv);
12244 /* terminate the string */
12246 nv = Atof(PL_tokenbuf);
12250 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
12251 (PL_hints & HINT_NEW_INTEGER) )
12252 sv = new_constant(PL_tokenbuf,
12255 (floatit ? "float" : "integer"),
12259 /* if it starts with a v, it could be a v-string */
12262 sv = newSV(5); /* preallocate storage space */
12263 s = scan_vstring(s, PL_bufend, sv);
12267 /* make the op for the constant and return */
12270 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12272 lvalp->opval = NULL;
12278 S_scan_formline(pTHX_ register char *s)
12281 register char *eol;
12283 SV * const stuff = newSVpvs("");
12284 bool needargs = FALSE;
12285 bool eofmt = FALSE;
12287 char *tokenstart = s;
12290 if (PL_madskills) {
12291 savewhite = PL_thiswhite;
12296 while (!needargs) {
12299 #ifdef PERL_STRICT_CR
12300 while (SPACE_OR_TAB(*t))
12303 while (SPACE_OR_TAB(*t) || *t == '\r')
12306 if (*t == '\n' || t == PL_bufend) {
12311 if (PL_in_eval && !PL_rsfp) {
12312 eol = (char *) memchr(s,'\n',PL_bufend-s);
12317 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12319 for (t = s; t < eol; t++) {
12320 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12322 goto enough; /* ~~ must be first line in formline */
12324 if (*t == '@' || *t == '^')
12328 sv_catpvn(stuff, s, eol-s);
12329 #ifndef PERL_STRICT_CR
12330 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12331 char *end = SvPVX(stuff) + SvCUR(stuff);
12334 SvCUR_set(stuff, SvCUR(stuff) - 1);
12344 if (PL_madskills) {
12346 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12348 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12351 s = filter_gets(PL_linestr, PL_rsfp, 0);
12353 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12355 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12357 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12358 PL_last_lop = PL_last_uni = NULL;
12367 if (SvCUR(stuff)) {
12370 PL_lex_state = LEX_NORMAL;
12371 start_force(PL_curforce);
12372 NEXTVAL_NEXTTOKE.ival = 0;
12376 PL_lex_state = LEX_FORMLINE;
12378 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12380 else if (PL_encoding)
12381 sv_recode_to_utf8(stuff, PL_encoding);
12383 start_force(PL_curforce);
12384 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12386 start_force(PL_curforce);
12387 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12391 SvREFCNT_dec(stuff);
12393 PL_lex_formbrack = 0;
12397 if (PL_madskills) {
12399 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12401 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12402 PL_thiswhite = savewhite;
12414 PL_cshlen = strlen(PL_cshname);
12416 #if defined(USE_ITHREADS)
12417 PERL_UNUSED_CONTEXT;
12423 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12426 const I32 oldsavestack_ix = PL_savestack_ix;
12427 CV* const outsidecv = PL_compcv;
12430 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12432 SAVEI32(PL_subline);
12433 save_item(PL_subname);
12434 SAVESPTR(PL_compcv);
12436 PL_compcv = (CV*)newSV_type(is_format ? SVt_PVFM : SVt_PVCV);
12437 CvFLAGS(PL_compcv) |= flags;
12439 PL_subline = CopLINE(PL_curcop);
12440 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12441 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
12442 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12444 return oldsavestack_ix;
12448 #pragma segment Perl_yylex
12451 Perl_yywarn(pTHX_ const char *s)
12454 PL_in_eval |= EVAL_WARNONLY;
12456 PL_in_eval &= ~EVAL_WARNONLY;
12461 Perl_yyerror(pTHX_ const char *s)
12464 const char *where = NULL;
12465 const char *context = NULL;
12468 int yychar = PL_parser->yychar;
12470 if (!yychar || (yychar == ';' && !PL_rsfp))
12472 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12473 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12474 PL_oldbufptr != PL_bufptr) {
12477 The code below is removed for NetWare because it abends/crashes on NetWare
12478 when the script has error such as not having the closing quotes like:
12479 if ($var eq "value)
12480 Checking of white spaces is anyway done in NetWare code.
12483 while (isSPACE(*PL_oldoldbufptr))
12486 context = PL_oldoldbufptr;
12487 contlen = PL_bufptr - PL_oldoldbufptr;
12489 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12490 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12493 The code below is removed for NetWare because it abends/crashes on NetWare
12494 when the script has error such as not having the closing quotes like:
12495 if ($var eq "value)
12496 Checking of white spaces is anyway done in NetWare code.
12499 while (isSPACE(*PL_oldbufptr))
12502 context = PL_oldbufptr;
12503 contlen = PL_bufptr - PL_oldbufptr;
12505 else if (yychar > 255)
12506 where = "next token ???";
12507 else if (yychar == -2) { /* YYEMPTY */
12508 if (PL_lex_state == LEX_NORMAL ||
12509 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12510 where = "at end of line";
12511 else if (PL_lex_inpat)
12512 where = "within pattern";
12514 where = "within string";
12517 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
12519 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12520 else if (isPRINT_LC(yychar))
12521 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
12523 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12524 where = SvPVX_const(where_sv);
12526 msg = sv_2mortal(newSVpv(s, 0));
12527 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12528 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12530 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12532 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12533 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12534 Perl_sv_catpvf(aTHX_ msg,
12535 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12536 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12539 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
12540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12543 if (PL_error_count >= 10) {
12544 if (PL_in_eval && SvCUR(ERRSV))
12545 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12546 SVfARG(ERRSV), OutCopFILE(PL_curcop));
12548 Perl_croak(aTHX_ "%s has too many errors.\n",
12549 OutCopFILE(PL_curcop));
12552 PL_in_my_stash = NULL;
12556 #pragma segment Main
12560 S_swallow_bom(pTHX_ U8 *s)
12563 const STRLEN slen = SvCUR(PL_linestr);
12566 if (s[1] == 0xFE) {
12567 /* UTF-16 little-endian? (or UTF32-LE?) */
12568 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12569 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12570 #ifndef PERL_NO_UTF16_FILTER
12571 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12574 if (PL_bufend > (char*)s) {
12578 filter_add(utf16rev_textfilter, NULL);
12579 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12580 utf16_to_utf8_reversed(s, news,
12581 PL_bufend - (char*)s - 1,
12583 sv_setpvn(PL_linestr, (const char*)news, newlen);
12585 s = (U8*)SvPVX(PL_linestr);
12586 Copy(news, s, newlen, U8);
12590 SvUTF8_on(PL_linestr);
12591 s = (U8*)SvPVX(PL_linestr);
12593 /* FIXME - is this a general bug fix? */
12596 PL_bufend = SvPVX(PL_linestr) + newlen;
12599 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12604 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12605 #ifndef PERL_NO_UTF16_FILTER
12606 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12609 if (PL_bufend > (char *)s) {
12613 filter_add(utf16_textfilter, NULL);
12614 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12615 utf16_to_utf8(s, news,
12616 PL_bufend - (char*)s,
12618 sv_setpvn(PL_linestr, (const char*)news, newlen);
12620 SvUTF8_on(PL_linestr);
12621 s = (U8*)SvPVX(PL_linestr);
12622 PL_bufend = SvPVX(PL_linestr) + newlen;
12625 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12630 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12631 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12632 s += 3; /* UTF-8 */
12638 if (s[2] == 0xFE && s[3] == 0xFF) {
12639 /* UTF-32 big-endian */
12640 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12643 else if (s[2] == 0 && s[3] != 0) {
12646 * are a good indicator of UTF-16BE. */
12647 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12653 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12654 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12655 s += 4; /* UTF-8 */
12661 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12664 * are a good indicator of UTF-16LE. */
12665 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12674 * Restore a source filter.
12678 restore_rsfp(pTHX_ void *f)
12681 PerlIO * const fp = (PerlIO*)f;
12683 if (PL_rsfp == PerlIO_stdin())
12684 PerlIO_clearerr(PL_rsfp);
12685 else if (PL_rsfp && (PL_rsfp != fp))
12686 PerlIO_close(PL_rsfp);
12690 #ifndef PERL_NO_UTF16_FILTER
12692 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12695 const STRLEN old = SvCUR(sv);
12696 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12697 DEBUG_P(PerlIO_printf(Perl_debug_log,
12698 "utf16_textfilter(%p): %d %d (%d)\n",
12699 FPTR2DPTR(void *, utf16_textfilter),
12700 idx, maxlen, (int) count));
12704 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12705 Copy(SvPVX_const(sv), tmps, old, char);
12706 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12707 SvCUR(sv) - old, &newlen);
12708 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12710 DEBUG_P({sv_dump(sv);});
12715 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12718 const STRLEN old = SvCUR(sv);
12719 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12720 DEBUG_P(PerlIO_printf(Perl_debug_log,
12721 "utf16rev_textfilter(%p): %d %d (%d)\n",
12722 FPTR2DPTR(void *, utf16rev_textfilter),
12723 idx, maxlen, (int) count));
12727 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12728 Copy(SvPVX_const(sv), tmps, old, char);
12729 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12730 SvCUR(sv) - old, &newlen);
12731 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12733 DEBUG_P({ sv_dump(sv); });
12739 Returns a pointer to the next character after the parsed
12740 vstring, as well as updating the passed in sv.
12742 Function must be called like
12745 s = scan_vstring(s,e,sv);
12747 where s and e are the start and end of the string.
12748 The sv should already be large enough to store the vstring
12749 passed in, for performance reasons.
12754 Perl_scan_vstring(pTHX_ const char *s, const char *e, SV *sv)
12757 const char *pos = s;
12758 const char *start = s;
12759 if (*pos == 'v') pos++; /* get past 'v' */
12760 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12762 if ( *pos != '.') {
12763 /* this may not be a v-string if followed by => */
12764 const char *next = pos;
12765 while (next < e && isSPACE(*next))
12767 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12768 /* return string not v-string */
12769 sv_setpvn(sv,(char *)s,pos-s);
12770 return (char *)pos;
12774 if (!isALPHA(*pos)) {
12775 U8 tmpbuf[UTF8_MAXBYTES+1];
12778 s++; /* get past 'v' */
12780 sv_setpvn(sv, "", 0);
12783 /* this is atoi() that tolerates underscores */
12786 const char *end = pos;
12788 while (--end >= s) {
12790 const UV orev = rev;
12791 rev += (*end - '0') * mult;
12793 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12794 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12795 "Integer overflow in decimal number");
12799 if (rev > 0x7FFFFFFF)
12800 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12802 /* Append native character for the rev point */
12803 tmpend = uvchr_to_utf8(tmpbuf, rev);
12804 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12805 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12807 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12813 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12817 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12825 * c-indentation-style: bsd
12826 * c-basic-offset: 4
12827 * indent-tabs-mode: t
12830 * ex: set ts=8 sts=4 sw=4 noet: