3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define new_constant(a,b,c,d,e,f,g) \
27 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
29 #define yylval (PL_parser->yylval)
31 /* YYINITDEPTH -- initial size of the parser's stacks. */
32 #define YYINITDEPTH 200
34 /* XXX temporary backwards compatibility */
35 #define PL_lex_brackets (PL_parser->lex_brackets)
36 #define PL_lex_brackstack (PL_parser->lex_brackstack)
37 #define PL_lex_casemods (PL_parser->lex_casemods)
38 #define PL_lex_casestack (PL_parser->lex_casestack)
39 #define PL_lex_defer (PL_parser->lex_defer)
40 #define PL_lex_dojoin (PL_parser->lex_dojoin)
41 #define PL_lex_expect (PL_parser->lex_expect)
42 #define PL_lex_formbrack (PL_parser->lex_formbrack)
43 #define PL_lex_inpat (PL_parser->lex_inpat)
44 #define PL_lex_inwhat (PL_parser->lex_inwhat)
45 #define PL_lex_op (PL_parser->lex_op)
46 #define PL_lex_repl (PL_parser->lex_repl)
47 #define PL_lex_starts (PL_parser->lex_starts)
48 #define PL_lex_stuff (PL_parser->lex_stuff)
49 #define PL_multi_start (PL_parser->multi_start)
50 #define PL_multi_open (PL_parser->multi_open)
51 #define PL_multi_close (PL_parser->multi_close)
52 #define PL_pending_ident (PL_parser->pending_ident)
53 #define PL_preambled (PL_parser->preambled)
54 #define PL_sublex_info (PL_parser->sublex_info)
55 #define PL_linestr (PL_parser->linestr)
56 #define PL_expect (PL_parser->expect)
57 #define PL_copline (PL_parser->copline)
58 #define PL_bufptr (PL_parser->bufptr)
59 #define PL_oldbufptr (PL_parser->oldbufptr)
60 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
61 #define PL_linestart (PL_parser->linestart)
62 #define PL_bufend (PL_parser->bufend)
63 #define PL_last_uni (PL_parser->last_uni)
64 #define PL_last_lop (PL_parser->last_lop)
65 #define PL_last_lop_op (PL_parser->last_lop_op)
66 #define PL_lex_state (PL_parser->lex_state)
67 #define PL_rsfp (PL_parser->rsfp)
68 #define PL_rsfp_filters (PL_parser->rsfp_filters)
69 #define PL_in_my (PL_parser->in_my)
70 #define PL_in_my_stash (PL_parser->in_my_stash)
71 #define PL_tokenbuf (PL_parser->tokenbuf)
72 #define PL_multi_end (PL_parser->multi_end)
73 #define PL_error_count (PL_parser->error_count)
76 # define PL_endwhite (PL_parser->endwhite)
77 # define PL_faketokens (PL_parser->faketokens)
78 # define PL_lasttoke (PL_parser->lasttoke)
79 # define PL_nextwhite (PL_parser->nextwhite)
80 # define PL_realtokenstart (PL_parser->realtokenstart)
81 # define PL_skipwhite (PL_parser->skipwhite)
82 # define PL_thisclose (PL_parser->thisclose)
83 # define PL_thismad (PL_parser->thismad)
84 # define PL_thisopen (PL_parser->thisopen)
85 # define PL_thisstuff (PL_parser->thisstuff)
86 # define PL_thistoken (PL_parser->thistoken)
87 # define PL_thiswhite (PL_parser->thiswhite)
88 # define PL_thiswhite (PL_parser->thiswhite)
89 # define PL_nexttoke (PL_parser->nexttoke)
90 # define PL_curforce (PL_parser->curforce)
92 # define PL_nexttoke (PL_parser->nexttoke)
93 # define PL_nexttype (PL_parser->nexttype)
94 # define PL_nextval (PL_parser->nextval)
98 S_pending_ident(pTHX);
100 static const char ident_too_long[] = "Identifier too long";
101 static const char commaless_variable_list[] = "comma-less variable list";
103 #ifndef PERL_NO_UTF16_FILTER
104 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
105 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
109 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
110 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
112 # define CURMAD(slot,sv)
113 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
116 #define XFAKEBRACK 128
117 #define XENUMMASK 127
119 #ifdef USE_UTF8_SCRIPTS
120 # define UTF (!IN_BYTES)
122 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
125 /* In variables named $^X, these are the legal values for X.
126 * 1999-02-27 mjd-perl-patch@plover.com */
127 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
129 /* On MacOS, respect nonbreaking spaces */
130 #ifdef MACOS_TRADITIONAL
131 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
133 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
136 /* LEX_* are values for PL_lex_state, the state of the lexer.
137 * They are arranged oddly so that the guard on the switch statement
138 * can get by with a single comparison (if the compiler is smart enough).
141 /* #define LEX_NOTPARSING 11 is done in perl.h. */
143 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
144 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
145 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
146 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
147 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
149 /* at end of code, eg "$x" followed by: */
150 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
151 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
153 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
154 string or after \E, $foo, etc */
155 #define LEX_INTERPCONST 2 /* NOT USED */
156 #define LEX_FORMLINE 1 /* expecting a format line */
157 #define LEX_KNOWNEXT 0 /* next token known; just return it */
161 static const char* const lex_state_names[] = {
180 #include "keywords.h"
182 /* CLINE is a macro that ensures PL_copline has a sane value */
187 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
190 # define SKIPSPACE0(s) skipspace0(s)
191 # define SKIPSPACE1(s) skipspace1(s)
192 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
193 # define PEEKSPACE(s) skipspace2(s,0)
195 # define SKIPSPACE0(s) skipspace(s)
196 # define SKIPSPACE1(s) skipspace(s)
197 # define SKIPSPACE2(s,tsv) skipspace(s)
198 # define PEEKSPACE(s) skipspace(s)
202 * Convenience functions to return different tokens and prime the
203 * lexer for the next token. They all take an argument.
205 * TOKEN : generic token (used for '(', DOLSHARP, etc)
206 * OPERATOR : generic operator
207 * AOPERATOR : assignment operator
208 * PREBLOCK : beginning the block after an if, while, foreach, ...
209 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
210 * PREREF : *EXPR where EXPR is not a simple identifier
211 * TERM : expression term
212 * LOOPX : loop exiting command (goto, last, dump, etc)
213 * FTST : file test operator
214 * FUN0 : zero-argument function
215 * FUN1 : not used, except for not, which isn't a UNIOP
216 * BOop : bitwise or or xor
218 * SHop : shift operator
219 * PWop : power operator
220 * PMop : pattern-matching operator
221 * Aop : addition-level operator
222 * Mop : multiplication-level operator
223 * Eop : equality-testing operator
224 * Rop : relational operator <= != gt
226 * Also see LOP and lop() below.
229 #ifdef DEBUGGING /* Serve -DT. */
230 # define REPORT(retval) tokereport((I32)retval)
232 # define REPORT(retval) (retval)
235 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
236 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
237 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
238 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
239 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
240 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
241 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
242 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
243 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
244 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
245 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
246 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
247 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
248 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
249 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
250 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
251 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
252 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
253 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
254 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
256 /* This bit of chicanery makes a unary function followed by
257 * a parenthesis into a function with one argument, highest precedence.
258 * The UNIDOR macro is for unary functions that can be followed by the //
259 * operator (such as C<shift // 0>).
261 #define UNI2(f,x) { \
265 PL_last_uni = PL_oldbufptr; \
266 PL_last_lop_op = f; \
268 return REPORT( (int)FUNC1 ); \
270 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
272 #define UNI(f) UNI2(f,XTERM)
273 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
275 #define UNIBRACK(f) { \
278 PL_last_uni = PL_oldbufptr; \
280 return REPORT( (int)FUNC1 ); \
282 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
285 /* grandfather return to old style */
286 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
290 /* how to interpret the yylval associated with the token */
294 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
300 static struct debug_tokens {
302 enum token_type type;
304 } const debug_tokens[] =
306 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
307 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
308 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
309 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
310 { ARROW, TOKENTYPE_NONE, "ARROW" },
311 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
312 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
313 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
314 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
315 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
316 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
317 { DO, TOKENTYPE_NONE, "DO" },
318 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
319 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
320 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
321 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
322 { ELSE, TOKENTYPE_NONE, "ELSE" },
323 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
324 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
325 { FOR, TOKENTYPE_IVAL, "FOR" },
326 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
330 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
331 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
332 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
333 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
334 { IF, TOKENTYPE_IVAL, "IF" },
335 { LABEL, TOKENTYPE_PVAL, "LABEL" },
336 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
337 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
338 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
339 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
340 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
341 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
342 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
343 { MY, TOKENTYPE_IVAL, "MY" },
344 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
351 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
352 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
353 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
354 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
355 { PREINC, TOKENTYPE_NONE, "PREINC" },
356 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
357 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
358 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
359 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
360 { SUB, TOKENTYPE_NONE, "SUB" },
361 { THING, TOKENTYPE_OPVAL, "THING" },
362 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
363 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
364 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
365 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
366 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
367 { USE, TOKENTYPE_IVAL, "USE" },
368 { WHEN, TOKENTYPE_IVAL, "WHEN" },
369 { WHILE, TOKENTYPE_IVAL, "WHILE" },
370 { WORD, TOKENTYPE_OPVAL, "WORD" },
371 { 0, TOKENTYPE_NONE, NULL }
374 /* dump the returned token in rv, plus any optional arg in yylval */
377 S_tokereport(pTHX_ I32 rv)
381 const char *name = NULL;
382 enum token_type type = TOKENTYPE_NONE;
383 const struct debug_tokens *p;
384 SV* const report = newSVpvs("<== ");
386 for (p = debug_tokens; p->token; p++) {
387 if (p->token == (int)rv) {
394 Perl_sv_catpv(aTHX_ report, name);
395 else if ((char)rv > ' ' && (char)rv < '~')
396 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
398 sv_catpvs(report, "EOF");
400 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
403 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
406 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
408 case TOKENTYPE_OPNUM:
409 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
410 PL_op_name[yylval.ival]);
413 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
415 case TOKENTYPE_OPVAL:
417 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
418 PL_op_name[yylval.opval->op_type]);
419 if (yylval.opval->op_type == OP_CONST) {
420 Perl_sv_catpvf(aTHX_ report, " %s",
421 SvPEEK(cSVOPx_sv(yylval.opval)));
426 sv_catpvs(report, "(opval=null)");
429 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
435 /* print the buffer with suitable escapes */
438 S_printbuf(pTHX_ const char* fmt, const char* s)
440 SV* const tmp = newSVpvs("");
441 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
450 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
451 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
455 S_ao(pTHX_ int toketype)
458 if (*PL_bufptr == '=') {
460 if (toketype == ANDAND)
461 yylval.ival = OP_ANDASSIGN;
462 else if (toketype == OROR)
463 yylval.ival = OP_ORASSIGN;
464 else if (toketype == DORDOR)
465 yylval.ival = OP_DORASSIGN;
473 * When Perl expects an operator and finds something else, no_op
474 * prints the warning. It always prints "<something> found where
475 * operator expected. It prints "Missing semicolon on previous line?"
476 * if the surprise occurs at the start of the line. "do you need to
477 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
478 * where the compiler doesn't know if foo is a method call or a function.
479 * It prints "Missing operator before end of line" if there's nothing
480 * after the missing operator, or "... before <...>" if there is something
481 * after the missing operator.
485 S_no_op(pTHX_ const char *what, char *s)
488 char * const oldbp = PL_bufptr;
489 const bool is_first = (PL_oldbufptr == PL_linestart);
495 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
496 if (ckWARN_d(WARN_SYNTAX)) {
498 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
499 "\t(Missing semicolon on previous line?)\n");
500 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
502 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
504 if (t < PL_bufptr && isSPACE(*t))
505 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
506 "\t(Do you need to predeclare %.*s?)\n",
507 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
511 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
512 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
520 * Complain about missing quote/regexp/heredoc terminator.
521 * If it's called with NULL then it cauterizes the line buffer.
522 * If we're in a delimited string and the delimiter is a control
523 * character, it's reformatted into a two-char sequence like ^C.
528 S_missingterm(pTHX_ char *s)
534 char * const nl = strrchr(s,'\n');
540 iscntrl(PL_multi_close)
542 PL_multi_close < 32 || PL_multi_close == 127
546 tmpbuf[1] = (char)toCTRL(PL_multi_close);
551 *tmpbuf = (char)PL_multi_close;
555 q = strchr(s,'"') ? '\'' : '"';
556 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
559 #define FEATURE_IS_ENABLED(name) \
560 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
561 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
563 * S_feature_is_enabled
564 * Check whether the named feature is enabled.
567 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
570 HV * const hinthv = GvHV(PL_hintgv);
571 char he_name[32] = "feature_";
572 (void) my_strlcpy(&he_name[8], name, 24);
574 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
582 Perl_deprecate(pTHX_ const char *s)
584 if (ckWARN(WARN_DEPRECATED))
585 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
589 Perl_deprecate_old(pTHX_ const char *s)
591 /* This function should NOT be called for any new deprecated warnings */
592 /* Use Perl_deprecate instead */
594 /* It is here to maintain backward compatibility with the pre-5.8 */
595 /* warnings category hierarchy. The "deprecated" category used to */
596 /* live under the "syntax" category. It is now a top-level category */
597 /* in its own right. */
599 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
600 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
601 "Use of %s is deprecated", s);
605 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
606 * utf16-to-utf8-reversed.
609 #ifdef PERL_CR_FILTER
613 register const char *s = SvPVX_const(sv);
614 register const char * const e = s + SvCUR(sv);
615 /* outer loop optimized to do nothing if there are no CR-LFs */
617 if (*s++ == '\r' && *s == '\n') {
618 /* hit a CR-LF, need to copy the rest */
619 register char *d = s - 1;
622 if (*s == '\r' && s[1] == '\n')
633 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
635 const I32 count = FILTER_READ(idx+1, sv, maxlen);
636 if (count > 0 && !maxlen)
647 * Create a parser object and initialise its parser and lexer fields
649 * rsfp is the opened file handle to read from (if any),
651 * line holds any initial content already read from the file (or in
652 * the case of no file, such as an eval, the whole contents);
654 * new_filter indicates that this is a new file and it shouldn't inherit
655 * the filters from the current parser (ie require).
659 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
662 const char *s = NULL;
664 yy_parser *parser, *oparser;
666 /* create and initialise a parser */
668 Newxz(parser, 1, yy_parser);
669 parser->old_parser = oparser = PL_parser;
672 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
673 parser->ps = parser->stack;
674 parser->stack_size = YYINITDEPTH;
676 parser->stack->state = 0;
677 parser->yyerrstatus = 0;
678 parser->yychar = YYEMPTY; /* Cause a token to be read. */
680 /* on scope exit, free this parser and restore any outer one */
682 parser->saved_curcop = PL_curcop;
684 /* initialise lexer state */
687 parser->curforce = -1;
689 parser->nexttoke = 0;
691 parser->copline = NOLINE;
692 parser->lex_state = LEX_NORMAL;
693 parser->expect = XSTATE;
695 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
696 : (AV*)SvREFCNT_inc(oparser->rsfp_filters);
698 Newx(parser->lex_brackstack, 120, char);
699 Newx(parser->lex_casestack, 12, char);
700 *parser->lex_casestack = '\0';
703 s = SvPV_const(line, len);
709 parser->linestr = newSVpvs("\n;");
710 } else if (SvREADONLY(line) || s[len-1] != ';') {
711 parser->linestr = newSVsv(line);
713 sv_catpvs(parser->linestr, "\n;");
716 SvREFCNT_inc_simple_void_NN(line);
717 parser->linestr = line;
719 parser->oldoldbufptr =
722 parser->linestart = SvPVX(parser->linestr);
723 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
724 parser->last_lop = parser->last_uni = NULL;
728 /* delete a parser object */
731 Perl_parser_free(pTHX_ const yy_parser *parser)
733 PL_curcop = parser->saved_curcop;
734 SvREFCNT_dec(parser->linestr);
736 if (parser->rsfp == PerlIO_stdin())
737 PerlIO_clearerr(parser->rsfp);
738 else if (parser->rsfp && parser->old_parser
739 && parser->rsfp != parser->old_parser->rsfp)
740 PerlIO_close(parser->rsfp);
741 SvREFCNT_dec(parser->rsfp_filters);
743 Safefree(parser->stack);
744 Safefree(parser->lex_brackstack);
745 Safefree(parser->lex_casestack);
746 PL_parser = parser->old_parser;
753 * Finalizer for lexing operations. Must be called when the parser is
754 * done with the lexer.
761 PL_doextract = FALSE;
766 * This subroutine has nothing to do with tilting, whether at windmills
767 * or pinball tables. Its name is short for "increment line". It
768 * increments the current line number in CopLINE(PL_curcop) and checks
769 * to see whether the line starts with a comment of the form
770 * # line 500 "foo.pm"
771 * If so, it sets the current line number and file to the values in the comment.
775 S_incline(pTHX_ const char *s)
782 CopLINE_inc(PL_curcop);
785 while (SPACE_OR_TAB(*s))
787 if (strnEQ(s, "line", 4))
791 if (SPACE_OR_TAB(*s))
795 while (SPACE_OR_TAB(*s))
803 while (SPACE_OR_TAB(*s))
805 if (*s == '"' && (t = strchr(s+1, '"'))) {
815 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
817 if (*e != '\n' && *e != '\0')
818 return; /* false alarm */
821 const STRLEN len = t - s;
823 const char * const cf = CopFILE(PL_curcop);
824 STRLEN tmplen = cf ? strlen(cf) : 0;
825 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
826 /* must copy *{"::_<(eval N)[oldfilename:L]"}
827 * to *{"::_<newfilename"} */
828 /* However, the long form of evals is only turned on by the
829 debugger - usually they're "(eval %lu)" */
833 STRLEN tmplen2 = len;
834 if (tmplen + 2 <= sizeof smallbuf)
837 Newx(tmpbuf, tmplen + 2, char);
840 memcpy(tmpbuf + 2, cf, tmplen);
842 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
847 if (tmplen2 + 2 <= sizeof smallbuf)
850 Newx(tmpbuf2, tmplen2 + 2, char);
852 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
853 /* Either they malloc'd it, or we malloc'd it,
854 so no prefix is present in ours. */
859 memcpy(tmpbuf2 + 2, s, tmplen2);
862 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
864 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
865 /* adjust ${"::_<newfilename"} to store the new file name */
866 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
867 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
868 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
871 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
873 if (tmpbuf != smallbuf) Safefree(tmpbuf);
876 CopFILE_free(PL_curcop);
877 CopFILE_setn(PL_curcop, s, len);
879 CopLINE_set(PL_curcop, atoi(n)-1);
883 /* skip space before PL_thistoken */
886 S_skipspace0(pTHX_ register char *s)
893 PL_thiswhite = newSVpvs("");
894 sv_catsv(PL_thiswhite, PL_skipwhite);
895 sv_free(PL_skipwhite);
898 PL_realtokenstart = s - SvPVX(PL_linestr);
902 /* skip space after PL_thistoken */
905 S_skipspace1(pTHX_ register char *s)
907 const char *start = s;
908 I32 startoff = start - SvPVX(PL_linestr);
913 start = SvPVX(PL_linestr) + startoff;
914 if (!PL_thistoken && PL_realtokenstart >= 0) {
915 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
916 PL_thistoken = newSVpvn(tstart, start - tstart);
918 PL_realtokenstart = -1;
921 PL_nextwhite = newSVpvs("");
922 sv_catsv(PL_nextwhite, PL_skipwhite);
923 sv_free(PL_skipwhite);
930 S_skipspace2(pTHX_ register char *s, SV **svp)
933 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
934 const I32 startoff = s - SvPVX(PL_linestr);
937 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
938 if (!PL_madskills || !svp)
940 start = SvPVX(PL_linestr) + startoff;
941 if (!PL_thistoken && PL_realtokenstart >= 0) {
942 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
943 PL_thistoken = newSVpvn(tstart, start - tstart);
944 PL_realtokenstart = -1;
949 sv_setsv(*svp, PL_skipwhite);
950 sv_free(PL_skipwhite);
959 S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
961 AV *av = CopFILEAVx(PL_curcop);
963 SV * const sv = newSV_type(SVt_PVMG);
965 sv_setsv(sv, orig_sv);
967 sv_setpvn(sv, buf, len);
970 av_store(av, (I32)CopLINE(PL_curcop), sv);
976 * Called to gobble the appropriate amount and type of whitespace.
977 * Skips comments as well.
981 S_skipspace(pTHX_ register char *s)
986 int startoff = s - SvPVX(PL_linestr);
989 sv_free(PL_skipwhite);
994 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
995 while (s < PL_bufend && SPACE_OR_TAB(*s))
1005 SSize_t oldprevlen, oldoldprevlen;
1006 SSize_t oldloplen = 0, oldunilen = 0;
1007 while (s < PL_bufend && isSPACE(*s)) {
1008 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1013 if (s < PL_bufend && *s == '#') {
1014 while (s < PL_bufend && *s != '\n')
1016 if (s < PL_bufend) {
1018 if (PL_in_eval && !PL_rsfp) {
1025 /* only continue to recharge the buffer if we're at the end
1026 * of the buffer, we're not reading from a source filter, and
1027 * we're in normal lexing mode
1029 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1030 PL_lex_state == LEX_FORMLINE)
1037 /* try to recharge the buffer */
1039 curoff = s - SvPVX(PL_linestr);
1042 if ((s = filter_gets(PL_linestr, PL_rsfp,
1043 (prevlen = SvCUR(PL_linestr)))) == NULL)
1046 if (PL_madskills && curoff != startoff) {
1048 PL_skipwhite = newSVpvs("");
1049 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1053 /* mustn't throw out old stuff yet if madpropping */
1054 SvCUR(PL_linestr) = curoff;
1055 s = SvPVX(PL_linestr) + curoff;
1057 if (curoff && s[-1] == '\n')
1061 /* end of file. Add on the -p or -n magic */
1062 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1065 sv_catpvs(PL_linestr,
1066 ";}continue{print or die qq(-p destination: $!\\n);}");
1068 sv_setpvs(PL_linestr,
1069 ";}continue{print or die qq(-p destination: $!\\n);}");
1071 PL_minus_n = PL_minus_p = 0;
1073 else if (PL_minus_n) {
1075 sv_catpvn(PL_linestr, ";}", 2);
1077 sv_setpvn(PL_linestr, ";}", 2);
1083 sv_catpvn(PL_linestr,";", 1);
1085 sv_setpvn(PL_linestr,";", 1);
1088 /* reset variables for next time we lex */
1089 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1095 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1096 PL_last_lop = PL_last_uni = NULL;
1098 /* Close the filehandle. Could be from -P preprocessor,
1099 * STDIN, or a regular file. If we were reading code from
1100 * STDIN (because the commandline held no -e or filename)
1101 * then we don't close it, we reset it so the code can
1102 * read from STDIN too.
1105 if (PL_preprocess && !PL_in_eval)
1106 (void)PerlProc_pclose(PL_rsfp);
1107 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1108 PerlIO_clearerr(PL_rsfp);
1110 (void)PerlIO_close(PL_rsfp);
1115 /* not at end of file, so we only read another line */
1116 /* make corresponding updates to old pointers, for yyerror() */
1117 oldprevlen = PL_oldbufptr - PL_bufend;
1118 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1120 oldunilen = PL_last_uni - PL_bufend;
1122 oldloplen = PL_last_lop - PL_bufend;
1123 PL_linestart = PL_bufptr = s + prevlen;
1124 PL_bufend = s + SvCUR(PL_linestr);
1126 PL_oldbufptr = s + oldprevlen;
1127 PL_oldoldbufptr = s + oldoldprevlen;
1129 PL_last_uni = s + oldunilen;
1131 PL_last_lop = s + oldloplen;
1134 /* debugger active and we're not compiling the debugger code,
1135 * so store the line into the debugger's array of lines
1137 if (PERLDB_LINE && PL_curstash != PL_debstash)
1138 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1145 PL_skipwhite = newSVpvs("");
1146 curoff = s - SvPVX(PL_linestr);
1147 if (curoff - startoff)
1148 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1157 * Check the unary operators to ensure there's no ambiguity in how they're
1158 * used. An ambiguous piece of code would be:
1160 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1161 * the +5 is its argument.
1171 if (PL_oldoldbufptr != PL_last_uni)
1173 while (isSPACE(*PL_last_uni))
1176 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1178 if ((t = strchr(s, '(')) && t < PL_bufptr)
1181 if (ckWARN_d(WARN_AMBIGUOUS)){
1182 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1183 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1184 (int)(s - PL_last_uni), PL_last_uni);
1189 * LOP : macro to build a list operator. Its behaviour has been replaced
1190 * with a subroutine, S_lop() for which LOP is just another name.
1193 #define LOP(f,x) return lop(f,x,s)
1197 * Build a list operator (or something that might be one). The rules:
1198 * - if we have a next token, then it's a list operator [why?]
1199 * - if the next thing is an opening paren, then it's a function
1200 * - else it's a list operator
1204 S_lop(pTHX_ I32 f, int x, char *s)
1211 PL_last_lop = PL_oldbufptr;
1212 PL_last_lop_op = (OPCODE)f;
1215 return REPORT(LSTOP);
1218 return REPORT(LSTOP);
1221 return REPORT(FUNC);
1224 return REPORT(FUNC);
1226 return REPORT(LSTOP);
1232 * Sets up for an eventual force_next(). start_force(0) basically does
1233 * an unshift, while start_force(-1) does a push. yylex removes items
1238 S_start_force(pTHX_ int where)
1242 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1243 where = PL_lasttoke;
1244 assert(PL_curforce < 0 || PL_curforce == where);
1245 if (PL_curforce != where) {
1246 for (i = PL_lasttoke; i > where; --i) {
1247 PL_nexttoke[i] = PL_nexttoke[i-1];
1251 if (PL_curforce < 0) /* in case of duplicate start_force() */
1252 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1253 PL_curforce = where;
1256 curmad('^', newSVpvs(""));
1257 CURMAD('_', PL_nextwhite);
1262 S_curmad(pTHX_ char slot, SV *sv)
1268 if (PL_curforce < 0)
1269 where = &PL_thismad;
1271 where = &PL_nexttoke[PL_curforce].next_mad;
1274 sv_setpvn(sv, "", 0);
1277 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1279 else if (PL_encoding) {
1280 sv_recode_to_utf8(sv, PL_encoding);
1285 /* keep a slot open for the head of the list? */
1286 if (slot != '_' && *where && (*where)->mad_key == '^') {
1287 (*where)->mad_key = slot;
1288 sv_free((SV*)((*where)->mad_val));
1289 (*where)->mad_val = (void*)sv;
1292 addmad(newMADsv(slot, sv), where, 0);
1295 # define start_force(where) NOOP
1296 # define curmad(slot, sv) NOOP
1301 * When the lexer realizes it knows the next token (for instance,
1302 * it is reordering tokens for the parser) then it can call S_force_next
1303 * to know what token to return the next time the lexer is called. Caller
1304 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1305 * and possibly PL_expect to ensure the lexer handles the token correctly.
1309 S_force_next(pTHX_ I32 type)
1313 if (PL_curforce < 0)
1314 start_force(PL_lasttoke);
1315 PL_nexttoke[PL_curforce].next_type = type;
1316 if (PL_lex_state != LEX_KNOWNEXT)
1317 PL_lex_defer = PL_lex_state;
1318 PL_lex_state = LEX_KNOWNEXT;
1319 PL_lex_expect = PL_expect;
1322 PL_nexttype[PL_nexttoke] = type;
1324 if (PL_lex_state != LEX_KNOWNEXT) {
1325 PL_lex_defer = PL_lex_state;
1326 PL_lex_expect = PL_expect;
1327 PL_lex_state = LEX_KNOWNEXT;
1333 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1336 SV * const sv = newSVpvn(start,len);
1337 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1344 * When the lexer knows the next thing is a word (for instance, it has
1345 * just seen -> and it knows that the next char is a word char, then
1346 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1350 * char *start : buffer position (must be within PL_linestr)
1351 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1352 * int check_keyword : if true, Perl checks to make sure the word isn't
1353 * a keyword (do this if the word is a label, e.g. goto FOO)
1354 * int allow_pack : if true, : characters will also be allowed (require,
1355 * use, etc. do this)
1356 * int allow_initial_tick : used by the "sub" lexer only.
1360 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1366 start = SKIPSPACE1(start);
1368 if (isIDFIRST_lazy_if(s,UTF) ||
1369 (allow_pack && *s == ':') ||
1370 (allow_initial_tick && *s == '\'') )
1372 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1373 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1375 start_force(PL_curforce);
1377 curmad('X', newSVpvn(start,s-start));
1378 if (token == METHOD) {
1383 PL_expect = XOPERATOR;
1387 curmad('g', newSVpvs( "forced" ));
1388 NEXTVAL_NEXTTOKE.opval
1389 = (OP*)newSVOP(OP_CONST,0,
1390 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1391 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1399 * Called when the lexer wants $foo *foo &foo etc, but the program
1400 * text only contains the "foo" portion. The first argument is a pointer
1401 * to the "foo", and the second argument is the type symbol to prefix.
1402 * Forces the next token to be a "WORD".
1403 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1407 S_force_ident(pTHX_ register const char *s, int kind)
1411 const STRLEN len = strlen(s);
1412 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1413 start_force(PL_curforce);
1414 NEXTVAL_NEXTTOKE.opval = o;
1417 o->op_private = OPpCONST_ENTERED;
1418 /* XXX see note in pp_entereval() for why we forgo typo
1419 warnings if the symbol must be introduced in an eval.
1421 gv_fetchpvn_flags(s, len,
1422 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1424 kind == '$' ? SVt_PV :
1425 kind == '@' ? SVt_PVAV :
1426 kind == '%' ? SVt_PVHV :
1434 Perl_str_to_version(pTHX_ SV *sv)
1439 const char *start = SvPV_const(sv,len);
1440 const char * const end = start + len;
1441 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1442 while (start < end) {
1446 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1451 retval += ((NV)n)/nshift;
1460 * Forces the next token to be a version number.
1461 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1462 * and if "guessing" is TRUE, then no new token is created (and the caller
1463 * must use an alternative parsing method).
1467 S_force_version(pTHX_ char *s, int guessing)
1473 I32 startoff = s - SvPVX(PL_linestr);
1482 while (isDIGIT(*d) || *d == '_' || *d == '.')
1486 start_force(PL_curforce);
1487 curmad('X', newSVpvn(s,d-s));
1490 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1492 s = scan_num(s, &yylval);
1493 version = yylval.opval;
1494 ver = cSVOPx(version)->op_sv;
1495 if (SvPOK(ver) && !SvNIOK(ver)) {
1496 SvUPGRADE(ver, SVt_PVNV);
1497 SvNV_set(ver, str_to_version(ver));
1498 SvNOK_on(ver); /* hint that it is a version */
1501 else if (guessing) {
1504 sv_free(PL_nextwhite); /* let next token collect whitespace */
1506 s = SvPVX(PL_linestr) + startoff;
1514 if (PL_madskills && !version) {
1515 sv_free(PL_nextwhite); /* let next token collect whitespace */
1517 s = SvPVX(PL_linestr) + startoff;
1520 /* NOTE: The parser sees the package name and the VERSION swapped */
1521 start_force(PL_curforce);
1522 NEXTVAL_NEXTTOKE.opval = version;
1530 * Tokenize a quoted string passed in as an SV. It finds the next
1531 * chunk, up to end of string or a backslash. It may make a new
1532 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1537 S_tokeq(pTHX_ SV *sv)
1541 register char *send;
1549 s = SvPV_force(sv, len);
1550 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1553 while (s < send && *s != '\\')
1558 if ( PL_hints & HINT_NEW_STRING ) {
1559 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1565 if (s + 1 < send && (s[1] == '\\'))
1566 s++; /* all that, just for this */
1571 SvCUR_set(sv, d - SvPVX_const(sv));
1573 if ( PL_hints & HINT_NEW_STRING )
1574 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
1579 * Now come three functions related to double-quote context,
1580 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1581 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1582 * interact with PL_lex_state, and create fake ( ... ) argument lists
1583 * to handle functions and concatenation.
1584 * They assume that whoever calls them will be setting up a fake
1585 * join call, because each subthing puts a ',' after it. This lets
1588 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1590 * (I'm not sure whether the spurious commas at the end of lcfirst's
1591 * arguments and join's arguments are created or not).
1596 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1598 * Pattern matching will set PL_lex_op to the pattern-matching op to
1599 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1601 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1603 * Everything else becomes a FUNC.
1605 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1606 * had an OP_CONST or OP_READLINE). This just sets us up for a
1607 * call to S_sublex_push().
1611 S_sublex_start(pTHX)
1614 register const I32 op_type = yylval.ival;
1616 if (op_type == OP_NULL) {
1617 yylval.opval = PL_lex_op;
1621 if (op_type == OP_CONST || op_type == OP_READLINE) {
1622 SV *sv = tokeq(PL_lex_stuff);
1624 if (SvTYPE(sv) == SVt_PVIV) {
1625 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1627 const char * const p = SvPV_const(sv, len);
1628 SV * const nsv = newSVpvn(p, len);
1634 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1635 PL_lex_stuff = NULL;
1636 /* Allow <FH> // "foo" */
1637 if (op_type == OP_READLINE)
1638 PL_expect = XTERMORDORDOR;
1641 else if (op_type == OP_BACKTICK && PL_lex_op) {
1642 /* readpipe() vas overriden */
1643 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1644 yylval.opval = PL_lex_op;
1646 PL_lex_stuff = NULL;
1650 PL_sublex_info.super_state = PL_lex_state;
1651 PL_sublex_info.sub_inwhat = (U16)op_type;
1652 PL_sublex_info.sub_op = PL_lex_op;
1653 PL_lex_state = LEX_INTERPPUSH;
1657 yylval.opval = PL_lex_op;
1667 * Create a new scope to save the lexing state. The scope will be
1668 * ended in S_sublex_done. Returns a '(', starting the function arguments
1669 * to the uc, lc, etc. found before.
1670 * Sets PL_lex_state to LEX_INTERPCONCAT.
1679 PL_lex_state = PL_sublex_info.super_state;
1680 SAVEBOOL(PL_lex_dojoin);
1681 SAVEI32(PL_lex_brackets);
1682 SAVEI32(PL_lex_casemods);
1683 SAVEI32(PL_lex_starts);
1684 SAVEI8(PL_lex_state);
1685 SAVEVPTR(PL_lex_inpat);
1686 SAVEI16(PL_lex_inwhat);
1687 SAVECOPLINE(PL_curcop);
1688 SAVEPPTR(PL_bufptr);
1689 SAVEPPTR(PL_bufend);
1690 SAVEPPTR(PL_oldbufptr);
1691 SAVEPPTR(PL_oldoldbufptr);
1692 SAVEPPTR(PL_last_lop);
1693 SAVEPPTR(PL_last_uni);
1694 SAVEPPTR(PL_linestart);
1695 SAVESPTR(PL_linestr);
1696 SAVEGENERICPV(PL_lex_brackstack);
1697 SAVEGENERICPV(PL_lex_casestack);
1699 PL_linestr = PL_lex_stuff;
1700 PL_lex_stuff = NULL;
1702 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1703 = SvPVX(PL_linestr);
1704 PL_bufend += SvCUR(PL_linestr);
1705 PL_last_lop = PL_last_uni = NULL;
1706 SAVEFREESV(PL_linestr);
1708 PL_lex_dojoin = FALSE;
1709 PL_lex_brackets = 0;
1710 Newx(PL_lex_brackstack, 120, char);
1711 Newx(PL_lex_casestack, 12, char);
1712 PL_lex_casemods = 0;
1713 *PL_lex_casestack = '\0';
1715 PL_lex_state = LEX_INTERPCONCAT;
1716 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1718 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1719 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1720 PL_lex_inpat = PL_sublex_info.sub_op;
1722 PL_lex_inpat = NULL;
1729 * Restores lexer state after a S_sublex_push.
1736 if (!PL_lex_starts++) {
1737 SV * const sv = newSVpvs("");
1738 if (SvUTF8(PL_linestr))
1740 PL_expect = XOPERATOR;
1741 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1745 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1746 PL_lex_state = LEX_INTERPCASEMOD;
1750 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1751 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1752 PL_linestr = PL_lex_repl;
1754 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1755 PL_bufend += SvCUR(PL_linestr);
1756 PL_last_lop = PL_last_uni = NULL;
1757 SAVEFREESV(PL_linestr);
1758 PL_lex_dojoin = FALSE;
1759 PL_lex_brackets = 0;
1760 PL_lex_casemods = 0;
1761 *PL_lex_casestack = '\0';
1763 if (SvEVALED(PL_lex_repl)) {
1764 PL_lex_state = LEX_INTERPNORMAL;
1766 /* we don't clear PL_lex_repl here, so that we can check later
1767 whether this is an evalled subst; that means we rely on the
1768 logic to ensure sublex_done() is called again only via the
1769 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1772 PL_lex_state = LEX_INTERPCONCAT;
1782 PL_endwhite = newSVpvs("");
1783 sv_catsv(PL_endwhite, PL_thiswhite);
1787 sv_setpvn(PL_thistoken,"",0);
1789 PL_realtokenstart = -1;
1793 PL_bufend = SvPVX(PL_linestr);
1794 PL_bufend += SvCUR(PL_linestr);
1795 PL_expect = XOPERATOR;
1796 PL_sublex_info.sub_inwhat = 0;
1804 Extracts a pattern, double-quoted string, or transliteration. This
1807 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1808 processing a pattern (PL_lex_inpat is true), a transliteration
1809 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1811 Returns a pointer to the character scanned up to. If this is
1812 advanced from the start pointer supplied (i.e. if anything was
1813 successfully parsed), will leave an OP for the substring scanned
1814 in yylval. Caller must intuit reason for not parsing further
1815 by looking at the next characters herself.
1819 double-quoted style: \r and \n
1820 regexp special ones: \D \s
1823 case and quoting: \U \Q \E
1824 stops on @ and $, but not for $ as tail anchor
1826 In transliterations:
1827 characters are VERY literal, except for - not at the start or end
1828 of the string, which indicates a range. If the range is in bytes,
1829 scan_const expands the range to the full set of intermediate
1830 characters. If the range is in utf8, the hyphen is replaced with
1831 a certain range mark which will be handled by pmtrans() in op.c.
1833 In double-quoted strings:
1835 double-quoted style: \r and \n
1837 deprecated backrefs: \1 (in substitution replacements)
1838 case and quoting: \U \Q \E
1841 scan_const does *not* construct ops to handle interpolated strings.
1842 It stops processing as soon as it finds an embedded $ or @ variable
1843 and leaves it to the caller to work out what's going on.
1845 embedded arrays (whether in pattern or not) could be:
1846 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1848 $ in double-quoted strings must be the symbol of an embedded scalar.
1850 $ in pattern could be $foo or could be tail anchor. Assumption:
1851 it's a tail anchor if $ is the last thing in the string, or if it's
1852 followed by one of "()| \r\n\t"
1854 \1 (backreferences) are turned into $1
1856 The structure of the code is
1857 while (there's a character to process) {
1858 handle transliteration ranges
1859 skip regexp comments /(?#comment)/ and codes /(?{code})/
1860 skip #-initiated comments in //x patterns
1861 check for embedded arrays
1862 check for embedded scalars
1864 leave intact backslashes from leaveit (below)
1865 deprecate \1 in substitution replacements
1866 handle string-changing backslashes \l \U \Q \E, etc.
1867 switch (what was escaped) {
1868 handle \- in a transliteration (becomes a literal -)
1869 handle \132 (octal characters)
1870 handle \x15 and \x{1234} (hex characters)
1871 handle \N{name} (named characters)
1872 handle \cV (control characters)
1873 handle printf-style backslashes (\f, \r, \n, etc)
1875 } (end if backslash)
1876 } (end while character to read)
1881 S_scan_const(pTHX_ char *start)
1884 register char *send = PL_bufend; /* end of the constant */
1885 SV *sv = newSV(send - start); /* sv for the constant */
1886 register char *s = start; /* start of the constant */
1887 register char *d = SvPVX(sv); /* destination for copies */
1888 bool dorange = FALSE; /* are we in a translit range? */
1889 bool didrange = FALSE; /* did we just finish a range? */
1890 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1891 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1894 UV literal_endpoint = 0;
1895 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1898 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1899 /* If we are doing a trans and we know we want UTF8 set expectation */
1900 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1901 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1905 while (s < send || dorange) {
1906 /* get transliterations out of the way (they're most literal) */
1907 if (PL_lex_inwhat == OP_TRANS) {
1908 /* expand a range A-Z to the full set of characters. AIE! */
1910 I32 i; /* current expanded character */
1911 I32 min; /* first character in range */
1912 I32 max; /* last character in range */
1923 char * const c = (char*)utf8_hop((U8*)d, -1);
1927 *c = (char)UTF_TO_NATIVE(0xff);
1928 /* mark the range as done, and continue */
1934 i = d - SvPVX_const(sv); /* remember current offset */
1937 SvLEN(sv) + (has_utf8 ?
1938 (512 - UTF_CONTINUATION_MARK +
1941 /* How many two-byte within 0..255: 128 in UTF-8,
1942 * 96 in UTF-8-mod. */
1944 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1946 d = SvPVX(sv) + i; /* refresh d after realloc */
1950 for (j = 0; j <= 1; j++) {
1951 char * const c = (char*)utf8_hop((U8*)d, -1);
1952 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1958 max = (U8)0xff; /* only to \xff */
1959 uvmax = uv; /* \x{100} to uvmax */
1961 d = c; /* eat endpoint chars */
1966 d -= 2; /* eat the first char and the - */
1967 min = (U8)*d; /* first char in range */
1968 max = (U8)d[1]; /* last char in range */
1975 "Invalid range \"%c-%c\" in transliteration operator",
1976 (char)min, (char)max);
1980 if (literal_endpoint == 2 &&
1981 ((isLOWER(min) && isLOWER(max)) ||
1982 (isUPPER(min) && isUPPER(max)))) {
1984 for (i = min; i <= max; i++)
1986 *d++ = NATIVE_TO_NEED(has_utf8,i);
1988 for (i = min; i <= max; i++)
1990 *d++ = NATIVE_TO_NEED(has_utf8,i);
1995 for (i = min; i <= max; i++)
1998 const U8 ch = (U8)NATIVE_TO_UTF(i);
1999 if (UNI_IS_INVARIANT(ch))
2002 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2003 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2012 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2014 *d++ = (char)UTF_TO_NATIVE(0xff);
2016 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2020 /* mark the range as done, and continue */
2024 literal_endpoint = 0;
2029 /* range begins (ignore - as first or last char) */
2030 else if (*s == '-' && s+1 < send && s != start) {
2032 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2039 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2049 literal_endpoint = 0;
2050 native_range = TRUE;
2055 /* if we get here, we're not doing a transliteration */
2057 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2058 except for the last char, which will be done separately. */
2059 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2061 while (s+1 < send && *s != ')')
2062 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2064 else if (s[2] == '{' /* This should match regcomp.c */
2065 || (s[2] == '?' && s[3] == '{'))
2068 char *regparse = s + (s[2] == '{' ? 3 : 4);
2071 while (count && (c = *regparse)) {
2072 if (c == '\\' && regparse[1])
2080 if (*regparse != ')')
2081 regparse--; /* Leave one char for continuation. */
2082 while (s < regparse)
2083 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2087 /* likewise skip #-initiated comments in //x patterns */
2088 else if (*s == '#' && PL_lex_inpat &&
2089 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2090 while (s+1 < send && *s != '\n')
2091 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2094 /* check for embedded arrays
2095 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2097 else if (*s == '@' && s[1]) {
2098 if (isALNUM_lazy_if(s+1,UTF))
2100 if (strchr(":'{$", s[1]))
2102 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2103 break; /* in regexp, neither @+ nor @- are interpolated */
2106 /* check for embedded scalars. only stop if we're sure it's a
2109 else if (*s == '$') {
2110 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2112 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2113 break; /* in regexp, $ might be tail anchor */
2116 /* End of else if chain - OP_TRANS rejoin rest */
2119 if (*s == '\\' && s+1 < send) {
2122 /* deprecate \1 in strings and substitution replacements */
2123 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2124 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2126 if (ckWARN(WARN_SYNTAX))
2127 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2132 /* string-change backslash escapes */
2133 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2137 /* skip any other backslash escapes in a pattern */
2138 else if (PL_lex_inpat) {
2139 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2140 goto default_action;
2143 /* if we get here, it's either a quoted -, or a digit */
2146 /* quoted - in transliterations */
2148 if (PL_lex_inwhat == OP_TRANS) {
2155 if ((isALPHA(*s) || isDIGIT(*s)) &&
2157 Perl_warner(aTHX_ packWARN(WARN_MISC),
2158 "Unrecognized escape \\%c passed through",
2160 /* default action is to copy the quoted character */
2161 goto default_action;
2164 /* \132 indicates an octal constant */
2165 case '0': case '1': case '2': case '3':
2166 case '4': case '5': case '6': case '7':
2170 uv = grok_oct(s, &len, &flags, NULL);
2173 goto NUM_ESCAPE_INSERT;
2175 /* \x24 indicates a hex constant */
2179 char* const e = strchr(s, '}');
2180 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2181 PERL_SCAN_DISALLOW_PREFIX;
2186 yyerror("Missing right brace on \\x{}");
2190 uv = grok_hex(s, &len, &flags, NULL);
2196 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2197 uv = grok_hex(s, &len, &flags, NULL);
2203 /* Insert oct or hex escaped character.
2204 * There will always enough room in sv since such
2205 * escapes will be longer than any UTF-8 sequence
2206 * they can end up as. */
2208 /* We need to map to chars to ASCII before doing the tests
2211 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2212 if (!has_utf8 && uv > 255) {
2213 /* Might need to recode whatever we have
2214 * accumulated so far if it contains any
2217 * (Can't we keep track of that and avoid
2218 * this rescan? --jhi)
2222 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2223 if (!NATIVE_IS_INVARIANT(*c)) {
2228 const STRLEN offset = d - SvPVX_const(sv);
2230 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2234 while (src >= (const U8 *)SvPVX_const(sv)) {
2235 if (!NATIVE_IS_INVARIANT(*src)) {
2236 const U8 ch = NATIVE_TO_ASCII(*src);
2237 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2238 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2248 if (has_utf8 || uv > 255) {
2249 d = (char*)uvchr_to_utf8((U8*)d, uv);
2251 if (PL_lex_inwhat == OP_TRANS &&
2252 PL_sublex_info.sub_op) {
2253 PL_sublex_info.sub_op->op_private |=
2254 (PL_lex_repl ? OPpTRANS_FROM_UTF
2258 if (uv > 255 && !dorange)
2259 native_range = FALSE;
2271 /* \N{LATIN SMALL LETTER A} is a named character */
2275 char* e = strchr(s, '}');
2281 yyerror("Missing right brace on \\N{}");
2285 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2287 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2288 PERL_SCAN_DISALLOW_PREFIX;
2291 uv = grok_hex(s, &len, &flags, NULL);
2292 if ( e > s && len != (STRLEN)(e - s) ) {
2296 goto NUM_ESCAPE_INSERT;
2298 res = newSVpvn(s + 1, e - s - 1);
2299 res = new_constant( NULL, 0, "charnames",
2300 res, NULL, s - 2, e - s + 3 );
2302 sv_utf8_upgrade(res);
2303 str = SvPV_const(res,len);
2304 #ifdef EBCDIC_NEVER_MIND
2305 /* charnames uses pack U and that has been
2306 * recently changed to do the below uni->native
2307 * mapping, so this would be redundant (and wrong,
2308 * the code point would be doubly converted).
2309 * But leave this in just in case the pack U change
2310 * gets revoked, but the semantics is still
2311 * desireable for charnames. --jhi */
2313 UV uv = utf8_to_uvchr((const U8*)str, 0);
2316 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2318 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2319 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2320 str = SvPV_const(res, len);
2324 if (!has_utf8 && SvUTF8(res)) {
2325 const char * const ostart = SvPVX_const(sv);
2326 SvCUR_set(sv, d - ostart);
2329 sv_utf8_upgrade(sv);
2330 /* this just broke our allocation above... */
2331 SvGROW(sv, (STRLEN)(send - start));
2332 d = SvPVX(sv) + SvCUR(sv);
2335 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2336 const char * const odest = SvPVX_const(sv);
2338 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2339 d = SvPVX(sv) + (d - odest);
2343 native_range = FALSE; /* \N{} is guessed to be Unicode */
2345 Copy(str, d, len, char);
2352 yyerror("Missing braces on \\N{}");
2355 /* \c is a control character */
2364 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2367 yyerror("Missing control char name in \\c");
2371 /* printf-style backslashes, formfeeds, newlines, etc */
2373 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2376 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2379 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2382 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2385 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2388 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2391 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2397 } /* end if (backslash) */
2404 /* If we started with encoded form, or already know we want it
2405 and then encode the next character */
2406 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2408 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2409 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2412 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2413 const STRLEN off = d - SvPVX_const(sv);
2414 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2416 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2419 if (uv > 255 && !dorange)
2420 native_range = FALSE;
2424 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2426 } /* while loop to process each character */
2428 /* terminate the string and set up the sv */
2430 SvCUR_set(sv, d - SvPVX_const(sv));
2431 if (SvCUR(sv) >= SvLEN(sv))
2432 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2435 if (PL_encoding && !has_utf8) {
2436 sv_recode_to_utf8(sv, PL_encoding);
2442 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2443 PL_sublex_info.sub_op->op_private |=
2444 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2448 /* shrink the sv if we allocated more than we used */
2449 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2450 SvPV_shrink_to_cur(sv);
2453 /* return the substring (via yylval) only if we parsed anything */
2454 if (s > PL_bufptr) {
2455 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2456 const char *const key = PL_lex_inpat ? "qr" : "q";
2457 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2461 if (PL_lex_inwhat == OP_TRANS) {
2464 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2472 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2475 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2482 * Returns TRUE if there's more to the expression (e.g., a subscript),
2485 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2487 * ->[ and ->{ return TRUE
2488 * { and [ outside a pattern are always subscripts, so return TRUE
2489 * if we're outside a pattern and it's not { or [, then return FALSE
2490 * if we're in a pattern and the first char is a {
2491 * {4,5} (any digits around the comma) returns FALSE
2492 * if we're in a pattern and the first char is a [
2494 * [SOMETHING] has a funky algorithm to decide whether it's a
2495 * character class or not. It has to deal with things like
2496 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2497 * anything else returns TRUE
2500 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2503 S_intuit_more(pTHX_ register char *s)
2506 if (PL_lex_brackets)
2508 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2510 if (*s != '{' && *s != '[')
2515 /* In a pattern, so maybe we have {n,m}. */
2532 /* On the other hand, maybe we have a character class */
2535 if (*s == ']' || *s == '^')
2538 /* this is terrifying, and it works */
2539 int weight = 2; /* let's weigh the evidence */
2541 unsigned char un_char = 255, last_un_char;
2542 const char * const send = strchr(s,']');
2543 char tmpbuf[sizeof PL_tokenbuf * 4];
2545 if (!send) /* has to be an expression */
2548 Zero(seen,256,char);
2551 else if (isDIGIT(*s)) {
2553 if (isDIGIT(s[1]) && s[2] == ']')
2559 for (; s < send; s++) {
2560 last_un_char = un_char;
2561 un_char = (unsigned char)*s;
2566 weight -= seen[un_char] * 10;
2567 if (isALNUM_lazy_if(s+1,UTF)) {
2569 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2570 len = (int)strlen(tmpbuf);
2571 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2576 else if (*s == '$' && s[1] &&
2577 strchr("[#!%*<>()-=",s[1])) {
2578 if (/*{*/ strchr("])} =",s[2]))
2587 if (strchr("wds]",s[1]))
2589 else if (seen[(U8)'\''] || seen[(U8)'"'])
2591 else if (strchr("rnftbxcav",s[1]))
2593 else if (isDIGIT(s[1])) {
2595 while (s[1] && isDIGIT(s[1]))
2605 if (strchr("aA01! ",last_un_char))
2607 if (strchr("zZ79~",s[1]))
2609 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2610 weight -= 5; /* cope with negative subscript */
2613 if (!isALNUM(last_un_char)
2614 && !(last_un_char == '$' || last_un_char == '@'
2615 || last_un_char == '&')
2616 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2621 if (keyword(tmpbuf, d - tmpbuf, 0))
2624 if (un_char == last_un_char + 1)
2626 weight -= seen[un_char];
2631 if (weight >= 0) /* probably a character class */
2641 * Does all the checking to disambiguate
2643 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2644 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2646 * First argument is the stuff after the first token, e.g. "bar".
2648 * Not a method if bar is a filehandle.
2649 * Not a method if foo is a subroutine prototyped to take a filehandle.
2650 * Not a method if it's really "Foo $bar"
2651 * Method if it's "foo $bar"
2652 * Not a method if it's really "print foo $bar"
2653 * Method if it's really "foo package::" (interpreted as package->foo)
2654 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2655 * Not a method if bar is a filehandle or package, but is quoted with
2660 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2663 char *s = start + (*start == '$');
2664 char tmpbuf[sizeof PL_tokenbuf];
2672 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2676 const char *proto = SvPVX_const(cv);
2687 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2688 /* start is the beginning of the possible filehandle/object,
2689 * and s is the end of it
2690 * tmpbuf is a copy of it
2693 if (*start == '$') {
2694 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2695 isUPPER(*PL_tokenbuf))
2698 len = start - SvPVX(PL_linestr);
2702 start = SvPVX(PL_linestr) + len;
2706 return *s == '(' ? FUNCMETH : METHOD;
2708 if (!keyword(tmpbuf, len, 0)) {
2709 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2713 soff = s - SvPVX(PL_linestr);
2717 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2718 if (indirgv && GvCVu(indirgv))
2720 /* filehandle or package name makes it a method */
2721 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2723 soff = s - SvPVX(PL_linestr);
2726 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2727 return 0; /* no assumptions -- "=>" quotes bearword */
2729 start_force(PL_curforce);
2730 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2731 newSVpvn(tmpbuf,len));
2732 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2734 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2739 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2741 return *s == '(' ? FUNCMETH : METHOD;
2749 * Return a string of Perl code to load the debugger. If PERL5DB
2750 * is set, it will return the contents of that, otherwise a
2751 * compile-time require of perl5db.pl.
2759 const char * const pdb = PerlEnv_getenv("PERL5DB");
2763 SETERRNO(0,SS_NORMAL);
2764 return "BEGIN { require 'perl5db.pl' }";
2770 /* Encoded script support. filter_add() effectively inserts a
2771 * 'pre-processing' function into the current source input stream.
2772 * Note that the filter function only applies to the current source file
2773 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2775 * The datasv parameter (which may be NULL) can be used to pass
2776 * private data to this instance of the filter. The filter function
2777 * can recover the SV using the FILTER_DATA macro and use it to
2778 * store private buffers and state information.
2780 * The supplied datasv parameter is upgraded to a PVIO type
2781 * and the IoDIRP/IoANY field is used to store the function pointer,
2782 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2783 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2784 * private use must be set using malloc'd pointers.
2788 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2797 if (!PL_rsfp_filters)
2798 PL_rsfp_filters = newAV();
2801 SvUPGRADE(datasv, SVt_PVIO);
2802 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2803 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2804 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2805 FPTR2DPTR(void *, IoANY(datasv)),
2806 SvPV_nolen(datasv)));
2807 av_unshift(PL_rsfp_filters, 1);
2808 av_store(PL_rsfp_filters, 0, datasv) ;
2813 /* Delete most recently added instance of this filter function. */
2815 Perl_filter_del(pTHX_ filter_t funcp)
2821 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2822 FPTR2DPTR(void*, funcp)));
2824 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2826 /* if filter is on top of stack (usual case) just pop it off */
2827 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2828 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2829 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2830 IoANY(datasv) = (void *)NULL;
2831 sv_free(av_pop(PL_rsfp_filters));
2835 /* we need to search for the correct entry and clear it */
2836 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2840 /* Invoke the idxth filter function for the current rsfp. */
2841 /* maxlen 0 = read one text line */
2843 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2848 /* This API is bad. It should have been using unsigned int for maxlen.
2849 Not sure if we want to change the API, but if not we should sanity
2850 check the value here. */
2851 const unsigned int correct_length
2860 if (!PL_parser || !PL_rsfp_filters)
2862 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2863 /* Provide a default input filter to make life easy. */
2864 /* Note that we append to the line. This is handy. */
2865 DEBUG_P(PerlIO_printf(Perl_debug_log,
2866 "filter_read %d: from rsfp\n", idx));
2867 if (correct_length) {
2870 const int old_len = SvCUR(buf_sv);
2872 /* ensure buf_sv is large enough */
2873 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2874 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2875 correct_length)) <= 0) {
2876 if (PerlIO_error(PL_rsfp))
2877 return -1; /* error */
2879 return 0 ; /* end of file */
2881 SvCUR_set(buf_sv, old_len + len) ;
2884 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2885 if (PerlIO_error(PL_rsfp))
2886 return -1; /* error */
2888 return 0 ; /* end of file */
2891 return SvCUR(buf_sv);
2893 /* Skip this filter slot if filter has been deleted */
2894 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2895 DEBUG_P(PerlIO_printf(Perl_debug_log,
2896 "filter_read %d: skipped (filter deleted)\n",
2898 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2900 /* Get function pointer hidden within datasv */
2901 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2902 DEBUG_P(PerlIO_printf(Perl_debug_log,
2903 "filter_read %d: via function %p (%s)\n",
2904 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2905 /* Call function. The function is expected to */
2906 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2907 /* Return: <0:error, =0:eof, >0:not eof */
2908 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2912 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2915 #ifdef PERL_CR_FILTER
2916 if (!PL_rsfp_filters) {
2917 filter_add(S_cr_textfilter,NULL);
2920 if (PL_rsfp_filters) {
2922 SvCUR_set(sv, 0); /* start with empty line */
2923 if (FILTER_READ(0, sv, 0) > 0)
2924 return ( SvPVX(sv) ) ;
2929 return (sv_gets(sv, fp, append));
2933 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2938 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2942 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2943 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2945 return GvHV(gv); /* Foo:: */
2948 /* use constant CLASS => 'MyClass' */
2949 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2950 if (gv && GvCV(gv)) {
2951 SV * const sv = cv_const_sv(GvCV(gv));
2953 pkgname = SvPV_nolen_const(sv);
2956 return gv_stashpv(pkgname, 0);
2960 * S_readpipe_override
2961 * Check whether readpipe() is overriden, and generates the appropriate
2962 * optree, provided sublex_start() is called afterwards.
2965 S_readpipe_override(pTHX)
2968 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2969 yylval.ival = OP_BACKTICK;
2971 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2973 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2974 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
2975 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2977 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2978 append_elem(OP_LIST,
2979 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2980 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2987 * The intent of this yylex wrapper is to minimize the changes to the
2988 * tokener when we aren't interested in collecting madprops. It remains
2989 * to be seen how successful this strategy will be...
2996 char *s = PL_bufptr;
2998 /* make sure PL_thiswhite is initialized */
3002 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3003 if (PL_pending_ident)
3004 return S_pending_ident(aTHX);
3006 /* previous token ate up our whitespace? */
3007 if (!PL_lasttoke && PL_nextwhite) {
3008 PL_thiswhite = PL_nextwhite;
3012 /* isolate the token, and figure out where it is without whitespace */
3013 PL_realtokenstart = -1;
3017 assert(PL_curforce < 0);
3019 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3020 if (!PL_thistoken) {
3021 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3022 PL_thistoken = newSVpvs("");
3024 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3025 PL_thistoken = newSVpvn(tstart, s - tstart);
3028 if (PL_thismad) /* install head */
3029 CURMAD('X', PL_thistoken);
3032 /* last whitespace of a sublex? */
3033 if (optype == ')' && PL_endwhite) {
3034 CURMAD('X', PL_endwhite);
3039 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3040 if (!PL_thiswhite && !PL_endwhite && !optype) {
3041 sv_free(PL_thistoken);
3046 /* put off final whitespace till peg */
3047 if (optype == ';' && !PL_rsfp) {
3048 PL_nextwhite = PL_thiswhite;
3051 else if (PL_thisopen) {
3052 CURMAD('q', PL_thisopen);
3054 sv_free(PL_thistoken);
3058 /* Store actual token text as madprop X */
3059 CURMAD('X', PL_thistoken);
3063 /* add preceding whitespace as madprop _ */
3064 CURMAD('_', PL_thiswhite);
3068 /* add quoted material as madprop = */
3069 CURMAD('=', PL_thisstuff);
3073 /* add terminating quote as madprop Q */
3074 CURMAD('Q', PL_thisclose);
3078 /* special processing based on optype */
3082 /* opval doesn't need a TOKEN since it can already store mp */
3093 append_madprops(PL_thismad, yylval.opval, 0);
3101 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3110 /* remember any fake bracket that lexer is about to discard */
3111 if (PL_lex_brackets == 1 &&
3112 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3115 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3118 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3119 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3122 break; /* don't bother looking for trailing comment */
3131 /* attach a trailing comment to its statement instead of next token */
3135 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3137 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3139 if (*s == '\n' || *s == '#') {
3140 while (s < PL_bufend && *s != '\n')
3144 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3145 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3162 /* Create new token struct. Note: opvals return early above. */
3163 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3170 S_tokenize_use(pTHX_ int is_use, char *s) {
3172 if (PL_expect != XSTATE)
3173 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3174 is_use ? "use" : "no"));
3176 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3177 s = force_version(s, TRUE);
3178 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3179 start_force(PL_curforce);
3180 NEXTVAL_NEXTTOKE.opval = NULL;
3183 else if (*s == 'v') {
3184 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3185 s = force_version(s, FALSE);
3189 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3190 s = force_version(s, FALSE);
3192 yylval.ival = is_use;
3196 static const char* const exp_name[] =
3197 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3198 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3205 Works out what to call the token just pulled out of the input
3206 stream. The yacc parser takes care of taking the ops we return and
3207 stitching them into a tree.
3213 if read an identifier
3214 if we're in a my declaration
3215 croak if they tried to say my($foo::bar)
3216 build the ops for a my() declaration
3217 if it's an access to a my() variable
3218 are we in a sort block?
3219 croak if my($a); $a <=> $b
3220 build ops for access to a my() variable
3221 if in a dq string, and they've said @foo and we can't find @foo
3223 build ops for a bareword
3224 if we already built the token before, use it.
3229 #pragma segment Perl_yylex
3235 register char *s = PL_bufptr;
3240 /* orig_keyword, gvp, and gv are initialized here because
3241 * jump to the label just_a_word_zero can bypass their
3242 * initialization later. */
3243 I32 orig_keyword = 0;
3248 SV* tmp = newSVpvs("");
3249 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3250 (IV)CopLINE(PL_curcop),
3251 lex_state_names[PL_lex_state],
3252 exp_name[PL_expect],
3253 pv_display(tmp, s, strlen(s), 0, 60));
3256 /* check if there's an identifier for us to look at */
3257 if (PL_pending_ident)
3258 return REPORT(S_pending_ident(aTHX));
3260 /* no identifier pending identification */
3262 switch (PL_lex_state) {
3264 case LEX_NORMAL: /* Some compilers will produce faster */
3265 case LEX_INTERPNORMAL: /* code if we comment these out. */
3269 /* when we've already built the next token, just pull it out of the queue */
3273 yylval = PL_nexttoke[PL_lasttoke].next_val;
3275 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3276 PL_nexttoke[PL_lasttoke].next_mad = 0;
3277 if (PL_thismad && PL_thismad->mad_key == '_') {
3278 PL_thiswhite = (SV*)PL_thismad->mad_val;
3279 PL_thismad->mad_val = 0;
3280 mad_free(PL_thismad);
3285 PL_lex_state = PL_lex_defer;
3286 PL_expect = PL_lex_expect;
3287 PL_lex_defer = LEX_NORMAL;
3288 if (!PL_nexttoke[PL_lasttoke].next_type)
3293 yylval = PL_nextval[PL_nexttoke];
3295 PL_lex_state = PL_lex_defer;
3296 PL_expect = PL_lex_expect;
3297 PL_lex_defer = LEX_NORMAL;
3301 /* FIXME - can these be merged? */
3302 return(PL_nexttoke[PL_lasttoke].next_type);
3304 return REPORT(PL_nexttype[PL_nexttoke]);
3307 /* interpolated case modifiers like \L \U, including \Q and \E.
3308 when we get here, PL_bufptr is at the \
3310 case LEX_INTERPCASEMOD:
3312 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3313 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3315 /* handle \E or end of string */
3316 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3318 if (PL_lex_casemods) {
3319 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3320 PL_lex_casestack[PL_lex_casemods] = '\0';
3322 if (PL_bufptr != PL_bufend
3323 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3325 PL_lex_state = LEX_INTERPCONCAT;
3328 PL_thistoken = newSVpvs("\\E");
3334 while (PL_bufptr != PL_bufend &&
3335 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3337 PL_thiswhite = newSVpvs("");
3338 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3342 if (PL_bufptr != PL_bufend)
3345 PL_lex_state = LEX_INTERPCONCAT;
3349 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3350 "### Saw case modifier\n"); });
3352 if (s[1] == '\\' && s[2] == 'E') {
3355 PL_thiswhite = newSVpvs("");
3356 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3359 PL_lex_state = LEX_INTERPCONCAT;
3364 if (!PL_madskills) /* when just compiling don't need correct */
3365 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3366 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3367 if ((*s == 'L' || *s == 'U') &&
3368 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3369 PL_lex_casestack[--PL_lex_casemods] = '\0';
3372 if (PL_lex_casemods > 10)
3373 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3374 PL_lex_casestack[PL_lex_casemods++] = *s;
3375 PL_lex_casestack[PL_lex_casemods] = '\0';
3376 PL_lex_state = LEX_INTERPCONCAT;
3377 start_force(PL_curforce);
3378 NEXTVAL_NEXTTOKE.ival = 0;
3380 start_force(PL_curforce);
3382 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3384 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3386 NEXTVAL_NEXTTOKE.ival = OP_LC;
3388 NEXTVAL_NEXTTOKE.ival = OP_UC;
3390 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3392 Perl_croak(aTHX_ "panic: yylex");
3394 SV* const tmpsv = newSVpvs("\\ ");
3395 /* replace the space with the character we want to escape
3397 SvPVX(tmpsv)[1] = *s;
3403 if (PL_lex_starts) {
3409 sv_free(PL_thistoken);
3410 PL_thistoken = newSVpvs("");
3413 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3414 if (PL_lex_casemods == 1 && PL_lex_inpat)
3423 case LEX_INTERPPUSH:
3424 return REPORT(sublex_push());
3426 case LEX_INTERPSTART:
3427 if (PL_bufptr == PL_bufend)
3428 return REPORT(sublex_done());
3429 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3430 "### Interpolated variable\n"); });
3432 PL_lex_dojoin = (*PL_bufptr == '@');
3433 PL_lex_state = LEX_INTERPNORMAL;
3434 if (PL_lex_dojoin) {
3435 start_force(PL_curforce);
3436 NEXTVAL_NEXTTOKE.ival = 0;
3438 start_force(PL_curforce);
3439 force_ident("\"", '$');
3440 start_force(PL_curforce);
3441 NEXTVAL_NEXTTOKE.ival = 0;
3443 start_force(PL_curforce);
3444 NEXTVAL_NEXTTOKE.ival = 0;
3446 start_force(PL_curforce);
3447 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3450 if (PL_lex_starts++) {
3455 sv_free(PL_thistoken);
3456 PL_thistoken = newSVpvs("");
3459 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3460 if (!PL_lex_casemods && PL_lex_inpat)
3467 case LEX_INTERPENDMAYBE:
3468 if (intuit_more(PL_bufptr)) {
3469 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3475 if (PL_lex_dojoin) {
3476 PL_lex_dojoin = FALSE;
3477 PL_lex_state = LEX_INTERPCONCAT;
3481 sv_free(PL_thistoken);
3482 PL_thistoken = newSVpvs("");
3487 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3488 && SvEVALED(PL_lex_repl))
3490 if (PL_bufptr != PL_bufend)
3491 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3495 case LEX_INTERPCONCAT:
3497 if (PL_lex_brackets)
3498 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3500 if (PL_bufptr == PL_bufend)
3501 return REPORT(sublex_done());
3503 if (SvIVX(PL_linestr) == '\'') {
3504 SV *sv = newSVsv(PL_linestr);
3507 else if ( PL_hints & HINT_NEW_RE )
3508 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
3509 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3513 s = scan_const(PL_bufptr);
3515 PL_lex_state = LEX_INTERPCASEMOD;
3517 PL_lex_state = LEX_INTERPSTART;
3520 if (s != PL_bufptr) {
3521 start_force(PL_curforce);
3523 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3525 NEXTVAL_NEXTTOKE = yylval;
3528 if (PL_lex_starts++) {
3532 sv_free(PL_thistoken);
3533 PL_thistoken = newSVpvs("");
3536 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3537 if (!PL_lex_casemods && PL_lex_inpat)
3550 PL_lex_state = LEX_NORMAL;
3551 s = scan_formline(PL_bufptr);
3552 if (!PL_lex_formbrack)
3558 PL_oldoldbufptr = PL_oldbufptr;
3564 sv_free(PL_thistoken);
3567 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3571 if (isIDFIRST_lazy_if(s,UTF))
3573 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
3574 Perl_croak(aTHX_ "Unrecognized character \\x%02X in column %d", *s & 255, (int) len + 1);
3577 goto fake_eof; /* emulate EOF on ^D or ^Z */
3586 if (PL_lex_brackets) {
3587 yyerror((const char *)
3589 ? "Format not terminated"
3590 : "Missing right curly or square bracket"));
3592 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3593 "### Tokener got EOF\n");
3597 if (s++ < PL_bufend)
3598 goto retry; /* ignore stray nulls */
3601 if (!PL_in_eval && !PL_preambled) {
3602 PL_preambled = TRUE;
3607 sv_setpv(PL_linestr,incl_perldb());
3608 if (SvCUR(PL_linestr))
3609 sv_catpvs(PL_linestr,";");
3611 while(AvFILLp(PL_preambleav) >= 0) {
3612 SV *tmpsv = av_shift(PL_preambleav);
3613 sv_catsv(PL_linestr, tmpsv);
3614 sv_catpvs(PL_linestr, ";");
3617 sv_free((SV*)PL_preambleav);
3618 PL_preambleav = NULL;
3620 if (PL_minus_n || PL_minus_p) {
3621 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3623 sv_catpvs(PL_linestr,"chomp;");
3626 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3627 || *PL_splitstr == '"')
3628 && strchr(PL_splitstr + 1, *PL_splitstr))
3629 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3631 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3632 bytes can be used as quoting characters. :-) */
3633 const char *splits = PL_splitstr;
3634 sv_catpvs(PL_linestr, "our @F=split(q\0");
3637 if (*splits == '\\')
3638 sv_catpvn(PL_linestr, splits, 1);
3639 sv_catpvn(PL_linestr, splits, 1);
3640 } while (*splits++);
3641 /* This loop will embed the trailing NUL of
3642 PL_linestr as the last thing it does before
3644 sv_catpvs(PL_linestr, ");");
3648 sv_catpvs(PL_linestr,"our @F=split(' ');");
3652 sv_catpvs(PL_linestr,"use feature ':5.10';");
3653 sv_catpvs(PL_linestr, "\n");
3654 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3655 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3656 PL_last_lop = PL_last_uni = NULL;
3657 if (PERLDB_LINE && PL_curstash != PL_debstash)
3658 update_debugger_info(PL_linestr, NULL, 0);
3662 bof = PL_rsfp ? TRUE : FALSE;
3663 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3666 PL_realtokenstart = -1;
3669 if (PL_preprocess && !PL_in_eval)
3670 (void)PerlProc_pclose(PL_rsfp);
3671 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3672 PerlIO_clearerr(PL_rsfp);
3674 (void)PerlIO_close(PL_rsfp);
3676 PL_doextract = FALSE;
3678 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3683 sv_setpv(PL_linestr,
3686 ? ";}continue{print;}" : ";}"));
3687 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3688 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3689 PL_last_lop = PL_last_uni = NULL;
3690 PL_minus_n = PL_minus_p = 0;
3693 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3694 PL_last_lop = PL_last_uni = NULL;
3695 sv_setpvn(PL_linestr,"",0);
3696 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3698 /* If it looks like the start of a BOM or raw UTF-16,
3699 * check if it in fact is. */
3705 #ifdef PERLIO_IS_STDIO
3706 # ifdef __GNU_LIBRARY__
3707 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3708 # define FTELL_FOR_PIPE_IS_BROKEN
3712 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3713 # define FTELL_FOR_PIPE_IS_BROKEN
3718 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3719 /* This loses the possibility to detect the bof
3720 * situation on perl -P when the libc5 is being used.
3721 * Workaround? Maybe attach some extra state to PL_rsfp?
3724 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3726 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3729 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3730 s = swallow_bom((U8*)s);
3734 /* Incest with pod. */
3737 sv_catsv(PL_thiswhite, PL_linestr);
3739 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3740 sv_setpvn(PL_linestr, "", 0);
3741 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3742 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3743 PL_last_lop = PL_last_uni = NULL;
3744 PL_doextract = FALSE;
3748 } while (PL_doextract);
3749 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3750 if (PERLDB_LINE && PL_curstash != PL_debstash)
3751 update_debugger_info(PL_linestr, NULL, 0);
3752 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3753 PL_last_lop = PL_last_uni = NULL;
3754 if (CopLINE(PL_curcop) == 1) {
3755 while (s < PL_bufend && isSPACE(*s))
3757 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3761 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3765 if (*s == '#' && *(s+1) == '!')
3767 #ifdef ALTERNATE_SHEBANG
3769 static char const as[] = ALTERNATE_SHEBANG;
3770 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3771 d = s + (sizeof(as) - 1);
3773 #endif /* ALTERNATE_SHEBANG */
3782 while (*d && !isSPACE(*d))
3786 #ifdef ARG_ZERO_IS_SCRIPT
3787 if (ipathend > ipath) {
3789 * HP-UX (at least) sets argv[0] to the script name,
3790 * which makes $^X incorrect. And Digital UNIX and Linux,
3791 * at least, set argv[0] to the basename of the Perl
3792 * interpreter. So, having found "#!", we'll set it right.
3794 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3796 assert(SvPOK(x) || SvGMAGICAL(x));
3797 if (sv_eq(x, CopFILESV(PL_curcop))) {
3798 sv_setpvn(x, ipath, ipathend - ipath);
3804 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3805 const char * const lstart = SvPV_const(x,llen);
3807 bstart += blen - llen;
3808 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3809 sv_setpvn(x, ipath, ipathend - ipath);
3814 TAINT_NOT; /* $^X is always tainted, but that's OK */
3816 #endif /* ARG_ZERO_IS_SCRIPT */
3821 d = instr(s,"perl -");
3823 d = instr(s,"perl");
3825 /* avoid getting into infinite loops when shebang
3826 * line contains "Perl" rather than "perl" */
3828 for (d = ipathend-4; d >= ipath; --d) {
3829 if ((*d == 'p' || *d == 'P')
3830 && !ibcmp(d, "perl", 4))
3840 #ifdef ALTERNATE_SHEBANG
3842 * If the ALTERNATE_SHEBANG on this system starts with a
3843 * character that can be part of a Perl expression, then if
3844 * we see it but not "perl", we're probably looking at the
3845 * start of Perl code, not a request to hand off to some
3846 * other interpreter. Similarly, if "perl" is there, but
3847 * not in the first 'word' of the line, we assume the line
3848 * contains the start of the Perl program.
3850 if (d && *s != '#') {
3851 const char *c = ipath;
3852 while (*c && !strchr("; \t\r\n\f\v#", *c))
3855 d = NULL; /* "perl" not in first word; ignore */
3857 *s = '#'; /* Don't try to parse shebang line */
3859 #endif /* ALTERNATE_SHEBANG */
3860 #ifndef MACOS_TRADITIONAL
3865 !instr(s,"indir") &&
3866 instr(PL_origargv[0],"perl"))
3873 while (s < PL_bufend && isSPACE(*s))
3875 if (s < PL_bufend) {
3876 Newxz(newargv,PL_origargc+3,char*);
3878 while (s < PL_bufend && !isSPACE(*s))
3881 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3884 newargv = PL_origargv;
3887 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3889 Perl_croak(aTHX_ "Can't exec %s", ipath);
3893 while (*d && !isSPACE(*d))
3895 while (SPACE_OR_TAB(*d))
3899 const bool switches_done = PL_doswitches;
3900 const U32 oldpdb = PL_perldb;
3901 const bool oldn = PL_minus_n;
3902 const bool oldp = PL_minus_p;
3906 if (*d1 == 'M' || *d1 == 'm' || *d1 == 'C') {
3907 const char * const m = d1;
3908 while (*d1 && !isSPACE(*d1))
3910 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3913 d1 = moreswitches(d1);
3915 if (PL_doswitches && !switches_done) {
3916 int argc = PL_origargc;
3917 char **argv = PL_origargv;
3920 } while (argc && argv[0][0] == '-' && argv[0][1]);
3921 init_argv_symbols(argc,argv);
3923 if ((PERLDB_LINE && !oldpdb) ||
3924 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3925 /* if we have already added "LINE: while (<>) {",
3926 we must not do it again */
3928 sv_setpvn(PL_linestr, "", 0);
3929 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3930 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3931 PL_last_lop = PL_last_uni = NULL;
3932 PL_preambled = FALSE;
3934 (void)gv_fetchfile(PL_origfilename);
3941 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3943 PL_lex_state = LEX_FORMLINE;
3948 #ifdef PERL_STRICT_CR
3949 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3951 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3953 case ' ': case '\t': case '\f': case 013:
3954 #ifdef MACOS_TRADITIONAL
3958 PL_realtokenstart = -1;
3960 PL_thiswhite = newSVpvs("");
3961 sv_catpvn(PL_thiswhite, s, 1);
3968 PL_realtokenstart = -1;
3972 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3973 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3974 /* handle eval qq[#line 1 "foo"\n ...] */
3975 CopLINE_dec(PL_curcop);
3978 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3980 if (!PL_in_eval || PL_rsfp)
3985 while (d < PL_bufend && *d != '\n')
3989 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3990 Perl_croak(aTHX_ "panic: input overflow");
3993 PL_thiswhite = newSVpvn(s, d - s);
3998 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4000 PL_lex_state = LEX_FORMLINE;
4006 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4007 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4010 TOKEN(PEG); /* make sure any #! line is accessible */
4015 /* if (PL_madskills && PL_lex_formbrack) { */
4017 while (d < PL_bufend && *d != '\n')
4021 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4022 Perl_croak(aTHX_ "panic: input overflow");
4023 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4025 PL_thiswhite = newSVpvs("");
4026 if (CopLINE(PL_curcop) == 1) {
4027 sv_setpvn(PL_thiswhite, "", 0);
4030 sv_catpvn(PL_thiswhite, s, d - s);
4044 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4052 while (s < PL_bufend && SPACE_OR_TAB(*s))
4055 if (strnEQ(s,"=>",2)) {
4056 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4057 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4058 OPERATOR('-'); /* unary minus */
4060 PL_last_uni = PL_oldbufptr;
4062 case 'r': ftst = OP_FTEREAD; break;
4063 case 'w': ftst = OP_FTEWRITE; break;
4064 case 'x': ftst = OP_FTEEXEC; break;
4065 case 'o': ftst = OP_FTEOWNED; break;
4066 case 'R': ftst = OP_FTRREAD; break;
4067 case 'W': ftst = OP_FTRWRITE; break;
4068 case 'X': ftst = OP_FTREXEC; break;
4069 case 'O': ftst = OP_FTROWNED; break;
4070 case 'e': ftst = OP_FTIS; break;
4071 case 'z': ftst = OP_FTZERO; break;
4072 case 's': ftst = OP_FTSIZE; break;
4073 case 'f': ftst = OP_FTFILE; break;
4074 case 'd': ftst = OP_FTDIR; break;
4075 case 'l': ftst = OP_FTLINK; break;
4076 case 'p': ftst = OP_FTPIPE; break;
4077 case 'S': ftst = OP_FTSOCK; break;
4078 case 'u': ftst = OP_FTSUID; break;
4079 case 'g': ftst = OP_FTSGID; break;
4080 case 'k': ftst = OP_FTSVTX; break;
4081 case 'b': ftst = OP_FTBLK; break;
4082 case 'c': ftst = OP_FTCHR; break;
4083 case 't': ftst = OP_FTTTY; break;
4084 case 'T': ftst = OP_FTTEXT; break;
4085 case 'B': ftst = OP_FTBINARY; break;
4086 case 'M': case 'A': case 'C':
4087 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4089 case 'M': ftst = OP_FTMTIME; break;
4090 case 'A': ftst = OP_FTATIME; break;
4091 case 'C': ftst = OP_FTCTIME; break;
4099 PL_last_lop_op = (OPCODE)ftst;
4100 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4101 "### Saw file test %c\n", (int)tmp);
4106 /* Assume it was a minus followed by a one-letter named
4107 * subroutine call (or a -bareword), then. */
4108 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4109 "### '-%c' looked like a file test but was not\n",
4116 const char tmp = *s++;
4119 if (PL_expect == XOPERATOR)
4124 else if (*s == '>') {
4127 if (isIDFIRST_lazy_if(s,UTF)) {
4128 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4136 if (PL_expect == XOPERATOR)
4139 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4141 OPERATOR('-'); /* unary minus */
4147 const char tmp = *s++;
4150 if (PL_expect == XOPERATOR)
4155 if (PL_expect == XOPERATOR)
4158 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4165 if (PL_expect != XOPERATOR) {
4166 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4167 PL_expect = XOPERATOR;
4168 force_ident(PL_tokenbuf, '*');
4181 if (PL_expect == XOPERATOR) {
4185 PL_tokenbuf[0] = '%';
4186 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4187 sizeof PL_tokenbuf - 1, FALSE);
4188 if (!PL_tokenbuf[1]) {
4191 PL_pending_ident = '%';
4202 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4209 const char tmp = *s++;
4215 goto just_a_word_zero_gv;
4218 switch (PL_expect) {
4224 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4226 PL_bufptr = s; /* update in case we back off */
4232 PL_expect = XTERMBLOCK;
4235 stuffstart = s - SvPVX(PL_linestr) - 1;
4239 while (isIDFIRST_lazy_if(s,UTF)) {
4242 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4243 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4244 if (tmp < 0) tmp = -tmp;
4258 sv = newSVpvn(s, len);
4260 d = scan_str(d,TRUE,TRUE);
4262 /* MUST advance bufptr here to avoid bogus
4263 "at end of line" context messages from yyerror().
4265 PL_bufptr = s + len;
4266 yyerror("Unterminated attribute parameter in attribute list");
4270 return REPORT(0); /* EOF indicator */
4274 sv_catsv(sv, PL_lex_stuff);
4275 attrs = append_elem(OP_LIST, attrs,
4276 newSVOP(OP_CONST, 0, sv));
4277 SvREFCNT_dec(PL_lex_stuff);
4278 PL_lex_stuff = NULL;
4281 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4283 if (PL_in_my == KEY_our) {
4285 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
4287 /* skip to avoid loading attributes.pm */
4289 deprecate(":unique");
4292 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4295 /* NOTE: any CV attrs applied here need to be part of
4296 the CVf_BUILTIN_ATTRS define in cv.h! */
4297 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4299 CvLVALUE_on(PL_compcv);
4301 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4303 CvLOCKED_on(PL_compcv);
4305 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4307 CvMETHOD_on(PL_compcv);
4309 /* After we've set the flags, it could be argued that
4310 we don't need to do the attributes.pm-based setting
4311 process, and shouldn't bother appending recognized
4312 flags. To experiment with that, uncomment the
4313 following "else". (Note that's already been
4314 uncommented. That keeps the above-applied built-in
4315 attributes from being intercepted (and possibly
4316 rejected) by a package's attribute routines, but is
4317 justified by the performance win for the common case
4318 of applying only built-in attributes.) */
4320 attrs = append_elem(OP_LIST, attrs,
4321 newSVOP(OP_CONST, 0,
4325 if (*s == ':' && s[1] != ':')
4328 break; /* require real whitespace or :'s */
4329 /* XXX losing whitespace on sequential attributes here */
4333 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4334 if (*s != ';' && *s != '}' && *s != tmp
4335 && (tmp != '=' || *s != ')')) {
4336 const char q = ((*s == '\'') ? '"' : '\'');
4337 /* If here for an expression, and parsed no attrs, back
4339 if (tmp == '=' && !attrs) {
4343 /* MUST advance bufptr here to avoid bogus "at end of line"
4344 context messages from yyerror().
4347 yyerror( (const char *)
4349 ? Perl_form(aTHX_ "Invalid separator character "
4350 "%c%c%c in attribute list", q, *s, q)
4351 : "Unterminated attribute list" ) );
4359 start_force(PL_curforce);
4360 NEXTVAL_NEXTTOKE.opval = attrs;
4361 CURMAD('_', PL_nextwhite);
4366 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4367 (s - SvPVX(PL_linestr)) - stuffstart);
4375 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4376 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4384 const char tmp = *s++;
4389 const char tmp = *s++;
4397 if (PL_lex_brackets <= 0)
4398 yyerror("Unmatched right square bracket");
4401 if (PL_lex_state == LEX_INTERPNORMAL) {
4402 if (PL_lex_brackets == 0) {
4403 if (*s == '-' && s[1] == '>')
4404 PL_lex_state = LEX_INTERPENDMAYBE;
4405 else if (*s != '[' && *s != '{')
4406 PL_lex_state = LEX_INTERPEND;
4413 if (PL_lex_brackets > 100) {
4414 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4416 switch (PL_expect) {
4418 if (PL_lex_formbrack) {
4422 if (PL_oldoldbufptr == PL_last_lop)
4423 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4425 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4426 OPERATOR(HASHBRACK);
4428 while (s < PL_bufend && SPACE_OR_TAB(*s))
4431 PL_tokenbuf[0] = '\0';
4432 if (d < PL_bufend && *d == '-') {
4433 PL_tokenbuf[0] = '-';
4435 while (d < PL_bufend && SPACE_OR_TAB(*d))
4438 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4439 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4441 while (d < PL_bufend && SPACE_OR_TAB(*d))
4444 const char minus = (PL_tokenbuf[0] == '-');
4445 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4453 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4458 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4463 if (PL_oldoldbufptr == PL_last_lop)
4464 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4466 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4469 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4471 /* This hack is to get the ${} in the message. */
4473 yyerror("syntax error");
4476 OPERATOR(HASHBRACK);
4478 /* This hack serves to disambiguate a pair of curlies
4479 * as being a block or an anon hash. Normally, expectation
4480 * determines that, but in cases where we're not in a
4481 * position to expect anything in particular (like inside
4482 * eval"") we have to resolve the ambiguity. This code
4483 * covers the case where the first term in the curlies is a
4484 * quoted string. Most other cases need to be explicitly
4485 * disambiguated by prepending a "+" before the opening
4486 * curly in order to force resolution as an anon hash.
4488 * XXX should probably propagate the outer expectation
4489 * into eval"" to rely less on this hack, but that could
4490 * potentially break current behavior of eval"".
4494 if (*s == '\'' || *s == '"' || *s == '`') {
4495 /* common case: get past first string, handling escapes */
4496 for (t++; t < PL_bufend && *t != *s;)
4497 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4501 else if (*s == 'q') {
4504 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4507 /* skip q//-like construct */
4509 char open, close, term;
4512 while (t < PL_bufend && isSPACE(*t))
4514 /* check for q => */
4515 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4516 OPERATOR(HASHBRACK);
4520 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4524 for (t++; t < PL_bufend; t++) {
4525 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4527 else if (*t == open)
4531 for (t++; t < PL_bufend; t++) {
4532 if (*t == '\\' && t+1 < PL_bufend)
4534 else if (*t == close && --brackets <= 0)
4536 else if (*t == open)
4543 /* skip plain q word */
4544 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4547 else if (isALNUM_lazy_if(t,UTF)) {
4549 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4552 while (t < PL_bufend && isSPACE(*t))
4554 /* if comma follows first term, call it an anon hash */
4555 /* XXX it could be a comma expression with loop modifiers */
4556 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4557 || (*t == '=' && t[1] == '>')))
4558 OPERATOR(HASHBRACK);
4559 if (PL_expect == XREF)
4562 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4568 yylval.ival = CopLINE(PL_curcop);
4569 if (isSPACE(*s) || *s == '#')
4570 PL_copline = NOLINE; /* invalidate current command line number */
4575 if (PL_lex_brackets <= 0)
4576 yyerror("Unmatched right curly bracket");
4578 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4579 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4580 PL_lex_formbrack = 0;
4581 if (PL_lex_state == LEX_INTERPNORMAL) {
4582 if (PL_lex_brackets == 0) {
4583 if (PL_expect & XFAKEBRACK) {
4584 PL_expect &= XENUMMASK;
4585 PL_lex_state = LEX_INTERPEND;
4590 PL_thiswhite = newSVpvs("");
4591 sv_catpvn(PL_thiswhite,"}",1);
4594 return yylex(); /* ignore fake brackets */
4596 if (*s == '-' && s[1] == '>')
4597 PL_lex_state = LEX_INTERPENDMAYBE;
4598 else if (*s != '[' && *s != '{')
4599 PL_lex_state = LEX_INTERPEND;
4602 if (PL_expect & XFAKEBRACK) {
4603 PL_expect &= XENUMMASK;
4605 return yylex(); /* ignore fake brackets */
4607 start_force(PL_curforce);
4609 curmad('X', newSVpvn(s-1,1));
4610 CURMAD('_', PL_thiswhite);
4615 PL_thistoken = newSVpvs("");
4623 if (PL_expect == XOPERATOR) {
4624 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4625 && isIDFIRST_lazy_if(s,UTF))
4627 CopLINE_dec(PL_curcop);
4628 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4629 CopLINE_inc(PL_curcop);
4634 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4636 PL_expect = XOPERATOR;
4637 force_ident(PL_tokenbuf, '&');
4641 yylval.ival = (OPpENTERSUB_AMPER<<8);
4653 const char tmp = *s++;
4660 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4661 && strchr("+-*/%.^&|<",tmp))
4662 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4663 "Reversed %c= operator",(int)tmp);
4665 if (PL_expect == XSTATE && isALPHA(tmp) &&
4666 (s == PL_linestart+1 || s[-2] == '\n') )
4668 if (PL_in_eval && !PL_rsfp) {
4673 if (strnEQ(s,"=cut",4)) {
4689 PL_thiswhite = newSVpvs("");
4690 sv_catpvn(PL_thiswhite, PL_linestart,
4691 PL_bufend - PL_linestart);
4695 PL_doextract = TRUE;
4699 if (PL_lex_brackets < PL_lex_formbrack) {
4701 #ifdef PERL_STRICT_CR
4702 while (SPACE_OR_TAB(*t))
4704 while (SPACE_OR_TAB(*t) || *t == '\r')
4707 if (*t == '\n' || *t == '#') {
4718 const char tmp = *s++;
4720 /* was this !=~ where !~ was meant?
4721 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4723 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4724 const char *t = s+1;
4726 while (t < PL_bufend && isSPACE(*t))
4729 if (*t == '/' || *t == '?' ||
4730 ((*t == 'm' || *t == 's' || *t == 'y')
4731 && !isALNUM(t[1])) ||
4732 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4733 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4734 "!=~ should be !~");
4744 if (PL_expect != XOPERATOR) {
4745 if (s[1] != '<' && !strchr(s,'>'))
4748 s = scan_heredoc(s);
4750 s = scan_inputsymbol(s);
4751 TERM(sublex_start());
4757 SHop(OP_LEFT_SHIFT);
4771 const char tmp = *s++;
4773 SHop(OP_RIGHT_SHIFT);
4774 else if (tmp == '=')
4783 if (PL_expect == XOPERATOR) {
4784 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4786 deprecate_old(commaless_variable_list);
4787 return REPORT(','); /* grandfather non-comma-format format */
4791 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4792 PL_tokenbuf[0] = '@';
4793 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4794 sizeof PL_tokenbuf - 1, FALSE);
4795 if (PL_expect == XOPERATOR)
4796 no_op("Array length", s);
4797 if (!PL_tokenbuf[1])
4799 PL_expect = XOPERATOR;
4800 PL_pending_ident = '#';
4804 PL_tokenbuf[0] = '$';
4805 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4806 sizeof PL_tokenbuf - 1, FALSE);
4807 if (PL_expect == XOPERATOR)
4809 if (!PL_tokenbuf[1]) {
4811 yyerror("Final $ should be \\$ or $name");
4815 /* This kludge not intended to be bulletproof. */
4816 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4817 yylval.opval = newSVOP(OP_CONST, 0,
4818 newSViv(CopARYBASE_get(&PL_compiling)));
4819 yylval.opval->op_private = OPpCONST_ARYBASE;
4825 const char tmp = *s;
4826 if (PL_lex_state == LEX_NORMAL)
4829 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4830 && intuit_more(s)) {
4832 PL_tokenbuf[0] = '@';
4833 if (ckWARN(WARN_SYNTAX)) {
4836 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4839 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4840 while (t < PL_bufend && *t != ']')
4842 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4843 "Multidimensional syntax %.*s not supported",
4844 (int)((t - PL_bufptr) + 1), PL_bufptr);
4848 else if (*s == '{') {
4850 PL_tokenbuf[0] = '%';
4851 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4852 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4854 char tmpbuf[sizeof PL_tokenbuf];
4857 } while (isSPACE(*t));
4858 if (isIDFIRST_lazy_if(t,UTF)) {
4860 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4864 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4865 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4866 "You need to quote \"%s\"",
4873 PL_expect = XOPERATOR;
4874 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4875 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4876 if (!islop || PL_last_lop_op == OP_GREPSTART)
4877 PL_expect = XOPERATOR;
4878 else if (strchr("$@\"'`q", *s))
4879 PL_expect = XTERM; /* e.g. print $fh "foo" */
4880 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4881 PL_expect = XTERM; /* e.g. print $fh &sub */
4882 else if (isIDFIRST_lazy_if(s,UTF)) {
4883 char tmpbuf[sizeof PL_tokenbuf];
4885 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4886 if ((t2 = keyword(tmpbuf, len, 0))) {
4887 /* binary operators exclude handle interpretations */
4899 PL_expect = XTERM; /* e.g. print $fh length() */
4904 PL_expect = XTERM; /* e.g. print $fh subr() */
4907 else if (isDIGIT(*s))
4908 PL_expect = XTERM; /* e.g. print $fh 3 */
4909 else if (*s == '.' && isDIGIT(s[1]))
4910 PL_expect = XTERM; /* e.g. print $fh .3 */
4911 else if ((*s == '?' || *s == '-' || *s == '+')
4912 && !isSPACE(s[1]) && s[1] != '=')
4913 PL_expect = XTERM; /* e.g. print $fh -1 */
4914 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4916 PL_expect = XTERM; /* e.g. print $fh /.../
4917 XXX except DORDOR operator
4919 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4921 PL_expect = XTERM; /* print $fh <<"EOF" */
4924 PL_pending_ident = '$';
4928 if (PL_expect == XOPERATOR)
4930 PL_tokenbuf[0] = '@';
4931 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4932 if (!PL_tokenbuf[1]) {
4935 if (PL_lex_state == LEX_NORMAL)
4937 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4939 PL_tokenbuf[0] = '%';
4941 /* Warn about @ where they meant $. */
4942 if (*s == '[' || *s == '{') {
4943 if (ckWARN(WARN_SYNTAX)) {
4944 const char *t = s + 1;
4945 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4947 if (*t == '}' || *t == ']') {
4949 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4950 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4951 "Scalar value %.*s better written as $%.*s",
4952 (int)(t-PL_bufptr), PL_bufptr,
4953 (int)(t-PL_bufptr-1), PL_bufptr+1);
4958 PL_pending_ident = '@';
4961 case '/': /* may be division, defined-or, or pattern */
4962 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4966 case '?': /* may either be conditional or pattern */
4967 if(PL_expect == XOPERATOR) {
4975 /* A // operator. */
4985 /* Disable warning on "study /blah/" */
4986 if (PL_oldoldbufptr == PL_last_uni
4987 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4988 || memNE(PL_last_uni, "study", 5)
4989 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4992 s = scan_pat(s,OP_MATCH);
4993 TERM(sublex_start());
4997 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4998 #ifdef PERL_STRICT_CR
5001 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5003 && (s == PL_linestart || s[-1] == '\n') )
5005 PL_lex_formbrack = 0;
5009 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5015 yylval.ival = OPf_SPECIAL;
5021 if (PL_expect != XOPERATOR)
5026 case '0': case '1': case '2': case '3': case '4':
5027 case '5': case '6': case '7': case '8': case '9':
5028 s = scan_num(s, &yylval);
5029 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5030 if (PL_expect == XOPERATOR)
5035 s = scan_str(s,!!PL_madskills,FALSE);
5036 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5037 if (PL_expect == XOPERATOR) {
5038 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5040 deprecate_old(commaless_variable_list);
5041 return REPORT(','); /* grandfather non-comma-format format */
5048 yylval.ival = OP_CONST;
5049 TERM(sublex_start());
5052 s = scan_str(s,!!PL_madskills,FALSE);
5053 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5054 if (PL_expect == XOPERATOR) {
5055 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5057 deprecate_old(commaless_variable_list);
5058 return REPORT(','); /* grandfather non-comma-format format */
5065 yylval.ival = OP_CONST;
5066 /* FIXME. I think that this can be const if char *d is replaced by
5067 more localised variables. */
5068 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5069 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5070 yylval.ival = OP_STRINGIFY;
5074 TERM(sublex_start());
5077 s = scan_str(s,!!PL_madskills,FALSE);
5078 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5079 if (PL_expect == XOPERATOR)
5080 no_op("Backticks",s);
5083 readpipe_override();
5084 TERM(sublex_start());
5088 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
5089 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5091 if (PL_expect == XOPERATOR)
5092 no_op("Backslash",s);
5096 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5097 char *start = s + 2;
5098 while (isDIGIT(*start) || *start == '_')
5100 if (*start == '.' && isDIGIT(start[1])) {
5101 s = scan_num(s, &yylval);
5104 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5105 else if (!isALPHA(*start) && (PL_expect == XTERM
5106 || PL_expect == XREF || PL_expect == XSTATE
5107 || PL_expect == XTERMORDORDOR)) {
5108 /* XXX Use gv_fetchpvn rather than stomping on a const string */
5109 const char c = *start;
5112 gv = gv_fetchpv(s, 0, SVt_PVCV);
5115 s = scan_num(s, &yylval);
5122 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5164 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5166 /* Some keywords can be followed by any delimiter, including ':' */
5167 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5168 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5169 (PL_tokenbuf[0] == 'q' &&
5170 strchr("qwxr", PL_tokenbuf[1])))));
5172 /* x::* is just a word, unless x is "CORE" */
5173 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5177 while (d < PL_bufend && isSPACE(*d))
5178 d++; /* no comments skipped here, or s### is misparsed */
5180 /* Is this a label? */
5181 if (!tmp && PL_expect == XSTATE
5182 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5184 yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5189 /* Check for keywords */
5190 tmp = keyword(PL_tokenbuf, len, 0);
5192 /* Is this a word before a => operator? */
5193 if (*d == '=' && d[1] == '>') {
5196 = (OP*)newSVOP(OP_CONST, 0,
5197 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5198 yylval.opval->op_private = OPpCONST_BARE;
5202 if (tmp < 0) { /* second-class keyword? */
5203 GV *ogv = NULL; /* override (winner) */
5204 GV *hgv = NULL; /* hidden (loser) */
5205 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5207 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5210 if (GvIMPORTED_CV(gv))
5212 else if (! CvMETHOD(cv))
5216 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5217 (gv = *gvp) && isGV_with_GP(gv) &&
5218 GvCVu(gv) && GvIMPORTED_CV(gv))
5225 tmp = 0; /* overridden by import or by GLOBAL */
5228 && -tmp==KEY_lock /* XXX generalizable kludge */
5231 tmp = 0; /* any sub overrides "weak" keyword */
5233 else { /* no override */
5235 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
5236 Perl_warner(aTHX_ packWARN(WARN_MISC),
5237 "dump() better written as CORE::dump()");
5241 if (hgv && tmp != KEY_x && tmp != KEY_CORE
5242 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
5243 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5244 "Ambiguous call resolved as CORE::%s(), %s",
5245 GvENAME(hgv), "qualify as such or use &");
5252 default: /* not a keyword */
5253 /* Trade off - by using this evil construction we can pull the
5254 variable gv into the block labelled keylookup. If not, then
5255 we have to give it function scope so that the goto from the
5256 earlier ':' case doesn't bypass the initialisation. */
5258 just_a_word_zero_gv:
5266 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5269 SV *nextPL_nextwhite = 0;
5273 /* Get the rest if it looks like a package qualifier */
5275 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5277 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5280 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5281 *s == '\'' ? "'" : "::");
5286 if (PL_expect == XOPERATOR) {
5287 if (PL_bufptr == PL_linestart) {
5288 CopLINE_dec(PL_curcop);
5289 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
5290 CopLINE_inc(PL_curcop);
5293 no_op("Bareword",s);
5296 /* Look for a subroutine with this name in current package,
5297 unless name is "Foo::", in which case Foo is a bearword
5298 (and a package name). */
5300 if (len > 2 && !PL_madskills &&
5301 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5303 if (ckWARN(WARN_BAREWORD)
5304 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5305 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5306 "Bareword \"%s\" refers to nonexistent package",
5309 PL_tokenbuf[len] = '\0';
5315 /* Mustn't actually add anything to a symbol table.
5316 But also don't want to "initialise" any placeholder
5317 constants that might already be there into full
5318 blown PVGVs with attached PVCV. */
5319 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5320 GV_NOADD_NOINIT, SVt_PVCV);
5325 /* if we saw a global override before, get the right name */
5328 sv = newSVpvs("CORE::GLOBAL::");
5329 sv_catpv(sv,PL_tokenbuf);
5332 /* If len is 0, newSVpv does strlen(), which is correct.
5333 If len is non-zero, then it will be the true length,
5334 and so the scalar will be created correctly. */
5335 sv = newSVpv(PL_tokenbuf,len);
5338 if (PL_madskills && !PL_thistoken) {
5339 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5340 PL_thistoken = newSVpv(start,s - start);
5341 PL_realtokenstart = s - SvPVX(PL_linestr);
5345 /* Presume this is going to be a bareword of some sort. */
5348 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5349 yylval.opval->op_private = OPpCONST_BARE;
5350 /* UTF-8 package name? */
5351 if (UTF && !IN_BYTES &&
5352 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5355 /* And if "Foo::", then that's what it certainly is. */
5360 /* Do the explicit type check so that we don't need to force
5361 the initialisation of the symbol table to have a real GV.
5362 Beware - gv may not really be a PVGV, cv may not really be
5363 a PVCV, (because of the space optimisations that gv_init
5364 understands) But they're true if for this symbol there is
5365 respectively a typeglob and a subroutine.
5367 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5368 /* Real typeglob, so get the real subroutine: */
5370 /* A proxy for a subroutine in this package? */
5371 : SvOK(gv) ? (CV *) gv : NULL)
5374 /* See if it's the indirect object for a list operator. */
5376 if (PL_oldoldbufptr &&
5377 PL_oldoldbufptr < PL_bufptr &&
5378 (PL_oldoldbufptr == PL_last_lop
5379 || PL_oldoldbufptr == PL_last_uni) &&
5380 /* NO SKIPSPACE BEFORE HERE! */
5381 (PL_expect == XREF ||
5382 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5384 bool immediate_paren = *s == '(';
5386 /* (Now we can afford to cross potential line boundary.) */
5387 s = SKIPSPACE2(s,nextPL_nextwhite);
5389 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5392 /* Two barewords in a row may indicate method call. */
5394 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5395 (tmp = intuit_method(s, gv, cv)))
5398 /* If not a declared subroutine, it's an indirect object. */
5399 /* (But it's an indir obj regardless for sort.) */
5400 /* Also, if "_" follows a filetest operator, it's a bareword */
5403 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5405 (PL_last_lop_op != OP_MAPSTART &&
5406 PL_last_lop_op != OP_GREPSTART))))
5407 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5408 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5411 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5416 PL_expect = XOPERATOR;
5419 s = SKIPSPACE2(s,nextPL_nextwhite);
5420 PL_nextwhite = nextPL_nextwhite;
5425 /* Is this a word before a => operator? */
5426 if (*s == '=' && s[1] == '>' && !pkgname) {
5428 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
5429 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5430 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
5434 /* If followed by a paren, it's certainly a subroutine. */
5439 while (SPACE_OR_TAB(*d))
5441 if (*d == ')' && (sv = gv_const_sv(gv))) {
5448 PL_nextwhite = PL_thiswhite;
5451 start_force(PL_curforce);
5453 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5454 PL_expect = XOPERATOR;
5457 PL_nextwhite = nextPL_nextwhite;
5458 curmad('X', PL_thistoken);
5459 PL_thistoken = newSVpvs("");
5467 /* If followed by var or block, call it a method (unless sub) */
5469 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5470 PL_last_lop = PL_oldbufptr;
5471 PL_last_lop_op = OP_METHOD;
5475 /* If followed by a bareword, see if it looks like indir obj. */
5478 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5479 && (tmp = intuit_method(s, gv, cv)))
5482 /* Not a method, so call it a subroutine (if defined) */
5485 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5486 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5487 "Ambiguous use of -%s resolved as -&%s()",
5488 PL_tokenbuf, PL_tokenbuf);
5489 /* Check for a constant sub */
5490 if ((sv = gv_const_sv(gv))) {
5492 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
5493 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5494 yylval.opval->op_private = 0;
5498 /* Resolve to GV now. */
5499 if (SvTYPE(gv) != SVt_PVGV) {
5500 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5501 assert (SvTYPE(gv) == SVt_PVGV);
5502 /* cv must have been some sort of placeholder, so
5503 now needs replacing with a real code reference. */
5507 op_free(yylval.opval);
5508 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5509 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5510 PL_last_lop = PL_oldbufptr;
5511 PL_last_lop_op = OP_ENTERSUB;
5512 /* Is there a prototype? */
5520 const char *proto = SvPV_const((SV*)cv, protolen);
5523 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5525 while (*proto == ';')
5527 if (*proto == '&' && *s == '{') {
5528 sv_setpv(PL_subname,
5531 "__ANON__" : "__ANON__::__ANON__"));
5538 PL_nextwhite = PL_thiswhite;
5541 start_force(PL_curforce);
5542 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5545 PL_nextwhite = nextPL_nextwhite;
5546 curmad('X', PL_thistoken);
5547 PL_thistoken = newSVpvs("");
5554 /* Guess harder when madskills require "best effort". */
5555 if (PL_madskills && (!gv || !GvCVu(gv))) {
5556 int probable_sub = 0;
5557 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5559 else if (isALPHA(*s)) {
5563 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5564 if (!keyword(tmpbuf, tmplen, 0))
5567 while (d < PL_bufend && isSPACE(*d))
5569 if (*d == '=' && d[1] == '>')
5574 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5575 op_free(yylval.opval);
5576 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5577 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5578 PL_last_lop = PL_oldbufptr;
5579 PL_last_lop_op = OP_ENTERSUB;
5580 PL_nextwhite = PL_thiswhite;
5582 start_force(PL_curforce);
5583 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5585 PL_nextwhite = nextPL_nextwhite;
5586 curmad('X', PL_thistoken);
5587 PL_thistoken = newSVpvs("");
5592 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5599 /* Call it a bare word */
5601 if (PL_hints & HINT_STRICT_SUBS)
5602 yylval.opval->op_private |= OPpCONST_STRICT;
5605 if (lastchar != '-') {
5606 if (ckWARN(WARN_RESERVED)) {
5610 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5611 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5618 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5619 && ckWARN_d(WARN_AMBIGUOUS)) {
5620 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5621 "Operator or semicolon missing before %c%s",
5622 lastchar, PL_tokenbuf);
5623 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5624 "Ambiguous use of %c resolved as operator %c",
5625 lastchar, lastchar);
5631 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5632 newSVpv(CopFILE(PL_curcop),0));
5636 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5637 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5640 case KEY___PACKAGE__:
5641 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5643 ? newSVhek(HvNAME_HEK(PL_curstash))
5650 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5651 const char *pname = "main";
5652 if (PL_tokenbuf[2] == 'D')
5653 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5654 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5658 GvIOp(gv) = newIO();
5659 IoIFP(GvIOp(gv)) = PL_rsfp;
5660 #if defined(HAS_FCNTL) && defined(F_SETFD)
5662 const int fd = PerlIO_fileno(PL_rsfp);
5663 fcntl(fd,F_SETFD,fd >= 3);
5666 /* Mark this internal pseudo-handle as clean */
5667 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5669 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
5670 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5671 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5673 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5674 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5675 /* if the script was opened in binmode, we need to revert
5676 * it to text mode for compatibility; but only iff it has CRs
5677 * XXX this is a questionable hack at best. */
5678 if (PL_bufend-PL_bufptr > 2
5679 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5682 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5683 loc = PerlIO_tell(PL_rsfp);
5684 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5687 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5689 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5690 #endif /* NETWARE */
5691 #ifdef PERLIO_IS_STDIO /* really? */
5692 # if defined(__BORLANDC__)
5693 /* XXX see note in do_binmode() */
5694 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5698 PerlIO_seek(PL_rsfp, loc, 0);
5702 #ifdef PERLIO_LAYERS
5705 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5706 else if (PL_encoding) {
5713 XPUSHs(PL_encoding);
5715 call_method("name", G_SCALAR);
5719 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5720 Perl_form(aTHX_ ":encoding(%"SVf")",
5729 if (PL_realtokenstart >= 0) {
5730 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5732 PL_endwhite = newSVpvs("");
5733 sv_catsv(PL_endwhite, PL_thiswhite);
5735 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5736 PL_realtokenstart = -1;
5738 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5739 SvCUR(PL_endwhite))) != NULL) ;
5754 if (PL_expect == XSTATE) {
5761 if (*s == ':' && s[1] == ':') {
5764 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5765 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5766 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5769 else if (tmp == KEY_require || tmp == KEY_do)
5770 /* that's a way to remember we saw "CORE::" */
5783 LOP(OP_ACCEPT,XTERM);
5789 LOP(OP_ATAN2,XTERM);
5795 LOP(OP_BINMODE,XTERM);
5798 LOP(OP_BLESS,XTERM);
5807 /* When 'use switch' is in effect, continue has a dual
5808 life as a control operator. */
5810 if (!FEATURE_IS_ENABLED("switch"))
5813 /* We have to disambiguate the two senses of
5814 "continue". If the next token is a '{' then
5815 treat it as the start of a continue block;
5816 otherwise treat it as a control operator.
5828 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5845 if (!PL_cryptseen) {
5846 PL_cryptseen = TRUE;
5850 LOP(OP_CRYPT,XTERM);
5853 LOP(OP_CHMOD,XTERM);
5856 LOP(OP_CHOWN,XTERM);
5859 LOP(OP_CONNECT,XTERM);
5878 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5879 if (orig_keyword == KEY_do) {
5888 PL_hints |= HINT_BLOCK_SCOPE;
5898 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5899 LOP(OP_DBMOPEN,XTERM);
5905 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5912 yylval.ival = CopLINE(PL_curcop);
5928 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
5929 UNIBRACK(OP_ENTEREVAL);
5943 case KEY_endhostent:
5949 case KEY_endservent:
5952 case KEY_endprotoent:
5963 yylval.ival = CopLINE(PL_curcop);
5965 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
5968 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
5971 if ((PL_bufend - p) >= 3 &&
5972 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
5974 else if ((PL_bufend - p) >= 4 &&
5975 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
5978 if (isIDFIRST_lazy_if(p,UTF)) {
5979 p = scan_ident(p, PL_bufend,
5980 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5984 Perl_croak(aTHX_ "Missing $ on loop variable");
5986 s = SvPVX(PL_linestr) + soff;
5992 LOP(OP_FORMLINE,XTERM);
5998 LOP(OP_FCNTL,XTERM);
6004 LOP(OP_FLOCK,XTERM);
6013 LOP(OP_GREPSTART, XREF);
6016 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6031 case KEY_getpriority:
6032 LOP(OP_GETPRIORITY,XTERM);
6034 case KEY_getprotobyname:
6037 case KEY_getprotobynumber:
6038 LOP(OP_GPBYNUMBER,XTERM);
6040 case KEY_getprotoent:
6052 case KEY_getpeername:
6053 UNI(OP_GETPEERNAME);
6055 case KEY_gethostbyname:
6058 case KEY_gethostbyaddr:
6059 LOP(OP_GHBYADDR,XTERM);
6061 case KEY_gethostent:
6064 case KEY_getnetbyname:
6067 case KEY_getnetbyaddr:
6068 LOP(OP_GNBYADDR,XTERM);
6073 case KEY_getservbyname:
6074 LOP(OP_GSBYNAME,XTERM);
6076 case KEY_getservbyport:
6077 LOP(OP_GSBYPORT,XTERM);
6079 case KEY_getservent:
6082 case KEY_getsockname:
6083 UNI(OP_GETSOCKNAME);
6085 case KEY_getsockopt:
6086 LOP(OP_GSOCKOPT,XTERM);
6101 yylval.ival = CopLINE(PL_curcop);
6111 yylval.ival = CopLINE(PL_curcop);
6115 LOP(OP_INDEX,XTERM);
6121 LOP(OP_IOCTL,XTERM);
6133 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6165 LOP(OP_LISTEN,XTERM);
6174 s = scan_pat(s,OP_MATCH);
6175 TERM(sublex_start());
6178 LOP(OP_MAPSTART, XREF);
6181 LOP(OP_MKDIR,XTERM);
6184 LOP(OP_MSGCTL,XTERM);
6187 LOP(OP_MSGGET,XTERM);
6190 LOP(OP_MSGRCV,XTERM);
6193 LOP(OP_MSGSND,XTERM);
6198 PL_in_my = (U16)tmp;
6200 if (isIDFIRST_lazy_if(s,UTF)) {
6204 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6205 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6207 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6208 if (!PL_in_my_stash) {
6211 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6215 if (PL_madskills) { /* just add type to declarator token */
6216 sv_catsv(PL_thistoken, PL_nextwhite);
6218 sv_catpvn(PL_thistoken, start, s - start);
6226 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6233 s = tokenize_use(0, s);
6237 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6244 if (isIDFIRST_lazy_if(s,UTF)) {
6246 for (d = s; isALNUM_lazy_if(d,UTF);)
6248 for (t=d; isSPACE(*t);)
6250 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6252 && !(t[0] == '=' && t[1] == '>')
6254 int parms_len = (int)(d-s);
6255 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6256 "Precedence problem: open %.*s should be open(%.*s)",
6257 parms_len, s, parms_len, s);
6263 yylval.ival = OP_OR;
6273 LOP(OP_OPEN_DIR,XTERM);
6276 checkcomma(s,PL_tokenbuf,"filehandle");
6280 checkcomma(s,PL_tokenbuf,"filehandle");
6299 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6303 LOP(OP_PIPE_OP,XTERM);
6306 s = scan_str(s,!!PL_madskills,FALSE);
6309 yylval.ival = OP_CONST;
6310 TERM(sublex_start());
6316 s = scan_str(s,!!PL_madskills,FALSE);
6319 PL_expect = XOPERATOR;
6321 if (SvCUR(PL_lex_stuff)) {
6324 d = SvPV_force(PL_lex_stuff, len);
6326 for (; isSPACE(*d) && len; --len, ++d)
6331 if (!warned && ckWARN(WARN_QW)) {
6332 for (; !isSPACE(*d) && len; --len, ++d) {
6334 Perl_warner(aTHX_ packWARN(WARN_QW),
6335 "Possible attempt to separate words with commas");
6338 else if (*d == '#') {
6339 Perl_warner(aTHX_ packWARN(WARN_QW),
6340 "Possible attempt to put comments in qw() list");
6346 for (; !isSPACE(*d) && len; --len, ++d)
6349 sv = newSVpvn(b, d-b);
6350 if (DO_UTF8(PL_lex_stuff))
6352 words = append_elem(OP_LIST, words,
6353 newSVOP(OP_CONST, 0, tokeq(sv)));
6357 start_force(PL_curforce);
6358 NEXTVAL_NEXTTOKE.opval = words;
6363 SvREFCNT_dec(PL_lex_stuff);
6364 PL_lex_stuff = NULL;
6370 s = scan_str(s,!!PL_madskills,FALSE);
6373 yylval.ival = OP_STRINGIFY;
6374 if (SvIVX(PL_lex_stuff) == '\'')
6375 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6376 TERM(sublex_start());
6379 s = scan_pat(s,OP_QR);
6380 TERM(sublex_start());
6383 s = scan_str(s,!!PL_madskills,FALSE);
6386 readpipe_override();
6387 TERM(sublex_start());
6395 s = force_version(s, FALSE);
6397 else if (*s != 'v' || !isDIGIT(s[1])
6398 || (s = force_version(s, TRUE), *s == 'v'))
6400 *PL_tokenbuf = '\0';
6401 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6402 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6403 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6405 yyerror("<> should be quotes");
6407 if (orig_keyword == KEY_require) {
6415 PL_last_uni = PL_oldbufptr;
6416 PL_last_lop_op = OP_REQUIRE;
6418 return REPORT( (int)REQUIRE );
6424 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6428 LOP(OP_RENAME,XTERM);
6437 LOP(OP_RINDEX,XTERM);
6446 UNIDOR(OP_READLINE);
6449 UNIDOR(OP_BACKTICK);
6458 LOP(OP_REVERSE,XTERM);
6461 UNIDOR(OP_READLINK);
6469 TERM(sublex_start());
6471 TOKEN(1); /* force error */
6474 checkcomma(s,PL_tokenbuf,"filehandle");
6484 LOP(OP_SELECT,XTERM);
6490 LOP(OP_SEMCTL,XTERM);
6493 LOP(OP_SEMGET,XTERM);
6496 LOP(OP_SEMOP,XTERM);
6502 LOP(OP_SETPGRP,XTERM);
6504 case KEY_setpriority:
6505 LOP(OP_SETPRIORITY,XTERM);
6507 case KEY_sethostent:
6513 case KEY_setservent:
6516 case KEY_setprotoent:
6526 LOP(OP_SEEKDIR,XTERM);
6528 case KEY_setsockopt:
6529 LOP(OP_SSOCKOPT,XTERM);
6535 LOP(OP_SHMCTL,XTERM);
6538 LOP(OP_SHMGET,XTERM);
6541 LOP(OP_SHMREAD,XTERM);
6544 LOP(OP_SHMWRITE,XTERM);
6547 LOP(OP_SHUTDOWN,XTERM);
6556 LOP(OP_SOCKET,XTERM);
6558 case KEY_socketpair:
6559 LOP(OP_SOCKPAIR,XTERM);
6562 checkcomma(s,PL_tokenbuf,"subroutine name");
6564 if (*s == ';' || *s == ')') /* probably a close */
6565 Perl_croak(aTHX_ "sort is now a reserved word");
6567 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6571 LOP(OP_SPLIT,XTERM);
6574 LOP(OP_SPRINTF,XTERM);
6577 LOP(OP_SPLICE,XTERM);
6592 LOP(OP_SUBSTR,XTERM);
6598 char tmpbuf[sizeof PL_tokenbuf];
6599 SSize_t tboffset = 0;
6600 expectation attrful;
6601 bool have_name, have_proto;
6602 const int key = tmp;
6607 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6608 SV *subtoken = newSVpvn(tstart, s - tstart);
6612 s = SKIPSPACE2(s,tmpwhite);
6617 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6618 (*s == ':' && s[1] == ':'))
6625 attrful = XATTRBLOCK;
6626 /* remember buffer pos'n for later force_word */
6627 tboffset = s - PL_oldbufptr;
6628 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6631 nametoke = newSVpvn(s, d - s);
6633 if (memchr(tmpbuf, ':', len))
6634 sv_setpvn(PL_subname, tmpbuf, len);
6636 sv_setsv(PL_subname,PL_curstname);
6637 sv_catpvs(PL_subname,"::");
6638 sv_catpvn(PL_subname,tmpbuf,len);
6645 CURMAD('X', nametoke);
6646 CURMAD('_', tmpwhite);
6647 (void) force_word(PL_oldbufptr + tboffset, WORD,
6650 s = SKIPSPACE2(d,tmpwhite);
6657 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6658 PL_expect = XTERMBLOCK;
6659 attrful = XATTRTERM;
6660 sv_setpvn(PL_subname,"?",1);
6664 if (key == KEY_format) {
6666 PL_lex_formbrack = PL_lex_brackets + 1;
6668 PL_thistoken = subtoken;
6672 (void) force_word(PL_oldbufptr + tboffset, WORD,
6678 /* Look for a prototype */
6681 bool bad_proto = FALSE;
6682 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6684 s = scan_str(s,!!PL_madskills,FALSE);
6686 Perl_croak(aTHX_ "Prototype not terminated");
6687 /* strip spaces and check for bad characters */
6688 d = SvPVX(PL_lex_stuff);
6690 for (p = d; *p; ++p) {
6693 if (warnsyntax && !strchr("$@%*;[]&\\_", *p))
6699 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6700 "Illegal character in prototype for %"SVf" : %s",
6701 SVfARG(PL_subname), d);
6702 SvCUR_set(PL_lex_stuff, tmp);
6707 CURMAD('q', PL_thisopen);
6708 CURMAD('_', tmpwhite);
6709 CURMAD('=', PL_thisstuff);
6710 CURMAD('Q', PL_thisclose);
6711 NEXTVAL_NEXTTOKE.opval =
6712 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6713 PL_lex_stuff = NULL;
6716 s = SKIPSPACE2(s,tmpwhite);
6724 if (*s == ':' && s[1] != ':')
6725 PL_expect = attrful;
6726 else if (*s != '{' && key == KEY_sub) {
6728 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6730 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6737 curmad('^', newSVpvs(""));
6738 CURMAD('_', tmpwhite);
6742 PL_thistoken = subtoken;
6745 NEXTVAL_NEXTTOKE.opval =
6746 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6747 PL_lex_stuff = NULL;
6752 sv_setpv(PL_subname,
6754 (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"));
6758 (void) force_word(PL_oldbufptr + tboffset, WORD,
6767 LOP(OP_SYSTEM,XREF);
6770 LOP(OP_SYMLINK,XTERM);
6773 LOP(OP_SYSCALL,XTERM);
6776 LOP(OP_SYSOPEN,XTERM);
6779 LOP(OP_SYSSEEK,XTERM);
6782 LOP(OP_SYSREAD,XTERM);
6785 LOP(OP_SYSWRITE,XTERM);
6789 TERM(sublex_start());
6810 LOP(OP_TRUNCATE,XTERM);
6822 yylval.ival = CopLINE(PL_curcop);
6826 yylval.ival = CopLINE(PL_curcop);
6830 LOP(OP_UNLINK,XTERM);
6836 LOP(OP_UNPACK,XTERM);
6839 LOP(OP_UTIME,XTERM);
6845 LOP(OP_UNSHIFT,XTERM);
6848 s = tokenize_use(1, s);
6858 yylval.ival = CopLINE(PL_curcop);
6862 yylval.ival = CopLINE(PL_curcop);
6866 PL_hints |= HINT_BLOCK_SCOPE;
6873 LOP(OP_WAITPID,XTERM);
6882 ctl_l[0] = toCTRL('L');
6884 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6887 /* Make sure $^L is defined */
6888 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
6893 if (PL_expect == XOPERATOR)
6899 yylval.ival = OP_XOR;
6904 TERM(sublex_start());
6909 #pragma segment Main
6913 S_pending_ident(pTHX)
6918 /* pit holds the identifier we read and pending_ident is reset */
6919 char pit = PL_pending_ident;
6920 PL_pending_ident = 0;
6922 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
6923 DEBUG_T({ PerlIO_printf(Perl_debug_log,
6924 "### Pending identifier '%s'\n", PL_tokenbuf); });
6926 /* if we're in a my(), we can't allow dynamics here.
6927 $foo'bar has already been turned into $foo::bar, so
6928 just check for colons.
6930 if it's a legal name, the OP is a PADANY.
6933 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
6934 if (strchr(PL_tokenbuf,':'))
6935 yyerror(Perl_form(aTHX_ "No package name allowed for "
6936 "variable %s in \"our\"",
6938 tmp = allocmy(PL_tokenbuf);
6941 if (strchr(PL_tokenbuf,':'))
6942 yyerror(Perl_form(aTHX_ PL_no_myglob,
6943 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
6945 yylval.opval = newOP(OP_PADANY, 0);
6946 yylval.opval->op_targ = allocmy(PL_tokenbuf);
6952 build the ops for accesses to a my() variable.
6954 Deny my($a) or my($b) in a sort block, *if* $a or $b is
6955 then used in a comparison. This catches most, but not
6956 all cases. For instance, it catches
6957 sort { my($a); $a <=> $b }
6959 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
6960 (although why you'd do that is anyone's guess).
6963 if (!strchr(PL_tokenbuf,':')) {
6965 tmp = pad_findmy(PL_tokenbuf);
6966 if (tmp != NOT_IN_PAD) {
6967 /* might be an "our" variable" */
6968 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6969 /* build ops for a bareword */
6970 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
6971 HEK * const stashname = HvNAME_HEK(stash);
6972 SV * const sym = newSVhek(stashname);
6973 sv_catpvs(sym, "::");
6974 sv_catpv(sym, PL_tokenbuf+1);
6975 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
6976 yylval.opval->op_private = OPpCONST_ENTERED;
6979 ? (GV_ADDMULTI | GV_ADDINEVAL)
6982 ((PL_tokenbuf[0] == '$') ? SVt_PV
6983 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6988 /* if it's a sort block and they're naming $a or $b */
6989 if (PL_last_lop_op == OP_SORT &&
6990 PL_tokenbuf[0] == '$' &&
6991 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
6994 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
6995 d < PL_bufend && *d != '\n';
6998 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
6999 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7005 yylval.opval = newOP(OP_PADANY, 0);
7006 yylval.opval->op_targ = tmp;
7012 Whine if they've said @foo in a doublequoted string,
7013 and @foo isn't a variable we can find in the symbol
7016 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7017 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
7018 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7019 && ckWARN(WARN_AMBIGUOUS)
7020 /* DO NOT warn for @- and @+ */
7021 && !( PL_tokenbuf[2] == '\0' &&
7022 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7025 /* Downgraded from fatal to warning 20000522 mjd */
7026 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7027 "Possible unintended interpolation of %s in string",
7032 /* build ops for a bareword */
7033 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
7034 yylval.opval->op_private = OPpCONST_ENTERED;
7037 /* If the identifier refers to a stash, don't autovivify it.
7038 * Change 24660 had the side effect of causing symbol table
7039 * hashes to always be defined, even if they were freshly
7040 * created and the only reference in the entire program was
7041 * the single statement with the defined %foo::bar:: test.
7042 * It appears that all code in the wild doing this actually
7043 * wants to know whether sub-packages have been loaded, so
7044 * by avoiding auto-vivifying symbol tables, we ensure that
7045 * defined %foo::bar:: continues to be false, and the existing
7046 * tests still give the expected answers, even though what
7047 * they're actually testing has now changed subtly.
7049 (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
7051 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7052 ((PL_tokenbuf[0] == '$') ? SVt_PV
7053 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7059 * The following code was generated by perl_keyword.pl.
7063 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7068 case 1: /* 5 tokens of length 1 */
7100 case 2: /* 18 tokens of length 2 */
7246 case 3: /* 29 tokens of length 3 */
7250 if (name[1] == 'N' &&
7313 if (name[1] == 'i' &&
7345 if (name[1] == 'o' &&
7354 if (name[1] == 'e' &&
7363 if (name[1] == 'n' &&
7372 if (name[1] == 'o' &&
7381 if (name[1] == 'a' &&
7390 if (name[1] == 'o' &&
7452 if (name[1] == 'e' &&
7466 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7492 if (name[1] == 'i' &&
7501 if (name[1] == 's' &&
7510 if (name[1] == 'e' &&
7519 if (name[1] == 'o' &&
7531 case 4: /* 41 tokens of length 4 */
7535 if (name[1] == 'O' &&
7545 if (name[1] == 'N' &&
7555 if (name[1] == 'i' &&
7565 if (name[1] == 'h' &&
7575 if (name[1] == 'u' &&
7588 if (name[2] == 'c' &&
7597 if (name[2] == 's' &&
7606 if (name[2] == 'a' &&
7642 if (name[1] == 'o' &&
7655 if (name[2] == 't' &&
7664 if (name[2] == 'o' &&
7673 if (name[2] == 't' &&
7682 if (name[2] == 'e' &&
7695 if (name[1] == 'o' &&
7708 if (name[2] == 'y' &&
7717 if (name[2] == 'l' &&
7733 if (name[2] == 's' &&
7742 if (name[2] == 'n' &&
7751 if (name[2] == 'c' &&
7764 if (name[1] == 'e' &&
7774 if (name[1] == 'p' &&
7787 if (name[2] == 'c' &&
7796 if (name[2] == 'p' &&
7805 if (name[2] == 's' &&
7821 if (name[2] == 'n' &&
7891 if (name[2] == 'r' &&
7900 if (name[2] == 'r' &&
7909 if (name[2] == 'a' &&
7925 if (name[2] == 'l' &&
7987 if (name[2] == 'e' &&
7990 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8003 case 5: /* 39 tokens of length 5 */
8007 if (name[1] == 'E' &&
8018 if (name[1] == 'H' &&
8032 if (name[2] == 'a' &&
8042 if (name[2] == 'a' &&
8059 if (name[2] == 'e' &&
8069 if (name[2] == 'e' &&
8073 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8089 if (name[3] == 'i' &&
8098 if (name[3] == 'o' &&
8134 if (name[2] == 'o' &&
8144 if (name[2] == 'y' &&
8158 if (name[1] == 'l' &&
8172 if (name[2] == 'n' &&
8182 if (name[2] == 'o' &&
8196 if (name[1] == 'i' &&
8201 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8210 if (name[2] == 'd' &&
8220 if (name[2] == 'c' &&
8237 if (name[2] == 'c' &&
8247 if (name[2] == 't' &&
8261 if (name[1] == 'k' &&
8272 if (name[1] == 'r' &&
8286 if (name[2] == 's' &&
8296 if (name[2] == 'd' &&
8313 if (name[2] == 'm' &&
8323 if (name[2] == 'i' &&
8333 if (name[2] == 'e' &&
8343 if (name[2] == 'l' &&
8353 if (name[2] == 'a' &&
8366 if (name[3] == 't' &&
8369 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8375 if (name[3] == 'd' &&
8392 if (name[1] == 'i' &&
8406 if (name[2] == 'a' &&
8419 if (name[3] == 'e' &&
8454 if (name[2] == 'i' &&
8471 if (name[2] == 'i' &&
8481 if (name[2] == 'i' &&
8498 case 6: /* 33 tokens of length 6 */
8502 if (name[1] == 'c' &&
8517 if (name[2] == 'l' &&
8528 if (name[2] == 'r' &&
8543 if (name[1] == 'e' &&
8558 if (name[2] == 's' &&
8563 if(ckWARN_d(WARN_SYNTAX))
8564 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8570 if (name[2] == 'i' &&
8588 if (name[2] == 'l' &&
8599 if (name[2] == 'r' &&
8614 if (name[1] == 'm' &&
8629 if (name[2] == 'n' &&
8640 if (name[2] == 's' &&
8655 if (name[1] == 's' &&
8661 if (name[4] == 't' &&
8670 if (name[4] == 'e' &&
8679 if (name[4] == 'c' &&
8688 if (name[4] == 'n' &&
8704 if (name[1] == 'r' &&
8722 if (name[3] == 'a' &&
8732 if (name[3] == 'u' &&
8746 if (name[2] == 'n' &&
8764 if (name[2] == 'a' &&
8778 if (name[3] == 'e' &&
8791 if (name[4] == 't' &&
8800 if (name[4] == 'e' &&
8822 if (name[4] == 't' &&
8831 if (name[4] == 'e' &&
8847 if (name[2] == 'c' &&
8858 if (name[2] == 'l' &&
8869 if (name[2] == 'b' &&
8880 if (name[2] == 's' &&
8903 if (name[4] == 's' &&
8912 if (name[4] == 'n' &&
8925 if (name[3] == 'a' &&
8942 if (name[1] == 'a' &&
8957 case 7: /* 29 tokens of length 7 */
8961 if (name[1] == 'E' &&
8974 if (name[1] == '_' &&
8987 if (name[1] == 'i' &&
8994 return -KEY_binmode;
9000 if (name[1] == 'o' &&
9007 return -KEY_connect;
9016 if (name[2] == 'm' &&
9022 return -KEY_dbmopen;
9033 if (name[4] == 'u' &&
9037 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9043 if (name[4] == 'n' &&
9064 if (name[1] == 'o' &&
9077 if (name[1] == 'e' &&
9084 if (name[5] == 'r' &&
9087 return -KEY_getpgrp;
9093 if (name[5] == 'i' &&
9096 return -KEY_getppid;
9109 if (name[1] == 'c' &&
9116 return -KEY_lcfirst;
9122 if (name[1] == 'p' &&
9129 return -KEY_opendir;
9135 if (name[1] == 'a' &&
9153 if (name[3] == 'd' &&
9158 return -KEY_readdir;
9164 if (name[3] == 'u' &&
9175 if (name[3] == 'e' &&
9180 return -KEY_reverse;
9199 if (name[3] == 'k' &&
9204 return -KEY_seekdir;
9210 if (name[3] == 'p' &&
9215 return -KEY_setpgrp;
9225 if (name[2] == 'm' &&
9231 return -KEY_shmread;
9237 if (name[2] == 'r' &&
9243 return -KEY_sprintf;
9252 if (name[3] == 'l' &&
9257 return -KEY_symlink;
9266 if (name[4] == 'a' &&
9270 return -KEY_syscall;
9276 if (name[4] == 'p' &&
9280 return -KEY_sysopen;
9286 if (name[4] == 'e' &&
9290 return -KEY_sysread;
9296 if (name[4] == 'e' &&
9300 return -KEY_sysseek;
9318 if (name[1] == 'e' &&
9325 return -KEY_telldir;
9334 if (name[2] == 'f' &&
9340 return -KEY_ucfirst;
9346 if (name[2] == 's' &&
9352 return -KEY_unshift;
9362 if (name[1] == 'a' &&
9369 return -KEY_waitpid;
9378 case 8: /* 26 tokens of length 8 */
9382 if (name[1] == 'U' &&
9390 return KEY_AUTOLOAD;
9401 if (name[3] == 'A' &&
9407 return KEY___DATA__;
9413 if (name[3] == 'I' &&
9419 return -KEY___FILE__;
9425 if (name[3] == 'I' &&
9431 return -KEY___LINE__;
9447 if (name[2] == 'o' &&
9454 return -KEY_closedir;
9460 if (name[2] == 'n' &&
9467 return -KEY_continue;
9477 if (name[1] == 'b' &&
9485 return -KEY_dbmclose;
9491 if (name[1] == 'n' &&
9497 if (name[4] == 'r' &&
9502 return -KEY_endgrent;
9508 if (name[4] == 'w' &&
9513 return -KEY_endpwent;
9526 if (name[1] == 'o' &&
9534 return -KEY_formline;
9540 if (name[1] == 'e' &&
9551 if (name[6] == 'n' &&
9554 return -KEY_getgrent;
9560 if (name[6] == 'i' &&
9563 return -KEY_getgrgid;
9569 if (name[6] == 'a' &&
9572 return -KEY_getgrnam;
9585 if (name[4] == 'o' &&
9590 return -KEY_getlogin;
9601 if (name[6] == 'n' &&
9604 return -KEY_getpwent;
9610 if (name[6] == 'a' &&
9613 return -KEY_getpwnam;
9619 if (name[6] == 'i' &&
9622 return -KEY_getpwuid;
9642 if (name[1] == 'e' &&
9649 if (name[5] == 'i' &&
9656 return -KEY_readline;
9661 return -KEY_readlink;
9672 if (name[5] == 'i' &&
9676 return -KEY_readpipe;
9697 if (name[4] == 'r' &&
9702 return -KEY_setgrent;
9708 if (name[4] == 'w' &&
9713 return -KEY_setpwent;
9729 if (name[3] == 'w' &&
9735 return -KEY_shmwrite;
9741 if (name[3] == 't' &&
9747 return -KEY_shutdown;
9757 if (name[2] == 's' &&
9764 return -KEY_syswrite;
9774 if (name[1] == 'r' &&
9782 return -KEY_truncate;
9791 case 9: /* 9 tokens of length 9 */
9795 if (name[1] == 'N' &&
9804 return KEY_UNITCHECK;
9810 if (name[1] == 'n' &&
9819 return -KEY_endnetent;
9825 if (name[1] == 'e' &&
9834 return -KEY_getnetent;
9840 if (name[1] == 'o' &&
9849 return -KEY_localtime;
9855 if (name[1] == 'r' &&
9864 return KEY_prototype;
9870 if (name[1] == 'u' &&
9879 return -KEY_quotemeta;
9885 if (name[1] == 'e' &&
9894 return -KEY_rewinddir;
9900 if (name[1] == 'e' &&
9909 return -KEY_setnetent;
9915 if (name[1] == 'a' &&
9924 return -KEY_wantarray;
9933 case 10: /* 9 tokens of length 10 */
9937 if (name[1] == 'n' &&
9943 if (name[4] == 'o' &&
9950 return -KEY_endhostent;
9956 if (name[4] == 'e' &&
9963 return -KEY_endservent;
9976 if (name[1] == 'e' &&
9982 if (name[4] == 'o' &&
9989 return -KEY_gethostent;
9998 if (name[5] == 'r' &&
10004 return -KEY_getservent;
10010 if (name[5] == 'c' &&
10016 return -KEY_getsockopt;
10036 if (name[2] == 't')
10041 if (name[4] == 'o' &&
10048 return -KEY_sethostent;
10057 if (name[5] == 'r' &&
10063 return -KEY_setservent;
10069 if (name[5] == 'c' &&
10075 return -KEY_setsockopt;
10092 if (name[2] == 'c' &&
10101 return -KEY_socketpair;
10114 case 11: /* 8 tokens of length 11 */
10118 if (name[1] == '_' &&
10128 { /* __PACKAGE__ */
10129 return -KEY___PACKAGE__;
10135 if (name[1] == 'n' &&
10145 { /* endprotoent */
10146 return -KEY_endprotoent;
10152 if (name[1] == 'e' &&
10161 if (name[5] == 'e' &&
10167 { /* getpeername */
10168 return -KEY_getpeername;
10177 if (name[6] == 'o' &&
10182 { /* getpriority */
10183 return -KEY_getpriority;
10189 if (name[6] == 't' &&
10194 { /* getprotoent */
10195 return -KEY_getprotoent;
10209 if (name[4] == 'o' &&
10216 { /* getsockname */
10217 return -KEY_getsockname;
10230 if (name[1] == 'e' &&
10238 if (name[6] == 'o' &&
10243 { /* setpriority */
10244 return -KEY_setpriority;
10250 if (name[6] == 't' &&
10255 { /* setprotoent */
10256 return -KEY_setprotoent;
10272 case 12: /* 2 tokens of length 12 */
10273 if (name[0] == 'g' &&
10285 if (name[9] == 'd' &&
10288 { /* getnetbyaddr */
10289 return -KEY_getnetbyaddr;
10295 if (name[9] == 'a' &&
10298 { /* getnetbyname */
10299 return -KEY_getnetbyname;
10311 case 13: /* 4 tokens of length 13 */
10312 if (name[0] == 'g' &&
10319 if (name[4] == 'o' &&
10328 if (name[10] == 'd' &&
10331 { /* gethostbyaddr */
10332 return -KEY_gethostbyaddr;
10338 if (name[10] == 'a' &&
10341 { /* gethostbyname */
10342 return -KEY_gethostbyname;
10355 if (name[4] == 'e' &&
10364 if (name[10] == 'a' &&
10367 { /* getservbyname */
10368 return -KEY_getservbyname;
10374 if (name[10] == 'o' &&
10377 { /* getservbyport */
10378 return -KEY_getservbyport;
10397 case 14: /* 1 tokens of length 14 */
10398 if (name[0] == 'g' &&
10412 { /* getprotobyname */
10413 return -KEY_getprotobyname;
10418 case 16: /* 1 tokens of length 16 */
10419 if (name[0] == 'g' &&
10435 { /* getprotobynumber */
10436 return -KEY_getprotobynumber;
10450 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10454 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10455 if (ckWARN(WARN_SYNTAX)) {
10458 for (w = s+2; *w && level; w++) {
10461 else if (*w == ')')
10464 while (isSPACE(*w))
10466 /* the list of chars below is for end of statements or
10467 * block / parens, boolean operators (&&, ||, //) and branch
10468 * constructs (or, and, if, until, unless, while, err, for).
10469 * Not a very solid hack... */
10470 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10471 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10472 "%s (...) interpreted as function",name);
10475 while (s < PL_bufend && isSPACE(*s))
10479 while (s < PL_bufend && isSPACE(*s))
10481 if (isIDFIRST_lazy_if(s,UTF)) {
10482 const char * const w = s++;
10483 while (isALNUM_lazy_if(s,UTF))
10485 while (s < PL_bufend && isSPACE(*s))
10489 if (keyword(w, s - w, 0))
10492 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10493 if (gv && GvCVu(gv))
10495 Perl_croak(aTHX_ "No comma allowed after %s", what);
10500 /* Either returns sv, or mortalizes sv and returns a new SV*.
10501 Best used as sv=new_constant(..., sv, ...).
10502 If s, pv are NULL, calls subroutine with one argument,
10503 and type is used with error messages only. */
10506 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10507 SV *sv, SV *pv, const char *type, STRLEN typelen)
10510 HV * const table = GvHV(PL_hintgv); /* ^H */
10514 const char *why1 = "", *why2 = "", *why3 = "";
10516 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10519 why2 = (const char *)
10520 (strEQ(key,"charnames")
10521 ? "(possibly a missing \"use charnames ...\")"
10523 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10524 (type ? type: "undef"), why2);
10526 /* This is convoluted and evil ("goto considered harmful")
10527 * but I do not understand the intricacies of all the different
10528 * failure modes of %^H in here. The goal here is to make
10529 * the most probable error message user-friendly. --jhi */
10534 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10535 (type ? type: "undef"), why1, why2, why3);
10537 yyerror(SvPVX_const(msg));
10541 cvp = hv_fetch(table, key, keylen, FALSE);
10542 if (!cvp || !SvOK(*cvp)) {
10545 why3 = "} is not defined";
10548 sv_2mortal(sv); /* Parent created it permanently */
10551 pv = sv_2mortal(newSVpvn(s, len));
10553 typesv = sv_2mortal(newSVpvn(type, typelen));
10555 typesv = &PL_sv_undef;
10557 PUSHSTACKi(PERLSI_OVERLOAD);
10569 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10573 /* Check the eval first */
10574 if (!PL_in_eval && SvTRUE(ERRSV)) {
10575 sv_catpvs(ERRSV, "Propagated");
10576 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10578 res = SvREFCNT_inc_simple(sv);
10582 SvREFCNT_inc_simple_void(res);
10591 why1 = "Call to &{$^H{";
10593 why3 = "}} did not return a defined value";
10601 /* Returns a NUL terminated string, with the length of the string written to
10605 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10608 register char *d = dest;
10609 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10612 Perl_croak(aTHX_ ident_too_long);
10613 if (isALNUM(*s)) /* UTF handled below */
10615 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10620 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10624 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10625 char *t = s + UTF8SKIP(s);
10627 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10631 Perl_croak(aTHX_ ident_too_long);
10632 Copy(s, d, len, char);
10645 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10648 char *bracket = NULL;
10650 register char *d = dest;
10651 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10656 while (isDIGIT(*s)) {
10658 Perl_croak(aTHX_ ident_too_long);
10665 Perl_croak(aTHX_ ident_too_long);
10666 if (isALNUM(*s)) /* UTF handled below */
10668 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10673 else if (*s == ':' && s[1] == ':') {
10677 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10678 char *t = s + UTF8SKIP(s);
10679 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10681 if (d + (t - s) > e)
10682 Perl_croak(aTHX_ ident_too_long);
10683 Copy(s, d, t - s, char);
10694 if (PL_lex_state != LEX_NORMAL)
10695 PL_lex_state = LEX_INTERPENDMAYBE;
10698 if (*s == '$' && s[1] &&
10699 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10712 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10717 if (isSPACE(s[-1])) {
10719 const char ch = *s++;
10720 if (!SPACE_OR_TAB(ch)) {
10726 if (isIDFIRST_lazy_if(d,UTF)) {
10730 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10731 end += UTF8SKIP(end);
10732 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10733 end += UTF8SKIP(end);
10735 Copy(s, d, end - s, char);
10740 while ((isALNUM(*s) || *s == ':') && d < e)
10743 Perl_croak(aTHX_ ident_too_long);
10746 while (s < send && SPACE_OR_TAB(*s))
10748 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10749 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10750 const char * const brack =
10752 ((*s == '[') ? "[...]" : "{...}");
10753 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10754 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10755 funny, dest, brack, funny, dest, brack);
10758 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10762 /* Handle extended ${^Foo} variables
10763 * 1999-02-27 mjd-perl-patch@plover.com */
10764 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10768 while (isALNUM(*s) && d < e) {
10772 Perl_croak(aTHX_ ident_too_long);
10777 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10778 PL_lex_state = LEX_INTERPEND;
10781 if (PL_lex_state == LEX_NORMAL) {
10782 if (ckWARN(WARN_AMBIGUOUS) &&
10783 (keyword(dest, d - dest, 0)
10784 || get_cvn_flags(dest, d - dest, 0)))
10788 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10789 "Ambiguous use of %c{%s} resolved to %c%s",
10790 funny, dest, funny, dest);
10795 s = bracket; /* let the parser handle it */
10799 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10800 PL_lex_state = LEX_INTERPEND;
10805 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10807 PERL_UNUSED_CONTEXT;
10811 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
10812 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10813 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10814 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10815 case KEEPCOPY_PAT_MOD: *pmfl |= PMf_KEEPCOPY; break;
10821 S_scan_pat(pTHX_ char *start, I32 type)
10825 char *s = scan_str(start,!!PL_madskills,FALSE);
10826 const char * const valid_flags =
10827 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10834 const char * const delimiter = skipspace(start);
10838 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10839 : "Search pattern not terminated" ));
10842 pm = (PMOP*)newPMOP(type, 0);
10843 if (PL_multi_open == '?') {
10844 /* This is the only point in the code that sets PMf_ONCE: */
10845 pm->op_pmflags |= PMf_ONCE;
10847 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10848 allows us to restrict the list needed by reset to just the ??
10850 assert(type != OP_TRANS);
10852 MAGIC *mg = mg_find((SV*)PL_curstash, PERL_MAGIC_symtab);
10855 mg = sv_magicext((SV*)PL_curstash, 0, PERL_MAGIC_symtab, 0, 0,
10858 elements = mg->mg_len / sizeof(PMOP**);
10859 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10860 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10861 mg->mg_len = elements * sizeof(PMOP**);
10862 PmopSTASH_set(pm,PL_curstash);
10868 while (*s && strchr(valid_flags, *s))
10869 pmflag(&pm->op_pmflags,*s++);
10871 if (PL_madskills && modstart != s) {
10872 SV* tmptoken = newSVpvn(modstart, s - modstart);
10873 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
10876 /* issue a warning if /c is specified,but /g is not */
10877 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
10878 && ckWARN(WARN_REGEXP))
10880 Perl_warner(aTHX_ packWARN(WARN_REGEXP),
10881 "Use of /c modifier is meaningless without /g" );
10884 PL_lex_op = (OP*)pm;
10885 yylval.ival = OP_MATCH;
10890 S_scan_subst(pTHX_ char *start)
10901 yylval.ival = OP_NULL;
10903 s = scan_str(start,!!PL_madskills,FALSE);
10906 Perl_croak(aTHX_ "Substitution pattern not terminated");
10908 if (s[-1] == PL_multi_open)
10911 if (PL_madskills) {
10912 CURMAD('q', PL_thisopen);
10913 CURMAD('_', PL_thiswhite);
10914 CURMAD('E', PL_thisstuff);
10915 CURMAD('Q', PL_thisclose);
10916 PL_realtokenstart = s - SvPVX(PL_linestr);
10920 first_start = PL_multi_start;
10921 s = scan_str(s,!!PL_madskills,FALSE);
10923 if (PL_lex_stuff) {
10924 SvREFCNT_dec(PL_lex_stuff);
10925 PL_lex_stuff = NULL;
10927 Perl_croak(aTHX_ "Substitution replacement not terminated");
10929 PL_multi_start = first_start; /* so whole substitution is taken together */
10931 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10934 if (PL_madskills) {
10935 CURMAD('z', PL_thisopen);
10936 CURMAD('R', PL_thisstuff);
10937 CURMAD('Z', PL_thisclose);
10943 if (*s == EXEC_PAT_MOD) {
10947 else if (strchr(S_PAT_MODS, *s))
10948 pmflag(&pm->op_pmflags,*s++);
10954 if (PL_madskills) {
10956 curmad('m', newSVpvn(modstart, s - modstart));
10957 append_madprops(PL_thismad, (OP*)pm, 0);
10961 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
10962 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10966 SV * const repl = newSVpvs("");
10968 PL_sublex_info.super_bufptr = s;
10969 PL_sublex_info.super_bufend = PL_bufend;
10971 pm->op_pmflags |= PMf_EVAL;
10974 sv_catpvs(repl, "eval ");
10976 sv_catpvs(repl, "do ");
10978 sv_catpvs(repl, "{");
10979 sv_catsv(repl, PL_lex_repl);
10980 if (strchr(SvPVX(PL_lex_repl), '#'))
10981 sv_catpvs(repl, "\n");
10982 sv_catpvs(repl, "}");
10984 SvREFCNT_dec(PL_lex_repl);
10985 PL_lex_repl = repl;
10988 PL_lex_op = (OP*)pm;
10989 yylval.ival = OP_SUBST;
10994 S_scan_trans(pTHX_ char *start)
11007 yylval.ival = OP_NULL;
11009 s = scan_str(start,!!PL_madskills,FALSE);
11011 Perl_croak(aTHX_ "Transliteration pattern not terminated");
11013 if (s[-1] == PL_multi_open)
11016 if (PL_madskills) {
11017 CURMAD('q', PL_thisopen);
11018 CURMAD('_', PL_thiswhite);
11019 CURMAD('E', PL_thisstuff);
11020 CURMAD('Q', PL_thisclose);
11021 PL_realtokenstart = s - SvPVX(PL_linestr);
11025 s = scan_str(s,!!PL_madskills,FALSE);
11027 if (PL_lex_stuff) {
11028 SvREFCNT_dec(PL_lex_stuff);
11029 PL_lex_stuff = NULL;
11031 Perl_croak(aTHX_ "Transliteration replacement not terminated");
11033 if (PL_madskills) {
11034 CURMAD('z', PL_thisopen);
11035 CURMAD('R', PL_thisstuff);
11036 CURMAD('Z', PL_thisclose);
11039 complement = del = squash = 0;
11046 complement = OPpTRANS_COMPLEMENT;
11049 del = OPpTRANS_DELETE;
11052 squash = OPpTRANS_SQUASH;
11061 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11062 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11063 o->op_private &= ~OPpTRANS_ALL;
11064 o->op_private |= del|squash|complement|
11065 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11066 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11069 yylval.ival = OP_TRANS;
11072 if (PL_madskills) {
11074 curmad('m', newSVpvn(modstart, s - modstart));
11075 append_madprops(PL_thismad, o, 0);
11084 S_scan_heredoc(pTHX_ register char *s)
11088 I32 op_type = OP_SCALAR;
11092 const char *found_newline;
11096 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11098 I32 stuffstart = s - SvPVX(PL_linestr);
11101 PL_realtokenstart = -1;
11106 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11110 while (SPACE_OR_TAB(*peek))
11112 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11115 s = delimcpy(d, e, s, PL_bufend, term, &len);
11125 if (!isALNUM_lazy_if(s,UTF))
11126 deprecate_old("bare << to mean <<\"\"");
11127 for (; isALNUM_lazy_if(s,UTF); s++) {
11132 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11133 Perl_croak(aTHX_ "Delimiter for here document is too long");
11136 len = d - PL_tokenbuf;
11139 if (PL_madskills) {
11140 tstart = PL_tokenbuf + !outer;
11141 PL_thisclose = newSVpvn(tstart, len - !outer);
11142 tstart = SvPVX(PL_linestr) + stuffstart;
11143 PL_thisopen = newSVpvn(tstart, s - tstart);
11144 stuffstart = s - SvPVX(PL_linestr);
11147 #ifndef PERL_STRICT_CR
11148 d = strchr(s, '\r');
11150 char * const olds = s;
11152 while (s < PL_bufend) {
11158 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11167 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11174 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11175 herewas = newSVpvn(s,PL_bufend-s);
11179 herewas = newSVpvn(s-1,found_newline-s+1);
11182 herewas = newSVpvn(s,found_newline-s);
11186 if (PL_madskills) {
11187 tstart = SvPVX(PL_linestr) + stuffstart;
11189 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11191 PL_thisstuff = newSVpvn(tstart, s - tstart);
11194 s += SvCUR(herewas);
11197 stuffstart = s - SvPVX(PL_linestr);
11203 tmpstr = newSV_type(SVt_PVIV);
11204 SvGROW(tmpstr, 80);
11205 if (term == '\'') {
11206 op_type = OP_CONST;
11207 SvIV_set(tmpstr, -1);
11209 else if (term == '`') {
11210 op_type = OP_BACKTICK;
11211 SvIV_set(tmpstr, '\\');
11215 PL_multi_start = CopLINE(PL_curcop);
11216 PL_multi_open = PL_multi_close = '<';
11217 term = *PL_tokenbuf;
11218 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11219 char * const bufptr = PL_sublex_info.super_bufptr;
11220 char * const bufend = PL_sublex_info.super_bufend;
11221 char * const olds = s - SvCUR(herewas);
11222 s = strchr(bufptr, '\n');
11226 while (s < bufend &&
11227 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11229 CopLINE_inc(PL_curcop);
11232 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11233 missingterm(PL_tokenbuf);
11235 sv_setpvn(herewas,bufptr,d-bufptr+1);
11236 sv_setpvn(tmpstr,d+1,s-d);
11238 sv_catpvn(herewas,s,bufend-s);
11239 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11246 while (s < PL_bufend &&
11247 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11249 CopLINE_inc(PL_curcop);
11251 if (s >= PL_bufend) {
11252 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11253 missingterm(PL_tokenbuf);
11255 sv_setpvn(tmpstr,d+1,s-d);
11257 if (PL_madskills) {
11259 sv_catpvn(PL_thisstuff, d + 1, s - d);
11261 PL_thisstuff = newSVpvn(d + 1, s - d);
11262 stuffstart = s - SvPVX(PL_linestr);
11266 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11268 sv_catpvn(herewas,s,PL_bufend-s);
11269 sv_setsv(PL_linestr,herewas);
11270 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11271 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11272 PL_last_lop = PL_last_uni = NULL;
11275 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
11276 while (s >= PL_bufend) { /* multiple line string? */
11278 if (PL_madskills) {
11279 tstart = SvPVX(PL_linestr) + stuffstart;
11281 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11283 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11287 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11288 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11289 missingterm(PL_tokenbuf);
11292 stuffstart = s - SvPVX(PL_linestr);
11294 CopLINE_inc(PL_curcop);
11295 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11296 PL_last_lop = PL_last_uni = NULL;
11297 #ifndef PERL_STRICT_CR
11298 if (PL_bufend - PL_linestart >= 2) {
11299 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11300 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11302 PL_bufend[-2] = '\n';
11304 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11306 else if (PL_bufend[-1] == '\r')
11307 PL_bufend[-1] = '\n';
11309 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11310 PL_bufend[-1] = '\n';
11312 if (PERLDB_LINE && PL_curstash != PL_debstash)
11313 update_debugger_info(PL_linestr, NULL, 0);
11314 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11315 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11316 *(SvPVX(PL_linestr) + off ) = ' ';
11317 sv_catsv(PL_linestr,herewas);
11318 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11319 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11323 sv_catsv(tmpstr,PL_linestr);
11328 PL_multi_end = CopLINE(PL_curcop);
11329 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11330 SvPV_shrink_to_cur(tmpstr);
11332 SvREFCNT_dec(herewas);
11334 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11336 else if (PL_encoding)
11337 sv_recode_to_utf8(tmpstr, PL_encoding);
11339 PL_lex_stuff = tmpstr;
11340 yylval.ival = op_type;
11344 /* scan_inputsymbol
11345 takes: current position in input buffer
11346 returns: new position in input buffer
11347 side-effects: yylval and lex_op are set.
11352 <FH> read from filehandle
11353 <pkg::FH> read from package qualified filehandle
11354 <pkg'FH> read from package qualified filehandle
11355 <$fh> read from filehandle in $fh
11356 <*.h> filename glob
11361 S_scan_inputsymbol(pTHX_ char *start)
11364 register char *s = start; /* current position in buffer */
11368 char *d = PL_tokenbuf; /* start of temp holding space */
11369 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11371 end = strchr(s, '\n');
11374 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11376 /* die if we didn't have space for the contents of the <>,
11377 or if it didn't end, or if we see a newline
11380 if (len >= (I32)sizeof PL_tokenbuf)
11381 Perl_croak(aTHX_ "Excessively long <> operator");
11383 Perl_croak(aTHX_ "Unterminated <> operator");
11388 Remember, only scalar variables are interpreted as filehandles by
11389 this code. Anything more complex (e.g., <$fh{$num}>) will be
11390 treated as a glob() call.
11391 This code makes use of the fact that except for the $ at the front,
11392 a scalar variable and a filehandle look the same.
11394 if (*d == '$' && d[1]) d++;
11396 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11397 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11400 /* If we've tried to read what we allow filehandles to look like, and
11401 there's still text left, then it must be a glob() and not a getline.
11402 Use scan_str to pull out the stuff between the <> and treat it
11403 as nothing more than a string.
11406 if (d - PL_tokenbuf != len) {
11407 yylval.ival = OP_GLOB;
11408 s = scan_str(start,!!PL_madskills,FALSE);
11410 Perl_croak(aTHX_ "Glob not terminated");
11414 bool readline_overriden = FALSE;
11417 /* we're in a filehandle read situation */
11420 /* turn <> into <ARGV> */
11422 Copy("ARGV",d,5,char);
11424 /* Check whether readline() is overriden */
11425 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11427 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11429 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11430 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11431 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11432 readline_overriden = TRUE;
11434 /* if <$fh>, create the ops to turn the variable into a
11438 /* try to find it in the pad for this block, otherwise find
11439 add symbol table ops
11441 const PADOFFSET tmp = pad_findmy(d);
11442 if (tmp != NOT_IN_PAD) {
11443 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11444 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11445 HEK * const stashname = HvNAME_HEK(stash);
11446 SV * const sym = sv_2mortal(newSVhek(stashname));
11447 sv_catpvs(sym, "::");
11448 sv_catpv(sym, d+1);
11453 OP * const o = newOP(OP_PADSV, 0);
11455 PL_lex_op = readline_overriden
11456 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11457 append_elem(OP_LIST, o,
11458 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11459 : (OP*)newUNOP(OP_READLINE, 0, o);
11468 ? (GV_ADDMULTI | GV_ADDINEVAL)
11471 PL_lex_op = readline_overriden
11472 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11473 append_elem(OP_LIST,
11474 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11475 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11476 : (OP*)newUNOP(OP_READLINE, 0,
11477 newUNOP(OP_RV2SV, 0,
11478 newGVOP(OP_GV, 0, gv)));
11480 if (!readline_overriden)
11481 PL_lex_op->op_flags |= OPf_SPECIAL;
11482 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
11483 yylval.ival = OP_NULL;
11486 /* If it's none of the above, it must be a literal filehandle
11487 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11489 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11490 PL_lex_op = readline_overriden
11491 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11492 append_elem(OP_LIST,
11493 newGVOP(OP_GV, 0, gv),
11494 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11495 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11496 yylval.ival = OP_NULL;
11505 takes: start position in buffer
11506 keep_quoted preserve \ on the embedded delimiter(s)
11507 keep_delims preserve the delimiters around the string
11508 returns: position to continue reading from buffer
11509 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11510 updates the read buffer.
11512 This subroutine pulls a string out of the input. It is called for:
11513 q single quotes q(literal text)
11514 ' single quotes 'literal text'
11515 qq double quotes qq(interpolate $here please)
11516 " double quotes "interpolate $here please"
11517 qx backticks qx(/bin/ls -l)
11518 ` backticks `/bin/ls -l`
11519 qw quote words @EXPORT_OK = qw( func() $spam )
11520 m// regexp match m/this/
11521 s/// regexp substitute s/this/that/
11522 tr/// string transliterate tr/this/that/
11523 y/// string transliterate y/this/that/
11524 ($*@) sub prototypes sub foo ($)
11525 (stuff) sub attr parameters sub foo : attr(stuff)
11526 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11528 In most of these cases (all but <>, patterns and transliterate)
11529 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11530 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11531 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11534 It skips whitespace before the string starts, and treats the first
11535 character as the delimiter. If the delimiter is one of ([{< then
11536 the corresponding "close" character )]}> is used as the closing
11537 delimiter. It allows quoting of delimiters, and if the string has
11538 balanced delimiters ([{<>}]) it allows nesting.
11540 On success, the SV with the resulting string is put into lex_stuff or,
11541 if that is already non-NULL, into lex_repl. The second case occurs only
11542 when parsing the RHS of the special constructs s/// and tr/// (y///).
11543 For convenience, the terminating delimiter character is stuffed into
11548 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11551 SV *sv; /* scalar value: string */
11552 const char *tmps; /* temp string, used for delimiter matching */
11553 register char *s = start; /* current position in the buffer */
11554 register char term; /* terminating character */
11555 register char *to; /* current position in the sv's data */
11556 I32 brackets = 1; /* bracket nesting level */
11557 bool has_utf8 = FALSE; /* is there any utf8 content? */
11558 I32 termcode; /* terminating char. code */
11559 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11560 STRLEN termlen; /* length of terminating string */
11561 int last_off = 0; /* last position for nesting bracket */
11567 /* skip space before the delimiter */
11573 if (PL_realtokenstart >= 0) {
11574 stuffstart = PL_realtokenstart;
11575 PL_realtokenstart = -1;
11578 stuffstart = start - SvPVX(PL_linestr);
11580 /* mark where we are, in case we need to report errors */
11583 /* after skipping whitespace, the next character is the terminator */
11586 termcode = termstr[0] = term;
11590 termcode = utf8_to_uvchr((U8*)s, &termlen);
11591 Copy(s, termstr, termlen, U8);
11592 if (!UTF8_IS_INVARIANT(term))
11596 /* mark where we are */
11597 PL_multi_start = CopLINE(PL_curcop);
11598 PL_multi_open = term;
11600 /* find corresponding closing delimiter */
11601 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11602 termcode = termstr[0] = term = tmps[5];
11604 PL_multi_close = term;
11606 /* create a new SV to hold the contents. 79 is the SV's initial length.
11607 What a random number. */
11608 sv = newSV_type(SVt_PVIV);
11610 SvIV_set(sv, termcode);
11611 (void)SvPOK_only(sv); /* validate pointer */
11613 /* move past delimiter and try to read a complete string */
11615 sv_catpvn(sv, s, termlen);
11618 tstart = SvPVX(PL_linestr) + stuffstart;
11619 if (!PL_thisopen && !keep_delims) {
11620 PL_thisopen = newSVpvn(tstart, s - tstart);
11621 stuffstart = s - SvPVX(PL_linestr);
11625 if (PL_encoding && !UTF) {
11629 int offset = s - SvPVX_const(PL_linestr);
11630 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11631 &offset, (char*)termstr, termlen);
11632 const char * const ns = SvPVX_const(PL_linestr) + offset;
11633 char * const svlast = SvEND(sv) - 1;
11635 for (; s < ns; s++) {
11636 if (*s == '\n' && !PL_rsfp)
11637 CopLINE_inc(PL_curcop);
11640 goto read_more_line;
11642 /* handle quoted delimiters */
11643 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11645 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11647 if ((svlast-1 - t) % 2) {
11648 if (!keep_quoted) {
11649 *(svlast-1) = term;
11651 SvCUR_set(sv, SvCUR(sv) - 1);
11656 if (PL_multi_open == PL_multi_close) {
11662 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11663 /* At here, all closes are "was quoted" one,
11664 so we don't check PL_multi_close. */
11666 if (!keep_quoted && *(t+1) == PL_multi_open)
11671 else if (*t == PL_multi_open)
11679 SvCUR_set(sv, w - SvPVX_const(sv));
11681 last_off = w - SvPVX(sv);
11682 if (--brackets <= 0)
11687 if (!keep_delims) {
11688 SvCUR_set(sv, SvCUR(sv) - 1);
11694 /* extend sv if need be */
11695 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11696 /* set 'to' to the next character in the sv's string */
11697 to = SvPVX(sv)+SvCUR(sv);
11699 /* if open delimiter is the close delimiter read unbridle */
11700 if (PL_multi_open == PL_multi_close) {
11701 for (; s < PL_bufend; s++,to++) {
11702 /* embedded newlines increment the current line number */
11703 if (*s == '\n' && !PL_rsfp)
11704 CopLINE_inc(PL_curcop);
11705 /* handle quoted delimiters */
11706 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11707 if (!keep_quoted && s[1] == term)
11709 /* any other quotes are simply copied straight through */
11713 /* terminate when run out of buffer (the for() condition), or
11714 have found the terminator */
11715 else if (*s == term) {
11718 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11721 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11727 /* if the terminator isn't the same as the start character (e.g.,
11728 matched brackets), we have to allow more in the quoting, and
11729 be prepared for nested brackets.
11732 /* read until we run out of string, or we find the terminator */
11733 for (; s < PL_bufend; s++,to++) {
11734 /* embedded newlines increment the line count */
11735 if (*s == '\n' && !PL_rsfp)
11736 CopLINE_inc(PL_curcop);
11737 /* backslashes can escape the open or closing characters */
11738 if (*s == '\\' && s+1 < PL_bufend) {
11739 if (!keep_quoted &&
11740 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11745 /* allow nested opens and closes */
11746 else if (*s == PL_multi_close && --brackets <= 0)
11748 else if (*s == PL_multi_open)
11750 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11755 /* terminate the copied string and update the sv's end-of-string */
11757 SvCUR_set(sv, to - SvPVX_const(sv));
11760 * this next chunk reads more into the buffer if we're not done yet
11764 break; /* handle case where we are done yet :-) */
11766 #ifndef PERL_STRICT_CR
11767 if (to - SvPVX_const(sv) >= 2) {
11768 if ((to[-2] == '\r' && to[-1] == '\n') ||
11769 (to[-2] == '\n' && to[-1] == '\r'))
11773 SvCUR_set(sv, to - SvPVX_const(sv));
11775 else if (to[-1] == '\r')
11778 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11783 /* if we're out of file, or a read fails, bail and reset the current
11784 line marker so we can report where the unterminated string began
11787 if (PL_madskills) {
11788 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11790 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11792 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11796 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11798 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11804 /* we read a line, so increment our line counter */
11805 CopLINE_inc(PL_curcop);
11807 /* update debugger info */
11808 if (PERLDB_LINE && PL_curstash != PL_debstash)
11809 update_debugger_info(PL_linestr, NULL, 0);
11811 /* having changed the buffer, we must update PL_bufend */
11812 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11813 PL_last_lop = PL_last_uni = NULL;
11816 /* at this point, we have successfully read the delimited string */
11818 if (!PL_encoding || UTF) {
11820 if (PL_madskills) {
11821 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11822 const int len = s - tstart;
11824 sv_catpvn(PL_thisstuff, tstart, len);
11826 PL_thisstuff = newSVpvn(tstart, len);
11827 if (!PL_thisclose && !keep_delims)
11828 PL_thisclose = newSVpvn(s,termlen);
11833 sv_catpvn(sv, s, termlen);
11838 if (PL_madskills) {
11839 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11840 const int len = s - tstart - termlen;
11842 sv_catpvn(PL_thisstuff, tstart, len);
11844 PL_thisstuff = newSVpvn(tstart, len);
11845 if (!PL_thisclose && !keep_delims)
11846 PL_thisclose = newSVpvn(s - termlen,termlen);
11850 if (has_utf8 || PL_encoding)
11853 PL_multi_end = CopLINE(PL_curcop);
11855 /* if we allocated too much space, give some back */
11856 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11857 SvLEN_set(sv, SvCUR(sv) + 1);
11858 SvPV_renew(sv, SvLEN(sv));
11861 /* decide whether this is the first or second quoted string we've read
11874 takes: pointer to position in buffer
11875 returns: pointer to new position in buffer
11876 side-effects: builds ops for the constant in yylval.op
11878 Read a number in any of the formats that Perl accepts:
11880 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11881 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11884 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11886 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11889 If it reads a number without a decimal point or an exponent, it will
11890 try converting the number to an integer and see if it can do so
11891 without loss of precision.
11895 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11898 register const char *s = start; /* current position in buffer */
11899 register char *d; /* destination in temp buffer */
11900 register char *e; /* end of temp buffer */
11901 NV nv; /* number read, as a double */
11902 SV *sv = NULL; /* place to put the converted number */
11903 bool floatit; /* boolean: int or float? */
11904 const char *lastub = NULL; /* position of last underbar */
11905 static char const number_too_long[] = "Number too long";
11907 /* We use the first character to decide what type of number this is */
11911 Perl_croak(aTHX_ "panic: scan_num");
11913 /* if it starts with a 0, it could be an octal number, a decimal in
11914 0.13 disguise, or a hexadecimal number, or a binary number. */
11918 u holds the "number so far"
11919 shift the power of 2 of the base
11920 (hex == 4, octal == 3, binary == 1)
11921 overflowed was the number more than we can hold?
11923 Shift is used when we add a digit. It also serves as an "are
11924 we in octal/hex/binary?" indicator to disallow hex characters
11925 when in octal mode.
11930 bool overflowed = FALSE;
11931 bool just_zero = TRUE; /* just plain 0 or binary number? */
11932 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11933 static const char* const bases[5] =
11934 { "", "binary", "", "octal", "hexadecimal" };
11935 static const char* const Bases[5] =
11936 { "", "Binary", "", "Octal", "Hexadecimal" };
11937 static const char* const maxima[5] =
11939 "0b11111111111111111111111111111111",
11943 const char *base, *Base, *max;
11945 /* check for hex */
11950 } else if (s[1] == 'b') {
11955 /* check for a decimal in disguise */
11956 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11958 /* so it must be octal */
11965 if (ckWARN(WARN_SYNTAX))
11966 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11967 "Misplaced _ in number");
11971 base = bases[shift];
11972 Base = Bases[shift];
11973 max = maxima[shift];
11975 /* read the rest of the number */
11977 /* x is used in the overflow test,
11978 b is the digit we're adding on. */
11983 /* if we don't mention it, we're done */
11987 /* _ are ignored -- but warned about if consecutive */
11989 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11990 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11991 "Misplaced _ in number");
11995 /* 8 and 9 are not octal */
11996 case '8': case '9':
11998 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12002 case '2': case '3': case '4':
12003 case '5': case '6': case '7':
12005 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12008 case '0': case '1':
12009 b = *s++ & 15; /* ASCII digit -> value of digit */
12013 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12014 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12015 /* make sure they said 0x */
12018 b = (*s++ & 7) + 9;
12020 /* Prepare to put the digit we have onto the end
12021 of the number so far. We check for overflows.
12027 x = u << shift; /* make room for the digit */
12029 if ((x >> shift) != u
12030 && !(PL_hints & HINT_NEW_BINARY)) {
12033 if (ckWARN_d(WARN_OVERFLOW))
12034 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12035 "Integer overflow in %s number",
12038 u = x | b; /* add the digit to the end */
12041 n *= nvshift[shift];
12042 /* If an NV has not enough bits in its
12043 * mantissa to represent an UV this summing of
12044 * small low-order numbers is a waste of time
12045 * (because the NV cannot preserve the
12046 * low-order bits anyway): we could just
12047 * remember when did we overflow and in the
12048 * end just multiply n by the right
12056 /* if we get here, we had success: make a scalar value from
12061 /* final misplaced underbar check */
12062 if (s[-1] == '_') {
12063 if (ckWARN(WARN_SYNTAX))
12064 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12069 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
12070 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12071 "%s number > %s non-portable",
12077 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
12078 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
12079 "%s number > %s non-portable",
12084 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12085 sv = new_constant(start, s - start, "integer",
12086 sv, NULL, NULL, 0);
12087 else if (PL_hints & HINT_NEW_BINARY)
12088 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12093 handle decimal numbers.
12094 we're also sent here when we read a 0 as the first digit
12096 case '1': case '2': case '3': case '4': case '5':
12097 case '6': case '7': case '8': case '9': case '.':
12100 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12103 /* read next group of digits and _ and copy into d */
12104 while (isDIGIT(*s) || *s == '_') {
12105 /* skip underscores, checking for misplaced ones
12109 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12110 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12111 "Misplaced _ in number");
12115 /* check for end of fixed-length buffer */
12117 Perl_croak(aTHX_ number_too_long);
12118 /* if we're ok, copy the character */
12123 /* final misplaced underbar check */
12124 if (lastub && s == lastub + 1) {
12125 if (ckWARN(WARN_SYNTAX))
12126 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12129 /* read a decimal portion if there is one. avoid
12130 3..5 being interpreted as the number 3. followed
12133 if (*s == '.' && s[1] != '.') {
12138 if (ckWARN(WARN_SYNTAX))
12139 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12140 "Misplaced _ in number");
12144 /* copy, ignoring underbars, until we run out of digits.
12146 for (; isDIGIT(*s) || *s == '_'; s++) {
12147 /* fixed length buffer check */
12149 Perl_croak(aTHX_ number_too_long);
12151 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12152 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12153 "Misplaced _ in number");
12159 /* fractional part ending in underbar? */
12160 if (s[-1] == '_') {
12161 if (ckWARN(WARN_SYNTAX))
12162 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12163 "Misplaced _ in number");
12165 if (*s == '.' && isDIGIT(s[1])) {
12166 /* oops, it's really a v-string, but without the "v" */
12172 /* read exponent part, if present */
12173 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12177 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12178 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12180 /* stray preinitial _ */
12182 if (ckWARN(WARN_SYNTAX))
12183 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12184 "Misplaced _ in number");
12188 /* allow positive or negative exponent */
12189 if (*s == '+' || *s == '-')
12192 /* stray initial _ */
12194 if (ckWARN(WARN_SYNTAX))
12195 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12196 "Misplaced _ in number");
12200 /* read digits of exponent */
12201 while (isDIGIT(*s) || *s == '_') {
12204 Perl_croak(aTHX_ number_too_long);
12208 if (((lastub && s == lastub + 1) ||
12209 (!isDIGIT(s[1]) && s[1] != '_'))
12210 && ckWARN(WARN_SYNTAX))
12211 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12212 "Misplaced _ in number");
12219 /* make an sv from the string */
12223 We try to do an integer conversion first if no characters
12224 indicating "float" have been found.
12229 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12231 if (flags == IS_NUMBER_IN_UV) {
12233 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12236 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12237 if (uv <= (UV) IV_MIN)
12238 sv_setiv(sv, -(IV)uv);
12245 /* terminate the string */
12247 nv = Atof(PL_tokenbuf);
12252 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12253 const char *const key = floatit ? "float" : "integer";
12254 const STRLEN keylen = floatit ? 5 : 7;
12255 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12256 key, keylen, sv, NULL, NULL, 0);
12260 /* if it starts with a v, it could be a v-string */
12263 sv = newSV(5); /* preallocate storage space */
12264 s = scan_vstring(s, PL_bufend, sv);
12268 /* make the op for the constant and return */
12271 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12273 lvalp->opval = NULL;
12279 S_scan_formline(pTHX_ register char *s)
12282 register char *eol;
12284 SV * const stuff = newSVpvs("");
12285 bool needargs = FALSE;
12286 bool eofmt = FALSE;
12288 char *tokenstart = s;
12291 if (PL_madskills) {
12292 savewhite = PL_thiswhite;
12297 while (!needargs) {
12300 #ifdef PERL_STRICT_CR
12301 while (SPACE_OR_TAB(*t))
12304 while (SPACE_OR_TAB(*t) || *t == '\r')
12307 if (*t == '\n' || t == PL_bufend) {
12312 if (PL_in_eval && !PL_rsfp) {
12313 eol = (char *) memchr(s,'\n',PL_bufend-s);
12318 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12320 for (t = s; t < eol; t++) {
12321 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12323 goto enough; /* ~~ must be first line in formline */
12325 if (*t == '@' || *t == '^')
12329 sv_catpvn(stuff, s, eol-s);
12330 #ifndef PERL_STRICT_CR
12331 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12332 char *end = SvPVX(stuff) + SvCUR(stuff);
12335 SvCUR_set(stuff, SvCUR(stuff) - 1);
12345 if (PL_madskills) {
12347 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12349 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12352 s = filter_gets(PL_linestr, PL_rsfp, 0);
12354 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12356 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12358 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12359 PL_last_lop = PL_last_uni = NULL;
12368 if (SvCUR(stuff)) {
12371 PL_lex_state = LEX_NORMAL;
12372 start_force(PL_curforce);
12373 NEXTVAL_NEXTTOKE.ival = 0;
12377 PL_lex_state = LEX_FORMLINE;
12379 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12381 else if (PL_encoding)
12382 sv_recode_to_utf8(stuff, PL_encoding);
12384 start_force(PL_curforce);
12385 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12387 start_force(PL_curforce);
12388 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12392 SvREFCNT_dec(stuff);
12394 PL_lex_formbrack = 0;
12398 if (PL_madskills) {
12400 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12402 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12403 PL_thiswhite = savewhite;
12410 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12413 const I32 oldsavestack_ix = PL_savestack_ix;
12414 CV* const outsidecv = PL_compcv;
12417 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12419 SAVEI32(PL_subline);
12420 save_item(PL_subname);
12421 SAVESPTR(PL_compcv);
12423 PL_compcv = (CV*)newSV_type(is_format ? SVt_PVFM : SVt_PVCV);
12424 CvFLAGS(PL_compcv) |= flags;
12426 PL_subline = CopLINE(PL_curcop);
12427 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12428 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
12429 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12431 return oldsavestack_ix;
12435 #pragma segment Perl_yylex
12438 Perl_yywarn(pTHX_ const char *s)
12441 PL_in_eval |= EVAL_WARNONLY;
12443 PL_in_eval &= ~EVAL_WARNONLY;
12448 Perl_yyerror(pTHX_ const char *s)
12451 const char *where = NULL;
12452 const char *context = NULL;
12455 int yychar = PL_parser->yychar;
12457 if (!yychar || (yychar == ';' && !PL_rsfp))
12459 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12460 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12461 PL_oldbufptr != PL_bufptr) {
12464 The code below is removed for NetWare because it abends/crashes on NetWare
12465 when the script has error such as not having the closing quotes like:
12466 if ($var eq "value)
12467 Checking of white spaces is anyway done in NetWare code.
12470 while (isSPACE(*PL_oldoldbufptr))
12473 context = PL_oldoldbufptr;
12474 contlen = PL_bufptr - PL_oldoldbufptr;
12476 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12477 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12480 The code below is removed for NetWare because it abends/crashes on NetWare
12481 when the script has error such as not having the closing quotes like:
12482 if ($var eq "value)
12483 Checking of white spaces is anyway done in NetWare code.
12486 while (isSPACE(*PL_oldbufptr))
12489 context = PL_oldbufptr;
12490 contlen = PL_bufptr - PL_oldbufptr;
12492 else if (yychar > 255)
12493 where = "next token ???";
12494 else if (yychar == -2) { /* YYEMPTY */
12495 if (PL_lex_state == LEX_NORMAL ||
12496 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12497 where = "at end of line";
12498 else if (PL_lex_inpat)
12499 where = "within pattern";
12501 where = "within string";
12504 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
12506 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12507 else if (isPRINT_LC(yychar)) {
12508 const char string = yychar;
12509 sv_catpvn(where_sv, &string, 1);
12512 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12513 where = SvPVX_const(where_sv);
12515 msg = sv_2mortal(newSVpv(s, 0));
12516 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12517 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12519 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12521 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12522 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12523 Perl_sv_catpvf(aTHX_ msg,
12524 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12525 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12528 if (PL_in_eval & EVAL_WARNONLY) {
12529 if (ckWARN_d(WARN_SYNTAX))
12530 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12534 if (PL_error_count >= 10) {
12535 if (PL_in_eval && SvCUR(ERRSV))
12536 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12537 SVfARG(ERRSV), OutCopFILE(PL_curcop));
12539 Perl_croak(aTHX_ "%s has too many errors.\n",
12540 OutCopFILE(PL_curcop));
12543 PL_in_my_stash = NULL;
12547 #pragma segment Main
12551 S_swallow_bom(pTHX_ U8 *s)
12554 const STRLEN slen = SvCUR(PL_linestr);
12557 if (s[1] == 0xFE) {
12558 /* UTF-16 little-endian? (or UTF32-LE?) */
12559 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12560 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12561 #ifndef PERL_NO_UTF16_FILTER
12562 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12565 if (PL_bufend > (char*)s) {
12569 filter_add(utf16rev_textfilter, NULL);
12570 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12571 utf16_to_utf8_reversed(s, news,
12572 PL_bufend - (char*)s - 1,
12574 sv_setpvn(PL_linestr, (const char*)news, newlen);
12576 s = (U8*)SvPVX(PL_linestr);
12577 Copy(news, s, newlen, U8);
12581 SvUTF8_on(PL_linestr);
12582 s = (U8*)SvPVX(PL_linestr);
12584 /* FIXME - is this a general bug fix? */
12587 PL_bufend = SvPVX(PL_linestr) + newlen;
12590 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12595 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12596 #ifndef PERL_NO_UTF16_FILTER
12597 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12600 if (PL_bufend > (char *)s) {
12604 filter_add(utf16_textfilter, NULL);
12605 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12606 utf16_to_utf8(s, news,
12607 PL_bufend - (char*)s,
12609 sv_setpvn(PL_linestr, (const char*)news, newlen);
12611 SvUTF8_on(PL_linestr);
12612 s = (U8*)SvPVX(PL_linestr);
12613 PL_bufend = SvPVX(PL_linestr) + newlen;
12616 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12621 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12622 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12623 s += 3; /* UTF-8 */
12629 if (s[2] == 0xFE && s[3] == 0xFF) {
12630 /* UTF-32 big-endian */
12631 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12634 else if (s[2] == 0 && s[3] != 0) {
12637 * are a good indicator of UTF-16BE. */
12638 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12644 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12645 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12646 s += 4; /* UTF-8 */
12652 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12655 * are a good indicator of UTF-16LE. */
12656 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12664 #ifndef PERL_NO_UTF16_FILTER
12666 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12669 const STRLEN old = SvCUR(sv);
12670 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12671 DEBUG_P(PerlIO_printf(Perl_debug_log,
12672 "utf16_textfilter(%p): %d %d (%d)\n",
12673 FPTR2DPTR(void *, utf16_textfilter),
12674 idx, maxlen, (int) count));
12678 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12679 Copy(SvPVX_const(sv), tmps, old, char);
12680 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12681 SvCUR(sv) - old, &newlen);
12682 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12684 DEBUG_P({sv_dump(sv);});
12689 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12692 const STRLEN old = SvCUR(sv);
12693 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12694 DEBUG_P(PerlIO_printf(Perl_debug_log,
12695 "utf16rev_textfilter(%p): %d %d (%d)\n",
12696 FPTR2DPTR(void *, utf16rev_textfilter),
12697 idx, maxlen, (int) count));
12701 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12702 Copy(SvPVX_const(sv), tmps, old, char);
12703 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12704 SvCUR(sv) - old, &newlen);
12705 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12707 DEBUG_P({ sv_dump(sv); });
12713 Returns a pointer to the next character after the parsed
12714 vstring, as well as updating the passed in sv.
12716 Function must be called like
12719 s = scan_vstring(s,e,sv);
12721 where s and e are the start and end of the string.
12722 The sv should already be large enough to store the vstring
12723 passed in, for performance reasons.
12728 Perl_scan_vstring(pTHX_ const char *s, const char *e, SV *sv)
12731 const char *pos = s;
12732 const char *start = s;
12733 if (*pos == 'v') pos++; /* get past 'v' */
12734 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12736 if ( *pos != '.') {
12737 /* this may not be a v-string if followed by => */
12738 const char *next = pos;
12739 while (next < e && isSPACE(*next))
12741 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12742 /* return string not v-string */
12743 sv_setpvn(sv,(char *)s,pos-s);
12744 return (char *)pos;
12748 if (!isALPHA(*pos)) {
12749 U8 tmpbuf[UTF8_MAXBYTES+1];
12752 s++; /* get past 'v' */
12754 sv_setpvn(sv, "", 0);
12757 /* this is atoi() that tolerates underscores */
12760 const char *end = pos;
12762 while (--end >= s) {
12764 const UV orev = rev;
12765 rev += (*end - '0') * mult;
12767 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12768 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12769 "Integer overflow in decimal number");
12773 if (rev > 0x7FFFFFFF)
12774 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12776 /* Append native character for the rev point */
12777 tmpend = uvchr_to_utf8(tmpbuf, rev);
12778 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12779 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12781 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12787 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12791 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12799 * c-indentation-style: bsd
12800 * c-basic-offset: 4
12801 * indent-tabs-mode: t
12804 * ex: set ts=8 sts=4 sw=4 noet: