3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 #define PERL_IN_TOKE_C
28 #define new_constant(a,b,c,d,e,f,g) \
29 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
31 #define pl_yylval (PL_parser->yylval)
33 /* YYINITDEPTH -- initial size of the parser's stacks. */
34 #define YYINITDEPTH 200
36 /* XXX temporary backwards compatibility */
37 #define PL_lex_brackets (PL_parser->lex_brackets)
38 #define PL_lex_brackstack (PL_parser->lex_brackstack)
39 #define PL_lex_casemods (PL_parser->lex_casemods)
40 #define PL_lex_casestack (PL_parser->lex_casestack)
41 #define PL_lex_defer (PL_parser->lex_defer)
42 #define PL_lex_dojoin (PL_parser->lex_dojoin)
43 #define PL_lex_expect (PL_parser->lex_expect)
44 #define PL_lex_formbrack (PL_parser->lex_formbrack)
45 #define PL_lex_inpat (PL_parser->lex_inpat)
46 #define PL_lex_inwhat (PL_parser->lex_inwhat)
47 #define PL_lex_op (PL_parser->lex_op)
48 #define PL_lex_repl (PL_parser->lex_repl)
49 #define PL_lex_starts (PL_parser->lex_starts)
50 #define PL_lex_stuff (PL_parser->lex_stuff)
51 #define PL_multi_start (PL_parser->multi_start)
52 #define PL_multi_open (PL_parser->multi_open)
53 #define PL_multi_close (PL_parser->multi_close)
54 #define PL_pending_ident (PL_parser->pending_ident)
55 #define PL_preambled (PL_parser->preambled)
56 #define PL_sublex_info (PL_parser->sublex_info)
57 #define PL_linestr (PL_parser->linestr)
58 #define PL_expect (PL_parser->expect)
59 #define PL_copline (PL_parser->copline)
60 #define PL_bufptr (PL_parser->bufptr)
61 #define PL_oldbufptr (PL_parser->oldbufptr)
62 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
63 #define PL_linestart (PL_parser->linestart)
64 #define PL_bufend (PL_parser->bufend)
65 #define PL_last_uni (PL_parser->last_uni)
66 #define PL_last_lop (PL_parser->last_lop)
67 #define PL_last_lop_op (PL_parser->last_lop_op)
68 #define PL_lex_state (PL_parser->lex_state)
69 #define PL_rsfp (PL_parser->rsfp)
70 #define PL_rsfp_filters (PL_parser->rsfp_filters)
71 #define PL_in_my (PL_parser->in_my)
72 #define PL_in_my_stash (PL_parser->in_my_stash)
73 #define PL_tokenbuf (PL_parser->tokenbuf)
74 #define PL_multi_end (PL_parser->multi_end)
75 #define PL_error_count (PL_parser->error_count)
78 # define PL_endwhite (PL_parser->endwhite)
79 # define PL_faketokens (PL_parser->faketokens)
80 # define PL_lasttoke (PL_parser->lasttoke)
81 # define PL_nextwhite (PL_parser->nextwhite)
82 # define PL_realtokenstart (PL_parser->realtokenstart)
83 # define PL_skipwhite (PL_parser->skipwhite)
84 # define PL_thisclose (PL_parser->thisclose)
85 # define PL_thismad (PL_parser->thismad)
86 # define PL_thisopen (PL_parser->thisopen)
87 # define PL_thisstuff (PL_parser->thisstuff)
88 # define PL_thistoken (PL_parser->thistoken)
89 # define PL_thiswhite (PL_parser->thiswhite)
90 # define PL_thiswhite (PL_parser->thiswhite)
91 # define PL_nexttoke (PL_parser->nexttoke)
92 # define PL_curforce (PL_parser->curforce)
94 # define PL_nexttoke (PL_parser->nexttoke)
95 # define PL_nexttype (PL_parser->nexttype)
96 # define PL_nextval (PL_parser->nextval)
99 /* This can't be done with embed.fnc, because struct yy_parser contains a
100 member named pending_ident, which clashes with the generated #define */
102 S_pending_ident(pTHX);
104 static const char ident_too_long[] = "Identifier too long";
107 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
108 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
110 # define CURMAD(slot,sv)
111 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
114 #define XFAKEBRACK 128
115 #define XENUMMASK 127
117 #ifdef USE_UTF8_SCRIPTS
118 # define UTF (!IN_BYTES)
120 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
123 /* The maximum number of characters preceding the unrecognized one to display */
124 #define UNRECOGNIZED_PRECEDE_COUNT 10
126 /* In variables named $^X, these are the legal values for X.
127 * 1999-02-27 mjd-perl-patch@plover.com */
128 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
130 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
132 /* LEX_* are values for PL_lex_state, the state of the lexer.
133 * They are arranged oddly so that the guard on the switch statement
134 * can get by with a single comparison (if the compiler is smart enough).
137 /* #define LEX_NOTPARSING 11 is done in perl.h. */
139 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
140 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
141 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
142 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
143 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
145 /* at end of code, eg "$x" followed by: */
146 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
147 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
149 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
150 string or after \E, $foo, etc */
151 #define LEX_INTERPCONST 2 /* NOT USED */
152 #define LEX_FORMLINE 1 /* expecting a format line */
153 #define LEX_KNOWNEXT 0 /* next token known; just return it */
157 static const char* const lex_state_names[] = {
176 #include "keywords.h"
178 /* CLINE is a macro that ensures PL_copline has a sane value */
183 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
186 # define SKIPSPACE0(s) skipspace0(s)
187 # define SKIPSPACE1(s) skipspace1(s)
188 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
189 # define PEEKSPACE(s) skipspace2(s,0)
191 # define SKIPSPACE0(s) skipspace(s)
192 # define SKIPSPACE1(s) skipspace(s)
193 # define SKIPSPACE2(s,tsv) skipspace(s)
194 # define PEEKSPACE(s) skipspace(s)
198 * Convenience functions to return different tokens and prime the
199 * lexer for the next token. They all take an argument.
201 * TOKEN : generic token (used for '(', DOLSHARP, etc)
202 * OPERATOR : generic operator
203 * AOPERATOR : assignment operator
204 * PREBLOCK : beginning the block after an if, while, foreach, ...
205 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
206 * PREREF : *EXPR where EXPR is not a simple identifier
207 * TERM : expression term
208 * LOOPX : loop exiting command (goto, last, dump, etc)
209 * FTST : file test operator
210 * FUN0 : zero-argument function
211 * FUN1 : not used, except for not, which isn't a UNIOP
212 * BOop : bitwise or or xor
214 * SHop : shift operator
215 * PWop : power operator
216 * PMop : pattern-matching operator
217 * Aop : addition-level operator
218 * Mop : multiplication-level operator
219 * Eop : equality-testing operator
220 * Rop : relational operator <= != gt
222 * Also see LOP and lop() below.
225 #ifdef DEBUGGING /* Serve -DT. */
226 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
228 # define REPORT(retval) (retval)
231 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
232 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
233 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
234 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
235 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
236 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
237 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
238 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
239 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
240 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
241 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
242 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
243 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
244 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
245 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
246 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
247 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
248 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
249 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
250 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
252 /* This bit of chicanery makes a unary function followed by
253 * a parenthesis into a function with one argument, highest precedence.
254 * The UNIDOR macro is for unary functions that can be followed by the //
255 * operator (such as C<shift // 0>).
257 #define UNI2(f,x) { \
258 pl_yylval.ival = f; \
261 PL_last_uni = PL_oldbufptr; \
262 PL_last_lop_op = f; \
264 return REPORT( (int)FUNC1 ); \
266 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
268 #define UNI(f) UNI2(f,XTERM)
269 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
271 #define UNIBRACK(f) { \
272 pl_yylval.ival = f; \
274 PL_last_uni = PL_oldbufptr; \
276 return REPORT( (int)FUNC1 ); \
278 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
281 /* grandfather return to old style */
282 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
286 /* how to interpret the pl_yylval associated with the token */
290 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
296 static struct debug_tokens {
298 enum token_type type;
300 } const debug_tokens[] =
302 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
303 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
304 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
305 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
306 { ARROW, TOKENTYPE_NONE, "ARROW" },
307 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
308 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
309 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
310 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
311 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
312 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
313 { DO, TOKENTYPE_NONE, "DO" },
314 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
315 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
316 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
317 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
318 { ELSE, TOKENTYPE_NONE, "ELSE" },
319 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
320 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
321 { FOR, TOKENTYPE_IVAL, "FOR" },
322 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
323 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
324 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
325 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
326 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
327 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
328 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
329 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
330 { IF, TOKENTYPE_IVAL, "IF" },
331 { LABEL, TOKENTYPE_PVAL, "LABEL" },
332 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
333 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
334 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
335 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
336 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
337 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
338 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
339 { MY, TOKENTYPE_IVAL, "MY" },
340 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
341 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
342 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
343 { OROP, TOKENTYPE_IVAL, "OROP" },
344 { OROR, TOKENTYPE_NONE, "OROR" },
345 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
346 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
347 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
348 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
349 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
350 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
351 { PREINC, TOKENTYPE_NONE, "PREINC" },
352 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
353 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
354 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
355 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
356 { SUB, TOKENTYPE_NONE, "SUB" },
357 { THING, TOKENTYPE_OPVAL, "THING" },
358 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
359 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
360 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
361 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
362 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
363 { USE, TOKENTYPE_IVAL, "USE" },
364 { WHEN, TOKENTYPE_IVAL, "WHEN" },
365 { WHILE, TOKENTYPE_IVAL, "WHILE" },
366 { WORD, TOKENTYPE_OPVAL, "WORD" },
367 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
368 { 0, TOKENTYPE_NONE, NULL }
371 /* dump the returned token in rv, plus any optional arg in pl_yylval */
374 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
378 PERL_ARGS_ASSERT_TOKEREPORT;
381 const char *name = NULL;
382 enum token_type type = TOKENTYPE_NONE;
383 const struct debug_tokens *p;
384 SV* const report = newSVpvs("<== ");
386 for (p = debug_tokens; p->token; p++) {
387 if (p->token == (int)rv) {
394 Perl_sv_catpv(aTHX_ report, name);
395 else if ((char)rv > ' ' && (char)rv < '~')
396 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
398 sv_catpvs(report, "EOF");
400 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
403 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
406 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
408 case TOKENTYPE_OPNUM:
409 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
410 PL_op_name[lvalp->ival]);
413 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
415 case TOKENTYPE_OPVAL:
417 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
418 PL_op_name[lvalp->opval->op_type]);
419 if (lvalp->opval->op_type == OP_CONST) {
420 Perl_sv_catpvf(aTHX_ report, " %s",
421 SvPEEK(cSVOPx_sv(lvalp->opval)));
426 sv_catpvs(report, "(opval=null)");
429 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
435 /* print the buffer with suitable escapes */
438 S_printbuf(pTHX_ const char *const fmt, const char *const s)
440 SV* const tmp = newSVpvs("");
442 PERL_ARGS_ASSERT_PRINTBUF;
444 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
451 S_deprecate_commaless_var_list(pTHX) {
453 deprecate("comma-less variable list");
454 return REPORT(','); /* grandfather non-comma-format format */
460 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
461 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
465 S_ao(pTHX_ int toketype)
468 if (*PL_bufptr == '=') {
470 if (toketype == ANDAND)
471 pl_yylval.ival = OP_ANDASSIGN;
472 else if (toketype == OROR)
473 pl_yylval.ival = OP_ORASSIGN;
474 else if (toketype == DORDOR)
475 pl_yylval.ival = OP_DORASSIGN;
483 * When Perl expects an operator and finds something else, no_op
484 * prints the warning. It always prints "<something> found where
485 * operator expected. It prints "Missing semicolon on previous line?"
486 * if the surprise occurs at the start of the line. "do you need to
487 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
488 * where the compiler doesn't know if foo is a method call or a function.
489 * It prints "Missing operator before end of line" if there's nothing
490 * after the missing operator, or "... before <...>" if there is something
491 * after the missing operator.
495 S_no_op(pTHX_ const char *const what, char *s)
498 char * const oldbp = PL_bufptr;
499 const bool is_first = (PL_oldbufptr == PL_linestart);
501 PERL_ARGS_ASSERT_NO_OP;
507 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
508 if (ckWARN_d(WARN_SYNTAX)) {
510 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
511 "\t(Missing semicolon on previous line?)\n");
512 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
514 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
516 if (t < PL_bufptr && isSPACE(*t))
517 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
518 "\t(Do you need to predeclare %.*s?)\n",
519 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
523 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
524 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
532 * Complain about missing quote/regexp/heredoc terminator.
533 * If it's called with NULL then it cauterizes the line buffer.
534 * If we're in a delimited string and the delimiter is a control
535 * character, it's reformatted into a two-char sequence like ^C.
540 S_missingterm(pTHX_ char *s)
546 char * const nl = strrchr(s,'\n');
550 else if (isCNTRL(PL_multi_close)) {
552 tmpbuf[1] = (char)toCTRL(PL_multi_close);
557 *tmpbuf = (char)PL_multi_close;
561 q = strchr(s,'"') ? '\'' : '"';
562 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
565 #define FEATURE_IS_ENABLED(name) \
566 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
567 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
568 /* The longest string we pass in. */
569 #define MAX_FEATURE_LEN (sizeof("switch")-1)
572 * S_feature_is_enabled
573 * Check whether the named feature is enabled.
576 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
579 HV * const hinthv = GvHV(PL_hintgv);
580 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
582 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
584 assert(namelen <= MAX_FEATURE_LEN);
585 memcpy(&he_name[8], name, namelen);
587 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
591 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
592 * utf16-to-utf8-reversed.
595 #ifdef PERL_CR_FILTER
599 register const char *s = SvPVX_const(sv);
600 register const char * const e = s + SvCUR(sv);
602 PERL_ARGS_ASSERT_STRIP_RETURN;
604 /* outer loop optimized to do nothing if there are no CR-LFs */
606 if (*s++ == '\r' && *s == '\n') {
607 /* hit a CR-LF, need to copy the rest */
608 register char *d = s - 1;
611 if (*s == '\r' && s[1] == '\n')
622 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
624 const I32 count = FILTER_READ(idx+1, sv, maxlen);
625 if (count > 0 && !maxlen)
636 * Create a parser object and initialise its parser and lexer fields
638 * rsfp is the opened file handle to read from (if any),
640 * line holds any initial content already read from the file (or in
641 * the case of no file, such as an eval, the whole contents);
643 * new_filter indicates that this is a new file and it shouldn't inherit
644 * the filters from the current parser (ie require).
648 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
651 const char *s = NULL;
653 yy_parser *parser, *oparser;
655 /* create and initialise a parser */
657 Newxz(parser, 1, yy_parser);
658 parser->old_parser = oparser = PL_parser;
661 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
662 parser->ps = parser->stack;
663 parser->stack_size = YYINITDEPTH;
665 parser->stack->state = 0;
666 parser->yyerrstatus = 0;
667 parser->yychar = YYEMPTY; /* Cause a token to be read. */
669 /* on scope exit, free this parser and restore any outer one */
671 parser->saved_curcop = PL_curcop;
673 /* initialise lexer state */
676 parser->curforce = -1;
678 parser->nexttoke = 0;
680 parser->error_count = oparser ? oparser->error_count : 0;
681 parser->copline = NOLINE;
682 parser->lex_state = LEX_NORMAL;
683 parser->expect = XSTATE;
685 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
686 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
688 Newx(parser->lex_brackstack, 120, char);
689 Newx(parser->lex_casestack, 12, char);
690 *parser->lex_casestack = '\0';
693 s = SvPV_const(line, len);
699 parser->linestr = newSVpvs("\n;");
700 } else if (SvREADONLY(line) || s[len-1] != ';') {
701 parser->linestr = newSVsv(line);
703 sv_catpvs(parser->linestr, "\n;");
706 SvREFCNT_inc_simple_void_NN(line);
707 parser->linestr = line;
709 parser->oldoldbufptr =
712 parser->linestart = SvPVX(parser->linestr);
713 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
714 parser->last_lop = parser->last_uni = NULL;
718 /* delete a parser object */
721 Perl_parser_free(pTHX_ const yy_parser *parser)
723 PERL_ARGS_ASSERT_PARSER_FREE;
725 PL_curcop = parser->saved_curcop;
726 SvREFCNT_dec(parser->linestr);
728 if (parser->rsfp == PerlIO_stdin())
729 PerlIO_clearerr(parser->rsfp);
730 else if (parser->rsfp && (!parser->old_parser ||
731 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
732 PerlIO_close(parser->rsfp);
733 SvREFCNT_dec(parser->rsfp_filters);
735 Safefree(parser->stack);
736 Safefree(parser->lex_brackstack);
737 Safefree(parser->lex_casestack);
738 PL_parser = parser->old_parser;
745 * Finalizer for lexing operations. Must be called when the parser is
746 * done with the lexer.
753 PL_doextract = FALSE;
758 * This subroutine has nothing to do with tilting, whether at windmills
759 * or pinball tables. Its name is short for "increment line". It
760 * increments the current line number in CopLINE(PL_curcop) and checks
761 * to see whether the line starts with a comment of the form
762 * # line 500 "foo.pm"
763 * If so, it sets the current line number and file to the values in the comment.
767 S_incline(pTHX_ const char *s)
774 PERL_ARGS_ASSERT_INCLINE;
776 CopLINE_inc(PL_curcop);
779 while (SPACE_OR_TAB(*s))
781 if (strnEQ(s, "line", 4))
785 if (SPACE_OR_TAB(*s))
789 while (SPACE_OR_TAB(*s))
797 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
799 while (SPACE_OR_TAB(*s))
801 if (*s == '"' && (t = strchr(s+1, '"'))) {
811 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
813 if (*e != '\n' && *e != '\0')
814 return; /* false alarm */
817 const STRLEN len = t - s;
819 SV *const temp_sv = CopFILESV(PL_curcop);
825 tmplen = SvCUR(temp_sv);
831 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
832 /* must copy *{"::_<(eval N)[oldfilename:L]"}
833 * to *{"::_<newfilename"} */
834 /* However, the long form of evals is only turned on by the
835 debugger - usually they're "(eval %lu)" */
839 STRLEN tmplen2 = len;
840 if (tmplen + 2 <= sizeof smallbuf)
843 Newx(tmpbuf, tmplen + 2, char);
846 memcpy(tmpbuf + 2, cf, tmplen);
848 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
853 if (tmplen2 + 2 <= sizeof smallbuf)
856 Newx(tmpbuf2, tmplen2 + 2, char);
858 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
859 /* Either they malloc'd it, or we malloc'd it,
860 so no prefix is present in ours. */
865 memcpy(tmpbuf2 + 2, s, tmplen2);
868 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
870 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
871 /* adjust ${"::_<newfilename"} to store the new file name */
872 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
873 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
874 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
877 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
879 if (tmpbuf != smallbuf) Safefree(tmpbuf);
882 CopFILE_free(PL_curcop);
883 CopFILE_setn(PL_curcop, s, len);
885 CopLINE_set(PL_curcop, atoi(n)-1);
889 /* skip space before PL_thistoken */
892 S_skipspace0(pTHX_ register char *s)
894 PERL_ARGS_ASSERT_SKIPSPACE0;
901 PL_thiswhite = newSVpvs("");
902 sv_catsv(PL_thiswhite, PL_skipwhite);
903 sv_free(PL_skipwhite);
906 PL_realtokenstart = s - SvPVX(PL_linestr);
910 /* skip space after PL_thistoken */
913 S_skipspace1(pTHX_ register char *s)
915 const char *start = s;
916 I32 startoff = start - SvPVX(PL_linestr);
918 PERL_ARGS_ASSERT_SKIPSPACE1;
923 start = SvPVX(PL_linestr) + startoff;
924 if (!PL_thistoken && PL_realtokenstart >= 0) {
925 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
926 PL_thistoken = newSVpvn(tstart, start - tstart);
928 PL_realtokenstart = -1;
931 PL_nextwhite = newSVpvs("");
932 sv_catsv(PL_nextwhite, PL_skipwhite);
933 sv_free(PL_skipwhite);
940 S_skipspace2(pTHX_ register char *s, SV **svp)
943 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
944 const I32 startoff = s - SvPVX(PL_linestr);
946 PERL_ARGS_ASSERT_SKIPSPACE2;
949 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
950 if (!PL_madskills || !svp)
952 start = SvPVX(PL_linestr) + startoff;
953 if (!PL_thistoken && PL_realtokenstart >= 0) {
954 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
955 PL_thistoken = newSVpvn(tstart, start - tstart);
956 PL_realtokenstart = -1;
961 sv_setsv(*svp, PL_skipwhite);
962 sv_free(PL_skipwhite);
971 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
973 AV *av = CopFILEAVx(PL_curcop);
975 SV * const sv = newSV_type(SVt_PVMG);
977 sv_setsv(sv, orig_sv);
979 sv_setpvn(sv, buf, len);
982 av_store(av, (I32)CopLINE(PL_curcop), sv);
988 * Called to gobble the appropriate amount and type of whitespace.
989 * Skips comments as well.
993 S_skipspace(pTHX_ register char *s)
998 int startoff = s - SvPVX(PL_linestr);
1000 PERL_ARGS_ASSERT_SKIPSPACE;
1003 sv_free(PL_skipwhite);
1007 PERL_ARGS_ASSERT_SKIPSPACE;
1009 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1010 while (s < PL_bufend && SPACE_OR_TAB(*s))
1020 SSize_t oldprevlen, oldoldprevlen;
1021 SSize_t oldloplen = 0, oldunilen = 0;
1022 while (s < PL_bufend && isSPACE(*s)) {
1023 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1028 if (s < PL_bufend && *s == '#') {
1029 while (s < PL_bufend && *s != '\n')
1031 if (s < PL_bufend) {
1033 if (PL_in_eval && !PL_rsfp) {
1040 /* only continue to recharge the buffer if we're at the end
1041 * of the buffer, we're not reading from a source filter, and
1042 * we're in normal lexing mode
1044 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1045 PL_lex_state == LEX_FORMLINE)
1052 /* try to recharge the buffer */
1054 curoff = s - SvPVX(PL_linestr);
1057 if ((s = filter_gets(PL_linestr, (prevlen = SvCUR(PL_linestr))))
1061 if (PL_madskills && curoff != startoff) {
1063 PL_skipwhite = newSVpvs("");
1064 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1068 /* mustn't throw out old stuff yet if madpropping */
1069 SvCUR(PL_linestr) = curoff;
1070 s = SvPVX(PL_linestr) + curoff;
1072 if (curoff && s[-1] == '\n')
1076 /* end of file. Add on the -p or -n magic */
1077 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1080 sv_catpvs(PL_linestr,
1081 ";}continue{print or die qq(-p destination: $!\\n);}");
1083 sv_setpvs(PL_linestr,
1084 ";}continue{print or die qq(-p destination: $!\\n);}");
1086 PL_minus_n = PL_minus_p = 0;
1088 else if (PL_minus_n) {
1090 sv_catpvs(PL_linestr, ";}");
1092 sv_setpvs(PL_linestr, ";}");
1098 sv_catpvs(PL_linestr,";");
1100 sv_setpvs(PL_linestr,";");
1103 /* reset variables for next time we lex */
1104 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1110 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1111 PL_last_lop = PL_last_uni = NULL;
1113 /* Close the filehandle. Could be from
1114 * STDIN, or a regular file. If we were reading code from
1115 * STDIN (because the commandline held no -e or filename)
1116 * then we don't close it, we reset it so the code can
1117 * read from STDIN too.
1120 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1121 PerlIO_clearerr(PL_rsfp);
1123 (void)PerlIO_close(PL_rsfp);
1128 /* not at end of file, so we only read another line */
1129 /* make corresponding updates to old pointers, for yyerror() */
1130 oldprevlen = PL_oldbufptr - PL_bufend;
1131 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1133 oldunilen = PL_last_uni - PL_bufend;
1135 oldloplen = PL_last_lop - PL_bufend;
1136 PL_linestart = PL_bufptr = s + prevlen;
1137 PL_bufend = s + SvCUR(PL_linestr);
1139 PL_oldbufptr = s + oldprevlen;
1140 PL_oldoldbufptr = s + oldoldprevlen;
1142 PL_last_uni = s + oldunilen;
1144 PL_last_lop = s + oldloplen;
1147 /* debugger active and we're not compiling the debugger code,
1148 * so store the line into the debugger's array of lines
1150 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
1151 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1158 PL_skipwhite = newSVpvs("");
1159 curoff = s - SvPVX(PL_linestr);
1160 if (curoff - startoff)
1161 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1170 * Check the unary operators to ensure there's no ambiguity in how they're
1171 * used. An ambiguous piece of code would be:
1173 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1174 * the +5 is its argument.
1184 if (PL_oldoldbufptr != PL_last_uni)
1186 while (isSPACE(*PL_last_uni))
1189 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1191 if ((t = strchr(s, '(')) && t < PL_bufptr)
1194 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1195 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1196 (int)(s - PL_last_uni), PL_last_uni);
1200 * LOP : macro to build a list operator. Its behaviour has been replaced
1201 * with a subroutine, S_lop() for which LOP is just another name.
1204 #define LOP(f,x) return lop(f,x,s)
1208 * Build a list operator (or something that might be one). The rules:
1209 * - if we have a next token, then it's a list operator [why?]
1210 * - if the next thing is an opening paren, then it's a function
1211 * - else it's a list operator
1215 S_lop(pTHX_ I32 f, int x, char *s)
1219 PERL_ARGS_ASSERT_LOP;
1225 PL_last_lop = PL_oldbufptr;
1226 PL_last_lop_op = (OPCODE)f;
1229 return REPORT(LSTOP);
1232 return REPORT(LSTOP);
1235 return REPORT(FUNC);
1238 return REPORT(FUNC);
1240 return REPORT(LSTOP);
1246 * Sets up for an eventual force_next(). start_force(0) basically does
1247 * an unshift, while start_force(-1) does a push. yylex removes items
1252 S_start_force(pTHX_ int where)
1256 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1257 where = PL_lasttoke;
1258 assert(PL_curforce < 0 || PL_curforce == where);
1259 if (PL_curforce != where) {
1260 for (i = PL_lasttoke; i > where; --i) {
1261 PL_nexttoke[i] = PL_nexttoke[i-1];
1265 if (PL_curforce < 0) /* in case of duplicate start_force() */
1266 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1267 PL_curforce = where;
1270 curmad('^', newSVpvs(""));
1271 CURMAD('_', PL_nextwhite);
1276 S_curmad(pTHX_ char slot, SV *sv)
1282 if (PL_curforce < 0)
1283 where = &PL_thismad;
1285 where = &PL_nexttoke[PL_curforce].next_mad;
1291 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1293 else if (PL_encoding) {
1294 sv_recode_to_utf8(sv, PL_encoding);
1299 /* keep a slot open for the head of the list? */
1300 if (slot != '_' && *where && (*where)->mad_key == '^') {
1301 (*where)->mad_key = slot;
1302 sv_free(MUTABLE_SV(((*where)->mad_val)));
1303 (*where)->mad_val = (void*)sv;
1306 addmad(newMADsv(slot, sv), where, 0);
1309 # define start_force(where) NOOP
1310 # define curmad(slot, sv) NOOP
1315 * When the lexer realizes it knows the next token (for instance,
1316 * it is reordering tokens for the parser) then it can call S_force_next
1317 * to know what token to return the next time the lexer is called. Caller
1318 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1319 * and possibly PL_expect to ensure the lexer handles the token correctly.
1323 S_force_next(pTHX_ I32 type)
1328 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1329 tokereport(type, &NEXTVAL_NEXTTOKE);
1333 if (PL_curforce < 0)
1334 start_force(PL_lasttoke);
1335 PL_nexttoke[PL_curforce].next_type = type;
1336 if (PL_lex_state != LEX_KNOWNEXT)
1337 PL_lex_defer = PL_lex_state;
1338 PL_lex_state = LEX_KNOWNEXT;
1339 PL_lex_expect = PL_expect;
1342 PL_nexttype[PL_nexttoke] = type;
1344 if (PL_lex_state != LEX_KNOWNEXT) {
1345 PL_lex_defer = PL_lex_state;
1346 PL_lex_expect = PL_expect;
1347 PL_lex_state = LEX_KNOWNEXT;
1353 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1356 SV * const sv = newSVpvn_utf8(start, len,
1359 && !is_ascii_string((const U8*)start, len)
1360 && is_utf8_string((const U8*)start, len));
1366 * When the lexer knows the next thing is a word (for instance, it has
1367 * just seen -> and it knows that the next char is a word char, then
1368 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1372 * char *start : buffer position (must be within PL_linestr)
1373 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1374 * int check_keyword : if true, Perl checks to make sure the word isn't
1375 * a keyword (do this if the word is a label, e.g. goto FOO)
1376 * int allow_pack : if true, : characters will also be allowed (require,
1377 * use, etc. do this)
1378 * int allow_initial_tick : used by the "sub" lexer only.
1382 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1388 PERL_ARGS_ASSERT_FORCE_WORD;
1390 start = SKIPSPACE1(start);
1392 if (isIDFIRST_lazy_if(s,UTF) ||
1393 (allow_pack && *s == ':') ||
1394 (allow_initial_tick && *s == '\'') )
1396 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1397 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1399 start_force(PL_curforce);
1401 curmad('X', newSVpvn(start,s-start));
1402 if (token == METHOD) {
1407 PL_expect = XOPERATOR;
1411 curmad('g', newSVpvs( "forced" ));
1412 NEXTVAL_NEXTTOKE.opval
1413 = (OP*)newSVOP(OP_CONST,0,
1414 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1415 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1423 * Called when the lexer wants $foo *foo &foo etc, but the program
1424 * text only contains the "foo" portion. The first argument is a pointer
1425 * to the "foo", and the second argument is the type symbol to prefix.
1426 * Forces the next token to be a "WORD".
1427 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1431 S_force_ident(pTHX_ register const char *s, int kind)
1435 PERL_ARGS_ASSERT_FORCE_IDENT;
1438 const STRLEN len = strlen(s);
1439 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1440 start_force(PL_curforce);
1441 NEXTVAL_NEXTTOKE.opval = o;
1444 o->op_private = OPpCONST_ENTERED;
1445 /* XXX see note in pp_entereval() for why we forgo typo
1446 warnings if the symbol must be introduced in an eval.
1448 gv_fetchpvn_flags(s, len,
1449 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1451 kind == '$' ? SVt_PV :
1452 kind == '@' ? SVt_PVAV :
1453 kind == '%' ? SVt_PVHV :
1461 Perl_str_to_version(pTHX_ SV *sv)
1466 const char *start = SvPV_const(sv,len);
1467 const char * const end = start + len;
1468 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1470 PERL_ARGS_ASSERT_STR_TO_VERSION;
1472 while (start < end) {
1476 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1481 retval += ((NV)n)/nshift;
1490 * Forces the next token to be a version number.
1491 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1492 * and if "guessing" is TRUE, then no new token is created (and the caller
1493 * must use an alternative parsing method).
1497 S_force_version(pTHX_ char *s, int guessing)
1503 I32 startoff = s - SvPVX(PL_linestr);
1506 PERL_ARGS_ASSERT_FORCE_VERSION;
1514 while (isDIGIT(*d) || *d == '_' || *d == '.')
1518 start_force(PL_curforce);
1519 curmad('X', newSVpvn(s,d-s));
1522 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1524 s = scan_num(s, &pl_yylval);
1525 version = pl_yylval.opval;
1526 ver = cSVOPx(version)->op_sv;
1527 if (SvPOK(ver) && !SvNIOK(ver)) {
1528 SvUPGRADE(ver, SVt_PVNV);
1529 SvNV_set(ver, str_to_version(ver));
1530 SvNOK_on(ver); /* hint that it is a version */
1533 else if (guessing) {
1536 sv_free(PL_nextwhite); /* let next token collect whitespace */
1538 s = SvPVX(PL_linestr) + startoff;
1546 if (PL_madskills && !version) {
1547 sv_free(PL_nextwhite); /* let next token collect whitespace */
1549 s = SvPVX(PL_linestr) + startoff;
1552 /* NOTE: The parser sees the package name and the VERSION swapped */
1553 start_force(PL_curforce);
1554 NEXTVAL_NEXTTOKE.opval = version;
1562 * Tokenize a quoted string passed in as an SV. It finds the next
1563 * chunk, up to end of string or a backslash. It may make a new
1564 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1569 S_tokeq(pTHX_ SV *sv)
1573 register char *send;
1578 PERL_ARGS_ASSERT_TOKEQ;
1583 s = SvPV_force(sv, len);
1584 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1587 while (s < send && *s != '\\')
1592 if ( PL_hints & HINT_NEW_STRING ) {
1593 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
1597 if (s + 1 < send && (s[1] == '\\'))
1598 s++; /* all that, just for this */
1603 SvCUR_set(sv, d - SvPVX_const(sv));
1605 if ( PL_hints & HINT_NEW_STRING )
1606 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
1611 * Now come three functions related to double-quote context,
1612 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1613 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1614 * interact with PL_lex_state, and create fake ( ... ) argument lists
1615 * to handle functions and concatenation.
1616 * They assume that whoever calls them will be setting up a fake
1617 * join call, because each subthing puts a ',' after it. This lets
1620 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1622 * (I'm not sure whether the spurious commas at the end of lcfirst's
1623 * arguments and join's arguments are created or not).
1628 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1630 * Pattern matching will set PL_lex_op to the pattern-matching op to
1631 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
1633 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1635 * Everything else becomes a FUNC.
1637 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1638 * had an OP_CONST or OP_READLINE). This just sets us up for a
1639 * call to S_sublex_push().
1643 S_sublex_start(pTHX)
1646 register const I32 op_type = pl_yylval.ival;
1648 if (op_type == OP_NULL) {
1649 pl_yylval.opval = PL_lex_op;
1653 if (op_type == OP_CONST || op_type == OP_READLINE) {
1654 SV *sv = tokeq(PL_lex_stuff);
1656 if (SvTYPE(sv) == SVt_PVIV) {
1657 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1659 const char * const p = SvPV_const(sv, len);
1660 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
1664 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1665 PL_lex_stuff = NULL;
1666 /* Allow <FH> // "foo" */
1667 if (op_type == OP_READLINE)
1668 PL_expect = XTERMORDORDOR;
1671 else if (op_type == OP_BACKTICK && PL_lex_op) {
1672 /* readpipe() vas overriden */
1673 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1674 pl_yylval.opval = PL_lex_op;
1676 PL_lex_stuff = NULL;
1680 PL_sublex_info.super_state = PL_lex_state;
1681 PL_sublex_info.sub_inwhat = (U16)op_type;
1682 PL_sublex_info.sub_op = PL_lex_op;
1683 PL_lex_state = LEX_INTERPPUSH;
1687 pl_yylval.opval = PL_lex_op;
1697 * Create a new scope to save the lexing state. The scope will be
1698 * ended in S_sublex_done. Returns a '(', starting the function arguments
1699 * to the uc, lc, etc. found before.
1700 * Sets PL_lex_state to LEX_INTERPCONCAT.
1709 PL_lex_state = PL_sublex_info.super_state;
1710 SAVEBOOL(PL_lex_dojoin);
1711 SAVEI32(PL_lex_brackets);
1712 SAVEI32(PL_lex_casemods);
1713 SAVEI32(PL_lex_starts);
1714 SAVEI8(PL_lex_state);
1715 SAVEVPTR(PL_lex_inpat);
1716 SAVEI16(PL_lex_inwhat);
1717 SAVECOPLINE(PL_curcop);
1718 SAVEPPTR(PL_bufptr);
1719 SAVEPPTR(PL_bufend);
1720 SAVEPPTR(PL_oldbufptr);
1721 SAVEPPTR(PL_oldoldbufptr);
1722 SAVEPPTR(PL_last_lop);
1723 SAVEPPTR(PL_last_uni);
1724 SAVEPPTR(PL_linestart);
1725 SAVESPTR(PL_linestr);
1726 SAVEGENERICPV(PL_lex_brackstack);
1727 SAVEGENERICPV(PL_lex_casestack);
1729 PL_linestr = PL_lex_stuff;
1730 PL_lex_stuff = NULL;
1732 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1733 = SvPVX(PL_linestr);
1734 PL_bufend += SvCUR(PL_linestr);
1735 PL_last_lop = PL_last_uni = NULL;
1736 SAVEFREESV(PL_linestr);
1738 PL_lex_dojoin = FALSE;
1739 PL_lex_brackets = 0;
1740 Newx(PL_lex_brackstack, 120, char);
1741 Newx(PL_lex_casestack, 12, char);
1742 PL_lex_casemods = 0;
1743 *PL_lex_casestack = '\0';
1745 PL_lex_state = LEX_INTERPCONCAT;
1746 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1748 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1749 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1750 PL_lex_inpat = PL_sublex_info.sub_op;
1752 PL_lex_inpat = NULL;
1759 * Restores lexer state after a S_sublex_push.
1766 if (!PL_lex_starts++) {
1767 SV * const sv = newSVpvs("");
1768 if (SvUTF8(PL_linestr))
1770 PL_expect = XOPERATOR;
1771 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1775 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1776 PL_lex_state = LEX_INTERPCASEMOD;
1780 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1781 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1782 PL_linestr = PL_lex_repl;
1784 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1785 PL_bufend += SvCUR(PL_linestr);
1786 PL_last_lop = PL_last_uni = NULL;
1787 SAVEFREESV(PL_linestr);
1788 PL_lex_dojoin = FALSE;
1789 PL_lex_brackets = 0;
1790 PL_lex_casemods = 0;
1791 *PL_lex_casestack = '\0';
1793 if (SvEVALED(PL_lex_repl)) {
1794 PL_lex_state = LEX_INTERPNORMAL;
1796 /* we don't clear PL_lex_repl here, so that we can check later
1797 whether this is an evalled subst; that means we rely on the
1798 logic to ensure sublex_done() is called again only via the
1799 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1802 PL_lex_state = LEX_INTERPCONCAT;
1812 PL_endwhite = newSVpvs("");
1813 sv_catsv(PL_endwhite, PL_thiswhite);
1817 sv_setpvs(PL_thistoken,"");
1819 PL_realtokenstart = -1;
1823 PL_bufend = SvPVX(PL_linestr);
1824 PL_bufend += SvCUR(PL_linestr);
1825 PL_expect = XOPERATOR;
1826 PL_sublex_info.sub_inwhat = 0;
1834 Extracts a pattern, double-quoted string, or transliteration. This
1837 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1838 processing a pattern (PL_lex_inpat is true), a transliteration
1839 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1841 Returns a pointer to the character scanned up to. If this is
1842 advanced from the start pointer supplied (i.e. if anything was
1843 successfully parsed), will leave an OP for the substring scanned
1844 in pl_yylval. Caller must intuit reason for not parsing further
1845 by looking at the next characters herself.
1849 double-quoted style: \r and \n
1850 regexp special ones: \D \s
1853 case and quoting: \U \Q \E
1854 stops on @ and $, but not for $ as tail anchor
1856 In transliterations:
1857 characters are VERY literal, except for - not at the start or end
1858 of the string, which indicates a range. If the range is in bytes,
1859 scan_const expands the range to the full set of intermediate
1860 characters. If the range is in utf8, the hyphen is replaced with
1861 a certain range mark which will be handled by pmtrans() in op.c.
1863 In double-quoted strings:
1865 double-quoted style: \r and \n
1867 deprecated backrefs: \1 (in substitution replacements)
1868 case and quoting: \U \Q \E
1871 scan_const does *not* construct ops to handle interpolated strings.
1872 It stops processing as soon as it finds an embedded $ or @ variable
1873 and leaves it to the caller to work out what's going on.
1875 embedded arrays (whether in pattern or not) could be:
1876 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1878 $ in double-quoted strings must be the symbol of an embedded scalar.
1880 $ in pattern could be $foo or could be tail anchor. Assumption:
1881 it's a tail anchor if $ is the last thing in the string, or if it's
1882 followed by one of "()| \r\n\t"
1884 \1 (backreferences) are turned into $1
1886 The structure of the code is
1887 while (there's a character to process) {
1888 handle transliteration ranges
1889 skip regexp comments /(?#comment)/ and codes /(?{code})/
1890 skip #-initiated comments in //x patterns
1891 check for embedded arrays
1892 check for embedded scalars
1894 leave intact backslashes from leaveit (below)
1895 deprecate \1 in substitution replacements
1896 handle string-changing backslashes \l \U \Q \E, etc.
1897 switch (what was escaped) {
1898 handle \- in a transliteration (becomes a literal -)
1899 handle \132 (octal characters)
1900 handle \x15 and \x{1234} (hex characters)
1901 handle \N{name} (named characters)
1902 handle \cV (control characters)
1903 handle printf-style backslashes (\f, \r, \n, etc)
1906 } (end if backslash)
1907 handle regular character
1908 } (end while character to read)
1913 S_scan_const(pTHX_ char *start)
1916 register char *send = PL_bufend; /* end of the constant */
1917 SV *sv = newSV(send - start); /* sv for the constant. See
1918 note below on sizing. */
1919 register char *s = start; /* start of the constant */
1920 register char *d = SvPVX(sv); /* destination for copies */
1921 bool dorange = FALSE; /* are we in a translit range? */
1922 bool didrange = FALSE; /* did we just finish a range? */
1923 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1924 I32 this_utf8 = UTF; /* Is the source string assumed
1925 to be UTF8? But, this can
1926 show as true when the source
1927 isn't utf8, as for example
1928 when it is entirely composed
1931 /* Note on sizing: The scanned constant is placed into sv, which is
1932 * initialized by newSV() assuming one byte of output for every byte of
1933 * input. This routine expects newSV() to allocate an extra byte for a
1934 * trailing NUL, which this routine will append if it gets to the end of
1935 * the input. There may be more bytes of input than output (eg., \N{LATIN
1936 * CAPITAL LETTER A}), or more output than input if the constant ends up
1937 * recoded to utf8, but each time a construct is found that might increase
1938 * the needed size, SvGROW() is called. Its size parameter each time is
1939 * based on the best guess estimate at the time, namely the length used so
1940 * far, plus the length the current construct will occupy, plus room for
1941 * the trailing NUL, plus one byte for every input byte still unscanned */
1945 UV literal_endpoint = 0;
1946 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1949 PERL_ARGS_ASSERT_SCAN_CONST;
1951 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1952 /* If we are doing a trans and we know we want UTF8 set expectation */
1953 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1954 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1958 while (s < send || dorange) {
1959 /* get transliterations out of the way (they're most literal) */
1960 if (PL_lex_inwhat == OP_TRANS) {
1961 /* expand a range A-Z to the full set of characters. AIE! */
1963 I32 i; /* current expanded character */
1964 I32 min; /* first character in range */
1965 I32 max; /* last character in range */
1976 char * const c = (char*)utf8_hop((U8*)d, -1);
1980 *c = (char)UTF_TO_NATIVE(0xff);
1981 /* mark the range as done, and continue */
1987 i = d - SvPVX_const(sv); /* remember current offset */
1990 SvLEN(sv) + (has_utf8 ?
1991 (512 - UTF_CONTINUATION_MARK +
1994 /* How many two-byte within 0..255: 128 in UTF-8,
1995 * 96 in UTF-8-mod. */
1997 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1999 d = SvPVX(sv) + i; /* refresh d after realloc */
2003 for (j = 0; j <= 1; j++) {
2004 char * const c = (char*)utf8_hop((U8*)d, -1);
2005 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2011 max = (U8)0xff; /* only to \xff */
2012 uvmax = uv; /* \x{100} to uvmax */
2014 d = c; /* eat endpoint chars */
2019 d -= 2; /* eat the first char and the - */
2020 min = (U8)*d; /* first char in range */
2021 max = (U8)d[1]; /* last char in range */
2028 "Invalid range \"%c-%c\" in transliteration operator",
2029 (char)min, (char)max);
2033 if (literal_endpoint == 2 &&
2034 ((isLOWER(min) && isLOWER(max)) ||
2035 (isUPPER(min) && isUPPER(max)))) {
2037 for (i = min; i <= max; i++)
2039 *d++ = NATIVE_TO_NEED(has_utf8,i);
2041 for (i = min; i <= max; i++)
2043 *d++ = NATIVE_TO_NEED(has_utf8,i);
2048 for (i = min; i <= max; i++)
2051 const U8 ch = (U8)NATIVE_TO_UTF(i);
2052 if (UNI_IS_INVARIANT(ch))
2055 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2056 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2065 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2067 *d++ = (char)UTF_TO_NATIVE(0xff);
2069 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2073 /* mark the range as done, and continue */
2077 literal_endpoint = 0;
2082 /* range begins (ignore - as first or last char) */
2083 else if (*s == '-' && s+1 < send && s != start) {
2085 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2092 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2102 literal_endpoint = 0;
2103 native_range = TRUE;
2108 /* if we get here, we're not doing a transliteration */
2110 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2111 except for the last char, which will be done separately. */
2112 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2114 while (s+1 < send && *s != ')')
2115 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2117 else if (s[2] == '{' /* This should match regcomp.c */
2118 || (s[2] == '?' && s[3] == '{'))
2121 char *regparse = s + (s[2] == '{' ? 3 : 4);
2124 while (count && (c = *regparse)) {
2125 if (c == '\\' && regparse[1])
2133 if (*regparse != ')')
2134 regparse--; /* Leave one char for continuation. */
2135 while (s < regparse)
2136 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2140 /* likewise skip #-initiated comments in //x patterns */
2141 else if (*s == '#' && PL_lex_inpat &&
2142 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2143 while (s+1 < send && *s != '\n')
2144 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2147 /* check for embedded arrays
2148 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2150 else if (*s == '@' && s[1]) {
2151 if (isALNUM_lazy_if(s+1,UTF))
2153 if (strchr(":'{$", s[1]))
2155 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2156 break; /* in regexp, neither @+ nor @- are interpolated */
2159 /* check for embedded scalars. only stop if we're sure it's a
2162 else if (*s == '$') {
2163 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2165 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2167 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2168 "Possible unintended interpolation of $\\ in regex");
2170 break; /* in regexp, $ might be tail anchor */
2174 /* End of else if chain - OP_TRANS rejoin rest */
2177 if (*s == '\\' && s+1 < send) {
2180 /* deprecate \1 in strings and substitution replacements */
2181 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2182 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2184 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2189 /* string-change backslash escapes */
2190 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2194 /* skip any other backslash escapes in a pattern */
2195 else if (PL_lex_inpat) {
2196 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2197 goto default_action;
2200 /* if we get here, it's either a quoted -, or a digit */
2203 /* quoted - in transliterations */
2205 if (PL_lex_inwhat == OP_TRANS) {
2212 if ((isALPHA(*s) || isDIGIT(*s)))
2213 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2214 "Unrecognized escape \\%c passed through",
2216 /* default action is to copy the quoted character */
2217 goto default_action;
2220 /* eg. \132 indicates the octal constant 0x132 */
2221 case '0': case '1': case '2': case '3':
2222 case '4': case '5': case '6': case '7':
2226 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2229 goto NUM_ESCAPE_INSERT;
2231 /* eg. \x24 indicates the hex constant 0x24 */
2235 char* const e = strchr(s, '}');
2236 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2237 PERL_SCAN_DISALLOW_PREFIX;
2242 yyerror("Missing right brace on \\x{}");
2246 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2252 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2253 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2259 /* Insert oct, hex, or \N{U+...} escaped character. There will
2260 * always be enough room in sv since such escapes will be
2261 * longer than any UTF-8 sequence they can end up as, except if
2262 * they force us to recode the rest of the string into utf8 */
2264 /* Here uv is the ordinal of the next character being added in
2265 * unicode (converted from native). (It has to be done before
2266 * here because \N is interpreted as unicode, and oct and hex
2268 if (!UNI_IS_INVARIANT(uv)) {
2269 if (!has_utf8 && uv > 255) {
2270 /* Might need to recode whatever we have accumulated so
2271 * far if it contains any chars variant in utf8 or
2274 SvCUR_set(sv, d - SvPVX_const(sv));
2277 /* See Note on sizing above. */
2278 sv_utf8_upgrade_flags_grow(sv,
2279 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2280 UNISKIP(uv) + (STRLEN)(send - s) + 1);
2281 d = SvPVX(sv) + SvCUR(sv);
2286 d = (char*)uvuni_to_utf8((U8*)d, uv);
2287 if (PL_lex_inwhat == OP_TRANS &&
2288 PL_sublex_info.sub_op) {
2289 PL_sublex_info.sub_op->op_private |=
2290 (PL_lex_repl ? OPpTRANS_FROM_UTF
2294 if (uv > 255 && !dorange)
2295 native_range = FALSE;
2307 /* \N{LATIN SMALL LETTER A} is a named character, and so is
2312 char* e = strchr(s, '}');
2318 yyerror("Missing right brace on \\N{}");
2322 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2323 /* \N{U+...} The ... is a unicode value even on EBCDIC
2325 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2326 PERL_SCAN_DISALLOW_PREFIX;
2329 uv = grok_hex(s, &len, &flags, NULL);
2330 if ( e > s && len != (STRLEN)(e - s) ) {
2334 goto NUM_ESCAPE_INSERT;
2336 res = newSVpvn(s + 1, e - s - 1);
2337 res = new_constant( NULL, 0, "charnames",
2338 res, NULL, s - 2, e - s + 3 );
2340 sv_utf8_upgrade(res);
2341 str = SvPV_const(res,len);
2342 #ifdef EBCDIC_NEVER_MIND
2343 /* charnames uses pack U and that has been
2344 * recently changed to do the below uni->native
2345 * mapping, so this would be redundant (and wrong,
2346 * the code point would be doubly converted).
2347 * But leave this in just in case the pack U change
2348 * gets revoked, but the semantics is still
2349 * desireable for charnames. --jhi */
2351 UV uv = utf8_to_uvchr((const U8*)str, 0);
2354 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2356 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2357 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2358 str = SvPV_const(res, len);
2362 /* If destination is not in utf8 but this new character is,
2363 * recode the dest to utf8 */
2364 if (!has_utf8 && SvUTF8(res)) {
2365 SvCUR_set(sv, d - SvPVX_const(sv));
2368 /* See Note on sizing above. */
2369 sv_utf8_upgrade_flags_grow(sv,
2370 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2371 len + (STRLEN)(send - s) + 1);
2372 d = SvPVX(sv) + SvCUR(sv);
2374 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2376 /* See Note on sizing above. (NOTE: SvCUR() is not set
2377 * correctly here). */
2378 const STRLEN off = d - SvPVX_const(sv);
2379 d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
2383 native_range = FALSE; /* \N{} is guessed to be Unicode */
2385 Copy(str, d, len, char);
2392 yyerror("Missing braces on \\N{}");
2395 /* \c is a control character */
2404 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2407 yyerror("Missing control char name in \\c");
2411 /* printf-style backslashes, formfeeds, newlines, etc */
2413 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2416 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2419 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2422 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2425 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2428 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2431 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2437 } /* end if (backslash) */
2444 /* If we started with encoded form, or already know we want it,
2445 then encode the next character */
2446 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2450 /* One might think that it is wasted effort in the case of the
2451 * source being utf8 (this_utf8 == TRUE) to take the next character
2452 * in the source, convert it to an unsigned value, and then convert
2453 * it back again. But the source has not been validated here. The
2454 * routine that does the conversion checks for errors like
2457 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2458 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2460 SvCUR_set(sv, d - SvPVX_const(sv));
2463 /* See Note on sizing above. */
2464 sv_utf8_upgrade_flags_grow(sv,
2465 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2466 need + (STRLEN)(send - s) + 1);
2467 d = SvPVX(sv) + SvCUR(sv);
2469 } else if (need > len) {
2470 /* encoded value larger than old, may need extra space (NOTE:
2471 * SvCUR() is not set correctly here). See Note on sizing
2473 const STRLEN off = d - SvPVX_const(sv);
2474 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2478 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2480 if (uv > 255 && !dorange)
2481 native_range = FALSE;
2485 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2487 } /* while loop to process each character */
2489 /* terminate the string and set up the sv */
2491 SvCUR_set(sv, d - SvPVX_const(sv));
2492 if (SvCUR(sv) >= SvLEN(sv))
2493 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2496 if (PL_encoding && !has_utf8) {
2497 sv_recode_to_utf8(sv, PL_encoding);
2503 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2504 PL_sublex_info.sub_op->op_private |=
2505 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2509 /* shrink the sv if we allocated more than we used */
2510 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2511 SvPV_shrink_to_cur(sv);
2514 /* return the substring (via pl_yylval) only if we parsed anything */
2515 if (s > PL_bufptr) {
2516 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2517 const char *const key = PL_lex_inpat ? "qr" : "q";
2518 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2522 if (PL_lex_inwhat == OP_TRANS) {
2525 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2533 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2536 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2543 * Returns TRUE if there's more to the expression (e.g., a subscript),
2546 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2548 * ->[ and ->{ return TRUE
2549 * { and [ outside a pattern are always subscripts, so return TRUE
2550 * if we're outside a pattern and it's not { or [, then return FALSE
2551 * if we're in a pattern and the first char is a {
2552 * {4,5} (any digits around the comma) returns FALSE
2553 * if we're in a pattern and the first char is a [
2555 * [SOMETHING] has a funky algorithm to decide whether it's a
2556 * character class or not. It has to deal with things like
2557 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2558 * anything else returns TRUE
2561 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2564 S_intuit_more(pTHX_ register char *s)
2568 PERL_ARGS_ASSERT_INTUIT_MORE;
2570 if (PL_lex_brackets)
2572 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2574 if (*s != '{' && *s != '[')
2579 /* In a pattern, so maybe we have {n,m}. */
2596 /* On the other hand, maybe we have a character class */
2599 if (*s == ']' || *s == '^')
2602 /* this is terrifying, and it works */
2603 int weight = 2; /* let's weigh the evidence */
2605 unsigned char un_char = 255, last_un_char;
2606 const char * const send = strchr(s,']');
2607 char tmpbuf[sizeof PL_tokenbuf * 4];
2609 if (!send) /* has to be an expression */
2612 Zero(seen,256,char);
2615 else if (isDIGIT(*s)) {
2617 if (isDIGIT(s[1]) && s[2] == ']')
2623 for (; s < send; s++) {
2624 last_un_char = un_char;
2625 un_char = (unsigned char)*s;
2630 weight -= seen[un_char] * 10;
2631 if (isALNUM_lazy_if(s+1,UTF)) {
2633 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2634 len = (int)strlen(tmpbuf);
2635 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2640 else if (*s == '$' && s[1] &&
2641 strchr("[#!%*<>()-=",s[1])) {
2642 if (/*{*/ strchr("])} =",s[2]))
2651 if (strchr("wds]",s[1]))
2653 else if (seen[(U8)'\''] || seen[(U8)'"'])
2655 else if (strchr("rnftbxcav",s[1]))
2657 else if (isDIGIT(s[1])) {
2659 while (s[1] && isDIGIT(s[1]))
2669 if (strchr("aA01! ",last_un_char))
2671 if (strchr("zZ79~",s[1]))
2673 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2674 weight -= 5; /* cope with negative subscript */
2677 if (!isALNUM(last_un_char)
2678 && !(last_un_char == '$' || last_un_char == '@'
2679 || last_un_char == '&')
2680 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2685 if (keyword(tmpbuf, d - tmpbuf, 0))
2688 if (un_char == last_un_char + 1)
2690 weight -= seen[un_char];
2695 if (weight >= 0) /* probably a character class */
2705 * Does all the checking to disambiguate
2707 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2708 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2710 * First argument is the stuff after the first token, e.g. "bar".
2712 * Not a method if bar is a filehandle.
2713 * Not a method if foo is a subroutine prototyped to take a filehandle.
2714 * Not a method if it's really "Foo $bar"
2715 * Method if it's "foo $bar"
2716 * Not a method if it's really "print foo $bar"
2717 * Method if it's really "foo package::" (interpreted as package->foo)
2718 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2719 * Not a method if bar is a filehandle or package, but is quoted with
2724 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2727 char *s = start + (*start == '$');
2728 char tmpbuf[sizeof PL_tokenbuf];
2735 PERL_ARGS_ASSERT_INTUIT_METHOD;
2738 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2742 const char *proto = SvPVX_const(cv);
2753 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2754 /* start is the beginning of the possible filehandle/object,
2755 * and s is the end of it
2756 * tmpbuf is a copy of it
2759 if (*start == '$') {
2760 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2761 isUPPER(*PL_tokenbuf))
2764 len = start - SvPVX(PL_linestr);
2768 start = SvPVX(PL_linestr) + len;
2772 return *s == '(' ? FUNCMETH : METHOD;
2774 if (!keyword(tmpbuf, len, 0)) {
2775 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2779 soff = s - SvPVX(PL_linestr);
2783 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2784 if (indirgv && GvCVu(indirgv))
2786 /* filehandle or package name makes it a method */
2787 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2789 soff = s - SvPVX(PL_linestr);
2792 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2793 return 0; /* no assumptions -- "=>" quotes bearword */
2795 start_force(PL_curforce);
2796 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2797 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
2798 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2800 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2805 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2807 return *s == '(' ? FUNCMETH : METHOD;
2813 /* Encoded script support. filter_add() effectively inserts a
2814 * 'pre-processing' function into the current source input stream.
2815 * Note that the filter function only applies to the current source file
2816 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2818 * The datasv parameter (which may be NULL) can be used to pass
2819 * private data to this instance of the filter. The filter function
2820 * can recover the SV using the FILTER_DATA macro and use it to
2821 * store private buffers and state information.
2823 * The supplied datasv parameter is upgraded to a PVIO type
2824 * and the IoDIRP/IoANY field is used to store the function pointer,
2825 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2826 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2827 * private use must be set using malloc'd pointers.
2831 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2840 if (!PL_rsfp_filters)
2841 PL_rsfp_filters = newAV();
2844 SvUPGRADE(datasv, SVt_PVIO);
2845 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2846 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2847 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2848 FPTR2DPTR(void *, IoANY(datasv)),
2849 SvPV_nolen(datasv)));
2850 av_unshift(PL_rsfp_filters, 1);
2851 av_store(PL_rsfp_filters, 0, datasv) ;
2856 /* Delete most recently added instance of this filter function. */
2858 Perl_filter_del(pTHX_ filter_t funcp)
2863 PERL_ARGS_ASSERT_FILTER_DEL;
2866 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2867 FPTR2DPTR(void*, funcp)));
2869 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2871 /* if filter is on top of stack (usual case) just pop it off */
2872 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2873 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2874 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2875 IoANY(datasv) = (void *)NULL;
2876 sv_free(av_pop(PL_rsfp_filters));
2880 /* we need to search for the correct entry and clear it */
2881 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2885 /* Invoke the idxth filter function for the current rsfp. */
2886 /* maxlen 0 = read one text line */
2888 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2893 /* This API is bad. It should have been using unsigned int for maxlen.
2894 Not sure if we want to change the API, but if not we should sanity
2895 check the value here. */
2896 const unsigned int correct_length
2905 PERL_ARGS_ASSERT_FILTER_READ;
2907 if (!PL_parser || !PL_rsfp_filters)
2909 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2910 /* Provide a default input filter to make life easy. */
2911 /* Note that we append to the line. This is handy. */
2912 DEBUG_P(PerlIO_printf(Perl_debug_log,
2913 "filter_read %d: from rsfp\n", idx));
2914 if (correct_length) {
2917 const int old_len = SvCUR(buf_sv);
2919 /* ensure buf_sv is large enough */
2920 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
2921 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2922 correct_length)) <= 0) {
2923 if (PerlIO_error(PL_rsfp))
2924 return -1; /* error */
2926 return 0 ; /* end of file */
2928 SvCUR_set(buf_sv, old_len + len) ;
2929 SvPVX(buf_sv)[old_len + len] = '\0';
2932 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2933 if (PerlIO_error(PL_rsfp))
2934 return -1; /* error */
2936 return 0 ; /* end of file */
2939 return SvCUR(buf_sv);
2941 /* Skip this filter slot if filter has been deleted */
2942 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2943 DEBUG_P(PerlIO_printf(Perl_debug_log,
2944 "filter_read %d: skipped (filter deleted)\n",
2946 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2948 /* Get function pointer hidden within datasv */
2949 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2950 DEBUG_P(PerlIO_printf(Perl_debug_log,
2951 "filter_read %d: via function %p (%s)\n",
2952 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2953 /* Call function. The function is expected to */
2954 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2955 /* Return: <0:error, =0:eof, >0:not eof */
2956 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2960 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
2964 PERL_ARGS_ASSERT_FILTER_GETS;
2966 #ifdef PERL_CR_FILTER
2967 if (!PL_rsfp_filters) {
2968 filter_add(S_cr_textfilter,NULL);
2971 if (PL_rsfp_filters) {
2973 SvCUR_set(sv, 0); /* start with empty line */
2974 if (FILTER_READ(0, sv, 0) > 0)
2975 return ( SvPVX(sv) ) ;
2980 return (sv_gets(sv, PL_rsfp, append));
2984 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
2989 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
2991 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2995 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2996 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2998 return GvHV(gv); /* Foo:: */
3001 /* use constant CLASS => 'MyClass' */
3002 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3003 if (gv && GvCV(gv)) {
3004 SV * const sv = cv_const_sv(GvCV(gv));
3006 pkgname = SvPV_const(sv, len);
3009 return gv_stashpvn(pkgname, len, 0);
3013 * S_readpipe_override
3014 * Check whether readpipe() is overriden, and generates the appropriate
3015 * optree, provided sublex_start() is called afterwards.
3018 S_readpipe_override(pTHX)
3021 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3022 pl_yylval.ival = OP_BACKTICK;
3024 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3026 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3027 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3028 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3030 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3031 append_elem(OP_LIST,
3032 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3033 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3040 * The intent of this yylex wrapper is to minimize the changes to the
3041 * tokener when we aren't interested in collecting madprops. It remains
3042 * to be seen how successful this strategy will be...
3049 char *s = PL_bufptr;
3051 /* make sure PL_thiswhite is initialized */
3055 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3056 if (PL_pending_ident)
3057 return S_pending_ident(aTHX);
3059 /* previous token ate up our whitespace? */
3060 if (!PL_lasttoke && PL_nextwhite) {
3061 PL_thiswhite = PL_nextwhite;
3065 /* isolate the token, and figure out where it is without whitespace */
3066 PL_realtokenstart = -1;
3070 assert(PL_curforce < 0);
3072 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3073 if (!PL_thistoken) {
3074 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3075 PL_thistoken = newSVpvs("");
3077 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3078 PL_thistoken = newSVpvn(tstart, s - tstart);
3081 if (PL_thismad) /* install head */
3082 CURMAD('X', PL_thistoken);
3085 /* last whitespace of a sublex? */
3086 if (optype == ')' && PL_endwhite) {
3087 CURMAD('X', PL_endwhite);
3092 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3093 if (!PL_thiswhite && !PL_endwhite && !optype) {
3094 sv_free(PL_thistoken);
3099 /* put off final whitespace till peg */
3100 if (optype == ';' && !PL_rsfp) {
3101 PL_nextwhite = PL_thiswhite;
3104 else if (PL_thisopen) {
3105 CURMAD('q', PL_thisopen);
3107 sv_free(PL_thistoken);
3111 /* Store actual token text as madprop X */
3112 CURMAD('X', PL_thistoken);
3116 /* add preceding whitespace as madprop _ */
3117 CURMAD('_', PL_thiswhite);
3121 /* add quoted material as madprop = */
3122 CURMAD('=', PL_thisstuff);
3126 /* add terminating quote as madprop Q */
3127 CURMAD('Q', PL_thisclose);
3131 /* special processing based on optype */
3135 /* opval doesn't need a TOKEN since it can already store mp */
3145 if (pl_yylval.opval)
3146 append_madprops(PL_thismad, pl_yylval.opval, 0);
3154 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3163 /* remember any fake bracket that lexer is about to discard */
3164 if (PL_lex_brackets == 1 &&
3165 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3168 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3171 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3172 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3175 break; /* don't bother looking for trailing comment */
3184 /* attach a trailing comment to its statement instead of next token */
3188 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3190 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3192 if (*s == '\n' || *s == '#') {
3193 while (s < PL_bufend && *s != '\n')
3197 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3198 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3215 /* Create new token struct. Note: opvals return early above. */
3216 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3223 S_tokenize_use(pTHX_ int is_use, char *s) {
3226 PERL_ARGS_ASSERT_TOKENIZE_USE;
3228 if (PL_expect != XSTATE)
3229 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3230 is_use ? "use" : "no"));
3232 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3233 s = force_version(s, TRUE);
3234 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3235 start_force(PL_curforce);
3236 NEXTVAL_NEXTTOKE.opval = NULL;
3239 else if (*s == 'v') {
3240 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3241 s = force_version(s, FALSE);
3245 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3246 s = force_version(s, FALSE);
3248 pl_yylval.ival = is_use;
3252 static const char* const exp_name[] =
3253 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3254 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3261 Works out what to call the token just pulled out of the input
3262 stream. The yacc parser takes care of taking the ops we return and
3263 stitching them into a tree.
3269 if read an identifier
3270 if we're in a my declaration
3271 croak if they tried to say my($foo::bar)
3272 build the ops for a my() declaration
3273 if it's an access to a my() variable
3274 are we in a sort block?
3275 croak if my($a); $a <=> $b
3276 build ops for access to a my() variable
3277 if in a dq string, and they've said @foo and we can't find @foo
3279 build ops for a bareword
3280 if we already built the token before, use it.
3285 #pragma segment Perl_yylex
3291 register char *s = PL_bufptr;
3296 /* orig_keyword, gvp, and gv are initialized here because
3297 * jump to the label just_a_word_zero can bypass their
3298 * initialization later. */
3299 I32 orig_keyword = 0;
3304 SV* tmp = newSVpvs("");
3305 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3306 (IV)CopLINE(PL_curcop),
3307 lex_state_names[PL_lex_state],
3308 exp_name[PL_expect],
3309 pv_display(tmp, s, strlen(s), 0, 60));
3312 /* check if there's an identifier for us to look at */
3313 if (PL_pending_ident)
3314 return REPORT(S_pending_ident(aTHX));
3316 /* no identifier pending identification */
3318 switch (PL_lex_state) {
3320 case LEX_NORMAL: /* Some compilers will produce faster */
3321 case LEX_INTERPNORMAL: /* code if we comment these out. */
3325 /* when we've already built the next token, just pull it out of the queue */
3329 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3331 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3332 PL_nexttoke[PL_lasttoke].next_mad = 0;
3333 if (PL_thismad && PL_thismad->mad_key == '_') {
3334 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
3335 PL_thismad->mad_val = 0;
3336 mad_free(PL_thismad);
3341 PL_lex_state = PL_lex_defer;
3342 PL_expect = PL_lex_expect;
3343 PL_lex_defer = LEX_NORMAL;
3344 if (!PL_nexttoke[PL_lasttoke].next_type)
3349 pl_yylval = PL_nextval[PL_nexttoke];
3351 PL_lex_state = PL_lex_defer;
3352 PL_expect = PL_lex_expect;
3353 PL_lex_defer = LEX_NORMAL;
3357 /* FIXME - can these be merged? */
3358 return(PL_nexttoke[PL_lasttoke].next_type);
3360 return REPORT(PL_nexttype[PL_nexttoke]);
3363 /* interpolated case modifiers like \L \U, including \Q and \E.
3364 when we get here, PL_bufptr is at the \
3366 case LEX_INTERPCASEMOD:
3368 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3369 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3371 /* handle \E or end of string */
3372 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3374 if (PL_lex_casemods) {
3375 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3376 PL_lex_casestack[PL_lex_casemods] = '\0';
3378 if (PL_bufptr != PL_bufend
3379 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3381 PL_lex_state = LEX_INTERPCONCAT;
3384 PL_thistoken = newSVpvs("\\E");
3390 while (PL_bufptr != PL_bufend &&
3391 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3393 PL_thiswhite = newSVpvs("");
3394 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3398 if (PL_bufptr != PL_bufend)
3401 PL_lex_state = LEX_INTERPCONCAT;
3405 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3406 "### Saw case modifier\n"); });
3408 if (s[1] == '\\' && s[2] == 'E') {
3411 PL_thiswhite = newSVpvs("");
3412 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3415 PL_lex_state = LEX_INTERPCONCAT;
3420 if (!PL_madskills) /* when just compiling don't need correct */
3421 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3422 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3423 if ((*s == 'L' || *s == 'U') &&
3424 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3425 PL_lex_casestack[--PL_lex_casemods] = '\0';
3428 if (PL_lex_casemods > 10)
3429 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3430 PL_lex_casestack[PL_lex_casemods++] = *s;
3431 PL_lex_casestack[PL_lex_casemods] = '\0';
3432 PL_lex_state = LEX_INTERPCONCAT;
3433 start_force(PL_curforce);
3434 NEXTVAL_NEXTTOKE.ival = 0;
3436 start_force(PL_curforce);
3438 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3440 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3442 NEXTVAL_NEXTTOKE.ival = OP_LC;
3444 NEXTVAL_NEXTTOKE.ival = OP_UC;
3446 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3448 Perl_croak(aTHX_ "panic: yylex");
3450 SV* const tmpsv = newSVpvs("\\ ");
3451 /* replace the space with the character we want to escape
3453 SvPVX(tmpsv)[1] = *s;
3459 if (PL_lex_starts) {
3465 sv_free(PL_thistoken);
3466 PL_thistoken = newSVpvs("");
3469 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3470 if (PL_lex_casemods == 1 && PL_lex_inpat)
3479 case LEX_INTERPPUSH:
3480 return REPORT(sublex_push());
3482 case LEX_INTERPSTART:
3483 if (PL_bufptr == PL_bufend)
3484 return REPORT(sublex_done());
3485 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3486 "### Interpolated variable\n"); });
3488 PL_lex_dojoin = (*PL_bufptr == '@');
3489 PL_lex_state = LEX_INTERPNORMAL;
3490 if (PL_lex_dojoin) {
3491 start_force(PL_curforce);
3492 NEXTVAL_NEXTTOKE.ival = 0;
3494 start_force(PL_curforce);
3495 force_ident("\"", '$');
3496 start_force(PL_curforce);
3497 NEXTVAL_NEXTTOKE.ival = 0;
3499 start_force(PL_curforce);
3500 NEXTVAL_NEXTTOKE.ival = 0;
3502 start_force(PL_curforce);
3503 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3506 if (PL_lex_starts++) {
3511 sv_free(PL_thistoken);
3512 PL_thistoken = newSVpvs("");
3515 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3516 if (!PL_lex_casemods && PL_lex_inpat)
3523 case LEX_INTERPENDMAYBE:
3524 if (intuit_more(PL_bufptr)) {
3525 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3531 if (PL_lex_dojoin) {
3532 PL_lex_dojoin = FALSE;
3533 PL_lex_state = LEX_INTERPCONCAT;
3537 sv_free(PL_thistoken);
3538 PL_thistoken = newSVpvs("");
3543 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3544 && SvEVALED(PL_lex_repl))
3546 if (PL_bufptr != PL_bufend)
3547 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3551 case LEX_INTERPCONCAT:
3553 if (PL_lex_brackets)
3554 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3556 if (PL_bufptr == PL_bufend)
3557 return REPORT(sublex_done());
3559 if (SvIVX(PL_linestr) == '\'') {
3560 SV *sv = newSVsv(PL_linestr);
3563 else if ( PL_hints & HINT_NEW_RE )
3564 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
3565 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3569 s = scan_const(PL_bufptr);
3571 PL_lex_state = LEX_INTERPCASEMOD;
3573 PL_lex_state = LEX_INTERPSTART;
3576 if (s != PL_bufptr) {
3577 start_force(PL_curforce);
3579 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3581 NEXTVAL_NEXTTOKE = pl_yylval;
3584 if (PL_lex_starts++) {
3588 sv_free(PL_thistoken);
3589 PL_thistoken = newSVpvs("");
3592 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3593 if (!PL_lex_casemods && PL_lex_inpat)
3606 PL_lex_state = LEX_NORMAL;
3607 s = scan_formline(PL_bufptr);
3608 if (!PL_lex_formbrack)
3614 PL_oldoldbufptr = PL_oldbufptr;
3620 sv_free(PL_thistoken);
3623 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3627 if (isIDFIRST_lazy_if(s,UTF))
3630 unsigned char c = *s;
3631 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
3632 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
3633 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
3638 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
3642 goto fake_eof; /* emulate EOF on ^D or ^Z */
3651 if (PL_lex_brackets) {
3652 yyerror((const char *)
3654 ? "Format not terminated"
3655 : "Missing right curly or square bracket"));
3657 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3658 "### Tokener got EOF\n");
3662 if (s++ < PL_bufend)
3663 goto retry; /* ignore stray nulls */
3666 if (!PL_in_eval && !PL_preambled) {
3667 PL_preambled = TRUE;
3673 /* Generate a string of Perl code to load the debugger.
3674 * If PERL5DB is set, it will return the contents of that,
3675 * otherwise a compile-time require of perl5db.pl. */
3677 const char * const pdb = PerlEnv_getenv("PERL5DB");
3680 sv_setpv(PL_linestr, pdb);
3681 sv_catpvs(PL_linestr,";");
3683 SETERRNO(0,SS_NORMAL);
3684 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
3687 sv_setpvs(PL_linestr,"");
3688 if (PL_preambleav) {
3689 SV **svp = AvARRAY(PL_preambleav);
3690 SV **const end = svp + AvFILLp(PL_preambleav);
3692 sv_catsv(PL_linestr, *svp);
3694 sv_catpvs(PL_linestr, ";");
3696 sv_free(MUTABLE_SV(PL_preambleav));
3697 PL_preambleav = NULL;
3700 sv_catpvs(PL_linestr,
3701 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3702 if (PL_minus_n || PL_minus_p) {
3703 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3705 sv_catpvs(PL_linestr,"chomp;");
3708 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3709 || *PL_splitstr == '"')
3710 && strchr(PL_splitstr + 1, *PL_splitstr))
3711 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3713 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3714 bytes can be used as quoting characters. :-) */
3715 const char *splits = PL_splitstr;
3716 sv_catpvs(PL_linestr, "our @F=split(q\0");
3719 if (*splits == '\\')
3720 sv_catpvn(PL_linestr, splits, 1);
3721 sv_catpvn(PL_linestr, splits, 1);
3722 } while (*splits++);
3723 /* This loop will embed the trailing NUL of
3724 PL_linestr as the last thing it does before
3726 sv_catpvs(PL_linestr, ");");
3730 sv_catpvs(PL_linestr,"our @F=split(' ');");
3733 sv_catpvs(PL_linestr, "\n");
3734 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3735 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3736 PL_last_lop = PL_last_uni = NULL;
3737 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
3738 update_debugger_info(PL_linestr, NULL, 0);
3742 bof = PL_rsfp ? TRUE : FALSE;
3743 if ((s = filter_gets(PL_linestr, 0)) == NULL) {
3746 PL_realtokenstart = -1;
3749 if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3750 PerlIO_clearerr(PL_rsfp);
3752 (void)PerlIO_close(PL_rsfp);
3754 PL_doextract = FALSE;
3756 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3762 sv_setpvs(PL_linestr, ";}continue{print;}");
3764 sv_setpvs(PL_linestr, ";}");
3765 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3766 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3767 PL_last_lop = PL_last_uni = NULL;
3768 PL_minus_n = PL_minus_p = 0;
3771 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3772 PL_last_lop = PL_last_uni = NULL;
3773 sv_setpvs(PL_linestr,"");
3774 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3776 /* If it looks like the start of a BOM or raw UTF-16,
3777 * check if it in fact is. */
3783 #ifdef PERLIO_IS_STDIO
3784 # ifdef __GNU_LIBRARY__
3785 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3786 # define FTELL_FOR_PIPE_IS_BROKEN
3790 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3791 # define FTELL_FOR_PIPE_IS_BROKEN
3796 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3798 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3799 s = swallow_bom((U8*)s);
3803 /* Incest with pod. */
3806 sv_catsv(PL_thiswhite, PL_linestr);
3808 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3809 sv_setpvs(PL_linestr, "");
3810 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3811 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3812 PL_last_lop = PL_last_uni = NULL;
3813 PL_doextract = FALSE;
3817 } while (PL_doextract);
3818 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3819 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
3820 update_debugger_info(PL_linestr, NULL, 0);
3821 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3822 PL_last_lop = PL_last_uni = NULL;
3823 if (CopLINE(PL_curcop) == 1) {
3824 while (s < PL_bufend && isSPACE(*s))
3826 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3830 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3834 if (*s == '#' && *(s+1) == '!')
3836 #ifdef ALTERNATE_SHEBANG
3838 static char const as[] = ALTERNATE_SHEBANG;
3839 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3840 d = s + (sizeof(as) - 1);
3842 #endif /* ALTERNATE_SHEBANG */
3851 while (*d && !isSPACE(*d))
3855 #ifdef ARG_ZERO_IS_SCRIPT
3856 if (ipathend > ipath) {
3858 * HP-UX (at least) sets argv[0] to the script name,
3859 * which makes $^X incorrect. And Digital UNIX and Linux,
3860 * at least, set argv[0] to the basename of the Perl
3861 * interpreter. So, having found "#!", we'll set it right.
3863 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3865 assert(SvPOK(x) || SvGMAGICAL(x));
3866 if (sv_eq(x, CopFILESV(PL_curcop))) {
3867 sv_setpvn(x, ipath, ipathend - ipath);
3873 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3874 const char * const lstart = SvPV_const(x,llen);
3876 bstart += blen - llen;
3877 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3878 sv_setpvn(x, ipath, ipathend - ipath);
3883 TAINT_NOT; /* $^X is always tainted, but that's OK */
3885 #endif /* ARG_ZERO_IS_SCRIPT */
3890 d = instr(s,"perl -");
3892 d = instr(s,"perl");
3894 /* avoid getting into infinite loops when shebang
3895 * line contains "Perl" rather than "perl" */
3897 for (d = ipathend-4; d >= ipath; --d) {
3898 if ((*d == 'p' || *d == 'P')
3899 && !ibcmp(d, "perl", 4))
3909 #ifdef ALTERNATE_SHEBANG
3911 * If the ALTERNATE_SHEBANG on this system starts with a
3912 * character that can be part of a Perl expression, then if
3913 * we see it but not "perl", we're probably looking at the
3914 * start of Perl code, not a request to hand off to some
3915 * other interpreter. Similarly, if "perl" is there, but
3916 * not in the first 'word' of the line, we assume the line
3917 * contains the start of the Perl program.
3919 if (d && *s != '#') {
3920 const char *c = ipath;
3921 while (*c && !strchr("; \t\r\n\f\v#", *c))
3924 d = NULL; /* "perl" not in first word; ignore */
3926 *s = '#'; /* Don't try to parse shebang line */
3928 #endif /* ALTERNATE_SHEBANG */
3933 !instr(s,"indir") &&
3934 instr(PL_origargv[0],"perl"))
3941 while (s < PL_bufend && isSPACE(*s))
3943 if (s < PL_bufend) {
3944 Newx(newargv,PL_origargc+3,char*);
3946 while (s < PL_bufend && !isSPACE(*s))
3949 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3952 newargv = PL_origargv;
3955 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3957 Perl_croak(aTHX_ "Can't exec %s", ipath);
3960 while (*d && !isSPACE(*d))
3962 while (SPACE_OR_TAB(*d))
3966 const bool switches_done = PL_doswitches;
3967 const U32 oldpdb = PL_perldb;
3968 const bool oldn = PL_minus_n;
3969 const bool oldp = PL_minus_p;
3973 bool baduni = FALSE;
3975 const char *d2 = d1 + 1;
3976 if (parse_unicode_opts((const char **)&d2)
3980 if (baduni || *d1 == 'M' || *d1 == 'm') {
3981 const char * const m = d1;
3982 while (*d1 && !isSPACE(*d1))
3984 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3987 d1 = moreswitches(d1);
3989 if (PL_doswitches && !switches_done) {
3990 int argc = PL_origargc;
3991 char **argv = PL_origargv;
3994 } while (argc && argv[0][0] == '-' && argv[0][1]);
3995 init_argv_symbols(argc,argv);
3997 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
3998 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3999 /* if we have already added "LINE: while (<>) {",
4000 we must not do it again */
4002 sv_setpvs(PL_linestr, "");
4003 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4004 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4005 PL_last_lop = PL_last_uni = NULL;
4006 PL_preambled = FALSE;
4007 if (PERLDB_LINE || PERLDB_SAVESRC)
4008 (void)gv_fetchfile(PL_origfilename);
4015 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4017 PL_lex_state = LEX_FORMLINE;
4022 #ifdef PERL_STRICT_CR
4023 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4025 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4027 case ' ': case '\t': case '\f': case 013:
4029 PL_realtokenstart = -1;
4031 PL_thiswhite = newSVpvs("");
4032 sv_catpvn(PL_thiswhite, s, 1);
4039 PL_realtokenstart = -1;
4043 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4044 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4045 /* handle eval qq[#line 1 "foo"\n ...] */
4046 CopLINE_dec(PL_curcop);
4049 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4051 if (!PL_in_eval || PL_rsfp)
4056 while (d < PL_bufend && *d != '\n')
4060 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4061 Perl_croak(aTHX_ "panic: input overflow");
4064 PL_thiswhite = newSVpvn(s, d - s);
4069 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4071 PL_lex_state = LEX_FORMLINE;
4077 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4078 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4081 TOKEN(PEG); /* make sure any #! line is accessible */
4086 /* if (PL_madskills && PL_lex_formbrack) { */
4088 while (d < PL_bufend && *d != '\n')
4092 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4093 Perl_croak(aTHX_ "panic: input overflow");
4094 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4096 PL_thiswhite = newSVpvs("");
4097 if (CopLINE(PL_curcop) == 1) {
4098 sv_setpvs(PL_thiswhite, "");
4101 sv_catpvn(PL_thiswhite, s, d - s);
4115 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4123 while (s < PL_bufend && SPACE_OR_TAB(*s))
4126 if (strnEQ(s,"=>",2)) {
4127 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4128 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4129 OPERATOR('-'); /* unary minus */
4131 PL_last_uni = PL_oldbufptr;
4133 case 'r': ftst = OP_FTEREAD; break;
4134 case 'w': ftst = OP_FTEWRITE; break;
4135 case 'x': ftst = OP_FTEEXEC; break;
4136 case 'o': ftst = OP_FTEOWNED; break;
4137 case 'R': ftst = OP_FTRREAD; break;
4138 case 'W': ftst = OP_FTRWRITE; break;
4139 case 'X': ftst = OP_FTREXEC; break;
4140 case 'O': ftst = OP_FTROWNED; break;
4141 case 'e': ftst = OP_FTIS; break;
4142 case 'z': ftst = OP_FTZERO; break;
4143 case 's': ftst = OP_FTSIZE; break;
4144 case 'f': ftst = OP_FTFILE; break;
4145 case 'd': ftst = OP_FTDIR; break;
4146 case 'l': ftst = OP_FTLINK; break;
4147 case 'p': ftst = OP_FTPIPE; break;
4148 case 'S': ftst = OP_FTSOCK; break;
4149 case 'u': ftst = OP_FTSUID; break;
4150 case 'g': ftst = OP_FTSGID; break;
4151 case 'k': ftst = OP_FTSVTX; break;
4152 case 'b': ftst = OP_FTBLK; break;
4153 case 'c': ftst = OP_FTCHR; break;
4154 case 't': ftst = OP_FTTTY; break;
4155 case 'T': ftst = OP_FTTEXT; break;
4156 case 'B': ftst = OP_FTBINARY; break;
4157 case 'M': case 'A': case 'C':
4158 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4160 case 'M': ftst = OP_FTMTIME; break;
4161 case 'A': ftst = OP_FTATIME; break;
4162 case 'C': ftst = OP_FTCTIME; break;
4170 PL_last_lop_op = (OPCODE)ftst;
4171 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4172 "### Saw file test %c\n", (int)tmp);
4177 /* Assume it was a minus followed by a one-letter named
4178 * subroutine call (or a -bareword), then. */
4179 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4180 "### '-%c' looked like a file test but was not\n",
4187 const char tmp = *s++;
4190 if (PL_expect == XOPERATOR)
4195 else if (*s == '>') {
4198 if (isIDFIRST_lazy_if(s,UTF)) {
4199 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4207 if (PL_expect == XOPERATOR)
4210 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4212 OPERATOR('-'); /* unary minus */
4218 const char tmp = *s++;
4221 if (PL_expect == XOPERATOR)
4226 if (PL_expect == XOPERATOR)
4229 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4236 if (PL_expect != XOPERATOR) {
4237 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4238 PL_expect = XOPERATOR;
4239 force_ident(PL_tokenbuf, '*');
4252 if (PL_expect == XOPERATOR) {
4256 PL_tokenbuf[0] = '%';
4257 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4258 sizeof PL_tokenbuf - 1, FALSE);
4259 if (!PL_tokenbuf[1]) {
4262 PL_pending_ident = '%';
4271 const char tmp = *s++;
4276 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4283 const char tmp = *s++;
4289 goto just_a_word_zero_gv;
4292 switch (PL_expect) {
4298 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4300 PL_bufptr = s; /* update in case we back off */
4302 deprecate(":= for an empty attribute list");
4309 PL_expect = XTERMBLOCK;
4312 stuffstart = s - SvPVX(PL_linestr) - 1;
4316 while (isIDFIRST_lazy_if(s,UTF)) {
4319 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4320 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4321 if (tmp < 0) tmp = -tmp;
4336 sv = newSVpvn(s, len);
4338 d = scan_str(d,TRUE,TRUE);
4340 /* MUST advance bufptr here to avoid bogus
4341 "at end of line" context messages from yyerror().
4343 PL_bufptr = s + len;
4344 yyerror("Unterminated attribute parameter in attribute list");
4348 return REPORT(0); /* EOF indicator */
4352 sv_catsv(sv, PL_lex_stuff);
4353 attrs = append_elem(OP_LIST, attrs,
4354 newSVOP(OP_CONST, 0, sv));
4355 SvREFCNT_dec(PL_lex_stuff);
4356 PL_lex_stuff = NULL;
4359 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4361 if (PL_in_my == KEY_our) {
4362 deprecate(":unique");
4365 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4368 /* NOTE: any CV attrs applied here need to be part of
4369 the CVf_BUILTIN_ATTRS define in cv.h! */
4370 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4372 CvLVALUE_on(PL_compcv);
4374 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4376 deprecate(":locked");
4378 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4380 CvMETHOD_on(PL_compcv);
4382 /* After we've set the flags, it could be argued that
4383 we don't need to do the attributes.pm-based setting
4384 process, and shouldn't bother appending recognized
4385 flags. To experiment with that, uncomment the
4386 following "else". (Note that's already been
4387 uncommented. That keeps the above-applied built-in
4388 attributes from being intercepted (and possibly
4389 rejected) by a package's attribute routines, but is
4390 justified by the performance win for the common case
4391 of applying only built-in attributes.) */
4393 attrs = append_elem(OP_LIST, attrs,
4394 newSVOP(OP_CONST, 0,
4398 if (*s == ':' && s[1] != ':')
4401 break; /* require real whitespace or :'s */
4402 /* XXX losing whitespace on sequential attributes here */
4406 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4407 if (*s != ';' && *s != '}' && *s != tmp
4408 && (tmp != '=' || *s != ')')) {
4409 const char q = ((*s == '\'') ? '"' : '\'');
4410 /* If here for an expression, and parsed no attrs, back
4412 if (tmp == '=' && !attrs) {
4416 /* MUST advance bufptr here to avoid bogus "at end of line"
4417 context messages from yyerror().
4420 yyerror( (const char *)
4422 ? Perl_form(aTHX_ "Invalid separator character "
4423 "%c%c%c in attribute list", q, *s, q)
4424 : "Unterminated attribute list" ) );
4432 start_force(PL_curforce);
4433 NEXTVAL_NEXTTOKE.opval = attrs;
4434 CURMAD('_', PL_nextwhite);
4439 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4440 (s - SvPVX(PL_linestr)) - stuffstart);
4448 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4449 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4457 const char tmp = *s++;
4462 const char tmp = *s++;
4470 if (PL_lex_brackets <= 0)
4471 yyerror("Unmatched right square bracket");
4474 if (PL_lex_state == LEX_INTERPNORMAL) {
4475 if (PL_lex_brackets == 0) {
4476 if (*s == '-' && s[1] == '>')
4477 PL_lex_state = LEX_INTERPENDMAYBE;
4478 else if (*s != '[' && *s != '{')
4479 PL_lex_state = LEX_INTERPEND;
4486 if (PL_lex_brackets > 100) {
4487 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4489 switch (PL_expect) {
4491 if (PL_lex_formbrack) {
4495 if (PL_oldoldbufptr == PL_last_lop)
4496 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4498 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4499 OPERATOR(HASHBRACK);
4501 while (s < PL_bufend && SPACE_OR_TAB(*s))
4504 PL_tokenbuf[0] = '\0';
4505 if (d < PL_bufend && *d == '-') {
4506 PL_tokenbuf[0] = '-';
4508 while (d < PL_bufend && SPACE_OR_TAB(*d))
4511 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4512 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4514 while (d < PL_bufend && SPACE_OR_TAB(*d))
4517 const char minus = (PL_tokenbuf[0] == '-');
4518 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4526 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4531 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4536 if (PL_oldoldbufptr == PL_last_lop)
4537 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4539 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4542 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4544 /* This hack is to get the ${} in the message. */
4546 yyerror("syntax error");
4549 OPERATOR(HASHBRACK);
4551 /* This hack serves to disambiguate a pair of curlies
4552 * as being a block or an anon hash. Normally, expectation
4553 * determines that, but in cases where we're not in a
4554 * position to expect anything in particular (like inside
4555 * eval"") we have to resolve the ambiguity. This code
4556 * covers the case where the first term in the curlies is a
4557 * quoted string. Most other cases need to be explicitly
4558 * disambiguated by prepending a "+" before the opening
4559 * curly in order to force resolution as an anon hash.
4561 * XXX should probably propagate the outer expectation
4562 * into eval"" to rely less on this hack, but that could
4563 * potentially break current behavior of eval"".
4567 if (*s == '\'' || *s == '"' || *s == '`') {
4568 /* common case: get past first string, handling escapes */
4569 for (t++; t < PL_bufend && *t != *s;)
4570 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4574 else if (*s == 'q') {
4577 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4580 /* skip q//-like construct */
4582 char open, close, term;
4585 while (t < PL_bufend && isSPACE(*t))
4587 /* check for q => */
4588 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4589 OPERATOR(HASHBRACK);
4593 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4597 for (t++; t < PL_bufend; t++) {
4598 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4600 else if (*t == open)
4604 for (t++; t < PL_bufend; t++) {
4605 if (*t == '\\' && t+1 < PL_bufend)
4607 else if (*t == close && --brackets <= 0)
4609 else if (*t == open)
4616 /* skip plain q word */
4617 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4620 else if (isALNUM_lazy_if(t,UTF)) {
4622 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4625 while (t < PL_bufend && isSPACE(*t))
4627 /* if comma follows first term, call it an anon hash */
4628 /* XXX it could be a comma expression with loop modifiers */
4629 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4630 || (*t == '=' && t[1] == '>')))
4631 OPERATOR(HASHBRACK);
4632 if (PL_expect == XREF)
4635 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4641 pl_yylval.ival = CopLINE(PL_curcop);
4642 if (isSPACE(*s) || *s == '#')
4643 PL_copline = NOLINE; /* invalidate current command line number */
4648 if (PL_lex_brackets <= 0)
4649 yyerror("Unmatched right curly bracket");
4651 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4652 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4653 PL_lex_formbrack = 0;
4654 if (PL_lex_state == LEX_INTERPNORMAL) {
4655 if (PL_lex_brackets == 0) {
4656 if (PL_expect & XFAKEBRACK) {
4657 PL_expect &= XENUMMASK;
4658 PL_lex_state = LEX_INTERPEND;
4663 PL_thiswhite = newSVpvs("");
4664 sv_catpvs(PL_thiswhite,"}");
4667 return yylex(); /* ignore fake brackets */
4669 if (*s == '-' && s[1] == '>')
4670 PL_lex_state = LEX_INTERPENDMAYBE;
4671 else if (*s != '[' && *s != '{')
4672 PL_lex_state = LEX_INTERPEND;
4675 if (PL_expect & XFAKEBRACK) {
4676 PL_expect &= XENUMMASK;
4678 return yylex(); /* ignore fake brackets */
4680 start_force(PL_curforce);
4682 curmad('X', newSVpvn(s-1,1));
4683 CURMAD('_', PL_thiswhite);
4688 PL_thistoken = newSVpvs("");
4696 if (PL_expect == XOPERATOR) {
4697 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4698 && isIDFIRST_lazy_if(s,UTF))
4700 CopLINE_dec(PL_curcop);
4701 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
4702 CopLINE_inc(PL_curcop);
4707 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4709 PL_expect = XOPERATOR;
4710 force_ident(PL_tokenbuf, '&');
4714 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
4726 const char tmp = *s++;
4733 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4734 && strchr("+-*/%.^&|<",tmp))
4735 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4736 "Reversed %c= operator",(int)tmp);
4738 if (PL_expect == XSTATE && isALPHA(tmp) &&
4739 (s == PL_linestart+1 || s[-2] == '\n') )
4741 if (PL_in_eval && !PL_rsfp) {
4746 if (strnEQ(s,"=cut",4)) {
4762 PL_thiswhite = newSVpvs("");
4763 sv_catpvn(PL_thiswhite, PL_linestart,
4764 PL_bufend - PL_linestart);
4768 PL_doextract = TRUE;
4772 if (PL_lex_brackets < PL_lex_formbrack) {
4774 #ifdef PERL_STRICT_CR
4775 while (SPACE_OR_TAB(*t))
4777 while (SPACE_OR_TAB(*t) || *t == '\r')
4780 if (*t == '\n' || *t == '#') {
4791 const char tmp = *s++;
4793 /* was this !=~ where !~ was meant?
4794 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4796 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4797 const char *t = s+1;
4799 while (t < PL_bufend && isSPACE(*t))
4802 if (*t == '/' || *t == '?' ||
4803 ((*t == 'm' || *t == 's' || *t == 'y')
4804 && !isALNUM(t[1])) ||
4805 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4806 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4807 "!=~ should be !~");
4817 if (PL_expect != XOPERATOR) {
4818 if (s[1] != '<' && !strchr(s,'>'))
4821 s = scan_heredoc(s);
4823 s = scan_inputsymbol(s);
4824 TERM(sublex_start());
4830 SHop(OP_LEFT_SHIFT);
4844 const char tmp = *s++;
4846 SHop(OP_RIGHT_SHIFT);
4847 else if (tmp == '=')
4856 if (PL_expect == XOPERATOR) {
4857 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4858 return deprecate_commaless_var_list();
4862 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4863 PL_tokenbuf[0] = '@';
4864 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4865 sizeof PL_tokenbuf - 1, FALSE);
4866 if (PL_expect == XOPERATOR)
4867 no_op("Array length", s);
4868 if (!PL_tokenbuf[1])
4870 PL_expect = XOPERATOR;
4871 PL_pending_ident = '#';
4875 PL_tokenbuf[0] = '$';
4876 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4877 sizeof PL_tokenbuf - 1, FALSE);
4878 if (PL_expect == XOPERATOR)
4880 if (!PL_tokenbuf[1]) {
4882 yyerror("Final $ should be \\$ or $name");
4886 /* This kludge not intended to be bulletproof. */
4887 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4888 pl_yylval.opval = newSVOP(OP_CONST, 0,
4889 newSViv(CopARYBASE_get(&PL_compiling)));
4890 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
4896 const char tmp = *s;
4897 if (PL_lex_state == LEX_NORMAL)
4900 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4901 && intuit_more(s)) {
4903 PL_tokenbuf[0] = '@';
4904 if (ckWARN(WARN_SYNTAX)) {
4907 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4910 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4911 while (t < PL_bufend && *t != ']')
4913 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4914 "Multidimensional syntax %.*s not supported",
4915 (int)((t - PL_bufptr) + 1), PL_bufptr);
4919 else if (*s == '{') {
4921 PL_tokenbuf[0] = '%';
4922 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4923 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4925 char tmpbuf[sizeof PL_tokenbuf];
4928 } while (isSPACE(*t));
4929 if (isIDFIRST_lazy_if(t,UTF)) {
4931 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4935 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4936 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4937 "You need to quote \"%s\"",
4944 PL_expect = XOPERATOR;
4945 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4946 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4947 if (!islop || PL_last_lop_op == OP_GREPSTART)
4948 PL_expect = XOPERATOR;
4949 else if (strchr("$@\"'`q", *s))
4950 PL_expect = XTERM; /* e.g. print $fh "foo" */
4951 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4952 PL_expect = XTERM; /* e.g. print $fh &sub */
4953 else if (isIDFIRST_lazy_if(s,UTF)) {
4954 char tmpbuf[sizeof PL_tokenbuf];
4956 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4957 if ((t2 = keyword(tmpbuf, len, 0))) {
4958 /* binary operators exclude handle interpretations */
4970 PL_expect = XTERM; /* e.g. print $fh length() */
4975 PL_expect = XTERM; /* e.g. print $fh subr() */
4978 else if (isDIGIT(*s))
4979 PL_expect = XTERM; /* e.g. print $fh 3 */
4980 else if (*s == '.' && isDIGIT(s[1]))
4981 PL_expect = XTERM; /* e.g. print $fh .3 */
4982 else if ((*s == '?' || *s == '-' || *s == '+')
4983 && !isSPACE(s[1]) && s[1] != '=')
4984 PL_expect = XTERM; /* e.g. print $fh -1 */
4985 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4987 PL_expect = XTERM; /* e.g. print $fh /.../
4988 XXX except DORDOR operator
4990 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4992 PL_expect = XTERM; /* print $fh <<"EOF" */
4995 PL_pending_ident = '$';
4999 if (PL_expect == XOPERATOR)
5001 PL_tokenbuf[0] = '@';
5002 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5003 if (!PL_tokenbuf[1]) {
5006 if (PL_lex_state == LEX_NORMAL)
5008 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5010 PL_tokenbuf[0] = '%';
5012 /* Warn about @ where they meant $. */
5013 if (*s == '[' || *s == '{') {
5014 if (ckWARN(WARN_SYNTAX)) {
5015 const char *t = s + 1;
5016 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5018 if (*t == '}' || *t == ']') {
5020 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5021 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5022 "Scalar value %.*s better written as $%.*s",
5023 (int)(t-PL_bufptr), PL_bufptr,
5024 (int)(t-PL_bufptr-1), PL_bufptr+1);
5029 PL_pending_ident = '@';
5032 case '/': /* may be division, defined-or, or pattern */
5033 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5037 case '?': /* may either be conditional or pattern */
5038 if (PL_expect == XOPERATOR) {
5046 /* A // operator. */
5056 /* Disable warning on "study /blah/" */
5057 if (PL_oldoldbufptr == PL_last_uni
5058 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5059 || memNE(PL_last_uni, "study", 5)
5060 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5063 s = scan_pat(s,OP_MATCH);
5064 TERM(sublex_start());
5068 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5069 #ifdef PERL_STRICT_CR
5072 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5074 && (s == PL_linestart || s[-1] == '\n') )
5076 PL_lex_formbrack = 0;
5080 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5084 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5090 pl_yylval.ival = OPf_SPECIAL;
5096 if (PL_expect != XOPERATOR)
5101 case '0': case '1': case '2': case '3': case '4':
5102 case '5': case '6': case '7': case '8': case '9':
5103 s = scan_num(s, &pl_yylval);
5104 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5105 if (PL_expect == XOPERATOR)
5110 s = scan_str(s,!!PL_madskills,FALSE);
5111 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5112 if (PL_expect == XOPERATOR) {
5113 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5114 return deprecate_commaless_var_list();
5121 pl_yylval.ival = OP_CONST;
5122 TERM(sublex_start());
5125 s = scan_str(s,!!PL_madskills,FALSE);
5126 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5127 if (PL_expect == XOPERATOR) {
5128 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5129 return deprecate_commaless_var_list();
5136 pl_yylval.ival = OP_CONST;
5137 /* FIXME. I think that this can be const if char *d is replaced by
5138 more localised variables. */
5139 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5140 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5141 pl_yylval.ival = OP_STRINGIFY;
5145 TERM(sublex_start());
5148 s = scan_str(s,!!PL_madskills,FALSE);
5149 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5150 if (PL_expect == XOPERATOR)
5151 no_op("Backticks",s);
5154 readpipe_override();
5155 TERM(sublex_start());
5159 if (PL_lex_inwhat && isDIGIT(*s))
5160 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5162 if (PL_expect == XOPERATOR)
5163 no_op("Backslash",s);
5167 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5168 char *start = s + 2;
5169 while (isDIGIT(*start) || *start == '_')
5171 if (*start == '.' && isDIGIT(start[1])) {
5172 s = scan_num(s, &pl_yylval);
5175 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5176 else if (!isALPHA(*start) && (PL_expect == XTERM
5177 || PL_expect == XREF || PL_expect == XSTATE
5178 || PL_expect == XTERMORDORDOR)) {
5179 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5181 s = scan_num(s, &pl_yylval);
5188 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5230 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5232 /* Some keywords can be followed by any delimiter, including ':' */
5233 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5234 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5235 (PL_tokenbuf[0] == 'q' &&
5236 strchr("qwxr", PL_tokenbuf[1])))));
5238 /* x::* is just a word, unless x is "CORE" */
5239 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5243 while (d < PL_bufend && isSPACE(*d))
5244 d++; /* no comments skipped here, or s### is misparsed */
5246 /* Is this a label? */
5247 if (!tmp && PL_expect == XSTATE
5248 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5249 tmp = keyword(PL_tokenbuf, len, 0);
5251 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5253 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5258 /* Check for keywords */
5259 tmp = keyword(PL_tokenbuf, len, 0);
5261 /* Is this a word before a => operator? */
5262 if (*d == '=' && d[1] == '>') {
5265 = (OP*)newSVOP(OP_CONST, 0,
5266 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5267 pl_yylval.opval->op_private = OPpCONST_BARE;
5271 if (tmp < 0) { /* second-class keyword? */
5272 GV *ogv = NULL; /* override (winner) */
5273 GV *hgv = NULL; /* hidden (loser) */
5274 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5276 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5279 if (GvIMPORTED_CV(gv))
5281 else if (! CvMETHOD(cv))
5285 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5286 (gv = *gvp) && isGV_with_GP(gv) &&
5287 GvCVu(gv) && GvIMPORTED_CV(gv))
5294 tmp = 0; /* overridden by import or by GLOBAL */
5297 && -tmp==KEY_lock /* XXX generalizable kludge */
5300 tmp = 0; /* any sub overrides "weak" keyword */
5302 else { /* no override */
5304 if (tmp == KEY_dump) {
5305 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5306 "dump() better written as CORE::dump()");
5310 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
5311 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5312 "Ambiguous call resolved as CORE::%s(), %s",
5313 GvENAME(hgv), "qualify as such or use &");
5320 default: /* not a keyword */
5321 /* Trade off - by using this evil construction we can pull the
5322 variable gv into the block labelled keylookup. If not, then
5323 we have to give it function scope so that the goto from the
5324 earlier ':' case doesn't bypass the initialisation. */
5326 just_a_word_zero_gv:
5334 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5337 SV *nextPL_nextwhite = 0;
5341 /* Get the rest if it looks like a package qualifier */
5343 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5345 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5348 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5349 *s == '\'' ? "'" : "::");
5354 if (PL_expect == XOPERATOR) {
5355 if (PL_bufptr == PL_linestart) {
5356 CopLINE_dec(PL_curcop);
5357 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5358 CopLINE_inc(PL_curcop);
5361 no_op("Bareword",s);
5364 /* Look for a subroutine with this name in current package,
5365 unless name is "Foo::", in which case Foo is a bearword
5366 (and a package name). */
5368 if (len > 2 && !PL_madskills &&
5369 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5371 if (ckWARN(WARN_BAREWORD)
5372 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5373 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5374 "Bareword \"%s\" refers to nonexistent package",
5377 PL_tokenbuf[len] = '\0';
5383 /* Mustn't actually add anything to a symbol table.
5384 But also don't want to "initialise" any placeholder
5385 constants that might already be there into full
5386 blown PVGVs with attached PVCV. */
5387 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5388 GV_NOADD_NOINIT, SVt_PVCV);
5393 /* if we saw a global override before, get the right name */
5396 sv = newSVpvs("CORE::GLOBAL::");
5397 sv_catpv(sv,PL_tokenbuf);
5400 /* If len is 0, newSVpv does strlen(), which is correct.
5401 If len is non-zero, then it will be the true length,
5402 and so the scalar will be created correctly. */
5403 sv = newSVpv(PL_tokenbuf,len);
5406 if (PL_madskills && !PL_thistoken) {
5407 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5408 PL_thistoken = newSVpvn(start,s - start);
5409 PL_realtokenstart = s - SvPVX(PL_linestr);
5413 /* Presume this is going to be a bareword of some sort. */
5416 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5417 pl_yylval.opval->op_private = OPpCONST_BARE;
5418 /* UTF-8 package name? */
5419 if (UTF && !IN_BYTES &&
5420 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5423 /* And if "Foo::", then that's what it certainly is. */
5428 /* Do the explicit type check so that we don't need to force
5429 the initialisation of the symbol table to have a real GV.
5430 Beware - gv may not really be a PVGV, cv may not really be
5431 a PVCV, (because of the space optimisations that gv_init
5432 understands) But they're true if for this symbol there is
5433 respectively a typeglob and a subroutine.
5435 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5436 /* Real typeglob, so get the real subroutine: */
5438 /* A proxy for a subroutine in this package? */
5439 : SvOK(gv) ? MUTABLE_CV(gv) : NULL)
5442 /* See if it's the indirect object for a list operator. */
5444 if (PL_oldoldbufptr &&
5445 PL_oldoldbufptr < PL_bufptr &&
5446 (PL_oldoldbufptr == PL_last_lop
5447 || PL_oldoldbufptr == PL_last_uni) &&
5448 /* NO SKIPSPACE BEFORE HERE! */
5449 (PL_expect == XREF ||
5450 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5452 bool immediate_paren = *s == '(';
5454 /* (Now we can afford to cross potential line boundary.) */
5455 s = SKIPSPACE2(s,nextPL_nextwhite);
5457 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5460 /* Two barewords in a row may indicate method call. */
5462 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5463 (tmp = intuit_method(s, gv, cv)))
5466 /* If not a declared subroutine, it's an indirect object. */
5467 /* (But it's an indir obj regardless for sort.) */
5468 /* Also, if "_" follows a filetest operator, it's a bareword */
5471 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5473 (PL_last_lop_op != OP_MAPSTART &&
5474 PL_last_lop_op != OP_GREPSTART))))
5475 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5476 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5479 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5484 PL_expect = XOPERATOR;
5487 s = SKIPSPACE2(s,nextPL_nextwhite);
5488 PL_nextwhite = nextPL_nextwhite;
5493 /* Is this a word before a => operator? */
5494 if (*s == '=' && s[1] == '>' && !pkgname) {
5496 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
5497 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5498 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
5502 /* If followed by a paren, it's certainly a subroutine. */
5507 while (SPACE_OR_TAB(*d))
5509 if (*d == ')' && (sv = gv_const_sv(gv))) {
5516 PL_nextwhite = PL_thiswhite;
5519 start_force(PL_curforce);
5521 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5522 PL_expect = XOPERATOR;
5525 PL_nextwhite = nextPL_nextwhite;
5526 curmad('X', PL_thistoken);
5527 PL_thistoken = newSVpvs("");
5535 /* If followed by var or block, call it a method (unless sub) */
5537 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5538 PL_last_lop = PL_oldbufptr;
5539 PL_last_lop_op = OP_METHOD;
5543 /* If followed by a bareword, see if it looks like indir obj. */
5546 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5547 && (tmp = intuit_method(s, gv, cv)))
5550 /* Not a method, so call it a subroutine (if defined) */
5553 if (lastchar == '-')
5554 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
5555 "Ambiguous use of -%s resolved as -&%s()",
5556 PL_tokenbuf, PL_tokenbuf);
5557 /* Check for a constant sub */
5558 if ((sv = gv_const_sv(gv))) {
5560 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
5561 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5562 pl_yylval.opval->op_private = 0;
5566 /* Resolve to GV now. */
5567 if (SvTYPE(gv) != SVt_PVGV) {
5568 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5569 assert (SvTYPE(gv) == SVt_PVGV);
5570 /* cv must have been some sort of placeholder, so
5571 now needs replacing with a real code reference. */
5575 op_free(pl_yylval.opval);
5576 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5577 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5578 PL_last_lop = PL_oldbufptr;
5579 PL_last_lop_op = OP_ENTERSUB;
5580 /* Is there a prototype? */
5588 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5591 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5593 while (*proto == ';')
5595 if (*proto == '&' && *s == '{') {
5597 sv_setpvs(PL_subname, "__ANON__");
5599 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5606 PL_nextwhite = PL_thiswhite;
5609 start_force(PL_curforce);
5610 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5613 PL_nextwhite = nextPL_nextwhite;
5614 curmad('X', PL_thistoken);
5615 PL_thistoken = newSVpvs("");
5622 /* Guess harder when madskills require "best effort". */
5623 if (PL_madskills && (!gv || !GvCVu(gv))) {
5624 int probable_sub = 0;
5625 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5627 else if (isALPHA(*s)) {
5631 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5632 if (!keyword(tmpbuf, tmplen, 0))
5635 while (d < PL_bufend && isSPACE(*d))
5637 if (*d == '=' && d[1] == '>')
5642 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5643 op_free(pl_yylval.opval);
5644 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5645 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5646 PL_last_lop = PL_oldbufptr;
5647 PL_last_lop_op = OP_ENTERSUB;
5648 PL_nextwhite = PL_thiswhite;
5650 start_force(PL_curforce);
5651 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5653 PL_nextwhite = nextPL_nextwhite;
5654 curmad('X', PL_thistoken);
5655 PL_thistoken = newSVpvs("");
5660 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5667 /* Call it a bare word */
5669 if (PL_hints & HINT_STRICT_SUBS)
5670 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5673 /* after "print" and similar functions (corresponding to
5674 * "F? L" in opcode.pl), whatever wasn't already parsed as
5675 * a filehandle should be subject to "strict subs".
5676 * Likewise for the optional indirect-object argument to system
5677 * or exec, which can't be a bareword */
5678 if ((PL_last_lop_op == OP_PRINT
5679 || PL_last_lop_op == OP_PRTF
5680 || PL_last_lop_op == OP_SAY
5681 || PL_last_lop_op == OP_SYSTEM
5682 || PL_last_lop_op == OP_EXEC)
5683 && (PL_hints & HINT_STRICT_SUBS))
5684 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5685 if (lastchar != '-') {
5686 if (ckWARN(WARN_RESERVED)) {
5690 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5691 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5698 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
5699 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
5700 "Operator or semicolon missing before %c%s",
5701 lastchar, PL_tokenbuf);
5702 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
5703 "Ambiguous use of %c resolved as operator %c",
5704 lastchar, lastchar);
5710 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5711 newSVpv(CopFILE(PL_curcop),0));
5715 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5716 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5719 case KEY___PACKAGE__:
5720 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5722 ? newSVhek(HvNAME_HEK(PL_curstash))
5729 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5730 const char *pname = "main";
5731 if (PL_tokenbuf[2] == 'D')
5732 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5733 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5737 GvIOp(gv) = newIO();
5738 IoIFP(GvIOp(gv)) = PL_rsfp;
5739 #if defined(HAS_FCNTL) && defined(F_SETFD)
5741 const int fd = PerlIO_fileno(PL_rsfp);
5742 fcntl(fd,F_SETFD,fd >= 3);
5745 /* Mark this internal pseudo-handle as clean */
5746 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5747 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5748 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5750 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5751 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5752 /* if the script was opened in binmode, we need to revert
5753 * it to text mode for compatibility; but only iff it has CRs
5754 * XXX this is a questionable hack at best. */
5755 if (PL_bufend-PL_bufptr > 2
5756 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5759 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5760 loc = PerlIO_tell(PL_rsfp);
5761 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5764 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5766 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5767 #endif /* NETWARE */
5768 #ifdef PERLIO_IS_STDIO /* really? */
5769 # if defined(__BORLANDC__)
5770 /* XXX see note in do_binmode() */
5771 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5775 PerlIO_seek(PL_rsfp, loc, 0);
5779 #ifdef PERLIO_LAYERS
5782 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5783 else if (PL_encoding) {
5790 XPUSHs(PL_encoding);
5792 call_method("name", G_SCALAR);
5796 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5797 Perl_form(aTHX_ ":encoding(%"SVf")",
5806 if (PL_realtokenstart >= 0) {
5807 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5809 PL_endwhite = newSVpvs("");
5810 sv_catsv(PL_endwhite, PL_thiswhite);
5812 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5813 PL_realtokenstart = -1;
5815 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
5831 if (PL_expect == XSTATE) {
5838 if (*s == ':' && s[1] == ':') {
5841 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5842 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5843 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5846 else if (tmp == KEY_require || tmp == KEY_do)
5847 /* that's a way to remember we saw "CORE::" */
5860 LOP(OP_ACCEPT,XTERM);
5866 LOP(OP_ATAN2,XTERM);
5872 LOP(OP_BINMODE,XTERM);
5875 LOP(OP_BLESS,XTERM);
5884 /* When 'use switch' is in effect, continue has a dual
5885 life as a control operator. */
5887 if (!FEATURE_IS_ENABLED("switch"))
5890 /* We have to disambiguate the two senses of
5891 "continue". If the next token is a '{' then
5892 treat it as the start of a continue block;
5893 otherwise treat it as a control operator.
5905 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5922 if (!PL_cryptseen) {
5923 PL_cryptseen = TRUE;
5927 LOP(OP_CRYPT,XTERM);
5930 LOP(OP_CHMOD,XTERM);
5933 LOP(OP_CHOWN,XTERM);
5936 LOP(OP_CONNECT,XTERM);
5955 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5956 if (orig_keyword == KEY_do) {
5965 PL_hints |= HINT_BLOCK_SCOPE;
5975 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5976 LOP(OP_DBMOPEN,XTERM);
5982 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5989 pl_yylval.ival = CopLINE(PL_curcop);
6005 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
6006 UNIBRACK(OP_ENTEREVAL);
6020 case KEY_endhostent:
6026 case KEY_endservent:
6029 case KEY_endprotoent:
6040 pl_yylval.ival = CopLINE(PL_curcop);
6042 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6045 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6048 if ((PL_bufend - p) >= 3 &&
6049 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6051 else if ((PL_bufend - p) >= 4 &&
6052 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6055 if (isIDFIRST_lazy_if(p,UTF)) {
6056 p = scan_ident(p, PL_bufend,
6057 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6061 Perl_croak(aTHX_ "Missing $ on loop variable");
6063 s = SvPVX(PL_linestr) + soff;
6069 LOP(OP_FORMLINE,XTERM);
6075 LOP(OP_FCNTL,XTERM);
6081 LOP(OP_FLOCK,XTERM);
6090 LOP(OP_GREPSTART, XREF);
6093 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6108 case KEY_getpriority:
6109 LOP(OP_GETPRIORITY,XTERM);
6111 case KEY_getprotobyname:
6114 case KEY_getprotobynumber:
6115 LOP(OP_GPBYNUMBER,XTERM);
6117 case KEY_getprotoent:
6129 case KEY_getpeername:
6130 UNI(OP_GETPEERNAME);
6132 case KEY_gethostbyname:
6135 case KEY_gethostbyaddr:
6136 LOP(OP_GHBYADDR,XTERM);
6138 case KEY_gethostent:
6141 case KEY_getnetbyname:
6144 case KEY_getnetbyaddr:
6145 LOP(OP_GNBYADDR,XTERM);
6150 case KEY_getservbyname:
6151 LOP(OP_GSBYNAME,XTERM);
6153 case KEY_getservbyport:
6154 LOP(OP_GSBYPORT,XTERM);
6156 case KEY_getservent:
6159 case KEY_getsockname:
6160 UNI(OP_GETSOCKNAME);
6162 case KEY_getsockopt:
6163 LOP(OP_GSOCKOPT,XTERM);
6178 pl_yylval.ival = CopLINE(PL_curcop);
6188 pl_yylval.ival = CopLINE(PL_curcop);
6192 LOP(OP_INDEX,XTERM);
6198 LOP(OP_IOCTL,XTERM);
6210 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6242 LOP(OP_LISTEN,XTERM);
6251 s = scan_pat(s,OP_MATCH);
6252 TERM(sublex_start());
6255 LOP(OP_MAPSTART, XREF);
6258 LOP(OP_MKDIR,XTERM);
6261 LOP(OP_MSGCTL,XTERM);
6264 LOP(OP_MSGGET,XTERM);
6267 LOP(OP_MSGRCV,XTERM);
6270 LOP(OP_MSGSND,XTERM);
6275 PL_in_my = (U16)tmp;
6277 if (isIDFIRST_lazy_if(s,UTF)) {
6281 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6282 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6284 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6285 if (!PL_in_my_stash) {
6288 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6292 if (PL_madskills) { /* just add type to declarator token */
6293 sv_catsv(PL_thistoken, PL_nextwhite);
6295 sv_catpvn(PL_thistoken, start, s - start);
6303 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6310 s = tokenize_use(0, s);
6314 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6321 if (isIDFIRST_lazy_if(s,UTF)) {
6323 for (d = s; isALNUM_lazy_if(d,UTF);)
6325 for (t=d; isSPACE(*t);)
6327 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6329 && !(t[0] == '=' && t[1] == '>')
6331 int parms_len = (int)(d-s);
6332 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6333 "Precedence problem: open %.*s should be open(%.*s)",
6334 parms_len, s, parms_len, s);
6340 pl_yylval.ival = OP_OR;
6350 LOP(OP_OPEN_DIR,XTERM);
6353 checkcomma(s,PL_tokenbuf,"filehandle");
6357 checkcomma(s,PL_tokenbuf,"filehandle");
6376 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6377 s = force_version(s, FALSE);
6381 LOP(OP_PIPE_OP,XTERM);
6384 s = scan_str(s,!!PL_madskills,FALSE);
6387 pl_yylval.ival = OP_CONST;
6388 TERM(sublex_start());
6394 s = scan_str(s,!!PL_madskills,FALSE);
6397 PL_expect = XOPERATOR;
6399 if (SvCUR(PL_lex_stuff)) {
6402 d = SvPV_force(PL_lex_stuff, len);
6404 for (; isSPACE(*d) && len; --len, ++d)
6409 if (!warned && ckWARN(WARN_QW)) {
6410 for (; !isSPACE(*d) && len; --len, ++d) {
6412 Perl_warner(aTHX_ packWARN(WARN_QW),
6413 "Possible attempt to separate words with commas");
6416 else if (*d == '#') {
6417 Perl_warner(aTHX_ packWARN(WARN_QW),
6418 "Possible attempt to put comments in qw() list");
6424 for (; !isSPACE(*d) && len; --len, ++d)
6427 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
6428 words = append_elem(OP_LIST, words,
6429 newSVOP(OP_CONST, 0, tokeq(sv)));
6433 start_force(PL_curforce);
6434 NEXTVAL_NEXTTOKE.opval = words;
6439 SvREFCNT_dec(PL_lex_stuff);
6440 PL_lex_stuff = NULL;
6446 s = scan_str(s,!!PL_madskills,FALSE);
6449 pl_yylval.ival = OP_STRINGIFY;
6450 if (SvIVX(PL_lex_stuff) == '\'')
6451 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6452 TERM(sublex_start());
6455 s = scan_pat(s,OP_QR);
6456 TERM(sublex_start());
6459 s = scan_str(s,!!PL_madskills,FALSE);
6462 readpipe_override();
6463 TERM(sublex_start());
6471 s = force_version(s, FALSE);
6473 else if (*s != 'v' || !isDIGIT(s[1])
6474 || (s = force_version(s, TRUE), *s == 'v'))
6476 *PL_tokenbuf = '\0';
6477 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6478 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6479 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6481 yyerror("<> should be quotes");
6483 if (orig_keyword == KEY_require) {
6491 PL_last_uni = PL_oldbufptr;
6492 PL_last_lop_op = OP_REQUIRE;
6494 return REPORT( (int)REQUIRE );
6500 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6504 LOP(OP_RENAME,XTERM);
6513 LOP(OP_RINDEX,XTERM);
6522 UNIDOR(OP_READLINE);
6525 UNIDOR(OP_BACKTICK);
6534 LOP(OP_REVERSE,XTERM);
6537 UNIDOR(OP_READLINK);
6544 if (pl_yylval.opval)
6545 TERM(sublex_start());
6547 TOKEN(1); /* force error */
6550 checkcomma(s,PL_tokenbuf,"filehandle");
6560 LOP(OP_SELECT,XTERM);
6566 LOP(OP_SEMCTL,XTERM);
6569 LOP(OP_SEMGET,XTERM);
6572 LOP(OP_SEMOP,XTERM);
6578 LOP(OP_SETPGRP,XTERM);
6580 case KEY_setpriority:
6581 LOP(OP_SETPRIORITY,XTERM);
6583 case KEY_sethostent:
6589 case KEY_setservent:
6592 case KEY_setprotoent:
6602 LOP(OP_SEEKDIR,XTERM);
6604 case KEY_setsockopt:
6605 LOP(OP_SSOCKOPT,XTERM);
6611 LOP(OP_SHMCTL,XTERM);
6614 LOP(OP_SHMGET,XTERM);
6617 LOP(OP_SHMREAD,XTERM);
6620 LOP(OP_SHMWRITE,XTERM);
6623 LOP(OP_SHUTDOWN,XTERM);
6632 LOP(OP_SOCKET,XTERM);
6634 case KEY_socketpair:
6635 LOP(OP_SOCKPAIR,XTERM);
6638 checkcomma(s,PL_tokenbuf,"subroutine name");
6640 if (*s == ';' || *s == ')') /* probably a close */
6641 Perl_croak(aTHX_ "sort is now a reserved word");
6643 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6647 LOP(OP_SPLIT,XTERM);
6650 LOP(OP_SPRINTF,XTERM);
6653 LOP(OP_SPLICE,XTERM);
6668 LOP(OP_SUBSTR,XTERM);
6674 char tmpbuf[sizeof PL_tokenbuf];
6675 SSize_t tboffset = 0;
6676 expectation attrful;
6677 bool have_name, have_proto;
6678 const int key = tmp;
6683 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6684 SV *subtoken = newSVpvn(tstart, s - tstart);
6688 s = SKIPSPACE2(s,tmpwhite);
6693 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6694 (*s == ':' && s[1] == ':'))
6697 SV *nametoke = NULL;
6701 attrful = XATTRBLOCK;
6702 /* remember buffer pos'n for later force_word */
6703 tboffset = s - PL_oldbufptr;
6704 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6707 nametoke = newSVpvn(s, d - s);
6709 if (memchr(tmpbuf, ':', len))
6710 sv_setpvn(PL_subname, tmpbuf, len);
6712 sv_setsv(PL_subname,PL_curstname);
6713 sv_catpvs(PL_subname,"::");
6714 sv_catpvn(PL_subname,tmpbuf,len);
6721 CURMAD('X', nametoke);
6722 CURMAD('_', tmpwhite);
6723 (void) force_word(PL_oldbufptr + tboffset, WORD,
6726 s = SKIPSPACE2(d,tmpwhite);
6733 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6734 PL_expect = XTERMBLOCK;
6735 attrful = XATTRTERM;
6736 sv_setpvs(PL_subname,"?");
6740 if (key == KEY_format) {
6742 PL_lex_formbrack = PL_lex_brackets + 1;
6744 PL_thistoken = subtoken;
6748 (void) force_word(PL_oldbufptr + tboffset, WORD,
6754 /* Look for a prototype */
6757 bool bad_proto = FALSE;
6758 bool in_brackets = FALSE;
6759 char greedy_proto = ' ';
6760 bool proto_after_greedy_proto = FALSE;
6761 bool must_be_last = FALSE;
6762 bool underscore = FALSE;
6763 bool seen_underscore = FALSE;
6764 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6766 s = scan_str(s,!!PL_madskills,FALSE);
6768 Perl_croak(aTHX_ "Prototype not terminated");
6769 /* strip spaces and check for bad characters */
6770 d = SvPVX(PL_lex_stuff);
6772 for (p = d; *p; ++p) {
6778 proto_after_greedy_proto = TRUE;
6779 if (!strchr("$@%*;[]&\\_", *p)) {
6791 else if ( *p == ']' ) {
6792 in_brackets = FALSE;
6794 else if ( (*p == '@' || *p == '%') &&
6795 ( tmp < 2 || d[tmp-2] != '\\' ) &&
6797 must_be_last = TRUE;
6800 else if ( *p == '_' ) {
6801 underscore = seen_underscore = TRUE;
6808 if (proto_after_greedy_proto)
6809 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6810 "Prototype after '%c' for %"SVf" : %s",
6811 greedy_proto, SVfARG(PL_subname), d);
6813 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6814 "Illegal character %sin prototype for %"SVf" : %s",
6815 seen_underscore ? "after '_' " : "",
6816 SVfARG(PL_subname), d);
6817 SvCUR_set(PL_lex_stuff, tmp);
6822 CURMAD('q', PL_thisopen);
6823 CURMAD('_', tmpwhite);
6824 CURMAD('=', PL_thisstuff);
6825 CURMAD('Q', PL_thisclose);
6826 NEXTVAL_NEXTTOKE.opval =
6827 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6828 PL_lex_stuff = NULL;
6831 s = SKIPSPACE2(s,tmpwhite);
6839 if (*s == ':' && s[1] != ':')
6840 PL_expect = attrful;
6841 else if (*s != '{' && key == KEY_sub) {
6843 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6845 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6852 curmad('^', newSVpvs(""));
6853 CURMAD('_', tmpwhite);
6857 PL_thistoken = subtoken;
6860 NEXTVAL_NEXTTOKE.opval =
6861 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6862 PL_lex_stuff = NULL;
6868 sv_setpvs(PL_subname, "__ANON__");
6870 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6874 (void) force_word(PL_oldbufptr + tboffset, WORD,
6883 LOP(OP_SYSTEM,XREF);
6886 LOP(OP_SYMLINK,XTERM);
6889 LOP(OP_SYSCALL,XTERM);
6892 LOP(OP_SYSOPEN,XTERM);
6895 LOP(OP_SYSSEEK,XTERM);
6898 LOP(OP_SYSREAD,XTERM);
6901 LOP(OP_SYSWRITE,XTERM);
6905 TERM(sublex_start());
6926 LOP(OP_TRUNCATE,XTERM);
6938 pl_yylval.ival = CopLINE(PL_curcop);
6942 pl_yylval.ival = CopLINE(PL_curcop);
6946 LOP(OP_UNLINK,XTERM);
6952 LOP(OP_UNPACK,XTERM);
6955 LOP(OP_UTIME,XTERM);
6961 LOP(OP_UNSHIFT,XTERM);
6964 s = tokenize_use(1, s);
6974 pl_yylval.ival = CopLINE(PL_curcop);
6978 pl_yylval.ival = CopLINE(PL_curcop);
6982 PL_hints |= HINT_BLOCK_SCOPE;
6989 LOP(OP_WAITPID,XTERM);
6998 ctl_l[0] = toCTRL('L');
7000 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7003 /* Make sure $^L is defined */
7004 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7009 if (PL_expect == XOPERATOR)
7015 pl_yylval.ival = OP_XOR;
7020 TERM(sublex_start());
7025 #pragma segment Main
7029 S_pending_ident(pTHX)
7034 /* pit holds the identifier we read and pending_ident is reset */
7035 char pit = PL_pending_ident;
7036 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7037 /* All routes through this function want to know if there is a colon. */
7038 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7039 PL_pending_ident = 0;
7041 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7042 DEBUG_T({ PerlIO_printf(Perl_debug_log,
7043 "### Pending identifier '%s'\n", PL_tokenbuf); });
7045 /* if we're in a my(), we can't allow dynamics here.
7046 $foo'bar has already been turned into $foo::bar, so
7047 just check for colons.
7049 if it's a legal name, the OP is a PADANY.
7052 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
7054 yyerror(Perl_form(aTHX_ "No package name allowed for "
7055 "variable %s in \"our\"",
7057 tmp = allocmy(PL_tokenbuf);
7061 yyerror(Perl_form(aTHX_ PL_no_myglob,
7062 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7064 pl_yylval.opval = newOP(OP_PADANY, 0);
7065 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf);
7071 build the ops for accesses to a my() variable.
7073 Deny my($a) or my($b) in a sort block, *if* $a or $b is
7074 then used in a comparison. This catches most, but not
7075 all cases. For instance, it catches
7076 sort { my($a); $a <=> $b }
7078 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7079 (although why you'd do that is anyone's guess).
7084 tmp = pad_findmy(PL_tokenbuf);
7085 if (tmp != NOT_IN_PAD) {
7086 /* might be an "our" variable" */
7087 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7088 /* build ops for a bareword */
7089 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
7090 HEK * const stashname = HvNAME_HEK(stash);
7091 SV * const sym = newSVhek(stashname);
7092 sv_catpvs(sym, "::");
7093 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7094 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7095 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7098 ? (GV_ADDMULTI | GV_ADDINEVAL)
7101 ((PL_tokenbuf[0] == '$') ? SVt_PV
7102 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7107 /* if it's a sort block and they're naming $a or $b */
7108 if (PL_last_lop_op == OP_SORT &&
7109 PL_tokenbuf[0] == '$' &&
7110 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7113 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7114 d < PL_bufend && *d != '\n';
7117 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7118 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7124 pl_yylval.opval = newOP(OP_PADANY, 0);
7125 pl_yylval.opval->op_targ = tmp;
7131 Whine if they've said @foo in a doublequoted string,
7132 and @foo isn't a variable we can find in the symbol
7135 if (ckWARN(WARN_AMBIGUOUS) &&
7136 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7137 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7139 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7140 /* DO NOT warn for @- and @+ */
7141 && !( PL_tokenbuf[2] == '\0' &&
7142 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7145 /* Downgraded from fatal to warning 20000522 mjd */
7146 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7147 "Possible unintended interpolation of %s in string",
7152 /* build ops for a bareword */
7153 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7155 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7157 PL_tokenbuf + 1, tokenbuf_len - 1,
7158 /* If the identifier refers to a stash, don't autovivify it.
7159 * Change 24660 had the side effect of causing symbol table
7160 * hashes to always be defined, even if they were freshly
7161 * created and the only reference in the entire program was
7162 * the single statement with the defined %foo::bar:: test.
7163 * It appears that all code in the wild doing this actually
7164 * wants to know whether sub-packages have been loaded, so
7165 * by avoiding auto-vivifying symbol tables, we ensure that
7166 * defined %foo::bar:: continues to be false, and the existing
7167 * tests still give the expected answers, even though what
7168 * they're actually testing has now changed subtly.
7170 (*PL_tokenbuf == '%'
7171 && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7174 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7175 ((PL_tokenbuf[0] == '$') ? SVt_PV
7176 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7182 * The following code was generated by perl_keyword.pl.
7186 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7190 PERL_ARGS_ASSERT_KEYWORD;
7194 case 1: /* 5 tokens of length 1 */
7226 case 2: /* 18 tokens of length 2 */
7372 case 3: /* 29 tokens of length 3 */
7376 if (name[1] == 'N' &&
7439 if (name[1] == 'i' &&
7471 if (name[1] == 'o' &&
7480 if (name[1] == 'e' &&
7489 if (name[1] == 'n' &&
7498 if (name[1] == 'o' &&
7507 if (name[1] == 'a' &&
7516 if (name[1] == 'o' &&
7578 if (name[1] == 'e' &&
7592 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7618 if (name[1] == 'i' &&
7627 if (name[1] == 's' &&
7636 if (name[1] == 'e' &&
7645 if (name[1] == 'o' &&
7657 case 4: /* 41 tokens of length 4 */
7661 if (name[1] == 'O' &&
7671 if (name[1] == 'N' &&
7681 if (name[1] == 'i' &&
7691 if (name[1] == 'h' &&
7701 if (name[1] == 'u' &&
7714 if (name[2] == 'c' &&
7723 if (name[2] == 's' &&
7732 if (name[2] == 'a' &&
7768 if (name[1] == 'o' &&
7781 if (name[2] == 't' &&
7790 if (name[2] == 'o' &&
7799 if (name[2] == 't' &&
7808 if (name[2] == 'e' &&
7821 if (name[1] == 'o' &&
7834 if (name[2] == 'y' &&
7843 if (name[2] == 'l' &&
7859 if (name[2] == 's' &&
7868 if (name[2] == 'n' &&
7877 if (name[2] == 'c' &&
7890 if (name[1] == 'e' &&
7900 if (name[1] == 'p' &&
7913 if (name[2] == 'c' &&
7922 if (name[2] == 'p' &&
7931 if (name[2] == 's' &&
7947 if (name[2] == 'n' &&
8017 if (name[2] == 'r' &&
8026 if (name[2] == 'r' &&
8035 if (name[2] == 'a' &&
8051 if (name[2] == 'l' &&
8113 if (name[2] == 'e' &&
8116 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8129 case 5: /* 39 tokens of length 5 */
8133 if (name[1] == 'E' &&
8144 if (name[1] == 'H' &&
8158 if (name[2] == 'a' &&
8168 if (name[2] == 'a' &&
8185 if (name[2] == 'e' &&
8195 if (name[2] == 'e' &&
8199 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8215 if (name[3] == 'i' &&
8224 if (name[3] == 'o' &&
8260 if (name[2] == 'o' &&
8270 if (name[2] == 'y' &&
8284 if (name[1] == 'l' &&
8298 if (name[2] == 'n' &&
8308 if (name[2] == 'o' &&
8322 if (name[1] == 'i' &&
8327 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8336 if (name[2] == 'd' &&
8346 if (name[2] == 'c' &&
8363 if (name[2] == 'c' &&
8373 if (name[2] == 't' &&
8387 if (name[1] == 'k' &&
8398 if (name[1] == 'r' &&
8412 if (name[2] == 's' &&
8422 if (name[2] == 'd' &&
8439 if (name[2] == 'm' &&
8449 if (name[2] == 'i' &&
8459 if (name[2] == 'e' &&
8469 if (name[2] == 'l' &&
8479 if (name[2] == 'a' &&
8492 if (name[3] == 't' &&
8495 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8501 if (name[3] == 'd' &&
8518 if (name[1] == 'i' &&
8532 if (name[2] == 'a' &&
8545 if (name[3] == 'e' &&
8580 if (name[2] == 'i' &&
8597 if (name[2] == 'i' &&
8607 if (name[2] == 'i' &&
8624 case 6: /* 33 tokens of length 6 */
8628 if (name[1] == 'c' &&
8643 if (name[2] == 'l' &&
8654 if (name[2] == 'r' &&
8669 if (name[1] == 'e' &&
8684 if (name[2] == 's' &&
8689 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8695 if (name[2] == 'i' &&
8713 if (name[2] == 'l' &&
8724 if (name[2] == 'r' &&
8739 if (name[1] == 'm' &&
8754 if (name[2] == 'n' &&
8765 if (name[2] == 's' &&
8780 if (name[1] == 's' &&
8786 if (name[4] == 't' &&
8795 if (name[4] == 'e' &&
8804 if (name[4] == 'c' &&
8813 if (name[4] == 'n' &&
8829 if (name[1] == 'r' &&
8847 if (name[3] == 'a' &&
8857 if (name[3] == 'u' &&
8871 if (name[2] == 'n' &&
8889 if (name[2] == 'a' &&
8903 if (name[3] == 'e' &&
8916 if (name[4] == 't' &&
8925 if (name[4] == 'e' &&
8947 if (name[4] == 't' &&
8956 if (name[4] == 'e' &&
8972 if (name[2] == 'c' &&
8983 if (name[2] == 'l' &&
8994 if (name[2] == 'b' &&
9005 if (name[2] == 's' &&
9028 if (name[4] == 's' &&
9037 if (name[4] == 'n' &&
9050 if (name[3] == 'a' &&
9067 if (name[1] == 'a' &&
9082 case 7: /* 29 tokens of length 7 */
9086 if (name[1] == 'E' &&
9099 if (name[1] == '_' &&
9112 if (name[1] == 'i' &&
9119 return -KEY_binmode;
9125 if (name[1] == 'o' &&
9132 return -KEY_connect;
9141 if (name[2] == 'm' &&
9147 return -KEY_dbmopen;
9158 if (name[4] == 'u' &&
9162 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9168 if (name[4] == 'n' &&
9189 if (name[1] == 'o' &&
9202 if (name[1] == 'e' &&
9209 if (name[5] == 'r' &&
9212 return -KEY_getpgrp;
9218 if (name[5] == 'i' &&
9221 return -KEY_getppid;
9234 if (name[1] == 'c' &&
9241 return -KEY_lcfirst;
9247 if (name[1] == 'p' &&
9254 return -KEY_opendir;
9260 if (name[1] == 'a' &&
9278 if (name[3] == 'd' &&
9283 return -KEY_readdir;
9289 if (name[3] == 'u' &&
9300 if (name[3] == 'e' &&
9305 return -KEY_reverse;
9324 if (name[3] == 'k' &&
9329 return -KEY_seekdir;
9335 if (name[3] == 'p' &&
9340 return -KEY_setpgrp;
9350 if (name[2] == 'm' &&
9356 return -KEY_shmread;
9362 if (name[2] == 'r' &&
9368 return -KEY_sprintf;
9377 if (name[3] == 'l' &&
9382 return -KEY_symlink;
9391 if (name[4] == 'a' &&
9395 return -KEY_syscall;
9401 if (name[4] == 'p' &&
9405 return -KEY_sysopen;
9411 if (name[4] == 'e' &&
9415 return -KEY_sysread;
9421 if (name[4] == 'e' &&
9425 return -KEY_sysseek;
9443 if (name[1] == 'e' &&
9450 return -KEY_telldir;
9459 if (name[2] == 'f' &&
9465 return -KEY_ucfirst;
9471 if (name[2] == 's' &&
9477 return -KEY_unshift;
9487 if (name[1] == 'a' &&
9494 return -KEY_waitpid;
9503 case 8: /* 26 tokens of length 8 */
9507 if (name[1] == 'U' &&
9515 return KEY_AUTOLOAD;
9526 if (name[3] == 'A' &&
9532 return KEY___DATA__;
9538 if (name[3] == 'I' &&
9544 return -KEY___FILE__;
9550 if (name[3] == 'I' &&
9556 return -KEY___LINE__;
9572 if (name[2] == 'o' &&
9579 return -KEY_closedir;
9585 if (name[2] == 'n' &&
9592 return -KEY_continue;
9602 if (name[1] == 'b' &&
9610 return -KEY_dbmclose;
9616 if (name[1] == 'n' &&
9622 if (name[4] == 'r' &&
9627 return -KEY_endgrent;
9633 if (name[4] == 'w' &&
9638 return -KEY_endpwent;
9651 if (name[1] == 'o' &&
9659 return -KEY_formline;
9665 if (name[1] == 'e' &&
9676 if (name[6] == 'n' &&
9679 return -KEY_getgrent;
9685 if (name[6] == 'i' &&
9688 return -KEY_getgrgid;
9694 if (name[6] == 'a' &&
9697 return -KEY_getgrnam;
9710 if (name[4] == 'o' &&
9715 return -KEY_getlogin;
9726 if (name[6] == 'n' &&
9729 return -KEY_getpwent;
9735 if (name[6] == 'a' &&
9738 return -KEY_getpwnam;
9744 if (name[6] == 'i' &&
9747 return -KEY_getpwuid;
9767 if (name[1] == 'e' &&
9774 if (name[5] == 'i' &&
9781 return -KEY_readline;
9786 return -KEY_readlink;
9797 if (name[5] == 'i' &&
9801 return -KEY_readpipe;
9822 if (name[4] == 'r' &&
9827 return -KEY_setgrent;
9833 if (name[4] == 'w' &&
9838 return -KEY_setpwent;
9854 if (name[3] == 'w' &&
9860 return -KEY_shmwrite;
9866 if (name[3] == 't' &&
9872 return -KEY_shutdown;
9882 if (name[2] == 's' &&
9889 return -KEY_syswrite;
9899 if (name[1] == 'r' &&
9907 return -KEY_truncate;
9916 case 9: /* 9 tokens of length 9 */
9920 if (name[1] == 'N' &&
9929 return KEY_UNITCHECK;
9935 if (name[1] == 'n' &&
9944 return -KEY_endnetent;
9950 if (name[1] == 'e' &&
9959 return -KEY_getnetent;
9965 if (name[1] == 'o' &&
9974 return -KEY_localtime;
9980 if (name[1] == 'r' &&
9989 return KEY_prototype;
9995 if (name[1] == 'u' &&
10004 return -KEY_quotemeta;
10010 if (name[1] == 'e' &&
10019 return -KEY_rewinddir;
10025 if (name[1] == 'e' &&
10034 return -KEY_setnetent;
10040 if (name[1] == 'a' &&
10049 return -KEY_wantarray;
10058 case 10: /* 9 tokens of length 10 */
10062 if (name[1] == 'n' &&
10068 if (name[4] == 'o' &&
10075 return -KEY_endhostent;
10081 if (name[4] == 'e' &&
10088 return -KEY_endservent;
10101 if (name[1] == 'e' &&
10107 if (name[4] == 'o' &&
10114 return -KEY_gethostent;
10123 if (name[5] == 'r' &&
10129 return -KEY_getservent;
10135 if (name[5] == 'c' &&
10141 return -KEY_getsockopt;
10161 if (name[2] == 't')
10166 if (name[4] == 'o' &&
10173 return -KEY_sethostent;
10182 if (name[5] == 'r' &&
10188 return -KEY_setservent;
10194 if (name[5] == 'c' &&
10200 return -KEY_setsockopt;
10217 if (name[2] == 'c' &&
10226 return -KEY_socketpair;
10239 case 11: /* 8 tokens of length 11 */
10243 if (name[1] == '_' &&
10253 { /* __PACKAGE__ */
10254 return -KEY___PACKAGE__;
10260 if (name[1] == 'n' &&
10270 { /* endprotoent */
10271 return -KEY_endprotoent;
10277 if (name[1] == 'e' &&
10286 if (name[5] == 'e' &&
10292 { /* getpeername */
10293 return -KEY_getpeername;
10302 if (name[6] == 'o' &&
10307 { /* getpriority */
10308 return -KEY_getpriority;
10314 if (name[6] == 't' &&
10319 { /* getprotoent */
10320 return -KEY_getprotoent;
10334 if (name[4] == 'o' &&
10341 { /* getsockname */
10342 return -KEY_getsockname;
10355 if (name[1] == 'e' &&
10363 if (name[6] == 'o' &&
10368 { /* setpriority */
10369 return -KEY_setpriority;
10375 if (name[6] == 't' &&
10380 { /* setprotoent */
10381 return -KEY_setprotoent;
10397 case 12: /* 2 tokens of length 12 */
10398 if (name[0] == 'g' &&
10410 if (name[9] == 'd' &&
10413 { /* getnetbyaddr */
10414 return -KEY_getnetbyaddr;
10420 if (name[9] == 'a' &&
10423 { /* getnetbyname */
10424 return -KEY_getnetbyname;
10436 case 13: /* 4 tokens of length 13 */
10437 if (name[0] == 'g' &&
10444 if (name[4] == 'o' &&
10453 if (name[10] == 'd' &&
10456 { /* gethostbyaddr */
10457 return -KEY_gethostbyaddr;
10463 if (name[10] == 'a' &&
10466 { /* gethostbyname */
10467 return -KEY_gethostbyname;
10480 if (name[4] == 'e' &&
10489 if (name[10] == 'a' &&
10492 { /* getservbyname */
10493 return -KEY_getservbyname;
10499 if (name[10] == 'o' &&
10502 { /* getservbyport */
10503 return -KEY_getservbyport;
10522 case 14: /* 1 tokens of length 14 */
10523 if (name[0] == 'g' &&
10537 { /* getprotobyname */
10538 return -KEY_getprotobyname;
10543 case 16: /* 1 tokens of length 16 */
10544 if (name[0] == 'g' &&
10560 { /* getprotobynumber */
10561 return -KEY_getprotobynumber;
10575 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10579 PERL_ARGS_ASSERT_CHECKCOMMA;
10581 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10582 if (ckWARN(WARN_SYNTAX)) {
10585 for (w = s+2; *w && level; w++) {
10588 else if (*w == ')')
10591 while (isSPACE(*w))
10593 /* the list of chars below is for end of statements or
10594 * block / parens, boolean operators (&&, ||, //) and branch
10595 * constructs (or, and, if, until, unless, while, err, for).
10596 * Not a very solid hack... */
10597 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10598 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10599 "%s (...) interpreted as function",name);
10602 while (s < PL_bufend && isSPACE(*s))
10606 while (s < PL_bufend && isSPACE(*s))
10608 if (isIDFIRST_lazy_if(s,UTF)) {
10609 const char * const w = s++;
10610 while (isALNUM_lazy_if(s,UTF))
10612 while (s < PL_bufend && isSPACE(*s))
10616 if (keyword(w, s - w, 0))
10619 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10620 if (gv && GvCVu(gv))
10622 Perl_croak(aTHX_ "No comma allowed after %s", what);
10627 /* Either returns sv, or mortalizes sv and returns a new SV*.
10628 Best used as sv=new_constant(..., sv, ...).
10629 If s, pv are NULL, calls subroutine with one argument,
10630 and type is used with error messages only. */
10633 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10634 SV *sv, SV *pv, const char *type, STRLEN typelen)
10637 HV * const table = GvHV(PL_hintgv); /* ^H */
10641 const char *why1 = "", *why2 = "", *why3 = "";
10643 PERL_ARGS_ASSERT_NEW_CONSTANT;
10645 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10648 why2 = (const char *)
10649 (strEQ(key,"charnames")
10650 ? "(possibly a missing \"use charnames ...\")"
10652 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10653 (type ? type: "undef"), why2);
10655 /* This is convoluted and evil ("goto considered harmful")
10656 * but I do not understand the intricacies of all the different
10657 * failure modes of %^H in here. The goal here is to make
10658 * the most probable error message user-friendly. --jhi */
10663 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10664 (type ? type: "undef"), why1, why2, why3);
10666 yyerror(SvPVX_const(msg));
10670 cvp = hv_fetch(table, key, keylen, FALSE);
10671 if (!cvp || !SvOK(*cvp)) {
10674 why3 = "} is not defined";
10677 sv_2mortal(sv); /* Parent created it permanently */
10680 pv = newSVpvn_flags(s, len, SVs_TEMP);
10682 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
10684 typesv = &PL_sv_undef;
10686 PUSHSTACKi(PERLSI_OVERLOAD);
10698 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10702 /* Check the eval first */
10703 if (!PL_in_eval && SvTRUE(ERRSV)) {
10704 sv_catpvs(ERRSV, "Propagated");
10705 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10707 res = SvREFCNT_inc_simple(sv);
10711 SvREFCNT_inc_simple_void(res);
10720 why1 = "Call to &{$^H{";
10722 why3 = "}} did not return a defined value";
10730 /* Returns a NUL terminated string, with the length of the string written to
10734 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10737 register char *d = dest;
10738 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10740 PERL_ARGS_ASSERT_SCAN_WORD;
10744 Perl_croak(aTHX_ ident_too_long);
10745 if (isALNUM(*s)) /* UTF handled below */
10747 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10752 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10756 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10757 char *t = s + UTF8SKIP(s);
10759 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10763 Perl_croak(aTHX_ ident_too_long);
10764 Copy(s, d, len, char);
10777 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10780 char *bracket = NULL;
10782 register char *d = dest;
10783 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10785 PERL_ARGS_ASSERT_SCAN_IDENT;
10790 while (isDIGIT(*s)) {
10792 Perl_croak(aTHX_ ident_too_long);
10799 Perl_croak(aTHX_ ident_too_long);
10800 if (isALNUM(*s)) /* UTF handled below */
10802 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10807 else if (*s == ':' && s[1] == ':') {
10811 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10812 char *t = s + UTF8SKIP(s);
10813 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10815 if (d + (t - s) > e)
10816 Perl_croak(aTHX_ ident_too_long);
10817 Copy(s, d, t - s, char);
10828 if (PL_lex_state != LEX_NORMAL)
10829 PL_lex_state = LEX_INTERPENDMAYBE;
10832 if (*s == '$' && s[1] &&
10833 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10846 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10851 if (isSPACE(s[-1])) {
10853 const char ch = *s++;
10854 if (!SPACE_OR_TAB(ch)) {
10860 if (isIDFIRST_lazy_if(d,UTF)) {
10864 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10865 end += UTF8SKIP(end);
10866 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10867 end += UTF8SKIP(end);
10869 Copy(s, d, end - s, char);
10874 while ((isALNUM(*s) || *s == ':') && d < e)
10877 Perl_croak(aTHX_ ident_too_long);
10880 while (s < send && SPACE_OR_TAB(*s))
10882 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10883 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10884 const char * const brack =
10886 ((*s == '[') ? "[...]" : "{...}");
10887 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10888 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10889 funny, dest, brack, funny, dest, brack);
10892 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10896 /* Handle extended ${^Foo} variables
10897 * 1999-02-27 mjd-perl-patch@plover.com */
10898 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10902 while (isALNUM(*s) && d < e) {
10906 Perl_croak(aTHX_ ident_too_long);
10911 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10912 PL_lex_state = LEX_INTERPEND;
10915 if (PL_lex_state == LEX_NORMAL) {
10916 if (ckWARN(WARN_AMBIGUOUS) &&
10917 (keyword(dest, d - dest, 0)
10918 || get_cvn_flags(dest, d - dest, 0)))
10922 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10923 "Ambiguous use of %c{%s} resolved to %c%s",
10924 funny, dest, funny, dest);
10929 s = bracket; /* let the parser handle it */
10933 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10934 PL_lex_state = LEX_INTERPEND;
10939 S_pmflag(U32 pmfl, const char ch) {
10941 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
10942 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
10943 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
10944 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
10945 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
10951 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10953 PERL_ARGS_ASSERT_PMFLAG;
10955 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
10956 "Perl_pmflag() is deprecated, and will be removed from the XS API");
10959 *pmfl = S_pmflag(*pmfl, (char)ch);
10964 S_scan_pat(pTHX_ char *start, I32 type)
10968 char *s = scan_str(start,!!PL_madskills,FALSE);
10969 const char * const valid_flags =
10970 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10975 PERL_ARGS_ASSERT_SCAN_PAT;
10978 const char * const delimiter = skipspace(start);
10982 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10983 : "Search pattern not terminated" ));
10986 pm = (PMOP*)newPMOP(type, 0);
10987 if (PL_multi_open == '?') {
10988 /* This is the only point in the code that sets PMf_ONCE: */
10989 pm->op_pmflags |= PMf_ONCE;
10991 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10992 allows us to restrict the list needed by reset to just the ??
10994 assert(type != OP_TRANS);
10996 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10999 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11002 elements = mg->mg_len / sizeof(PMOP**);
11003 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11004 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11005 mg->mg_len = elements * sizeof(PMOP**);
11006 PmopSTASH_set(pm,PL_curstash);
11012 while (*s && strchr(valid_flags, *s))
11013 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11015 if (PL_madskills && modstart != s) {
11016 SV* tmptoken = newSVpvn(modstart, s - modstart);
11017 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11020 /* issue a warning if /c is specified,but /g is not */
11021 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11023 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11024 "Use of /c modifier is meaningless without /g" );
11027 PL_lex_op = (OP*)pm;
11028 pl_yylval.ival = OP_MATCH;
11033 S_scan_subst(pTHX_ char *start)
11044 PERL_ARGS_ASSERT_SCAN_SUBST;
11046 pl_yylval.ival = OP_NULL;
11048 s = scan_str(start,!!PL_madskills,FALSE);
11051 Perl_croak(aTHX_ "Substitution pattern not terminated");
11053 if (s[-1] == PL_multi_open)
11056 if (PL_madskills) {
11057 CURMAD('q', PL_thisopen);
11058 CURMAD('_', PL_thiswhite);
11059 CURMAD('E', PL_thisstuff);
11060 CURMAD('Q', PL_thisclose);
11061 PL_realtokenstart = s - SvPVX(PL_linestr);
11065 first_start = PL_multi_start;
11066 s = scan_str(s,!!PL_madskills,FALSE);
11068 if (PL_lex_stuff) {
11069 SvREFCNT_dec(PL_lex_stuff);
11070 PL_lex_stuff = NULL;
11072 Perl_croak(aTHX_ "Substitution replacement not terminated");
11074 PL_multi_start = first_start; /* so whole substitution is taken together */
11076 pm = (PMOP*)newPMOP(OP_SUBST, 0);
11079 if (PL_madskills) {
11080 CURMAD('z', PL_thisopen);
11081 CURMAD('R', PL_thisstuff);
11082 CURMAD('Z', PL_thisclose);
11088 if (*s == EXEC_PAT_MOD) {
11092 else if (strchr(S_PAT_MODS, *s))
11093 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11099 if (PL_madskills) {
11101 curmad('m', newSVpvn(modstart, s - modstart));
11102 append_madprops(PL_thismad, (OP*)pm, 0);
11106 if ((pm->op_pmflags & PMf_CONTINUE)) {
11107 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11111 SV * const repl = newSVpvs("");
11113 PL_sublex_info.super_bufptr = s;
11114 PL_sublex_info.super_bufend = PL_bufend;
11116 pm->op_pmflags |= PMf_EVAL;
11119 sv_catpvs(repl, "eval ");
11121 sv_catpvs(repl, "do ");
11123 sv_catpvs(repl, "{");
11124 sv_catsv(repl, PL_lex_repl);
11125 if (strchr(SvPVX(PL_lex_repl), '#'))
11126 sv_catpvs(repl, "\n");
11127 sv_catpvs(repl, "}");
11129 SvREFCNT_dec(PL_lex_repl);
11130 PL_lex_repl = repl;
11133 PL_lex_op = (OP*)pm;
11134 pl_yylval.ival = OP_SUBST;
11139 S_scan_trans(pTHX_ char *start)
11152 PERL_ARGS_ASSERT_SCAN_TRANS;
11154 pl_yylval.ival = OP_NULL;
11156 s = scan_str(start,!!PL_madskills,FALSE);
11158 Perl_croak(aTHX_ "Transliteration pattern not terminated");
11160 if (s[-1] == PL_multi_open)
11163 if (PL_madskills) {
11164 CURMAD('q', PL_thisopen);
11165 CURMAD('_', PL_thiswhite);
11166 CURMAD('E', PL_thisstuff);
11167 CURMAD('Q', PL_thisclose);
11168 PL_realtokenstart = s - SvPVX(PL_linestr);
11172 s = scan_str(s,!!PL_madskills,FALSE);
11174 if (PL_lex_stuff) {
11175 SvREFCNT_dec(PL_lex_stuff);
11176 PL_lex_stuff = NULL;
11178 Perl_croak(aTHX_ "Transliteration replacement not terminated");
11180 if (PL_madskills) {
11181 CURMAD('z', PL_thisopen);
11182 CURMAD('R', PL_thisstuff);
11183 CURMAD('Z', PL_thisclose);
11186 complement = del = squash = 0;
11193 complement = OPpTRANS_COMPLEMENT;
11196 del = OPpTRANS_DELETE;
11199 squash = OPpTRANS_SQUASH;
11208 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11209 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11210 o->op_private &= ~OPpTRANS_ALL;
11211 o->op_private |= del|squash|complement|
11212 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11213 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11216 pl_yylval.ival = OP_TRANS;
11219 if (PL_madskills) {
11221 curmad('m', newSVpvn(modstart, s - modstart));
11222 append_madprops(PL_thismad, o, 0);
11231 S_scan_heredoc(pTHX_ register char *s)
11235 I32 op_type = OP_SCALAR;
11239 const char *found_newline;
11243 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11245 I32 stuffstart = s - SvPVX(PL_linestr);
11248 PL_realtokenstart = -1;
11251 PERL_ARGS_ASSERT_SCAN_HEREDOC;
11255 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11259 while (SPACE_OR_TAB(*peek))
11261 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11264 s = delimcpy(d, e, s, PL_bufend, term, &len);
11274 if (!isALNUM_lazy_if(s,UTF))
11275 deprecate("bare << to mean <<\"\"");
11276 for (; isALNUM_lazy_if(s,UTF); s++) {
11281 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11282 Perl_croak(aTHX_ "Delimiter for here document is too long");
11285 len = d - PL_tokenbuf;
11288 if (PL_madskills) {
11289 tstart = PL_tokenbuf + !outer;
11290 PL_thisclose = newSVpvn(tstart, len - !outer);
11291 tstart = SvPVX(PL_linestr) + stuffstart;
11292 PL_thisopen = newSVpvn(tstart, s - tstart);
11293 stuffstart = s - SvPVX(PL_linestr);
11296 #ifndef PERL_STRICT_CR
11297 d = strchr(s, '\r');
11299 char * const olds = s;
11301 while (s < PL_bufend) {
11307 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11316 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11323 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11324 herewas = newSVpvn(s,PL_bufend-s);
11328 herewas = newSVpvn(s-1,found_newline-s+1);
11331 herewas = newSVpvn(s,found_newline-s);
11335 if (PL_madskills) {
11336 tstart = SvPVX(PL_linestr) + stuffstart;
11338 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11340 PL_thisstuff = newSVpvn(tstart, s - tstart);
11343 s += SvCUR(herewas);
11346 stuffstart = s - SvPVX(PL_linestr);
11352 tmpstr = newSV_type(SVt_PVIV);
11353 SvGROW(tmpstr, 80);
11354 if (term == '\'') {
11355 op_type = OP_CONST;
11356 SvIV_set(tmpstr, -1);
11358 else if (term == '`') {
11359 op_type = OP_BACKTICK;
11360 SvIV_set(tmpstr, '\\');
11364 PL_multi_start = CopLINE(PL_curcop);
11365 PL_multi_open = PL_multi_close = '<';
11366 term = *PL_tokenbuf;
11367 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11368 char * const bufptr = PL_sublex_info.super_bufptr;
11369 char * const bufend = PL_sublex_info.super_bufend;
11370 char * const olds = s - SvCUR(herewas);
11371 s = strchr(bufptr, '\n');
11375 while (s < bufend &&
11376 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11378 CopLINE_inc(PL_curcop);
11381 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11382 missingterm(PL_tokenbuf);
11384 sv_setpvn(herewas,bufptr,d-bufptr+1);
11385 sv_setpvn(tmpstr,d+1,s-d);
11387 sv_catpvn(herewas,s,bufend-s);
11388 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11395 while (s < PL_bufend &&
11396 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11398 CopLINE_inc(PL_curcop);
11400 if (s >= PL_bufend) {
11401 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11402 missingterm(PL_tokenbuf);
11404 sv_setpvn(tmpstr,d+1,s-d);
11406 if (PL_madskills) {
11408 sv_catpvn(PL_thisstuff, d + 1, s - d);
11410 PL_thisstuff = newSVpvn(d + 1, s - d);
11411 stuffstart = s - SvPVX(PL_linestr);
11415 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11417 sv_catpvn(herewas,s,PL_bufend-s);
11418 sv_setsv(PL_linestr,herewas);
11419 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11420 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11421 PL_last_lop = PL_last_uni = NULL;
11424 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
11425 while (s >= PL_bufend) { /* multiple line string? */
11427 if (PL_madskills) {
11428 tstart = SvPVX(PL_linestr) + stuffstart;
11430 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11432 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11436 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart
11437 = filter_gets(PL_linestr, 0))) {
11438 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11439 missingterm(PL_tokenbuf);
11442 stuffstart = s - SvPVX(PL_linestr);
11444 CopLINE_inc(PL_curcop);
11445 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11446 PL_last_lop = PL_last_uni = NULL;
11447 #ifndef PERL_STRICT_CR
11448 if (PL_bufend - PL_linestart >= 2) {
11449 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11450 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11452 PL_bufend[-2] = '\n';
11454 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11456 else if (PL_bufend[-1] == '\r')
11457 PL_bufend[-1] = '\n';
11459 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11460 PL_bufend[-1] = '\n';
11462 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
11463 update_debugger_info(PL_linestr, NULL, 0);
11464 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11465 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11466 *(SvPVX(PL_linestr) + off ) = ' ';
11467 sv_catsv(PL_linestr,herewas);
11468 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11469 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11473 sv_catsv(tmpstr,PL_linestr);
11478 PL_multi_end = CopLINE(PL_curcop);
11479 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11480 SvPV_shrink_to_cur(tmpstr);
11482 SvREFCNT_dec(herewas);
11484 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11486 else if (PL_encoding)
11487 sv_recode_to_utf8(tmpstr, PL_encoding);
11489 PL_lex_stuff = tmpstr;
11490 pl_yylval.ival = op_type;
11494 /* scan_inputsymbol
11495 takes: current position in input buffer
11496 returns: new position in input buffer
11497 side-effects: pl_yylval and lex_op are set.
11502 <FH> read from filehandle
11503 <pkg::FH> read from package qualified filehandle
11504 <pkg'FH> read from package qualified filehandle
11505 <$fh> read from filehandle in $fh
11506 <*.h> filename glob
11511 S_scan_inputsymbol(pTHX_ char *start)
11514 register char *s = start; /* current position in buffer */
11517 char *d = PL_tokenbuf; /* start of temp holding space */
11518 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11520 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11522 end = strchr(s, '\n');
11525 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11527 /* die if we didn't have space for the contents of the <>,
11528 or if it didn't end, or if we see a newline
11531 if (len >= (I32)sizeof PL_tokenbuf)
11532 Perl_croak(aTHX_ "Excessively long <> operator");
11534 Perl_croak(aTHX_ "Unterminated <> operator");
11539 Remember, only scalar variables are interpreted as filehandles by
11540 this code. Anything more complex (e.g., <$fh{$num}>) will be
11541 treated as a glob() call.
11542 This code makes use of the fact that except for the $ at the front,
11543 a scalar variable and a filehandle look the same.
11545 if (*d == '$' && d[1]) d++;
11547 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11548 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11551 /* If we've tried to read what we allow filehandles to look like, and
11552 there's still text left, then it must be a glob() and not a getline.
11553 Use scan_str to pull out the stuff between the <> and treat it
11554 as nothing more than a string.
11557 if (d - PL_tokenbuf != len) {
11558 pl_yylval.ival = OP_GLOB;
11559 s = scan_str(start,!!PL_madskills,FALSE);
11561 Perl_croak(aTHX_ "Glob not terminated");
11565 bool readline_overriden = FALSE;
11568 /* we're in a filehandle read situation */
11571 /* turn <> into <ARGV> */
11573 Copy("ARGV",d,5,char);
11575 /* Check whether readline() is overriden */
11576 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11578 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11580 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11581 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11582 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11583 readline_overriden = TRUE;
11585 /* if <$fh>, create the ops to turn the variable into a
11589 /* try to find it in the pad for this block, otherwise find
11590 add symbol table ops
11592 const PADOFFSET tmp = pad_findmy(d);
11593 if (tmp != NOT_IN_PAD) {
11594 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11595 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11596 HEK * const stashname = HvNAME_HEK(stash);
11597 SV * const sym = sv_2mortal(newSVhek(stashname));
11598 sv_catpvs(sym, "::");
11599 sv_catpv(sym, d+1);
11604 OP * const o = newOP(OP_PADSV, 0);
11606 PL_lex_op = readline_overriden
11607 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11608 append_elem(OP_LIST, o,
11609 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11610 : (OP*)newUNOP(OP_READLINE, 0, o);
11619 ? (GV_ADDMULTI | GV_ADDINEVAL)
11622 PL_lex_op = readline_overriden
11623 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11624 append_elem(OP_LIST,
11625 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11626 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11627 : (OP*)newUNOP(OP_READLINE, 0,
11628 newUNOP(OP_RV2SV, 0,
11629 newGVOP(OP_GV, 0, gv)));
11631 if (!readline_overriden)
11632 PL_lex_op->op_flags |= OPf_SPECIAL;
11633 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11634 pl_yylval.ival = OP_NULL;
11637 /* If it's none of the above, it must be a literal filehandle
11638 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11640 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11641 PL_lex_op = readline_overriden
11642 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11643 append_elem(OP_LIST,
11644 newGVOP(OP_GV, 0, gv),
11645 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11646 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11647 pl_yylval.ival = OP_NULL;
11656 takes: start position in buffer
11657 keep_quoted preserve \ on the embedded delimiter(s)
11658 keep_delims preserve the delimiters around the string
11659 returns: position to continue reading from buffer
11660 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11661 updates the read buffer.
11663 This subroutine pulls a string out of the input. It is called for:
11664 q single quotes q(literal text)
11665 ' single quotes 'literal text'
11666 qq double quotes qq(interpolate $here please)
11667 " double quotes "interpolate $here please"
11668 qx backticks qx(/bin/ls -l)
11669 ` backticks `/bin/ls -l`
11670 qw quote words @EXPORT_OK = qw( func() $spam )
11671 m// regexp match m/this/
11672 s/// regexp substitute s/this/that/
11673 tr/// string transliterate tr/this/that/
11674 y/// string transliterate y/this/that/
11675 ($*@) sub prototypes sub foo ($)
11676 (stuff) sub attr parameters sub foo : attr(stuff)
11677 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11679 In most of these cases (all but <>, patterns and transliterate)
11680 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11681 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11682 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11685 It skips whitespace before the string starts, and treats the first
11686 character as the delimiter. If the delimiter is one of ([{< then
11687 the corresponding "close" character )]}> is used as the closing
11688 delimiter. It allows quoting of delimiters, and if the string has
11689 balanced delimiters ([{<>}]) it allows nesting.
11691 On success, the SV with the resulting string is put into lex_stuff or,
11692 if that is already non-NULL, into lex_repl. The second case occurs only
11693 when parsing the RHS of the special constructs s/// and tr/// (y///).
11694 For convenience, the terminating delimiter character is stuffed into
11699 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11702 SV *sv; /* scalar value: string */
11703 const char *tmps; /* temp string, used for delimiter matching */
11704 register char *s = start; /* current position in the buffer */
11705 register char term; /* terminating character */
11706 register char *to; /* current position in the sv's data */
11707 I32 brackets = 1; /* bracket nesting level */
11708 bool has_utf8 = FALSE; /* is there any utf8 content? */
11709 I32 termcode; /* terminating char. code */
11710 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11711 STRLEN termlen; /* length of terminating string */
11712 int last_off = 0; /* last position for nesting bracket */
11718 PERL_ARGS_ASSERT_SCAN_STR;
11720 /* skip space before the delimiter */
11726 if (PL_realtokenstart >= 0) {
11727 stuffstart = PL_realtokenstart;
11728 PL_realtokenstart = -1;
11731 stuffstart = start - SvPVX(PL_linestr);
11733 /* mark where we are, in case we need to report errors */
11736 /* after skipping whitespace, the next character is the terminator */
11739 termcode = termstr[0] = term;
11743 termcode = utf8_to_uvchr((U8*)s, &termlen);
11744 Copy(s, termstr, termlen, U8);
11745 if (!UTF8_IS_INVARIANT(term))
11749 /* mark where we are */
11750 PL_multi_start = CopLINE(PL_curcop);
11751 PL_multi_open = term;
11753 /* find corresponding closing delimiter */
11754 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11755 termcode = termstr[0] = term = tmps[5];
11757 PL_multi_close = term;
11759 /* create a new SV to hold the contents. 79 is the SV's initial length.
11760 What a random number. */
11761 sv = newSV_type(SVt_PVIV);
11763 SvIV_set(sv, termcode);
11764 (void)SvPOK_only(sv); /* validate pointer */
11766 /* move past delimiter and try to read a complete string */
11768 sv_catpvn(sv, s, termlen);
11771 tstart = SvPVX(PL_linestr) + stuffstart;
11772 if (!PL_thisopen && !keep_delims) {
11773 PL_thisopen = newSVpvn(tstart, s - tstart);
11774 stuffstart = s - SvPVX(PL_linestr);
11778 if (PL_encoding && !UTF) {
11782 int offset = s - SvPVX_const(PL_linestr);
11783 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11784 &offset, (char*)termstr, termlen);
11785 const char * const ns = SvPVX_const(PL_linestr) + offset;
11786 char * const svlast = SvEND(sv) - 1;
11788 for (; s < ns; s++) {
11789 if (*s == '\n' && !PL_rsfp)
11790 CopLINE_inc(PL_curcop);
11793 goto read_more_line;
11795 /* handle quoted delimiters */
11796 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11798 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11800 if ((svlast-1 - t) % 2) {
11801 if (!keep_quoted) {
11802 *(svlast-1) = term;
11804 SvCUR_set(sv, SvCUR(sv) - 1);
11809 if (PL_multi_open == PL_multi_close) {
11815 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11816 /* At here, all closes are "was quoted" one,
11817 so we don't check PL_multi_close. */
11819 if (!keep_quoted && *(t+1) == PL_multi_open)
11824 else if (*t == PL_multi_open)
11832 SvCUR_set(sv, w - SvPVX_const(sv));
11834 last_off = w - SvPVX(sv);
11835 if (--brackets <= 0)
11840 if (!keep_delims) {
11841 SvCUR_set(sv, SvCUR(sv) - 1);
11847 /* extend sv if need be */
11848 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11849 /* set 'to' to the next character in the sv's string */
11850 to = SvPVX(sv)+SvCUR(sv);
11852 /* if open delimiter is the close delimiter read unbridle */
11853 if (PL_multi_open == PL_multi_close) {
11854 for (; s < PL_bufend; s++,to++) {
11855 /* embedded newlines increment the current line number */
11856 if (*s == '\n' && !PL_rsfp)
11857 CopLINE_inc(PL_curcop);
11858 /* handle quoted delimiters */
11859 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11860 if (!keep_quoted && s[1] == term)
11862 /* any other quotes are simply copied straight through */
11866 /* terminate when run out of buffer (the for() condition), or
11867 have found the terminator */
11868 else if (*s == term) {
11871 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11874 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11880 /* if the terminator isn't the same as the start character (e.g.,
11881 matched brackets), we have to allow more in the quoting, and
11882 be prepared for nested brackets.
11885 /* read until we run out of string, or we find the terminator */
11886 for (; s < PL_bufend; s++,to++) {
11887 /* embedded newlines increment the line count */
11888 if (*s == '\n' && !PL_rsfp)
11889 CopLINE_inc(PL_curcop);
11890 /* backslashes can escape the open or closing characters */
11891 if (*s == '\\' && s+1 < PL_bufend) {
11892 if (!keep_quoted &&
11893 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11898 /* allow nested opens and closes */
11899 else if (*s == PL_multi_close && --brackets <= 0)
11901 else if (*s == PL_multi_open)
11903 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11908 /* terminate the copied string and update the sv's end-of-string */
11910 SvCUR_set(sv, to - SvPVX_const(sv));
11913 * this next chunk reads more into the buffer if we're not done yet
11917 break; /* handle case where we are done yet :-) */
11919 #ifndef PERL_STRICT_CR
11920 if (to - SvPVX_const(sv) >= 2) {
11921 if ((to[-2] == '\r' && to[-1] == '\n') ||
11922 (to[-2] == '\n' && to[-1] == '\r'))
11926 SvCUR_set(sv, to - SvPVX_const(sv));
11928 else if (to[-1] == '\r')
11931 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11936 /* if we're out of file, or a read fails, bail and reset the current
11937 line marker so we can report where the unterminated string began
11940 if (PL_madskills) {
11941 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11943 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11945 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11949 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart
11950 = filter_gets(PL_linestr, 0))) {
11952 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11958 /* we read a line, so increment our line counter */
11959 CopLINE_inc(PL_curcop);
11961 /* update debugger info */
11962 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
11963 update_debugger_info(PL_linestr, NULL, 0);
11965 /* having changed the buffer, we must update PL_bufend */
11966 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11967 PL_last_lop = PL_last_uni = NULL;
11970 /* at this point, we have successfully read the delimited string */
11972 if (!PL_encoding || UTF) {
11974 if (PL_madskills) {
11975 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11976 const int len = s - tstart;
11978 sv_catpvn(PL_thisstuff, tstart, len);
11980 PL_thisstuff = newSVpvn(tstart, len);
11981 if (!PL_thisclose && !keep_delims)
11982 PL_thisclose = newSVpvn(s,termlen);
11987 sv_catpvn(sv, s, termlen);
11992 if (PL_madskills) {
11993 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11994 const int len = s - tstart - termlen;
11996 sv_catpvn(PL_thisstuff, tstart, len);
11998 PL_thisstuff = newSVpvn(tstart, len);
11999 if (!PL_thisclose && !keep_delims)
12000 PL_thisclose = newSVpvn(s - termlen,termlen);
12004 if (has_utf8 || PL_encoding)
12007 PL_multi_end = CopLINE(PL_curcop);
12009 /* if we allocated too much space, give some back */
12010 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12011 SvLEN_set(sv, SvCUR(sv) + 1);
12012 SvPV_renew(sv, SvLEN(sv));
12015 /* decide whether this is the first or second quoted string we've read
12028 takes: pointer to position in buffer
12029 returns: pointer to new position in buffer
12030 side-effects: builds ops for the constant in pl_yylval.op
12032 Read a number in any of the formats that Perl accepts:
12034 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12035 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
12038 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12040 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12043 If it reads a number without a decimal point or an exponent, it will
12044 try converting the number to an integer and see if it can do so
12045 without loss of precision.
12049 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12052 register const char *s = start; /* current position in buffer */
12053 register char *d; /* destination in temp buffer */
12054 register char *e; /* end of temp buffer */
12055 NV nv; /* number read, as a double */
12056 SV *sv = NULL; /* place to put the converted number */
12057 bool floatit; /* boolean: int or float? */
12058 const char *lastub = NULL; /* position of last underbar */
12059 static char const number_too_long[] = "Number too long";
12061 PERL_ARGS_ASSERT_SCAN_NUM;
12063 /* We use the first character to decide what type of number this is */
12067 Perl_croak(aTHX_ "panic: scan_num");
12069 /* if it starts with a 0, it could be an octal number, a decimal in
12070 0.13 disguise, or a hexadecimal number, or a binary number. */
12074 u holds the "number so far"
12075 shift the power of 2 of the base
12076 (hex == 4, octal == 3, binary == 1)
12077 overflowed was the number more than we can hold?
12079 Shift is used when we add a digit. It also serves as an "are
12080 we in octal/hex/binary?" indicator to disallow hex characters
12081 when in octal mode.
12086 bool overflowed = FALSE;
12087 bool just_zero = TRUE; /* just plain 0 or binary number? */
12088 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12089 static const char* const bases[5] =
12090 { "", "binary", "", "octal", "hexadecimal" };
12091 static const char* const Bases[5] =
12092 { "", "Binary", "", "Octal", "Hexadecimal" };
12093 static const char* const maxima[5] =
12095 "0b11111111111111111111111111111111",
12099 const char *base, *Base, *max;
12101 /* check for hex */
12106 } else if (s[1] == 'b') {
12111 /* check for a decimal in disguise */
12112 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12114 /* so it must be octal */
12121 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12122 "Misplaced _ in number");
12126 base = bases[shift];
12127 Base = Bases[shift];
12128 max = maxima[shift];
12130 /* read the rest of the number */
12132 /* x is used in the overflow test,
12133 b is the digit we're adding on. */
12138 /* if we don't mention it, we're done */
12142 /* _ are ignored -- but warned about if consecutive */
12144 if (lastub && s == lastub + 1)
12145 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12146 "Misplaced _ in number");
12150 /* 8 and 9 are not octal */
12151 case '8': case '9':
12153 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12157 case '2': case '3': case '4':
12158 case '5': case '6': case '7':
12160 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12163 case '0': case '1':
12164 b = *s++ & 15; /* ASCII digit -> value of digit */
12168 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12169 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12170 /* make sure they said 0x */
12173 b = (*s++ & 7) + 9;
12175 /* Prepare to put the digit we have onto the end
12176 of the number so far. We check for overflows.
12182 x = u << shift; /* make room for the digit */
12184 if ((x >> shift) != u
12185 && !(PL_hints & HINT_NEW_BINARY)) {
12188 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12189 "Integer overflow in %s number",
12192 u = x | b; /* add the digit to the end */
12195 n *= nvshift[shift];
12196 /* If an NV has not enough bits in its
12197 * mantissa to represent an UV this summing of
12198 * small low-order numbers is a waste of time
12199 * (because the NV cannot preserve the
12200 * low-order bits anyway): we could just
12201 * remember when did we overflow and in the
12202 * end just multiply n by the right
12210 /* if we get here, we had success: make a scalar value from
12215 /* final misplaced underbar check */
12216 if (s[-1] == '_') {
12217 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12222 if (n > 4294967295.0)
12223 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12224 "%s number > %s non-portable",
12230 if (u > 0xffffffff)
12231 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12232 "%s number > %s non-portable",
12237 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12238 sv = new_constant(start, s - start, "integer",
12239 sv, NULL, NULL, 0);
12240 else if (PL_hints & HINT_NEW_BINARY)
12241 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12246 handle decimal numbers.
12247 we're also sent here when we read a 0 as the first digit
12249 case '1': case '2': case '3': case '4': case '5':
12250 case '6': case '7': case '8': case '9': case '.':
12253 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12256 /* read next group of digits and _ and copy into d */
12257 while (isDIGIT(*s) || *s == '_') {
12258 /* skip underscores, checking for misplaced ones
12262 if (lastub && s == lastub + 1)
12263 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12264 "Misplaced _ in number");
12268 /* check for end of fixed-length buffer */
12270 Perl_croak(aTHX_ number_too_long);
12271 /* if we're ok, copy the character */
12276 /* final misplaced underbar check */
12277 if (lastub && s == lastub + 1) {
12278 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12281 /* read a decimal portion if there is one. avoid
12282 3..5 being interpreted as the number 3. followed
12285 if (*s == '.' && s[1] != '.') {
12290 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12291 "Misplaced _ in number");
12295 /* copy, ignoring underbars, until we run out of digits.
12297 for (; isDIGIT(*s) || *s == '_'; s++) {
12298 /* fixed length buffer check */
12300 Perl_croak(aTHX_ number_too_long);
12302 if (lastub && s == lastub + 1)
12303 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12304 "Misplaced _ in number");
12310 /* fractional part ending in underbar? */
12311 if (s[-1] == '_') {
12312 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12313 "Misplaced _ in number");
12315 if (*s == '.' && isDIGIT(s[1])) {
12316 /* oops, it's really a v-string, but without the "v" */
12322 /* read exponent part, if present */
12323 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12327 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12328 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12330 /* stray preinitial _ */
12332 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12333 "Misplaced _ in number");
12337 /* allow positive or negative exponent */
12338 if (*s == '+' || *s == '-')
12341 /* stray initial _ */
12343 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12344 "Misplaced _ in number");
12348 /* read digits of exponent */
12349 while (isDIGIT(*s) || *s == '_') {
12352 Perl_croak(aTHX_ number_too_long);
12356 if (((lastub && s == lastub + 1) ||
12357 (!isDIGIT(s[1]) && s[1] != '_')))
12358 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12359 "Misplaced _ in number");
12366 /* make an sv from the string */
12370 We try to do an integer conversion first if no characters
12371 indicating "float" have been found.
12376 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12378 if (flags == IS_NUMBER_IN_UV) {
12380 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12383 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12384 if (uv <= (UV) IV_MIN)
12385 sv_setiv(sv, -(IV)uv);
12392 /* terminate the string */
12394 nv = Atof(PL_tokenbuf);
12399 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12400 const char *const key = floatit ? "float" : "integer";
12401 const STRLEN keylen = floatit ? 5 : 7;
12402 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12403 key, keylen, sv, NULL, NULL, 0);
12407 /* if it starts with a v, it could be a v-string */
12410 sv = newSV(5); /* preallocate storage space */
12411 s = scan_vstring(s, PL_bufend, sv);
12415 /* make the op for the constant and return */
12418 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12420 lvalp->opval = NULL;
12426 S_scan_formline(pTHX_ register char *s)
12429 register char *eol;
12431 SV * const stuff = newSVpvs("");
12432 bool needargs = FALSE;
12433 bool eofmt = FALSE;
12435 char *tokenstart = s;
12436 SV* savewhite = NULL;
12438 if (PL_madskills) {
12439 savewhite = PL_thiswhite;
12444 PERL_ARGS_ASSERT_SCAN_FORMLINE;
12446 while (!needargs) {
12449 #ifdef PERL_STRICT_CR
12450 while (SPACE_OR_TAB(*t))
12453 while (SPACE_OR_TAB(*t) || *t == '\r')
12456 if (*t == '\n' || t == PL_bufend) {
12461 if (PL_in_eval && !PL_rsfp) {
12462 eol = (char *) memchr(s,'\n',PL_bufend-s);
12467 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12469 for (t = s; t < eol; t++) {
12470 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12472 goto enough; /* ~~ must be first line in formline */
12474 if (*t == '@' || *t == '^')
12478 sv_catpvn(stuff, s, eol-s);
12479 #ifndef PERL_STRICT_CR
12480 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12481 char *end = SvPVX(stuff) + SvCUR(stuff);
12484 SvCUR_set(stuff, SvCUR(stuff) - 1);
12494 if (PL_madskills) {
12496 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12498 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12501 s = filter_gets(PL_linestr, 0);
12503 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12505 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12507 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12508 PL_last_lop = PL_last_uni = NULL;
12517 if (SvCUR(stuff)) {
12520 PL_lex_state = LEX_NORMAL;
12521 start_force(PL_curforce);
12522 NEXTVAL_NEXTTOKE.ival = 0;
12526 PL_lex_state = LEX_FORMLINE;
12528 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12530 else if (PL_encoding)
12531 sv_recode_to_utf8(stuff, PL_encoding);
12533 start_force(PL_curforce);
12534 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12536 start_force(PL_curforce);
12537 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12541 SvREFCNT_dec(stuff);
12543 PL_lex_formbrack = 0;
12547 if (PL_madskills) {
12549 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12551 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12552 PL_thiswhite = savewhite;
12559 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12562 const I32 oldsavestack_ix = PL_savestack_ix;
12563 CV* const outsidecv = PL_compcv;
12566 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12568 SAVEI32(PL_subline);
12569 save_item(PL_subname);
12570 SAVESPTR(PL_compcv);
12572 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12573 CvFLAGS(PL_compcv) |= flags;
12575 PL_subline = CopLINE(PL_curcop);
12576 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12577 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12578 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12580 return oldsavestack_ix;
12584 #pragma segment Perl_yylex
12587 S_yywarn(pTHX_ const char *const s)
12591 PERL_ARGS_ASSERT_YYWARN;
12593 PL_in_eval |= EVAL_WARNONLY;
12595 PL_in_eval &= ~EVAL_WARNONLY;
12600 Perl_yyerror(pTHX_ const char *const s)
12603 const char *where = NULL;
12604 const char *context = NULL;
12607 int yychar = PL_parser->yychar;
12609 PERL_ARGS_ASSERT_YYERROR;
12611 if (!yychar || (yychar == ';' && !PL_rsfp))
12613 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12614 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12615 PL_oldbufptr != PL_bufptr) {
12618 The code below is removed for NetWare because it abends/crashes on NetWare
12619 when the script has error such as not having the closing quotes like:
12620 if ($var eq "value)
12621 Checking of white spaces is anyway done in NetWare code.
12624 while (isSPACE(*PL_oldoldbufptr))
12627 context = PL_oldoldbufptr;
12628 contlen = PL_bufptr - PL_oldoldbufptr;
12630 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12631 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12634 The code below is removed for NetWare because it abends/crashes on NetWare
12635 when the script has error such as not having the closing quotes like:
12636 if ($var eq "value)
12637 Checking of white spaces is anyway done in NetWare code.
12640 while (isSPACE(*PL_oldbufptr))
12643 context = PL_oldbufptr;
12644 contlen = PL_bufptr - PL_oldbufptr;
12646 else if (yychar > 255)
12647 where = "next token ???";
12648 else if (yychar == -2) { /* YYEMPTY */
12649 if (PL_lex_state == LEX_NORMAL ||
12650 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12651 where = "at end of line";
12652 else if (PL_lex_inpat)
12653 where = "within pattern";
12655 where = "within string";
12658 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
12660 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12661 else if (isPRINT_LC(yychar)) {
12662 const char string = yychar;
12663 sv_catpvn(where_sv, &string, 1);
12666 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12667 where = SvPVX_const(where_sv);
12669 msg = sv_2mortal(newSVpv(s, 0));
12670 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12671 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12673 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12675 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12676 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12677 Perl_sv_catpvf(aTHX_ msg,
12678 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12679 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12682 if (PL_in_eval & EVAL_WARNONLY) {
12683 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12687 if (PL_error_count >= 10) {
12688 if (PL_in_eval && SvCUR(ERRSV))
12689 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12690 SVfARG(ERRSV), OutCopFILE(PL_curcop));
12692 Perl_croak(aTHX_ "%s has too many errors.\n",
12693 OutCopFILE(PL_curcop));
12696 PL_in_my_stash = NULL;
12700 #pragma segment Main
12704 S_swallow_bom(pTHX_ U8 *s)
12707 const STRLEN slen = SvCUR(PL_linestr);
12709 PERL_ARGS_ASSERT_SWALLOW_BOM;
12713 if (s[1] == 0xFE) {
12714 /* UTF-16 little-endian? (or UTF32-LE?) */
12715 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12716 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12717 #ifndef PERL_NO_UTF16_FILTER
12718 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12720 if (PL_bufend > (char*)s) {
12721 s = add_utf16_textfilter(s, TRUE);
12724 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12729 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12730 #ifndef PERL_NO_UTF16_FILTER
12731 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12733 if (PL_bufend > (char *)s) {
12734 s = add_utf16_textfilter(s, FALSE);
12737 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12742 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12743 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12744 s += 3; /* UTF-8 */
12750 if (s[2] == 0xFE && s[3] == 0xFF) {
12751 /* UTF-32 big-endian */
12752 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12755 else if (s[2] == 0 && s[3] != 0) {
12758 * are a good indicator of UTF-16BE. */
12759 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12760 s = add_utf16_textfilter(s, FALSE);
12765 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12766 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12767 s += 4; /* UTF-8 */
12773 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12776 * are a good indicator of UTF-16LE. */
12777 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12778 s = add_utf16_textfilter(s, TRUE);
12785 #ifndef PERL_NO_UTF16_FILTER
12787 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12790 SV *const filter = FILTER_DATA(idx);
12791 /* We re-use this each time round, throwing the contents away before we
12793 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12794 SV *const utf8_buffer = filter;
12795 IV status = IoPAGE(filter);
12796 const bool reverse = IoLINES(filter);
12799 /* As we're automatically added, at the lowest level, and hence only called
12800 from this file, we can be sure that we're not called in block mode. Hence
12801 don't bother writing code to deal with block mode. */
12803 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12806 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
12808 DEBUG_P(PerlIO_printf(Perl_debug_log,
12809 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
12810 FPTR2DPTR(void *, S_utf16_textfilter),
12811 reverse ? 'l' : 'b', idx, maxlen, status,
12812 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12819 /* First, look in our buffer of existing UTF-8 data: */
12820 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12824 } else if (status == 0) {
12826 IoPAGE(filter) = 0;
12827 nl = SvEND(utf8_buffer);
12830 STRLEN got = nl - SvPVX(utf8_buffer);
12831 /* Did we have anything to append? */
12833 sv_catpvn(sv, SvPVX(utf8_buffer), got);
12834 /* Everything else in this code works just fine if SVp_POK isn't
12835 set. This, however, needs it, and we need it to work, else
12836 we loop infinitely because the buffer is never consumed. */
12837 sv_chop(utf8_buffer, nl);
12841 /* OK, not a complete line there, so need to read some more UTF-16.
12842 Read an extra octect if the buffer currently has an odd number. */
12846 if (SvCUR(utf16_buffer) >= 2) {
12847 /* Location of the high octet of the last complete code point.
12848 Gosh, UTF-16 is a pain. All the benefits of variable length,
12849 *coupled* with all the benefits of partial reads and
12851 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12852 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12854 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12858 /* We have the first half of a surrogate. Read more. */
12859 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12862 status = FILTER_READ(idx + 1, utf16_buffer,
12863 160 + (SvCUR(utf16_buffer) & 1));
12864 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
12865 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12868 IoPAGE(filter) = status;
12873 chars = SvCUR(utf16_buffer) >> 1;
12874 have = SvCUR(utf8_buffer);
12875 SvGROW(utf8_buffer, have + chars * 3 + 1);
12878 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12879 (U8*)SvPVX_const(utf8_buffer) + have,
12880 chars * 2, &newlen);
12882 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12883 (U8*)SvPVX_const(utf8_buffer) + have,
12884 chars * 2, &newlen);
12886 SvCUR_set(utf8_buffer, have + newlen);
12889 /* No need to keep this SV "well-formed" with a '\0' after the end, as
12890 it's private to us, and utf16_to_utf8{,reversed} take a
12891 (pointer,length) pair, rather than a NUL-terminated string. */
12892 if(SvCUR(utf16_buffer) & 1) {
12893 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12894 SvCUR_set(utf16_buffer, 1);
12896 SvCUR_set(utf16_buffer, 0);
12899 DEBUG_P(PerlIO_printf(Perl_debug_log,
12900 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
12902 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12903 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12908 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12910 SV *filter = filter_add(S_utf16_textfilter, NULL);
12912 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12913 sv_setpvs(filter, "");
12914 IoLINES(filter) = reversed;
12915 IoPAGE(filter) = 1; /* Not EOF */
12917 /* Sadly, we have to return a valid pointer, come what may, so we have to
12918 ignore any error return from this. */
12919 SvCUR_set(PL_linestr, 0);
12920 if (FILTER_READ(0, PL_linestr, 0)) {
12921 SvUTF8_on(PL_linestr);
12923 SvUTF8_on(PL_linestr);
12925 PL_bufend = SvEND(PL_linestr);
12926 return (U8*)SvPVX(PL_linestr);
12931 Returns a pointer to the next character after the parsed
12932 vstring, as well as updating the passed in sv.
12934 Function must be called like
12937 s = scan_vstring(s,e,sv);
12939 where s and e are the start and end of the string.
12940 The sv should already be large enough to store the vstring
12941 passed in, for performance reasons.
12946 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12949 const char *pos = s;
12950 const char *start = s;
12952 PERL_ARGS_ASSERT_SCAN_VSTRING;
12954 if (*pos == 'v') pos++; /* get past 'v' */
12955 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12957 if ( *pos != '.') {
12958 /* this may not be a v-string if followed by => */
12959 const char *next = pos;
12960 while (next < e && isSPACE(*next))
12962 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12963 /* return string not v-string */
12964 sv_setpvn(sv,(char *)s,pos-s);
12965 return (char *)pos;
12969 if (!isALPHA(*pos)) {
12970 U8 tmpbuf[UTF8_MAXBYTES+1];
12973 s++; /* get past 'v' */
12978 /* this is atoi() that tolerates underscores */
12981 const char *end = pos;
12983 while (--end >= s) {
12985 const UV orev = rev;
12986 rev += (*end - '0') * mult;
12989 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12990 "Integer overflow in decimal number");
12994 if (rev > 0x7FFFFFFF)
12995 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12997 /* Append native character for the rev point */
12998 tmpend = uvchr_to_utf8(tmpbuf, rev);
12999 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13000 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13002 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13008 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13012 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13020 * c-indentation-style: bsd
13021 * c-basic-offset: 4
13022 * indent-tabs-mode: t
13025 * ex: set ts=8 sts=4 sw=4 noet: