3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 #define PERL_IN_TOKE_C
28 #define new_constant(a,b,c,d,e,f,g) \
29 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
31 #define pl_yylval (PL_parser->yylval)
33 /* YYINITDEPTH -- initial size of the parser's stacks. */
34 #define YYINITDEPTH 200
36 /* XXX temporary backwards compatibility */
37 #define PL_lex_brackets (PL_parser->lex_brackets)
38 #define PL_lex_brackstack (PL_parser->lex_brackstack)
39 #define PL_lex_casemods (PL_parser->lex_casemods)
40 #define PL_lex_casestack (PL_parser->lex_casestack)
41 #define PL_lex_defer (PL_parser->lex_defer)
42 #define PL_lex_dojoin (PL_parser->lex_dojoin)
43 #define PL_lex_expect (PL_parser->lex_expect)
44 #define PL_lex_formbrack (PL_parser->lex_formbrack)
45 #define PL_lex_inpat (PL_parser->lex_inpat)
46 #define PL_lex_inwhat (PL_parser->lex_inwhat)
47 #define PL_lex_op (PL_parser->lex_op)
48 #define PL_lex_repl (PL_parser->lex_repl)
49 #define PL_lex_starts (PL_parser->lex_starts)
50 #define PL_lex_stuff (PL_parser->lex_stuff)
51 #define PL_multi_start (PL_parser->multi_start)
52 #define PL_multi_open (PL_parser->multi_open)
53 #define PL_multi_close (PL_parser->multi_close)
54 #define PL_pending_ident (PL_parser->pending_ident)
55 #define PL_preambled (PL_parser->preambled)
56 #define PL_sublex_info (PL_parser->sublex_info)
57 #define PL_linestr (PL_parser->linestr)
58 #define PL_expect (PL_parser->expect)
59 #define PL_copline (PL_parser->copline)
60 #define PL_bufptr (PL_parser->bufptr)
61 #define PL_oldbufptr (PL_parser->oldbufptr)
62 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
63 #define PL_linestart (PL_parser->linestart)
64 #define PL_bufend (PL_parser->bufend)
65 #define PL_last_uni (PL_parser->last_uni)
66 #define PL_last_lop (PL_parser->last_lop)
67 #define PL_last_lop_op (PL_parser->last_lop_op)
68 #define PL_lex_state (PL_parser->lex_state)
69 #define PL_rsfp (PL_parser->rsfp)
70 #define PL_rsfp_filters (PL_parser->rsfp_filters)
71 #define PL_in_my (PL_parser->in_my)
72 #define PL_in_my_stash (PL_parser->in_my_stash)
73 #define PL_tokenbuf (PL_parser->tokenbuf)
74 #define PL_multi_end (PL_parser->multi_end)
75 #define PL_error_count (PL_parser->error_count)
78 # define PL_endwhite (PL_parser->endwhite)
79 # define PL_faketokens (PL_parser->faketokens)
80 # define PL_lasttoke (PL_parser->lasttoke)
81 # define PL_nextwhite (PL_parser->nextwhite)
82 # define PL_realtokenstart (PL_parser->realtokenstart)
83 # define PL_skipwhite (PL_parser->skipwhite)
84 # define PL_thisclose (PL_parser->thisclose)
85 # define PL_thismad (PL_parser->thismad)
86 # define PL_thisopen (PL_parser->thisopen)
87 # define PL_thisstuff (PL_parser->thisstuff)
88 # define PL_thistoken (PL_parser->thistoken)
89 # define PL_thiswhite (PL_parser->thiswhite)
90 # define PL_thiswhite (PL_parser->thiswhite)
91 # define PL_nexttoke (PL_parser->nexttoke)
92 # define PL_curforce (PL_parser->curforce)
94 # define PL_nexttoke (PL_parser->nexttoke)
95 # define PL_nexttype (PL_parser->nexttype)
96 # define PL_nextval (PL_parser->nextval)
100 S_pending_ident(pTHX);
102 static const char ident_too_long[] = "Identifier too long";
103 static const char commaless_variable_list[] = "comma-less variable list";
105 #ifndef PERL_NO_UTF16_FILTER
106 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
107 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
111 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
112 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
114 # define CURMAD(slot,sv)
115 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
118 #define XFAKEBRACK 128
119 #define XENUMMASK 127
121 #ifdef USE_UTF8_SCRIPTS
122 # define UTF (!IN_BYTES)
124 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
127 /* The maximum number of characters preceding the unrecognized one to display */
128 #define UNRECOGNIZED_PRECEDE_COUNT 10
130 /* In variables named $^X, these are the legal values for X.
131 * 1999-02-27 mjd-perl-patch@plover.com */
132 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
134 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
136 /* LEX_* are values for PL_lex_state, the state of the lexer.
137 * They are arranged oddly so that the guard on the switch statement
138 * can get by with a single comparison (if the compiler is smart enough).
141 /* #define LEX_NOTPARSING 11 is done in perl.h. */
143 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
144 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
145 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
146 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
147 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
149 /* at end of code, eg "$x" followed by: */
150 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
151 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
153 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
154 string or after \E, $foo, etc */
155 #define LEX_INTERPCONST 2 /* NOT USED */
156 #define LEX_FORMLINE 1 /* expecting a format line */
157 #define LEX_KNOWNEXT 0 /* next token known; just return it */
161 static const char* const lex_state_names[] = {
180 #include "keywords.h"
182 /* CLINE is a macro that ensures PL_copline has a sane value */
187 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
190 # define SKIPSPACE0(s) skipspace0(s)
191 # define SKIPSPACE1(s) skipspace1(s)
192 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
193 # define PEEKSPACE(s) skipspace2(s,0)
195 # define SKIPSPACE0(s) skipspace(s)
196 # define SKIPSPACE1(s) skipspace(s)
197 # define SKIPSPACE2(s,tsv) skipspace(s)
198 # define PEEKSPACE(s) skipspace(s)
202 * Convenience functions to return different tokens and prime the
203 * lexer for the next token. They all take an argument.
205 * TOKEN : generic token (used for '(', DOLSHARP, etc)
206 * OPERATOR : generic operator
207 * AOPERATOR : assignment operator
208 * PREBLOCK : beginning the block after an if, while, foreach, ...
209 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
210 * PREREF : *EXPR where EXPR is not a simple identifier
211 * TERM : expression term
212 * LOOPX : loop exiting command (goto, last, dump, etc)
213 * FTST : file test operator
214 * FUN0 : zero-argument function
215 * FUN1 : not used, except for not, which isn't a UNIOP
216 * BOop : bitwise or or xor
218 * SHop : shift operator
219 * PWop : power operator
220 * PMop : pattern-matching operator
221 * Aop : addition-level operator
222 * Mop : multiplication-level operator
223 * Eop : equality-testing operator
224 * Rop : relational operator <= != gt
226 * Also see LOP and lop() below.
229 #ifdef DEBUGGING /* Serve -DT. */
230 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
232 # define REPORT(retval) (retval)
235 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
236 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
237 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
238 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
239 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
240 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
241 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
242 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
243 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
244 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
245 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
246 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
247 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
248 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
249 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
250 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
251 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
252 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
253 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
254 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
256 /* This bit of chicanery makes a unary function followed by
257 * a parenthesis into a function with one argument, highest precedence.
258 * The UNIDOR macro is for unary functions that can be followed by the //
259 * operator (such as C<shift // 0>).
261 #define UNI2(f,x) { \
262 pl_yylval.ival = f; \
265 PL_last_uni = PL_oldbufptr; \
266 PL_last_lop_op = f; \
268 return REPORT( (int)FUNC1 ); \
270 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
272 #define UNI(f) UNI2(f,XTERM)
273 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
275 #define UNIBRACK(f) { \
276 pl_yylval.ival = f; \
278 PL_last_uni = PL_oldbufptr; \
280 return REPORT( (int)FUNC1 ); \
282 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
285 /* grandfather return to old style */
286 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
290 /* how to interpret the pl_yylval associated with the token */
294 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
300 static struct debug_tokens {
302 enum token_type type;
304 } const debug_tokens[] =
306 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
307 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
308 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
309 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
310 { ARROW, TOKENTYPE_NONE, "ARROW" },
311 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
312 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
313 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
314 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
315 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
316 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
317 { DO, TOKENTYPE_NONE, "DO" },
318 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
319 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
320 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
321 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
322 { ELSE, TOKENTYPE_NONE, "ELSE" },
323 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
324 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
325 { FOR, TOKENTYPE_IVAL, "FOR" },
326 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
330 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
331 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
332 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
333 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
334 { IF, TOKENTYPE_IVAL, "IF" },
335 { LABEL, TOKENTYPE_PVAL, "LABEL" },
336 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
337 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
338 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
339 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
340 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
341 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
342 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
343 { MY, TOKENTYPE_IVAL, "MY" },
344 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
351 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
352 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
353 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
354 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
355 { PREINC, TOKENTYPE_NONE, "PREINC" },
356 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
357 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
358 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
359 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
360 { SUB, TOKENTYPE_NONE, "SUB" },
361 { THING, TOKENTYPE_OPVAL, "THING" },
362 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
363 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
364 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
365 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
366 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
367 { USE, TOKENTYPE_IVAL, "USE" },
368 { WHEN, TOKENTYPE_IVAL, "WHEN" },
369 { WHILE, TOKENTYPE_IVAL, "WHILE" },
370 { WORD, TOKENTYPE_OPVAL, "WORD" },
371 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
372 { 0, TOKENTYPE_NONE, NULL }
375 /* dump the returned token in rv, plus any optional arg in pl_yylval */
378 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
382 PERL_ARGS_ASSERT_TOKEREPORT;
385 const char *name = NULL;
386 enum token_type type = TOKENTYPE_NONE;
387 const struct debug_tokens *p;
388 SV* const report = newSVpvs("<== ");
390 for (p = debug_tokens; p->token; p++) {
391 if (p->token == (int)rv) {
398 Perl_sv_catpv(aTHX_ report, name);
399 else if ((char)rv > ' ' && (char)rv < '~')
400 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
402 sv_catpvs(report, "EOF");
404 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
407 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
410 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
412 case TOKENTYPE_OPNUM:
413 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
414 PL_op_name[lvalp->ival]);
417 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
419 case TOKENTYPE_OPVAL:
421 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
422 PL_op_name[lvalp->opval->op_type]);
423 if (lvalp->opval->op_type == OP_CONST) {
424 Perl_sv_catpvf(aTHX_ report, " %s",
425 SvPEEK(cSVOPx_sv(lvalp->opval)));
430 sv_catpvs(report, "(opval=null)");
433 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
439 /* print the buffer with suitable escapes */
442 S_printbuf(pTHX_ const char *const fmt, const char *const s)
444 SV* const tmp = newSVpvs("");
446 PERL_ARGS_ASSERT_PRINTBUF;
448 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
457 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
458 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
462 S_ao(pTHX_ int toketype)
465 if (*PL_bufptr == '=') {
467 if (toketype == ANDAND)
468 pl_yylval.ival = OP_ANDASSIGN;
469 else if (toketype == OROR)
470 pl_yylval.ival = OP_ORASSIGN;
471 else if (toketype == DORDOR)
472 pl_yylval.ival = OP_DORASSIGN;
480 * When Perl expects an operator and finds something else, no_op
481 * prints the warning. It always prints "<something> found where
482 * operator expected. It prints "Missing semicolon on previous line?"
483 * if the surprise occurs at the start of the line. "do you need to
484 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
485 * where the compiler doesn't know if foo is a method call or a function.
486 * It prints "Missing operator before end of line" if there's nothing
487 * after the missing operator, or "... before <...>" if there is something
488 * after the missing operator.
492 S_no_op(pTHX_ const char *const what, char *s)
495 char * const oldbp = PL_bufptr;
496 const bool is_first = (PL_oldbufptr == PL_linestart);
498 PERL_ARGS_ASSERT_NO_OP;
504 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
505 if (ckWARN_d(WARN_SYNTAX)) {
507 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
508 "\t(Missing semicolon on previous line?)\n");
509 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
511 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
513 if (t < PL_bufptr && isSPACE(*t))
514 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
515 "\t(Do you need to predeclare %.*s?)\n",
516 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
520 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
521 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
529 * Complain about missing quote/regexp/heredoc terminator.
530 * If it's called with NULL then it cauterizes the line buffer.
531 * If we're in a delimited string and the delimiter is a control
532 * character, it's reformatted into a two-char sequence like ^C.
537 S_missingterm(pTHX_ char *s)
543 char * const nl = strrchr(s,'\n');
547 else if (isCNTRL(PL_multi_close)) {
549 tmpbuf[1] = (char)toCTRL(PL_multi_close);
554 *tmpbuf = (char)PL_multi_close;
558 q = strchr(s,'"') ? '\'' : '"';
559 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
562 #define FEATURE_IS_ENABLED(name) \
563 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
564 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
565 /* The longest string we pass in. */
566 #define MAX_FEATURE_LEN (sizeof("switch")-1)
569 * S_feature_is_enabled
570 * Check whether the named feature is enabled.
573 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
576 HV * const hinthv = GvHV(PL_hintgv);
577 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
579 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
581 assert(namelen <= MAX_FEATURE_LEN);
582 memcpy(&he_name[8], name, namelen);
584 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
592 Perl_deprecate(pTHX_ const char *const s)
594 PERL_ARGS_ASSERT_DEPRECATE;
596 Perl_ck_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
600 S_deprecate_old(pTHX_ const char *const s)
602 /* This function should NOT be called for any new deprecated warnings */
603 /* Use Perl_deprecate instead */
605 /* It is here to maintain backward compatibility with the pre-5.8 */
606 /* warnings category hierarchy. The "deprecated" category used to */
607 /* live under the "syntax" category. It is now a top-level category */
608 /* in its own right. */
610 PERL_ARGS_ASSERT_DEPRECATE_OLD;
612 Perl_ck_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
613 "Use of %s is deprecated", s);
617 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
618 * utf16-to-utf8-reversed.
621 #ifdef PERL_CR_FILTER
625 register const char *s = SvPVX_const(sv);
626 register const char * const e = s + SvCUR(sv);
628 PERL_ARGS_ASSERT_STRIP_RETURN;
630 /* outer loop optimized to do nothing if there are no CR-LFs */
632 if (*s++ == '\r' && *s == '\n') {
633 /* hit a CR-LF, need to copy the rest */
634 register char *d = s - 1;
637 if (*s == '\r' && s[1] == '\n')
648 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
650 const I32 count = FILTER_READ(idx+1, sv, maxlen);
651 if (count > 0 && !maxlen)
662 * Create a parser object and initialise its parser and lexer fields
664 * rsfp is the opened file handle to read from (if any),
666 * line holds any initial content already read from the file (or in
667 * the case of no file, such as an eval, the whole contents);
669 * new_filter indicates that this is a new file and it shouldn't inherit
670 * the filters from the current parser (ie require).
674 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
677 const char *s = NULL;
679 yy_parser *parser, *oparser;
681 /* create and initialise a parser */
683 Newxz(parser, 1, yy_parser);
684 parser->old_parser = oparser = PL_parser;
687 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
688 parser->ps = parser->stack;
689 parser->stack_size = YYINITDEPTH;
691 parser->stack->state = 0;
692 parser->yyerrstatus = 0;
693 parser->yychar = YYEMPTY; /* Cause a token to be read. */
695 /* on scope exit, free this parser and restore any outer one */
697 parser->saved_curcop = PL_curcop;
699 /* initialise lexer state */
702 parser->curforce = -1;
704 parser->nexttoke = 0;
706 parser->error_count = oparser ? oparser->error_count : 0;
707 parser->copline = NOLINE;
708 parser->lex_state = LEX_NORMAL;
709 parser->expect = XSTATE;
711 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
712 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
714 Newx(parser->lex_brackstack, 120, char);
715 Newx(parser->lex_casestack, 12, char);
716 *parser->lex_casestack = '\0';
719 s = SvPV_const(line, len);
725 parser->linestr = newSVpvs("\n;");
726 } else if (SvREADONLY(line) || s[len-1] != ';') {
727 parser->linestr = newSVsv(line);
729 sv_catpvs(parser->linestr, "\n;");
732 SvREFCNT_inc_simple_void_NN(line);
733 parser->linestr = line;
735 parser->oldoldbufptr =
738 parser->linestart = SvPVX(parser->linestr);
739 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
740 parser->last_lop = parser->last_uni = NULL;
744 /* delete a parser object */
747 Perl_parser_free(pTHX_ const yy_parser *parser)
749 PERL_ARGS_ASSERT_PARSER_FREE;
751 PL_curcop = parser->saved_curcop;
752 SvREFCNT_dec(parser->linestr);
754 if (parser->rsfp == PerlIO_stdin())
755 PerlIO_clearerr(parser->rsfp);
756 else if (parser->rsfp && (!parser->old_parser ||
757 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
758 PerlIO_close(parser->rsfp);
759 SvREFCNT_dec(parser->rsfp_filters);
761 Safefree(parser->stack);
762 Safefree(parser->lex_brackstack);
763 Safefree(parser->lex_casestack);
764 PL_parser = parser->old_parser;
771 * Finalizer for lexing operations. Must be called when the parser is
772 * done with the lexer.
779 PL_doextract = FALSE;
784 * This subroutine has nothing to do with tilting, whether at windmills
785 * or pinball tables. Its name is short for "increment line". It
786 * increments the current line number in CopLINE(PL_curcop) and checks
787 * to see whether the line starts with a comment of the form
788 * # line 500 "foo.pm"
789 * If so, it sets the current line number and file to the values in the comment.
793 S_incline(pTHX_ const char *s)
800 PERL_ARGS_ASSERT_INCLINE;
802 CopLINE_inc(PL_curcop);
805 while (SPACE_OR_TAB(*s))
807 if (strnEQ(s, "line", 4))
811 if (SPACE_OR_TAB(*s))
815 while (SPACE_OR_TAB(*s))
823 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
825 while (SPACE_OR_TAB(*s))
827 if (*s == '"' && (t = strchr(s+1, '"'))) {
837 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
839 if (*e != '\n' && *e != '\0')
840 return; /* false alarm */
843 const STRLEN len = t - s;
845 SV *const temp_sv = CopFILESV(PL_curcop);
851 tmplen = SvCUR(temp_sv);
857 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
858 /* must copy *{"::_<(eval N)[oldfilename:L]"}
859 * to *{"::_<newfilename"} */
860 /* However, the long form of evals is only turned on by the
861 debugger - usually they're "(eval %lu)" */
865 STRLEN tmplen2 = len;
866 if (tmplen + 2 <= sizeof smallbuf)
869 Newx(tmpbuf, tmplen + 2, char);
872 memcpy(tmpbuf + 2, cf, tmplen);
874 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
879 if (tmplen2 + 2 <= sizeof smallbuf)
882 Newx(tmpbuf2, tmplen2 + 2, char);
884 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
885 /* Either they malloc'd it, or we malloc'd it,
886 so no prefix is present in ours. */
891 memcpy(tmpbuf2 + 2, s, tmplen2);
894 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
896 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
897 /* adjust ${"::_<newfilename"} to store the new file name */
898 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
899 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
900 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
903 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
905 if (tmpbuf != smallbuf) Safefree(tmpbuf);
908 CopFILE_free(PL_curcop);
909 CopFILE_setn(PL_curcop, s, len);
911 CopLINE_set(PL_curcop, atoi(n)-1);
915 /* skip space before PL_thistoken */
918 S_skipspace0(pTHX_ register char *s)
920 PERL_ARGS_ASSERT_SKIPSPACE0;
927 PL_thiswhite = newSVpvs("");
928 sv_catsv(PL_thiswhite, PL_skipwhite);
929 sv_free(PL_skipwhite);
932 PL_realtokenstart = s - SvPVX(PL_linestr);
936 /* skip space after PL_thistoken */
939 S_skipspace1(pTHX_ register char *s)
941 const char *start = s;
942 I32 startoff = start - SvPVX(PL_linestr);
944 PERL_ARGS_ASSERT_SKIPSPACE1;
949 start = SvPVX(PL_linestr) + startoff;
950 if (!PL_thistoken && PL_realtokenstart >= 0) {
951 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
952 PL_thistoken = newSVpvn(tstart, start - tstart);
954 PL_realtokenstart = -1;
957 PL_nextwhite = newSVpvs("");
958 sv_catsv(PL_nextwhite, PL_skipwhite);
959 sv_free(PL_skipwhite);
966 S_skipspace2(pTHX_ register char *s, SV **svp)
969 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
970 const I32 startoff = s - SvPVX(PL_linestr);
972 PERL_ARGS_ASSERT_SKIPSPACE2;
975 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
976 if (!PL_madskills || !svp)
978 start = SvPVX(PL_linestr) + startoff;
979 if (!PL_thistoken && PL_realtokenstart >= 0) {
980 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
981 PL_thistoken = newSVpvn(tstart, start - tstart);
982 PL_realtokenstart = -1;
987 sv_setsv(*svp, PL_skipwhite);
988 sv_free(PL_skipwhite);
997 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
999 AV *av = CopFILEAVx(PL_curcop);
1001 SV * const sv = newSV_type(SVt_PVMG);
1003 sv_setsv(sv, orig_sv);
1005 sv_setpvn(sv, buf, len);
1008 av_store(av, (I32)CopLINE(PL_curcop), sv);
1014 * Called to gobble the appropriate amount and type of whitespace.
1015 * Skips comments as well.
1019 S_skipspace(pTHX_ register char *s)
1024 int startoff = s - SvPVX(PL_linestr);
1026 PERL_ARGS_ASSERT_SKIPSPACE;
1029 sv_free(PL_skipwhite);
1033 PERL_ARGS_ASSERT_SKIPSPACE;
1035 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1036 while (s < PL_bufend && SPACE_OR_TAB(*s))
1046 SSize_t oldprevlen, oldoldprevlen;
1047 SSize_t oldloplen = 0, oldunilen = 0;
1048 while (s < PL_bufend && isSPACE(*s)) {
1049 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1054 if (s < PL_bufend && *s == '#') {
1055 while (s < PL_bufend && *s != '\n')
1057 if (s < PL_bufend) {
1059 if (PL_in_eval && !PL_rsfp) {
1066 /* only continue to recharge the buffer if we're at the end
1067 * of the buffer, we're not reading from a source filter, and
1068 * we're in normal lexing mode
1070 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1071 PL_lex_state == LEX_FORMLINE)
1078 /* try to recharge the buffer */
1080 curoff = s - SvPVX(PL_linestr);
1083 if ((s = filter_gets(PL_linestr, PL_rsfp,
1084 (prevlen = SvCUR(PL_linestr)))) == NULL)
1087 if (PL_madskills && curoff != startoff) {
1089 PL_skipwhite = newSVpvs("");
1090 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1094 /* mustn't throw out old stuff yet if madpropping */
1095 SvCUR(PL_linestr) = curoff;
1096 s = SvPVX(PL_linestr) + curoff;
1098 if (curoff && s[-1] == '\n')
1102 /* end of file. Add on the -p or -n magic */
1103 /* XXX these shouldn't really be added here, can't set PL_faketokens */
1106 sv_catpvs(PL_linestr,
1107 ";}continue{print or die qq(-p destination: $!\\n);}");
1109 sv_setpvs(PL_linestr,
1110 ";}continue{print or die qq(-p destination: $!\\n);}");
1112 PL_minus_n = PL_minus_p = 0;
1114 else if (PL_minus_n) {
1116 sv_catpvs(PL_linestr, ";}");
1118 sv_setpvs(PL_linestr, ";}");
1124 sv_catpvs(PL_linestr,";");
1126 sv_setpvs(PL_linestr,";");
1129 /* reset variables for next time we lex */
1130 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1136 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1137 PL_last_lop = PL_last_uni = NULL;
1139 /* Close the filehandle. Could be from
1140 * STDIN, or a regular file. If we were reading code from
1141 * STDIN (because the commandline held no -e or filename)
1142 * then we don't close it, we reset it so the code can
1143 * read from STDIN too.
1146 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1147 PerlIO_clearerr(PL_rsfp);
1149 (void)PerlIO_close(PL_rsfp);
1154 /* not at end of file, so we only read another line */
1155 /* make corresponding updates to old pointers, for yyerror() */
1156 oldprevlen = PL_oldbufptr - PL_bufend;
1157 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1159 oldunilen = PL_last_uni - PL_bufend;
1161 oldloplen = PL_last_lop - PL_bufend;
1162 PL_linestart = PL_bufptr = s + prevlen;
1163 PL_bufend = s + SvCUR(PL_linestr);
1165 PL_oldbufptr = s + oldprevlen;
1166 PL_oldoldbufptr = s + oldoldprevlen;
1168 PL_last_uni = s + oldunilen;
1170 PL_last_lop = s + oldloplen;
1173 /* debugger active and we're not compiling the debugger code,
1174 * so store the line into the debugger's array of lines
1176 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
1177 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1184 PL_skipwhite = newSVpvs("");
1185 curoff = s - SvPVX(PL_linestr);
1186 if (curoff - startoff)
1187 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1196 * Check the unary operators to ensure there's no ambiguity in how they're
1197 * used. An ambiguous piece of code would be:
1199 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1200 * the +5 is its argument.
1210 if (PL_oldoldbufptr != PL_last_uni)
1212 while (isSPACE(*PL_last_uni))
1215 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1217 if ((t = strchr(s, '(')) && t < PL_bufptr)
1220 if (ckWARN_d(WARN_AMBIGUOUS)){
1221 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1222 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1223 (int)(s - PL_last_uni), PL_last_uni);
1228 * LOP : macro to build a list operator. Its behaviour has been replaced
1229 * with a subroutine, S_lop() for which LOP is just another name.
1232 #define LOP(f,x) return lop(f,x,s)
1236 * Build a list operator (or something that might be one). The rules:
1237 * - if we have a next token, then it's a list operator [why?]
1238 * - if the next thing is an opening paren, then it's a function
1239 * - else it's a list operator
1243 S_lop(pTHX_ I32 f, int x, char *s)
1247 PERL_ARGS_ASSERT_LOP;
1253 PL_last_lop = PL_oldbufptr;
1254 PL_last_lop_op = (OPCODE)f;
1257 return REPORT(LSTOP);
1260 return REPORT(LSTOP);
1263 return REPORT(FUNC);
1266 return REPORT(FUNC);
1268 return REPORT(LSTOP);
1274 * Sets up for an eventual force_next(). start_force(0) basically does
1275 * an unshift, while start_force(-1) does a push. yylex removes items
1280 S_start_force(pTHX_ int where)
1284 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1285 where = PL_lasttoke;
1286 assert(PL_curforce < 0 || PL_curforce == where);
1287 if (PL_curforce != where) {
1288 for (i = PL_lasttoke; i > where; --i) {
1289 PL_nexttoke[i] = PL_nexttoke[i-1];
1293 if (PL_curforce < 0) /* in case of duplicate start_force() */
1294 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1295 PL_curforce = where;
1298 curmad('^', newSVpvs(""));
1299 CURMAD('_', PL_nextwhite);
1304 S_curmad(pTHX_ char slot, SV *sv)
1310 if (PL_curforce < 0)
1311 where = &PL_thismad;
1313 where = &PL_nexttoke[PL_curforce].next_mad;
1319 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1321 else if (PL_encoding) {
1322 sv_recode_to_utf8(sv, PL_encoding);
1327 /* keep a slot open for the head of the list? */
1328 if (slot != '_' && *where && (*where)->mad_key == '^') {
1329 (*where)->mad_key = slot;
1330 sv_free(MUTABLE_SV(((*where)->mad_val)));
1331 (*where)->mad_val = (void*)sv;
1334 addmad(newMADsv(slot, sv), where, 0);
1337 # define start_force(where) NOOP
1338 # define curmad(slot, sv) NOOP
1343 * When the lexer realizes it knows the next token (for instance,
1344 * it is reordering tokens for the parser) then it can call S_force_next
1345 * to know what token to return the next time the lexer is called. Caller
1346 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1347 * and possibly PL_expect to ensure the lexer handles the token correctly.
1351 S_force_next(pTHX_ I32 type)
1356 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1357 tokereport(type, &NEXTVAL_NEXTTOKE);
1361 if (PL_curforce < 0)
1362 start_force(PL_lasttoke);
1363 PL_nexttoke[PL_curforce].next_type = type;
1364 if (PL_lex_state != LEX_KNOWNEXT)
1365 PL_lex_defer = PL_lex_state;
1366 PL_lex_state = LEX_KNOWNEXT;
1367 PL_lex_expect = PL_expect;
1370 PL_nexttype[PL_nexttoke] = type;
1372 if (PL_lex_state != LEX_KNOWNEXT) {
1373 PL_lex_defer = PL_lex_state;
1374 PL_lex_expect = PL_expect;
1375 PL_lex_state = LEX_KNOWNEXT;
1381 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1384 SV * const sv = newSVpvn_utf8(start, len,
1387 && !is_ascii_string((const U8*)start, len)
1388 && is_utf8_string((const U8*)start, len));
1394 * When the lexer knows the next thing is a word (for instance, it has
1395 * just seen -> and it knows that the next char is a word char, then
1396 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1400 * char *start : buffer position (must be within PL_linestr)
1401 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1402 * int check_keyword : if true, Perl checks to make sure the word isn't
1403 * a keyword (do this if the word is a label, e.g. goto FOO)
1404 * int allow_pack : if true, : characters will also be allowed (require,
1405 * use, etc. do this)
1406 * int allow_initial_tick : used by the "sub" lexer only.
1410 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1416 PERL_ARGS_ASSERT_FORCE_WORD;
1418 start = SKIPSPACE1(start);
1420 if (isIDFIRST_lazy_if(s,UTF) ||
1421 (allow_pack && *s == ':') ||
1422 (allow_initial_tick && *s == '\'') )
1424 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1425 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1427 start_force(PL_curforce);
1429 curmad('X', newSVpvn(start,s-start));
1430 if (token == METHOD) {
1435 PL_expect = XOPERATOR;
1439 curmad('g', newSVpvs( "forced" ));
1440 NEXTVAL_NEXTTOKE.opval
1441 = (OP*)newSVOP(OP_CONST,0,
1442 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1443 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1451 * Called when the lexer wants $foo *foo &foo etc, but the program
1452 * text only contains the "foo" portion. The first argument is a pointer
1453 * to the "foo", and the second argument is the type symbol to prefix.
1454 * Forces the next token to be a "WORD".
1455 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1459 S_force_ident(pTHX_ register const char *s, int kind)
1463 PERL_ARGS_ASSERT_FORCE_IDENT;
1466 const STRLEN len = strlen(s);
1467 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1468 start_force(PL_curforce);
1469 NEXTVAL_NEXTTOKE.opval = o;
1472 o->op_private = OPpCONST_ENTERED;
1473 /* XXX see note in pp_entereval() for why we forgo typo
1474 warnings if the symbol must be introduced in an eval.
1476 gv_fetchpvn_flags(s, len,
1477 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1479 kind == '$' ? SVt_PV :
1480 kind == '@' ? SVt_PVAV :
1481 kind == '%' ? SVt_PVHV :
1489 Perl_str_to_version(pTHX_ SV *sv)
1494 const char *start = SvPV_const(sv,len);
1495 const char * const end = start + len;
1496 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1498 PERL_ARGS_ASSERT_STR_TO_VERSION;
1500 while (start < end) {
1504 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1509 retval += ((NV)n)/nshift;
1518 * Forces the next token to be a version number.
1519 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1520 * and if "guessing" is TRUE, then no new token is created (and the caller
1521 * must use an alternative parsing method).
1525 S_force_version(pTHX_ char *s, int guessing)
1531 I32 startoff = s - SvPVX(PL_linestr);
1534 PERL_ARGS_ASSERT_FORCE_VERSION;
1542 while (isDIGIT(*d) || *d == '_' || *d == '.')
1546 start_force(PL_curforce);
1547 curmad('X', newSVpvn(s,d-s));
1550 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1552 s = scan_num(s, &pl_yylval);
1553 version = pl_yylval.opval;
1554 ver = cSVOPx(version)->op_sv;
1555 if (SvPOK(ver) && !SvNIOK(ver)) {
1556 SvUPGRADE(ver, SVt_PVNV);
1557 SvNV_set(ver, str_to_version(ver));
1558 SvNOK_on(ver); /* hint that it is a version */
1561 else if (guessing) {
1564 sv_free(PL_nextwhite); /* let next token collect whitespace */
1566 s = SvPVX(PL_linestr) + startoff;
1574 if (PL_madskills && !version) {
1575 sv_free(PL_nextwhite); /* let next token collect whitespace */
1577 s = SvPVX(PL_linestr) + startoff;
1580 /* NOTE: The parser sees the package name and the VERSION swapped */
1581 start_force(PL_curforce);
1582 NEXTVAL_NEXTTOKE.opval = version;
1590 * Tokenize a quoted string passed in as an SV. It finds the next
1591 * chunk, up to end of string or a backslash. It may make a new
1592 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1597 S_tokeq(pTHX_ SV *sv)
1601 register char *send;
1606 PERL_ARGS_ASSERT_TOKEQ;
1611 s = SvPV_force(sv, len);
1612 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1615 while (s < send && *s != '\\')
1620 if ( PL_hints & HINT_NEW_STRING ) {
1621 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
1625 if (s + 1 < send && (s[1] == '\\'))
1626 s++; /* all that, just for this */
1631 SvCUR_set(sv, d - SvPVX_const(sv));
1633 if ( PL_hints & HINT_NEW_STRING )
1634 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
1639 * Now come three functions related to double-quote context,
1640 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1641 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1642 * interact with PL_lex_state, and create fake ( ... ) argument lists
1643 * to handle functions and concatenation.
1644 * They assume that whoever calls them will be setting up a fake
1645 * join call, because each subthing puts a ',' after it. This lets
1648 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1650 * (I'm not sure whether the spurious commas at the end of lcfirst's
1651 * arguments and join's arguments are created or not).
1656 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1658 * Pattern matching will set PL_lex_op to the pattern-matching op to
1659 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
1661 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1663 * Everything else becomes a FUNC.
1665 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1666 * had an OP_CONST or OP_READLINE). This just sets us up for a
1667 * call to S_sublex_push().
1671 S_sublex_start(pTHX)
1674 register const I32 op_type = pl_yylval.ival;
1676 if (op_type == OP_NULL) {
1677 pl_yylval.opval = PL_lex_op;
1681 if (op_type == OP_CONST || op_type == OP_READLINE) {
1682 SV *sv = tokeq(PL_lex_stuff);
1684 if (SvTYPE(sv) == SVt_PVIV) {
1685 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1687 const char * const p = SvPV_const(sv, len);
1688 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
1692 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1693 PL_lex_stuff = NULL;
1694 /* Allow <FH> // "foo" */
1695 if (op_type == OP_READLINE)
1696 PL_expect = XTERMORDORDOR;
1699 else if (op_type == OP_BACKTICK && PL_lex_op) {
1700 /* readpipe() vas overriden */
1701 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1702 pl_yylval.opval = PL_lex_op;
1704 PL_lex_stuff = NULL;
1708 PL_sublex_info.super_state = PL_lex_state;
1709 PL_sublex_info.sub_inwhat = (U16)op_type;
1710 PL_sublex_info.sub_op = PL_lex_op;
1711 PL_lex_state = LEX_INTERPPUSH;
1715 pl_yylval.opval = PL_lex_op;
1725 * Create a new scope to save the lexing state. The scope will be
1726 * ended in S_sublex_done. Returns a '(', starting the function arguments
1727 * to the uc, lc, etc. found before.
1728 * Sets PL_lex_state to LEX_INTERPCONCAT.
1737 PL_lex_state = PL_sublex_info.super_state;
1738 SAVEBOOL(PL_lex_dojoin);
1739 SAVEI32(PL_lex_brackets);
1740 SAVEI32(PL_lex_casemods);
1741 SAVEI32(PL_lex_starts);
1742 SAVEI8(PL_lex_state);
1743 SAVEVPTR(PL_lex_inpat);
1744 SAVEI16(PL_lex_inwhat);
1745 SAVECOPLINE(PL_curcop);
1746 SAVEPPTR(PL_bufptr);
1747 SAVEPPTR(PL_bufend);
1748 SAVEPPTR(PL_oldbufptr);
1749 SAVEPPTR(PL_oldoldbufptr);
1750 SAVEPPTR(PL_last_lop);
1751 SAVEPPTR(PL_last_uni);
1752 SAVEPPTR(PL_linestart);
1753 SAVESPTR(PL_linestr);
1754 SAVEGENERICPV(PL_lex_brackstack);
1755 SAVEGENERICPV(PL_lex_casestack);
1757 PL_linestr = PL_lex_stuff;
1758 PL_lex_stuff = NULL;
1760 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1761 = SvPVX(PL_linestr);
1762 PL_bufend += SvCUR(PL_linestr);
1763 PL_last_lop = PL_last_uni = NULL;
1764 SAVEFREESV(PL_linestr);
1766 PL_lex_dojoin = FALSE;
1767 PL_lex_brackets = 0;
1768 Newx(PL_lex_brackstack, 120, char);
1769 Newx(PL_lex_casestack, 12, char);
1770 PL_lex_casemods = 0;
1771 *PL_lex_casestack = '\0';
1773 PL_lex_state = LEX_INTERPCONCAT;
1774 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1776 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1777 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1778 PL_lex_inpat = PL_sublex_info.sub_op;
1780 PL_lex_inpat = NULL;
1787 * Restores lexer state after a S_sublex_push.
1794 if (!PL_lex_starts++) {
1795 SV * const sv = newSVpvs("");
1796 if (SvUTF8(PL_linestr))
1798 PL_expect = XOPERATOR;
1799 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1803 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1804 PL_lex_state = LEX_INTERPCASEMOD;
1808 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1809 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1810 PL_linestr = PL_lex_repl;
1812 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1813 PL_bufend += SvCUR(PL_linestr);
1814 PL_last_lop = PL_last_uni = NULL;
1815 SAVEFREESV(PL_linestr);
1816 PL_lex_dojoin = FALSE;
1817 PL_lex_brackets = 0;
1818 PL_lex_casemods = 0;
1819 *PL_lex_casestack = '\0';
1821 if (SvEVALED(PL_lex_repl)) {
1822 PL_lex_state = LEX_INTERPNORMAL;
1824 /* we don't clear PL_lex_repl here, so that we can check later
1825 whether this is an evalled subst; that means we rely on the
1826 logic to ensure sublex_done() is called again only via the
1827 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1830 PL_lex_state = LEX_INTERPCONCAT;
1840 PL_endwhite = newSVpvs("");
1841 sv_catsv(PL_endwhite, PL_thiswhite);
1845 sv_setpvs(PL_thistoken,"");
1847 PL_realtokenstart = -1;
1851 PL_bufend = SvPVX(PL_linestr);
1852 PL_bufend += SvCUR(PL_linestr);
1853 PL_expect = XOPERATOR;
1854 PL_sublex_info.sub_inwhat = 0;
1862 Extracts a pattern, double-quoted string, or transliteration. This
1865 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1866 processing a pattern (PL_lex_inpat is true), a transliteration
1867 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1869 Returns a pointer to the character scanned up to. If this is
1870 advanced from the start pointer supplied (i.e. if anything was
1871 successfully parsed), will leave an OP for the substring scanned
1872 in pl_yylval. Caller must intuit reason for not parsing further
1873 by looking at the next characters herself.
1877 double-quoted style: \r and \n
1878 regexp special ones: \D \s
1881 case and quoting: \U \Q \E
1882 stops on @ and $, but not for $ as tail anchor
1884 In transliterations:
1885 characters are VERY literal, except for - not at the start or end
1886 of the string, which indicates a range. If the range is in bytes,
1887 scan_const expands the range to the full set of intermediate
1888 characters. If the range is in utf8, the hyphen is replaced with
1889 a certain range mark which will be handled by pmtrans() in op.c.
1891 In double-quoted strings:
1893 double-quoted style: \r and \n
1895 deprecated backrefs: \1 (in substitution replacements)
1896 case and quoting: \U \Q \E
1899 scan_const does *not* construct ops to handle interpolated strings.
1900 It stops processing as soon as it finds an embedded $ or @ variable
1901 and leaves it to the caller to work out what's going on.
1903 embedded arrays (whether in pattern or not) could be:
1904 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1906 $ in double-quoted strings must be the symbol of an embedded scalar.
1908 $ in pattern could be $foo or could be tail anchor. Assumption:
1909 it's a tail anchor if $ is the last thing in the string, or if it's
1910 followed by one of "()| \r\n\t"
1912 \1 (backreferences) are turned into $1
1914 The structure of the code is
1915 while (there's a character to process) {
1916 handle transliteration ranges
1917 skip regexp comments /(?#comment)/ and codes /(?{code})/
1918 skip #-initiated comments in //x patterns
1919 check for embedded arrays
1920 check for embedded scalars
1922 leave intact backslashes from leaveit (below)
1923 deprecate \1 in substitution replacements
1924 handle string-changing backslashes \l \U \Q \E, etc.
1925 switch (what was escaped) {
1926 handle \- in a transliteration (becomes a literal -)
1927 handle \132 (octal characters)
1928 handle \x15 and \x{1234} (hex characters)
1929 handle \N{name} (named characters)
1930 handle \cV (control characters)
1931 handle printf-style backslashes (\f, \r, \n, etc)
1934 } (end if backslash)
1935 handle regular character
1936 } (end while character to read)
1941 S_scan_const(pTHX_ char *start)
1944 register char *send = PL_bufend; /* end of the constant */
1945 SV *sv = newSV(send - start); /* sv for the constant. See
1946 note below on sizing. */
1947 register char *s = start; /* start of the constant */
1948 register char *d = SvPVX(sv); /* destination for copies */
1949 bool dorange = FALSE; /* are we in a translit range? */
1950 bool didrange = FALSE; /* did we just finish a range? */
1951 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1952 I32 this_utf8 = UTF; /* Is the source string assumed
1953 to be UTF8? But, this can
1954 show as true when the source
1955 isn't utf8, as for example
1956 when it is entirely composed
1959 /* Note on sizing: The scanned constant is placed into sv, which is
1960 * initialized by newSV() assuming one byte of output for every byte of
1961 * input. This routine expects newSV() to allocate an extra byte for a
1962 * trailing NUL, which this routine will append if it gets to the end of
1963 * the input. There may be more bytes of input than output (eg., \N{LATIN
1964 * CAPITAL LETTER A}), or more output than input if the constant ends up
1965 * recoded to utf8, but each time a construct is found that might increase
1966 * the needed size, SvGROW() is called. Its size parameter each time is
1967 * based on the best guess estimate at the time, namely the length used so
1968 * far, plus the length the current construct will occupy, plus room for
1969 * the trailing NUL, plus one byte for every input byte still unscanned */
1973 UV literal_endpoint = 0;
1974 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1977 PERL_ARGS_ASSERT_SCAN_CONST;
1979 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1980 /* If we are doing a trans and we know we want UTF8 set expectation */
1981 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1982 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1986 while (s < send || dorange) {
1987 /* get transliterations out of the way (they're most literal) */
1988 if (PL_lex_inwhat == OP_TRANS) {
1989 /* expand a range A-Z to the full set of characters. AIE! */
1991 I32 i; /* current expanded character */
1992 I32 min; /* first character in range */
1993 I32 max; /* last character in range */
2004 char * const c = (char*)utf8_hop((U8*)d, -1);
2008 *c = (char)UTF_TO_NATIVE(0xff);
2009 /* mark the range as done, and continue */
2015 i = d - SvPVX_const(sv); /* remember current offset */
2018 SvLEN(sv) + (has_utf8 ?
2019 (512 - UTF_CONTINUATION_MARK +
2022 /* How many two-byte within 0..255: 128 in UTF-8,
2023 * 96 in UTF-8-mod. */
2025 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2027 d = SvPVX(sv) + i; /* refresh d after realloc */
2031 for (j = 0; j <= 1; j++) {
2032 char * const c = (char*)utf8_hop((U8*)d, -1);
2033 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2039 max = (U8)0xff; /* only to \xff */
2040 uvmax = uv; /* \x{100} to uvmax */
2042 d = c; /* eat endpoint chars */
2047 d -= 2; /* eat the first char and the - */
2048 min = (U8)*d; /* first char in range */
2049 max = (U8)d[1]; /* last char in range */
2056 "Invalid range \"%c-%c\" in transliteration operator",
2057 (char)min, (char)max);
2061 if (literal_endpoint == 2 &&
2062 ((isLOWER(min) && isLOWER(max)) ||
2063 (isUPPER(min) && isUPPER(max)))) {
2065 for (i = min; i <= max; i++)
2067 *d++ = NATIVE_TO_NEED(has_utf8,i);
2069 for (i = min; i <= max; i++)
2071 *d++ = NATIVE_TO_NEED(has_utf8,i);
2076 for (i = min; i <= max; i++)
2079 const U8 ch = (U8)NATIVE_TO_UTF(i);
2080 if (UNI_IS_INVARIANT(ch))
2083 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2084 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2093 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2095 *d++ = (char)UTF_TO_NATIVE(0xff);
2097 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2101 /* mark the range as done, and continue */
2105 literal_endpoint = 0;
2110 /* range begins (ignore - as first or last char) */
2111 else if (*s == '-' && s+1 < send && s != start) {
2113 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2120 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2130 literal_endpoint = 0;
2131 native_range = TRUE;
2136 /* if we get here, we're not doing a transliteration */
2138 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2139 except for the last char, which will be done separately. */
2140 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2142 while (s+1 < send && *s != ')')
2143 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2145 else if (s[2] == '{' /* This should match regcomp.c */
2146 || (s[2] == '?' && s[3] == '{'))
2149 char *regparse = s + (s[2] == '{' ? 3 : 4);
2152 while (count && (c = *regparse)) {
2153 if (c == '\\' && regparse[1])
2161 if (*regparse != ')')
2162 regparse--; /* Leave one char for continuation. */
2163 while (s < regparse)
2164 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2168 /* likewise skip #-initiated comments in //x patterns */
2169 else if (*s == '#' && PL_lex_inpat &&
2170 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2171 while (s+1 < send && *s != '\n')
2172 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2175 /* check for embedded arrays
2176 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2178 else if (*s == '@' && s[1]) {
2179 if (isALNUM_lazy_if(s+1,UTF))
2181 if (strchr(":'{$", s[1]))
2183 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2184 break; /* in regexp, neither @+ nor @- are interpolated */
2187 /* check for embedded scalars. only stop if we're sure it's a
2190 else if (*s == '$') {
2191 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2193 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2195 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2196 "Possible unintended interpolation of $\\ in regex");
2198 break; /* in regexp, $ might be tail anchor */
2202 /* End of else if chain - OP_TRANS rejoin rest */
2205 if (*s == '\\' && s+1 < send) {
2208 /* deprecate \1 in strings and substitution replacements */
2209 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2210 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2212 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2217 /* string-change backslash escapes */
2218 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2222 /* skip any other backslash escapes in a pattern */
2223 else if (PL_lex_inpat) {
2224 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2225 goto default_action;
2228 /* if we get here, it's either a quoted -, or a digit */
2231 /* quoted - in transliterations */
2233 if (PL_lex_inwhat == OP_TRANS) {
2240 if ((isALPHA(*s) || isDIGIT(*s)))
2241 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2242 "Unrecognized escape \\%c passed through",
2244 /* default action is to copy the quoted character */
2245 goto default_action;
2248 /* eg. \132 indicates the octal constant 0x132 */
2249 case '0': case '1': case '2': case '3':
2250 case '4': case '5': case '6': case '7':
2254 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2257 goto NUM_ESCAPE_INSERT;
2259 /* eg. \x24 indicates the hex constant 0x24 */
2263 char* const e = strchr(s, '}');
2264 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2265 PERL_SCAN_DISALLOW_PREFIX;
2270 yyerror("Missing right brace on \\x{}");
2274 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2280 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2281 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2287 /* Insert oct, hex, or \N{U+...} escaped character. There will
2288 * always be enough room in sv since such escapes will be
2289 * longer than any UTF-8 sequence they can end up as, except if
2290 * they force us to recode the rest of the string into utf8 */
2292 /* Here uv is the ordinal of the next character being added in
2293 * unicode (converted from native). (It has to be done before
2294 * here because \N is interpreted as unicode, and oct and hex
2296 if (!UNI_IS_INVARIANT(uv)) {
2297 if (!has_utf8 && uv > 255) {
2298 /* Might need to recode whatever we have accumulated so
2299 * far if it contains any chars variant in utf8 or
2302 SvCUR_set(sv, d - SvPVX_const(sv));
2305 /* See Note on sizing above. */
2306 sv_utf8_upgrade_flags_grow(sv,
2307 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2308 UNISKIP(uv) + (STRLEN)(send - s) + 1);
2309 d = SvPVX(sv) + SvCUR(sv);
2314 d = (char*)uvuni_to_utf8((U8*)d, uv);
2315 if (PL_lex_inwhat == OP_TRANS &&
2316 PL_sublex_info.sub_op) {
2317 PL_sublex_info.sub_op->op_private |=
2318 (PL_lex_repl ? OPpTRANS_FROM_UTF
2322 if (uv > 255 && !dorange)
2323 native_range = FALSE;
2335 /* \N{LATIN SMALL LETTER A} is a named character, and so is
2340 char* e = strchr(s, '}');
2346 yyerror("Missing right brace on \\N{}");
2350 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2351 /* \N{U+...} The ... is a unicode value even on EBCDIC
2353 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2354 PERL_SCAN_DISALLOW_PREFIX;
2357 uv = grok_hex(s, &len, &flags, NULL);
2358 if ( e > s && len != (STRLEN)(e - s) ) {
2362 goto NUM_ESCAPE_INSERT;
2364 res = newSVpvn(s + 1, e - s - 1);
2365 res = new_constant( NULL, 0, "charnames",
2366 res, NULL, s - 2, e - s + 3 );
2368 sv_utf8_upgrade(res);
2369 str = SvPV_const(res,len);
2370 #ifdef EBCDIC_NEVER_MIND
2371 /* charnames uses pack U and that has been
2372 * recently changed to do the below uni->native
2373 * mapping, so this would be redundant (and wrong,
2374 * the code point would be doubly converted).
2375 * But leave this in just in case the pack U change
2376 * gets revoked, but the semantics is still
2377 * desireable for charnames. --jhi */
2379 UV uv = utf8_to_uvchr((const U8*)str, 0);
2382 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2384 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2385 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2386 str = SvPV_const(res, len);
2390 /* If destination is not in utf8 but this new character is,
2391 * recode the dest to utf8 */
2392 if (!has_utf8 && SvUTF8(res)) {
2393 SvCUR_set(sv, d - SvPVX_const(sv));
2396 /* See Note on sizing above. */
2397 sv_utf8_upgrade_flags_grow(sv,
2398 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2399 len + (STRLEN)(send - s) + 1);
2400 d = SvPVX(sv) + SvCUR(sv);
2402 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2404 /* See Note on sizing above. (NOTE: SvCUR() is not set
2405 * correctly here). */
2406 const STRLEN off = d - SvPVX_const(sv);
2407 d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
2411 native_range = FALSE; /* \N{} is guessed to be Unicode */
2413 Copy(str, d, len, char);
2420 yyerror("Missing braces on \\N{}");
2423 /* \c is a control character */
2432 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2435 yyerror("Missing control char name in \\c");
2439 /* printf-style backslashes, formfeeds, newlines, etc */
2441 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2444 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2447 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2450 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2453 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2456 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2459 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2465 } /* end if (backslash) */
2472 /* If we started with encoded form, or already know we want it,
2473 then encode the next character */
2474 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2478 /* One might think that it is wasted effort in the case of the
2479 * source being utf8 (this_utf8 == TRUE) to take the next character
2480 * in the source, convert it to an unsigned value, and then convert
2481 * it back again. But the source has not been validated here. The
2482 * routine that does the conversion checks for errors like
2485 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2486 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2488 SvCUR_set(sv, d - SvPVX_const(sv));
2491 /* See Note on sizing above. */
2492 sv_utf8_upgrade_flags_grow(sv,
2493 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2494 need + (STRLEN)(send - s) + 1);
2495 d = SvPVX(sv) + SvCUR(sv);
2497 } else if (need > len) {
2498 /* encoded value larger than old, may need extra space (NOTE:
2499 * SvCUR() is not set correctly here). See Note on sizing
2501 const STRLEN off = d - SvPVX_const(sv);
2502 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2506 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2508 if (uv > 255 && !dorange)
2509 native_range = FALSE;
2513 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2515 } /* while loop to process each character */
2517 /* terminate the string and set up the sv */
2519 SvCUR_set(sv, d - SvPVX_const(sv));
2520 if (SvCUR(sv) >= SvLEN(sv))
2521 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2524 if (PL_encoding && !has_utf8) {
2525 sv_recode_to_utf8(sv, PL_encoding);
2531 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2532 PL_sublex_info.sub_op->op_private |=
2533 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2537 /* shrink the sv if we allocated more than we used */
2538 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2539 SvPV_shrink_to_cur(sv);
2542 /* return the substring (via pl_yylval) only if we parsed anything */
2543 if (s > PL_bufptr) {
2544 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2545 const char *const key = PL_lex_inpat ? "qr" : "q";
2546 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2550 if (PL_lex_inwhat == OP_TRANS) {
2553 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2561 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2564 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2571 * Returns TRUE if there's more to the expression (e.g., a subscript),
2574 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2576 * ->[ and ->{ return TRUE
2577 * { and [ outside a pattern are always subscripts, so return TRUE
2578 * if we're outside a pattern and it's not { or [, then return FALSE
2579 * if we're in a pattern and the first char is a {
2580 * {4,5} (any digits around the comma) returns FALSE
2581 * if we're in a pattern and the first char is a [
2583 * [SOMETHING] has a funky algorithm to decide whether it's a
2584 * character class or not. It has to deal with things like
2585 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2586 * anything else returns TRUE
2589 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2592 S_intuit_more(pTHX_ register char *s)
2596 PERL_ARGS_ASSERT_INTUIT_MORE;
2598 if (PL_lex_brackets)
2600 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2602 if (*s != '{' && *s != '[')
2607 /* In a pattern, so maybe we have {n,m}. */
2624 /* On the other hand, maybe we have a character class */
2627 if (*s == ']' || *s == '^')
2630 /* this is terrifying, and it works */
2631 int weight = 2; /* let's weigh the evidence */
2633 unsigned char un_char = 255, last_un_char;
2634 const char * const send = strchr(s,']');
2635 char tmpbuf[sizeof PL_tokenbuf * 4];
2637 if (!send) /* has to be an expression */
2640 Zero(seen,256,char);
2643 else if (isDIGIT(*s)) {
2645 if (isDIGIT(s[1]) && s[2] == ']')
2651 for (; s < send; s++) {
2652 last_un_char = un_char;
2653 un_char = (unsigned char)*s;
2658 weight -= seen[un_char] * 10;
2659 if (isALNUM_lazy_if(s+1,UTF)) {
2661 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2662 len = (int)strlen(tmpbuf);
2663 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2668 else if (*s == '$' && s[1] &&
2669 strchr("[#!%*<>()-=",s[1])) {
2670 if (/*{*/ strchr("])} =",s[2]))
2679 if (strchr("wds]",s[1]))
2681 else if (seen[(U8)'\''] || seen[(U8)'"'])
2683 else if (strchr("rnftbxcav",s[1]))
2685 else if (isDIGIT(s[1])) {
2687 while (s[1] && isDIGIT(s[1]))
2697 if (strchr("aA01! ",last_un_char))
2699 if (strchr("zZ79~",s[1]))
2701 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2702 weight -= 5; /* cope with negative subscript */
2705 if (!isALNUM(last_un_char)
2706 && !(last_un_char == '$' || last_un_char == '@'
2707 || last_un_char == '&')
2708 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2713 if (keyword(tmpbuf, d - tmpbuf, 0))
2716 if (un_char == last_un_char + 1)
2718 weight -= seen[un_char];
2723 if (weight >= 0) /* probably a character class */
2733 * Does all the checking to disambiguate
2735 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2736 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2738 * First argument is the stuff after the first token, e.g. "bar".
2740 * Not a method if bar is a filehandle.
2741 * Not a method if foo is a subroutine prototyped to take a filehandle.
2742 * Not a method if it's really "Foo $bar"
2743 * Method if it's "foo $bar"
2744 * Not a method if it's really "print foo $bar"
2745 * Method if it's really "foo package::" (interpreted as package->foo)
2746 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2747 * Not a method if bar is a filehandle or package, but is quoted with
2752 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2755 char *s = start + (*start == '$');
2756 char tmpbuf[sizeof PL_tokenbuf];
2763 PERL_ARGS_ASSERT_INTUIT_METHOD;
2766 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2770 const char *proto = SvPVX_const(cv);
2781 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2782 /* start is the beginning of the possible filehandle/object,
2783 * and s is the end of it
2784 * tmpbuf is a copy of it
2787 if (*start == '$') {
2788 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2789 isUPPER(*PL_tokenbuf))
2792 len = start - SvPVX(PL_linestr);
2796 start = SvPVX(PL_linestr) + len;
2800 return *s == '(' ? FUNCMETH : METHOD;
2802 if (!keyword(tmpbuf, len, 0)) {
2803 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2807 soff = s - SvPVX(PL_linestr);
2811 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2812 if (indirgv && GvCVu(indirgv))
2814 /* filehandle or package name makes it a method */
2815 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2817 soff = s - SvPVX(PL_linestr);
2820 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2821 return 0; /* no assumptions -- "=>" quotes bearword */
2823 start_force(PL_curforce);
2824 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2825 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
2826 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2828 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2833 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2835 return *s == '(' ? FUNCMETH : METHOD;
2841 /* Encoded script support. filter_add() effectively inserts a
2842 * 'pre-processing' function into the current source input stream.
2843 * Note that the filter function only applies to the current source file
2844 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2846 * The datasv parameter (which may be NULL) can be used to pass
2847 * private data to this instance of the filter. The filter function
2848 * can recover the SV using the FILTER_DATA macro and use it to
2849 * store private buffers and state information.
2851 * The supplied datasv parameter is upgraded to a PVIO type
2852 * and the IoDIRP/IoANY field is used to store the function pointer,
2853 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2854 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2855 * private use must be set using malloc'd pointers.
2859 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2868 if (!PL_rsfp_filters)
2869 PL_rsfp_filters = newAV();
2872 SvUPGRADE(datasv, SVt_PVIO);
2873 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2874 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2875 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2876 FPTR2DPTR(void *, IoANY(datasv)),
2877 SvPV_nolen(datasv)));
2878 av_unshift(PL_rsfp_filters, 1);
2879 av_store(PL_rsfp_filters, 0, datasv) ;
2884 /* Delete most recently added instance of this filter function. */
2886 Perl_filter_del(pTHX_ filter_t funcp)
2891 PERL_ARGS_ASSERT_FILTER_DEL;
2894 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2895 FPTR2DPTR(void*, funcp)));
2897 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2899 /* if filter is on top of stack (usual case) just pop it off */
2900 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2901 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2902 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2903 IoANY(datasv) = (void *)NULL;
2904 sv_free(av_pop(PL_rsfp_filters));
2908 /* we need to search for the correct entry and clear it */
2909 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2913 /* Invoke the idxth filter function for the current rsfp. */
2914 /* maxlen 0 = read one text line */
2916 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2921 /* This API is bad. It should have been using unsigned int for maxlen.
2922 Not sure if we want to change the API, but if not we should sanity
2923 check the value here. */
2924 const unsigned int correct_length
2933 PERL_ARGS_ASSERT_FILTER_READ;
2935 if (!PL_parser || !PL_rsfp_filters)
2937 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2938 /* Provide a default input filter to make life easy. */
2939 /* Note that we append to the line. This is handy. */
2940 DEBUG_P(PerlIO_printf(Perl_debug_log,
2941 "filter_read %d: from rsfp\n", idx));
2942 if (correct_length) {
2945 const int old_len = SvCUR(buf_sv);
2947 /* ensure buf_sv is large enough */
2948 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2949 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2950 correct_length)) <= 0) {
2951 if (PerlIO_error(PL_rsfp))
2952 return -1; /* error */
2954 return 0 ; /* end of file */
2956 SvCUR_set(buf_sv, old_len + len) ;
2959 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2960 if (PerlIO_error(PL_rsfp))
2961 return -1; /* error */
2963 return 0 ; /* end of file */
2966 return SvCUR(buf_sv);
2968 /* Skip this filter slot if filter has been deleted */
2969 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2970 DEBUG_P(PerlIO_printf(Perl_debug_log,
2971 "filter_read %d: skipped (filter deleted)\n",
2973 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2975 /* Get function pointer hidden within datasv */
2976 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2977 DEBUG_P(PerlIO_printf(Perl_debug_log,
2978 "filter_read %d: via function %p (%s)\n",
2979 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2980 /* Call function. The function is expected to */
2981 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2982 /* Return: <0:error, =0:eof, >0:not eof */
2983 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2987 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2991 PERL_ARGS_ASSERT_FILTER_GETS;
2993 #ifdef PERL_CR_FILTER
2994 if (!PL_rsfp_filters) {
2995 filter_add(S_cr_textfilter,NULL);
2998 if (PL_rsfp_filters) {
3000 SvCUR_set(sv, 0); /* start with empty line */
3001 if (FILTER_READ(0, sv, 0) > 0)
3002 return ( SvPVX(sv) ) ;
3007 return (sv_gets(sv, fp, append));
3011 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3016 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3018 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3022 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3023 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3025 return GvHV(gv); /* Foo:: */
3028 /* use constant CLASS => 'MyClass' */
3029 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3030 if (gv && GvCV(gv)) {
3031 SV * const sv = cv_const_sv(GvCV(gv));
3033 pkgname = SvPV_const(sv, len);
3036 return gv_stashpvn(pkgname, len, 0);
3040 * S_readpipe_override
3041 * Check whether readpipe() is overriden, and generates the appropriate
3042 * optree, provided sublex_start() is called afterwards.
3045 S_readpipe_override(pTHX)
3048 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3049 pl_yylval.ival = OP_BACKTICK;
3051 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3053 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3054 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3055 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3057 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3058 append_elem(OP_LIST,
3059 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3060 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3067 * The intent of this yylex wrapper is to minimize the changes to the
3068 * tokener when we aren't interested in collecting madprops. It remains
3069 * to be seen how successful this strategy will be...
3076 char *s = PL_bufptr;
3078 /* make sure PL_thiswhite is initialized */
3082 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3083 if (PL_pending_ident)
3084 return S_pending_ident(aTHX);
3086 /* previous token ate up our whitespace? */
3087 if (!PL_lasttoke && PL_nextwhite) {
3088 PL_thiswhite = PL_nextwhite;
3092 /* isolate the token, and figure out where it is without whitespace */
3093 PL_realtokenstart = -1;
3097 assert(PL_curforce < 0);
3099 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3100 if (!PL_thistoken) {
3101 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3102 PL_thistoken = newSVpvs("");
3104 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3105 PL_thistoken = newSVpvn(tstart, s - tstart);
3108 if (PL_thismad) /* install head */
3109 CURMAD('X', PL_thistoken);
3112 /* last whitespace of a sublex? */
3113 if (optype == ')' && PL_endwhite) {
3114 CURMAD('X', PL_endwhite);
3119 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3120 if (!PL_thiswhite && !PL_endwhite && !optype) {
3121 sv_free(PL_thistoken);
3126 /* put off final whitespace till peg */
3127 if (optype == ';' && !PL_rsfp) {
3128 PL_nextwhite = PL_thiswhite;
3131 else if (PL_thisopen) {
3132 CURMAD('q', PL_thisopen);
3134 sv_free(PL_thistoken);
3138 /* Store actual token text as madprop X */
3139 CURMAD('X', PL_thistoken);
3143 /* add preceding whitespace as madprop _ */
3144 CURMAD('_', PL_thiswhite);
3148 /* add quoted material as madprop = */
3149 CURMAD('=', PL_thisstuff);
3153 /* add terminating quote as madprop Q */
3154 CURMAD('Q', PL_thisclose);
3158 /* special processing based on optype */
3162 /* opval doesn't need a TOKEN since it can already store mp */
3172 if (pl_yylval.opval)
3173 append_madprops(PL_thismad, pl_yylval.opval, 0);
3181 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3190 /* remember any fake bracket that lexer is about to discard */
3191 if (PL_lex_brackets == 1 &&
3192 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3195 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3198 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3199 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3202 break; /* don't bother looking for trailing comment */
3211 /* attach a trailing comment to its statement instead of next token */
3215 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3217 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3219 if (*s == '\n' || *s == '#') {
3220 while (s < PL_bufend && *s != '\n')
3224 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3225 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3242 /* Create new token struct. Note: opvals return early above. */
3243 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3250 S_tokenize_use(pTHX_ int is_use, char *s) {
3253 PERL_ARGS_ASSERT_TOKENIZE_USE;
3255 if (PL_expect != XSTATE)
3256 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3257 is_use ? "use" : "no"));
3259 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3260 s = force_version(s, TRUE);
3261 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3262 start_force(PL_curforce);
3263 NEXTVAL_NEXTTOKE.opval = NULL;
3266 else if (*s == 'v') {
3267 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3268 s = force_version(s, FALSE);
3272 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3273 s = force_version(s, FALSE);
3275 pl_yylval.ival = is_use;
3279 static const char* const exp_name[] =
3280 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3281 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3288 Works out what to call the token just pulled out of the input
3289 stream. The yacc parser takes care of taking the ops we return and
3290 stitching them into a tree.
3296 if read an identifier
3297 if we're in a my declaration
3298 croak if they tried to say my($foo::bar)
3299 build the ops for a my() declaration
3300 if it's an access to a my() variable
3301 are we in a sort block?
3302 croak if my($a); $a <=> $b
3303 build ops for access to a my() variable
3304 if in a dq string, and they've said @foo and we can't find @foo
3306 build ops for a bareword
3307 if we already built the token before, use it.
3312 #pragma segment Perl_yylex
3318 register char *s = PL_bufptr;
3323 /* orig_keyword, gvp, and gv are initialized here because
3324 * jump to the label just_a_word_zero can bypass their
3325 * initialization later. */
3326 I32 orig_keyword = 0;
3331 SV* tmp = newSVpvs("");
3332 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3333 (IV)CopLINE(PL_curcop),
3334 lex_state_names[PL_lex_state],
3335 exp_name[PL_expect],
3336 pv_display(tmp, s, strlen(s), 0, 60));
3339 /* check if there's an identifier for us to look at */
3340 if (PL_pending_ident)
3341 return REPORT(S_pending_ident(aTHX));
3343 /* no identifier pending identification */
3345 switch (PL_lex_state) {
3347 case LEX_NORMAL: /* Some compilers will produce faster */
3348 case LEX_INTERPNORMAL: /* code if we comment these out. */
3352 /* when we've already built the next token, just pull it out of the queue */
3356 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3358 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3359 PL_nexttoke[PL_lasttoke].next_mad = 0;
3360 if (PL_thismad && PL_thismad->mad_key == '_') {
3361 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
3362 PL_thismad->mad_val = 0;
3363 mad_free(PL_thismad);
3368 PL_lex_state = PL_lex_defer;
3369 PL_expect = PL_lex_expect;
3370 PL_lex_defer = LEX_NORMAL;
3371 if (!PL_nexttoke[PL_lasttoke].next_type)
3376 pl_yylval = PL_nextval[PL_nexttoke];
3378 PL_lex_state = PL_lex_defer;
3379 PL_expect = PL_lex_expect;
3380 PL_lex_defer = LEX_NORMAL;
3384 /* FIXME - can these be merged? */
3385 return(PL_nexttoke[PL_lasttoke].next_type);
3387 return REPORT(PL_nexttype[PL_nexttoke]);
3390 /* interpolated case modifiers like \L \U, including \Q and \E.
3391 when we get here, PL_bufptr is at the \
3393 case LEX_INTERPCASEMOD:
3395 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3396 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3398 /* handle \E or end of string */
3399 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3401 if (PL_lex_casemods) {
3402 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3403 PL_lex_casestack[PL_lex_casemods] = '\0';
3405 if (PL_bufptr != PL_bufend
3406 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3408 PL_lex_state = LEX_INTERPCONCAT;
3411 PL_thistoken = newSVpvs("\\E");
3417 while (PL_bufptr != PL_bufend &&
3418 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3420 PL_thiswhite = newSVpvs("");
3421 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3425 if (PL_bufptr != PL_bufend)
3428 PL_lex_state = LEX_INTERPCONCAT;
3432 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3433 "### Saw case modifier\n"); });
3435 if (s[1] == '\\' && s[2] == 'E') {
3438 PL_thiswhite = newSVpvs("");
3439 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3442 PL_lex_state = LEX_INTERPCONCAT;
3447 if (!PL_madskills) /* when just compiling don't need correct */
3448 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3449 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3450 if ((*s == 'L' || *s == 'U') &&
3451 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3452 PL_lex_casestack[--PL_lex_casemods] = '\0';
3455 if (PL_lex_casemods > 10)
3456 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3457 PL_lex_casestack[PL_lex_casemods++] = *s;
3458 PL_lex_casestack[PL_lex_casemods] = '\0';
3459 PL_lex_state = LEX_INTERPCONCAT;
3460 start_force(PL_curforce);
3461 NEXTVAL_NEXTTOKE.ival = 0;
3463 start_force(PL_curforce);
3465 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3467 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3469 NEXTVAL_NEXTTOKE.ival = OP_LC;
3471 NEXTVAL_NEXTTOKE.ival = OP_UC;
3473 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3475 Perl_croak(aTHX_ "panic: yylex");
3477 SV* const tmpsv = newSVpvs("\\ ");
3478 /* replace the space with the character we want to escape
3480 SvPVX(tmpsv)[1] = *s;
3486 if (PL_lex_starts) {
3492 sv_free(PL_thistoken);
3493 PL_thistoken = newSVpvs("");
3496 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3497 if (PL_lex_casemods == 1 && PL_lex_inpat)
3506 case LEX_INTERPPUSH:
3507 return REPORT(sublex_push());
3509 case LEX_INTERPSTART:
3510 if (PL_bufptr == PL_bufend)
3511 return REPORT(sublex_done());
3512 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3513 "### Interpolated variable\n"); });
3515 PL_lex_dojoin = (*PL_bufptr == '@');
3516 PL_lex_state = LEX_INTERPNORMAL;
3517 if (PL_lex_dojoin) {
3518 start_force(PL_curforce);
3519 NEXTVAL_NEXTTOKE.ival = 0;
3521 start_force(PL_curforce);
3522 force_ident("\"", '$');
3523 start_force(PL_curforce);
3524 NEXTVAL_NEXTTOKE.ival = 0;
3526 start_force(PL_curforce);
3527 NEXTVAL_NEXTTOKE.ival = 0;
3529 start_force(PL_curforce);
3530 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3533 if (PL_lex_starts++) {
3538 sv_free(PL_thistoken);
3539 PL_thistoken = newSVpvs("");
3542 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3543 if (!PL_lex_casemods && PL_lex_inpat)
3550 case LEX_INTERPENDMAYBE:
3551 if (intuit_more(PL_bufptr)) {
3552 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3558 if (PL_lex_dojoin) {
3559 PL_lex_dojoin = FALSE;
3560 PL_lex_state = LEX_INTERPCONCAT;
3564 sv_free(PL_thistoken);
3565 PL_thistoken = newSVpvs("");
3570 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3571 && SvEVALED(PL_lex_repl))
3573 if (PL_bufptr != PL_bufend)
3574 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3578 case LEX_INTERPCONCAT:
3580 if (PL_lex_brackets)
3581 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3583 if (PL_bufptr == PL_bufend)
3584 return REPORT(sublex_done());
3586 if (SvIVX(PL_linestr) == '\'') {
3587 SV *sv = newSVsv(PL_linestr);
3590 else if ( PL_hints & HINT_NEW_RE )
3591 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
3592 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3596 s = scan_const(PL_bufptr);
3598 PL_lex_state = LEX_INTERPCASEMOD;
3600 PL_lex_state = LEX_INTERPSTART;
3603 if (s != PL_bufptr) {
3604 start_force(PL_curforce);
3606 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3608 NEXTVAL_NEXTTOKE = pl_yylval;
3611 if (PL_lex_starts++) {
3615 sv_free(PL_thistoken);
3616 PL_thistoken = newSVpvs("");
3619 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3620 if (!PL_lex_casemods && PL_lex_inpat)
3633 PL_lex_state = LEX_NORMAL;
3634 s = scan_formline(PL_bufptr);
3635 if (!PL_lex_formbrack)
3641 PL_oldoldbufptr = PL_oldbufptr;
3647 sv_free(PL_thistoken);
3650 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3654 if (isIDFIRST_lazy_if(s,UTF))
3657 unsigned char c = *s;
3658 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
3659 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
3660 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
3665 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
3669 goto fake_eof; /* emulate EOF on ^D or ^Z */
3678 if (PL_lex_brackets) {
3679 yyerror((const char *)
3681 ? "Format not terminated"
3682 : "Missing right curly or square bracket"));
3684 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3685 "### Tokener got EOF\n");
3689 if (s++ < PL_bufend)
3690 goto retry; /* ignore stray nulls */
3693 if (!PL_in_eval && !PL_preambled) {
3694 PL_preambled = TRUE;
3700 /* Generate a string of Perl code to load the debugger.
3701 * If PERL5DB is set, it will return the contents of that,
3702 * otherwise a compile-time require of perl5db.pl. */
3704 const char * const pdb = PerlEnv_getenv("PERL5DB");
3707 sv_setpv(PL_linestr, pdb);
3708 sv_catpvs(PL_linestr,";");
3710 SETERRNO(0,SS_NORMAL);
3711 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
3714 sv_setpvs(PL_linestr,"");
3715 if (PL_preambleav) {
3716 SV **svp = AvARRAY(PL_preambleav);
3717 SV **const end = svp + AvFILLp(PL_preambleav);
3719 sv_catsv(PL_linestr, *svp);
3721 sv_catpvs(PL_linestr, ";");
3723 sv_free(MUTABLE_SV(PL_preambleav));
3724 PL_preambleav = NULL;
3727 sv_catpvs(PL_linestr,
3728 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3729 if (PL_minus_n || PL_minus_p) {
3730 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3732 sv_catpvs(PL_linestr,"chomp;");
3735 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3736 || *PL_splitstr == '"')
3737 && strchr(PL_splitstr + 1, *PL_splitstr))
3738 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3740 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3741 bytes can be used as quoting characters. :-) */
3742 const char *splits = PL_splitstr;
3743 sv_catpvs(PL_linestr, "our @F=split(q\0");
3746 if (*splits == '\\')
3747 sv_catpvn(PL_linestr, splits, 1);
3748 sv_catpvn(PL_linestr, splits, 1);
3749 } while (*splits++);
3750 /* This loop will embed the trailing NUL of
3751 PL_linestr as the last thing it does before
3753 sv_catpvs(PL_linestr, ");");
3757 sv_catpvs(PL_linestr,"our @F=split(' ');");
3760 sv_catpvs(PL_linestr, "\n");
3761 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3762 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3763 PL_last_lop = PL_last_uni = NULL;
3764 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
3765 update_debugger_info(PL_linestr, NULL, 0);
3769 bof = PL_rsfp ? TRUE : FALSE;
3770 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3773 PL_realtokenstart = -1;
3776 if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3777 PerlIO_clearerr(PL_rsfp);
3779 (void)PerlIO_close(PL_rsfp);
3781 PL_doextract = FALSE;
3783 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3789 sv_setpvs(PL_linestr, ";}continue{print;}");
3791 sv_setpvs(PL_linestr, ";}");
3792 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3793 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3794 PL_last_lop = PL_last_uni = NULL;
3795 PL_minus_n = PL_minus_p = 0;
3798 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3799 PL_last_lop = PL_last_uni = NULL;
3800 sv_setpvs(PL_linestr,"");
3801 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3803 /* If it looks like the start of a BOM or raw UTF-16,
3804 * check if it in fact is. */
3810 #ifdef PERLIO_IS_STDIO
3811 # ifdef __GNU_LIBRARY__
3812 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3813 # define FTELL_FOR_PIPE_IS_BROKEN
3817 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3818 # define FTELL_FOR_PIPE_IS_BROKEN
3823 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3825 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3826 s = swallow_bom((U8*)s);
3830 /* Incest with pod. */
3833 sv_catsv(PL_thiswhite, PL_linestr);
3835 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3836 sv_setpvs(PL_linestr, "");
3837 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3838 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3839 PL_last_lop = PL_last_uni = NULL;
3840 PL_doextract = FALSE;
3844 } while (PL_doextract);
3845 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3846 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
3847 update_debugger_info(PL_linestr, NULL, 0);
3848 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3849 PL_last_lop = PL_last_uni = NULL;
3850 if (CopLINE(PL_curcop) == 1) {
3851 while (s < PL_bufend && isSPACE(*s))
3853 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3857 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3861 if (*s == '#' && *(s+1) == '!')
3863 #ifdef ALTERNATE_SHEBANG
3865 static char const as[] = ALTERNATE_SHEBANG;
3866 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3867 d = s + (sizeof(as) - 1);
3869 #endif /* ALTERNATE_SHEBANG */
3878 while (*d && !isSPACE(*d))
3882 #ifdef ARG_ZERO_IS_SCRIPT
3883 if (ipathend > ipath) {
3885 * HP-UX (at least) sets argv[0] to the script name,
3886 * which makes $^X incorrect. And Digital UNIX and Linux,
3887 * at least, set argv[0] to the basename of the Perl
3888 * interpreter. So, having found "#!", we'll set it right.
3890 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3892 assert(SvPOK(x) || SvGMAGICAL(x));
3893 if (sv_eq(x, CopFILESV(PL_curcop))) {
3894 sv_setpvn(x, ipath, ipathend - ipath);
3900 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3901 const char * const lstart = SvPV_const(x,llen);
3903 bstart += blen - llen;
3904 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3905 sv_setpvn(x, ipath, ipathend - ipath);
3910 TAINT_NOT; /* $^X is always tainted, but that's OK */
3912 #endif /* ARG_ZERO_IS_SCRIPT */
3917 d = instr(s,"perl -");
3919 d = instr(s,"perl");
3921 /* avoid getting into infinite loops when shebang
3922 * line contains "Perl" rather than "perl" */
3924 for (d = ipathend-4; d >= ipath; --d) {
3925 if ((*d == 'p' || *d == 'P')
3926 && !ibcmp(d, "perl", 4))
3936 #ifdef ALTERNATE_SHEBANG
3938 * If the ALTERNATE_SHEBANG on this system starts with a
3939 * character that can be part of a Perl expression, then if
3940 * we see it but not "perl", we're probably looking at the
3941 * start of Perl code, not a request to hand off to some
3942 * other interpreter. Similarly, if "perl" is there, but
3943 * not in the first 'word' of the line, we assume the line
3944 * contains the start of the Perl program.
3946 if (d && *s != '#') {
3947 const char *c = ipath;
3948 while (*c && !strchr("; \t\r\n\f\v#", *c))
3951 d = NULL; /* "perl" not in first word; ignore */
3953 *s = '#'; /* Don't try to parse shebang line */
3955 #endif /* ALTERNATE_SHEBANG */
3960 !instr(s,"indir") &&
3961 instr(PL_origargv[0],"perl"))
3968 while (s < PL_bufend && isSPACE(*s))
3970 if (s < PL_bufend) {
3971 Newx(newargv,PL_origargc+3,char*);
3973 while (s < PL_bufend && !isSPACE(*s))
3976 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3979 newargv = PL_origargv;
3982 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3984 Perl_croak(aTHX_ "Can't exec %s", ipath);
3987 while (*d && !isSPACE(*d))
3989 while (SPACE_OR_TAB(*d))
3993 const bool switches_done = PL_doswitches;
3994 const U32 oldpdb = PL_perldb;
3995 const bool oldn = PL_minus_n;
3996 const bool oldp = PL_minus_p;
4000 bool baduni = FALSE;
4002 const char *d2 = d1 + 1;
4003 if (parse_unicode_opts((const char **)&d2)
4007 if (baduni || *d1 == 'M' || *d1 == 'm') {
4008 const char * const m = d1;
4009 while (*d1 && !isSPACE(*d1))
4011 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4014 d1 = moreswitches(d1);
4016 if (PL_doswitches && !switches_done) {
4017 int argc = PL_origargc;
4018 char **argv = PL_origargv;
4021 } while (argc && argv[0][0] == '-' && argv[0][1]);
4022 init_argv_symbols(argc,argv);
4024 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4025 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4026 /* if we have already added "LINE: while (<>) {",
4027 we must not do it again */
4029 sv_setpvs(PL_linestr, "");
4030 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4031 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4032 PL_last_lop = PL_last_uni = NULL;
4033 PL_preambled = FALSE;
4034 if (PERLDB_LINE || PERLDB_SAVESRC)
4035 (void)gv_fetchfile(PL_origfilename);
4042 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4044 PL_lex_state = LEX_FORMLINE;
4049 #ifdef PERL_STRICT_CR
4050 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4052 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4054 case ' ': case '\t': case '\f': case 013:
4056 PL_realtokenstart = -1;
4058 PL_thiswhite = newSVpvs("");
4059 sv_catpvn(PL_thiswhite, s, 1);
4066 PL_realtokenstart = -1;
4070 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4071 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4072 /* handle eval qq[#line 1 "foo"\n ...] */
4073 CopLINE_dec(PL_curcop);
4076 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4078 if (!PL_in_eval || PL_rsfp)
4083 while (d < PL_bufend && *d != '\n')
4087 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4088 Perl_croak(aTHX_ "panic: input overflow");
4091 PL_thiswhite = newSVpvn(s, d - s);
4096 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4098 PL_lex_state = LEX_FORMLINE;
4104 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4105 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4108 TOKEN(PEG); /* make sure any #! line is accessible */
4113 /* if (PL_madskills && PL_lex_formbrack) { */
4115 while (d < PL_bufend && *d != '\n')
4119 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4120 Perl_croak(aTHX_ "panic: input overflow");
4121 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4123 PL_thiswhite = newSVpvs("");
4124 if (CopLINE(PL_curcop) == 1) {
4125 sv_setpvs(PL_thiswhite, "");
4128 sv_catpvn(PL_thiswhite, s, d - s);
4142 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4150 while (s < PL_bufend && SPACE_OR_TAB(*s))
4153 if (strnEQ(s,"=>",2)) {
4154 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4155 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4156 OPERATOR('-'); /* unary minus */
4158 PL_last_uni = PL_oldbufptr;
4160 case 'r': ftst = OP_FTEREAD; break;
4161 case 'w': ftst = OP_FTEWRITE; break;
4162 case 'x': ftst = OP_FTEEXEC; break;
4163 case 'o': ftst = OP_FTEOWNED; break;
4164 case 'R': ftst = OP_FTRREAD; break;
4165 case 'W': ftst = OP_FTRWRITE; break;
4166 case 'X': ftst = OP_FTREXEC; break;
4167 case 'O': ftst = OP_FTROWNED; break;
4168 case 'e': ftst = OP_FTIS; break;
4169 case 'z': ftst = OP_FTZERO; break;
4170 case 's': ftst = OP_FTSIZE; break;
4171 case 'f': ftst = OP_FTFILE; break;
4172 case 'd': ftst = OP_FTDIR; break;
4173 case 'l': ftst = OP_FTLINK; break;
4174 case 'p': ftst = OP_FTPIPE; break;
4175 case 'S': ftst = OP_FTSOCK; break;
4176 case 'u': ftst = OP_FTSUID; break;
4177 case 'g': ftst = OP_FTSGID; break;
4178 case 'k': ftst = OP_FTSVTX; break;
4179 case 'b': ftst = OP_FTBLK; break;
4180 case 'c': ftst = OP_FTCHR; break;
4181 case 't': ftst = OP_FTTTY; break;
4182 case 'T': ftst = OP_FTTEXT; break;
4183 case 'B': ftst = OP_FTBINARY; break;
4184 case 'M': case 'A': case 'C':
4185 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4187 case 'M': ftst = OP_FTMTIME; break;
4188 case 'A': ftst = OP_FTATIME; break;
4189 case 'C': ftst = OP_FTCTIME; break;
4197 PL_last_lop_op = (OPCODE)ftst;
4198 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4199 "### Saw file test %c\n", (int)tmp);
4204 /* Assume it was a minus followed by a one-letter named
4205 * subroutine call (or a -bareword), then. */
4206 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4207 "### '-%c' looked like a file test but was not\n",
4214 const char tmp = *s++;
4217 if (PL_expect == XOPERATOR)
4222 else if (*s == '>') {
4225 if (isIDFIRST_lazy_if(s,UTF)) {
4226 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4234 if (PL_expect == XOPERATOR)
4237 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4239 OPERATOR('-'); /* unary minus */
4245 const char tmp = *s++;
4248 if (PL_expect == XOPERATOR)
4253 if (PL_expect == XOPERATOR)
4256 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4263 if (PL_expect != XOPERATOR) {
4264 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4265 PL_expect = XOPERATOR;
4266 force_ident(PL_tokenbuf, '*');
4279 if (PL_expect == XOPERATOR) {
4283 PL_tokenbuf[0] = '%';
4284 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4285 sizeof PL_tokenbuf - 1, FALSE);
4286 if (!PL_tokenbuf[1]) {
4289 PL_pending_ident = '%';
4298 const char tmp = *s++;
4303 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4310 const char tmp = *s++;
4316 goto just_a_word_zero_gv;
4319 switch (PL_expect) {
4325 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4327 PL_bufptr = s; /* update in case we back off */
4333 PL_expect = XTERMBLOCK;
4336 stuffstart = s - SvPVX(PL_linestr) - 1;
4340 while (isIDFIRST_lazy_if(s,UTF)) {
4343 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4344 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4345 if (tmp < 0) tmp = -tmp;
4360 sv = newSVpvn(s, len);
4362 d = scan_str(d,TRUE,TRUE);
4364 /* MUST advance bufptr here to avoid bogus
4365 "at end of line" context messages from yyerror().
4367 PL_bufptr = s + len;
4368 yyerror("Unterminated attribute parameter in attribute list");
4372 return REPORT(0); /* EOF indicator */
4376 sv_catsv(sv, PL_lex_stuff);
4377 attrs = append_elem(OP_LIST, attrs,
4378 newSVOP(OP_CONST, 0, sv));
4379 SvREFCNT_dec(PL_lex_stuff);
4380 PL_lex_stuff = NULL;
4383 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4385 if (PL_in_my == KEY_our) {
4386 deprecate(":unique");
4389 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4392 /* NOTE: any CV attrs applied here need to be part of
4393 the CVf_BUILTIN_ATTRS define in cv.h! */
4394 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4396 CvLVALUE_on(PL_compcv);
4398 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4400 deprecate(":locked");
4402 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4404 CvMETHOD_on(PL_compcv);
4406 /* After we've set the flags, it could be argued that
4407 we don't need to do the attributes.pm-based setting
4408 process, and shouldn't bother appending recognized
4409 flags. To experiment with that, uncomment the
4410 following "else". (Note that's already been
4411 uncommented. That keeps the above-applied built-in
4412 attributes from being intercepted (and possibly
4413 rejected) by a package's attribute routines, but is
4414 justified by the performance win for the common case
4415 of applying only built-in attributes.) */
4417 attrs = append_elem(OP_LIST, attrs,
4418 newSVOP(OP_CONST, 0,
4422 if (*s == ':' && s[1] != ':')
4425 break; /* require real whitespace or :'s */
4426 /* XXX losing whitespace on sequential attributes here */
4430 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4431 if (*s != ';' && *s != '}' && *s != tmp
4432 && (tmp != '=' || *s != ')')) {
4433 const char q = ((*s == '\'') ? '"' : '\'');
4434 /* If here for an expression, and parsed no attrs, back
4436 if (tmp == '=' && !attrs) {
4440 /* MUST advance bufptr here to avoid bogus "at end of line"
4441 context messages from yyerror().
4444 yyerror( (const char *)
4446 ? Perl_form(aTHX_ "Invalid separator character "
4447 "%c%c%c in attribute list", q, *s, q)
4448 : "Unterminated attribute list" ) );
4456 start_force(PL_curforce);
4457 NEXTVAL_NEXTTOKE.opval = attrs;
4458 CURMAD('_', PL_nextwhite);
4463 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4464 (s - SvPVX(PL_linestr)) - stuffstart);
4472 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4473 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4481 const char tmp = *s++;
4486 const char tmp = *s++;
4494 if (PL_lex_brackets <= 0)
4495 yyerror("Unmatched right square bracket");
4498 if (PL_lex_state == LEX_INTERPNORMAL) {
4499 if (PL_lex_brackets == 0) {
4500 if (*s == '-' && s[1] == '>')
4501 PL_lex_state = LEX_INTERPENDMAYBE;
4502 else if (*s != '[' && *s != '{')
4503 PL_lex_state = LEX_INTERPEND;
4510 if (PL_lex_brackets > 100) {
4511 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4513 switch (PL_expect) {
4515 if (PL_lex_formbrack) {
4519 if (PL_oldoldbufptr == PL_last_lop)
4520 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4522 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4523 OPERATOR(HASHBRACK);
4525 while (s < PL_bufend && SPACE_OR_TAB(*s))
4528 PL_tokenbuf[0] = '\0';
4529 if (d < PL_bufend && *d == '-') {
4530 PL_tokenbuf[0] = '-';
4532 while (d < PL_bufend && SPACE_OR_TAB(*d))
4535 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4536 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4538 while (d < PL_bufend && SPACE_OR_TAB(*d))
4541 const char minus = (PL_tokenbuf[0] == '-');
4542 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4550 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4555 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4560 if (PL_oldoldbufptr == PL_last_lop)
4561 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4563 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4566 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4568 /* This hack is to get the ${} in the message. */
4570 yyerror("syntax error");
4573 OPERATOR(HASHBRACK);
4575 /* This hack serves to disambiguate a pair of curlies
4576 * as being a block or an anon hash. Normally, expectation
4577 * determines that, but in cases where we're not in a
4578 * position to expect anything in particular (like inside
4579 * eval"") we have to resolve the ambiguity. This code
4580 * covers the case where the first term in the curlies is a
4581 * quoted string. Most other cases need to be explicitly
4582 * disambiguated by prepending a "+" before the opening
4583 * curly in order to force resolution as an anon hash.
4585 * XXX should probably propagate the outer expectation
4586 * into eval"" to rely less on this hack, but that could
4587 * potentially break current behavior of eval"".
4591 if (*s == '\'' || *s == '"' || *s == '`') {
4592 /* common case: get past first string, handling escapes */
4593 for (t++; t < PL_bufend && *t != *s;)
4594 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4598 else if (*s == 'q') {
4601 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4604 /* skip q//-like construct */
4606 char open, close, term;
4609 while (t < PL_bufend && isSPACE(*t))
4611 /* check for q => */
4612 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4613 OPERATOR(HASHBRACK);
4617 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4621 for (t++; t < PL_bufend; t++) {
4622 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4624 else if (*t == open)
4628 for (t++; t < PL_bufend; t++) {
4629 if (*t == '\\' && t+1 < PL_bufend)
4631 else if (*t == close && --brackets <= 0)
4633 else if (*t == open)
4640 /* skip plain q word */
4641 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4644 else if (isALNUM_lazy_if(t,UTF)) {
4646 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4649 while (t < PL_bufend && isSPACE(*t))
4651 /* if comma follows first term, call it an anon hash */
4652 /* XXX it could be a comma expression with loop modifiers */
4653 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4654 || (*t == '=' && t[1] == '>')))
4655 OPERATOR(HASHBRACK);
4656 if (PL_expect == XREF)
4659 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4665 pl_yylval.ival = CopLINE(PL_curcop);
4666 if (isSPACE(*s) || *s == '#')
4667 PL_copline = NOLINE; /* invalidate current command line number */
4672 if (PL_lex_brackets <= 0)
4673 yyerror("Unmatched right curly bracket");
4675 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4676 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4677 PL_lex_formbrack = 0;
4678 if (PL_lex_state == LEX_INTERPNORMAL) {
4679 if (PL_lex_brackets == 0) {
4680 if (PL_expect & XFAKEBRACK) {
4681 PL_expect &= XENUMMASK;
4682 PL_lex_state = LEX_INTERPEND;
4687 PL_thiswhite = newSVpvs("");
4688 sv_catpvs(PL_thiswhite,"}");
4691 return yylex(); /* ignore fake brackets */
4693 if (*s == '-' && s[1] == '>')
4694 PL_lex_state = LEX_INTERPENDMAYBE;
4695 else if (*s != '[' && *s != '{')
4696 PL_lex_state = LEX_INTERPEND;
4699 if (PL_expect & XFAKEBRACK) {
4700 PL_expect &= XENUMMASK;
4702 return yylex(); /* ignore fake brackets */
4704 start_force(PL_curforce);
4706 curmad('X', newSVpvn(s-1,1));
4707 CURMAD('_', PL_thiswhite);
4712 PL_thistoken = newSVpvs("");
4720 if (PL_expect == XOPERATOR) {
4721 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4722 && isIDFIRST_lazy_if(s,UTF))
4724 CopLINE_dec(PL_curcop);
4725 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
4726 CopLINE_inc(PL_curcop);
4731 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4733 PL_expect = XOPERATOR;
4734 force_ident(PL_tokenbuf, '&');
4738 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
4750 const char tmp = *s++;
4757 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4758 && strchr("+-*/%.^&|<",tmp))
4759 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4760 "Reversed %c= operator",(int)tmp);
4762 if (PL_expect == XSTATE && isALPHA(tmp) &&
4763 (s == PL_linestart+1 || s[-2] == '\n') )
4765 if (PL_in_eval && !PL_rsfp) {
4770 if (strnEQ(s,"=cut",4)) {
4786 PL_thiswhite = newSVpvs("");
4787 sv_catpvn(PL_thiswhite, PL_linestart,
4788 PL_bufend - PL_linestart);
4792 PL_doextract = TRUE;
4796 if (PL_lex_brackets < PL_lex_formbrack) {
4798 #ifdef PERL_STRICT_CR
4799 while (SPACE_OR_TAB(*t))
4801 while (SPACE_OR_TAB(*t) || *t == '\r')
4804 if (*t == '\n' || *t == '#') {
4815 const char tmp = *s++;
4817 /* was this !=~ where !~ was meant?
4818 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4820 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4821 const char *t = s+1;
4823 while (t < PL_bufend && isSPACE(*t))
4826 if (*t == '/' || *t == '?' ||
4827 ((*t == 'm' || *t == 's' || *t == 'y')
4828 && !isALNUM(t[1])) ||
4829 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4830 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4831 "!=~ should be !~");
4841 if (PL_expect != XOPERATOR) {
4842 if (s[1] != '<' && !strchr(s,'>'))
4845 s = scan_heredoc(s);
4847 s = scan_inputsymbol(s);
4848 TERM(sublex_start());
4854 SHop(OP_LEFT_SHIFT);
4868 const char tmp = *s++;
4870 SHop(OP_RIGHT_SHIFT);
4871 else if (tmp == '=')
4880 if (PL_expect == XOPERATOR) {
4881 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4883 deprecate_old(commaless_variable_list);
4884 return REPORT(','); /* grandfather non-comma-format format */
4888 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4889 PL_tokenbuf[0] = '@';
4890 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4891 sizeof PL_tokenbuf - 1, FALSE);
4892 if (PL_expect == XOPERATOR)
4893 no_op("Array length", s);
4894 if (!PL_tokenbuf[1])
4896 PL_expect = XOPERATOR;
4897 PL_pending_ident = '#';
4901 PL_tokenbuf[0] = '$';
4902 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4903 sizeof PL_tokenbuf - 1, FALSE);
4904 if (PL_expect == XOPERATOR)
4906 if (!PL_tokenbuf[1]) {
4908 yyerror("Final $ should be \\$ or $name");
4912 /* This kludge not intended to be bulletproof. */
4913 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4914 pl_yylval.opval = newSVOP(OP_CONST, 0,
4915 newSViv(CopARYBASE_get(&PL_compiling)));
4916 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
4922 const char tmp = *s;
4923 if (PL_lex_state == LEX_NORMAL)
4926 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4927 && intuit_more(s)) {
4929 PL_tokenbuf[0] = '@';
4930 if (ckWARN(WARN_SYNTAX)) {
4933 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4936 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4937 while (t < PL_bufend && *t != ']')
4939 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4940 "Multidimensional syntax %.*s not supported",
4941 (int)((t - PL_bufptr) + 1), PL_bufptr);
4945 else if (*s == '{') {
4947 PL_tokenbuf[0] = '%';
4948 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4949 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4951 char tmpbuf[sizeof PL_tokenbuf];
4954 } while (isSPACE(*t));
4955 if (isIDFIRST_lazy_if(t,UTF)) {
4957 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4961 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4962 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4963 "You need to quote \"%s\"",
4970 PL_expect = XOPERATOR;
4971 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4972 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4973 if (!islop || PL_last_lop_op == OP_GREPSTART)
4974 PL_expect = XOPERATOR;
4975 else if (strchr("$@\"'`q", *s))
4976 PL_expect = XTERM; /* e.g. print $fh "foo" */
4977 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4978 PL_expect = XTERM; /* e.g. print $fh &sub */
4979 else if (isIDFIRST_lazy_if(s,UTF)) {
4980 char tmpbuf[sizeof PL_tokenbuf];
4982 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4983 if ((t2 = keyword(tmpbuf, len, 0))) {
4984 /* binary operators exclude handle interpretations */
4996 PL_expect = XTERM; /* e.g. print $fh length() */
5001 PL_expect = XTERM; /* e.g. print $fh subr() */
5004 else if (isDIGIT(*s))
5005 PL_expect = XTERM; /* e.g. print $fh 3 */
5006 else if (*s == '.' && isDIGIT(s[1]))
5007 PL_expect = XTERM; /* e.g. print $fh .3 */
5008 else if ((*s == '?' || *s == '-' || *s == '+')
5009 && !isSPACE(s[1]) && s[1] != '=')
5010 PL_expect = XTERM; /* e.g. print $fh -1 */
5011 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5013 PL_expect = XTERM; /* e.g. print $fh /.../
5014 XXX except DORDOR operator
5016 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5018 PL_expect = XTERM; /* print $fh <<"EOF" */
5021 PL_pending_ident = '$';
5025 if (PL_expect == XOPERATOR)
5027 PL_tokenbuf[0] = '@';
5028 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5029 if (!PL_tokenbuf[1]) {
5032 if (PL_lex_state == LEX_NORMAL)
5034 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5036 PL_tokenbuf[0] = '%';
5038 /* Warn about @ where they meant $. */
5039 if (*s == '[' || *s == '{') {
5040 if (ckWARN(WARN_SYNTAX)) {
5041 const char *t = s + 1;
5042 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5044 if (*t == '}' || *t == ']') {
5046 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5047 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5048 "Scalar value %.*s better written as $%.*s",
5049 (int)(t-PL_bufptr), PL_bufptr,
5050 (int)(t-PL_bufptr-1), PL_bufptr+1);
5055 PL_pending_ident = '@';
5058 case '/': /* may be division, defined-or, or pattern */
5059 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5063 case '?': /* may either be conditional or pattern */
5064 if (PL_expect == XOPERATOR) {
5072 /* A // operator. */
5082 /* Disable warning on "study /blah/" */
5083 if (PL_oldoldbufptr == PL_last_uni
5084 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5085 || memNE(PL_last_uni, "study", 5)
5086 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5089 s = scan_pat(s,OP_MATCH);
5090 TERM(sublex_start());
5094 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5095 #ifdef PERL_STRICT_CR
5098 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5100 && (s == PL_linestart || s[-1] == '\n') )
5102 PL_lex_formbrack = 0;
5106 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5110 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5116 pl_yylval.ival = OPf_SPECIAL;
5122 if (PL_expect != XOPERATOR)
5127 case '0': case '1': case '2': case '3': case '4':
5128 case '5': case '6': case '7': case '8': case '9':
5129 s = scan_num(s, &pl_yylval);
5130 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5131 if (PL_expect == XOPERATOR)
5136 s = scan_str(s,!!PL_madskills,FALSE);
5137 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5138 if (PL_expect == XOPERATOR) {
5139 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5141 deprecate_old(commaless_variable_list);
5142 return REPORT(','); /* grandfather non-comma-format format */
5149 pl_yylval.ival = OP_CONST;
5150 TERM(sublex_start());
5153 s = scan_str(s,!!PL_madskills,FALSE);
5154 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5155 if (PL_expect == XOPERATOR) {
5156 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5158 deprecate_old(commaless_variable_list);
5159 return REPORT(','); /* grandfather non-comma-format format */
5166 pl_yylval.ival = OP_CONST;
5167 /* FIXME. I think that this can be const if char *d is replaced by
5168 more localised variables. */
5169 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5170 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5171 pl_yylval.ival = OP_STRINGIFY;
5175 TERM(sublex_start());
5178 s = scan_str(s,!!PL_madskills,FALSE);
5179 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5180 if (PL_expect == XOPERATOR)
5181 no_op("Backticks",s);
5184 readpipe_override();
5185 TERM(sublex_start());
5189 if (PL_lex_inwhat && isDIGIT(*s))
5190 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5192 if (PL_expect == XOPERATOR)
5193 no_op("Backslash",s);
5197 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5198 char *start = s + 2;
5199 while (isDIGIT(*start) || *start == '_')
5201 if (*start == '.' && isDIGIT(start[1])) {
5202 s = scan_num(s, &pl_yylval);
5205 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5206 else if (!isALPHA(*start) && (PL_expect == XTERM
5207 || PL_expect == XREF || PL_expect == XSTATE
5208 || PL_expect == XTERMORDORDOR)) {
5209 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5211 s = scan_num(s, &pl_yylval);
5218 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5260 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5262 /* Some keywords can be followed by any delimiter, including ':' */
5263 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5264 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5265 (PL_tokenbuf[0] == 'q' &&
5266 strchr("qwxr", PL_tokenbuf[1])))));
5268 /* x::* is just a word, unless x is "CORE" */
5269 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5273 while (d < PL_bufend && isSPACE(*d))
5274 d++; /* no comments skipped here, or s### is misparsed */
5276 /* Is this a label? */
5277 if (!tmp && PL_expect == XSTATE
5278 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5279 tmp = keyword(PL_tokenbuf, len, 0);
5281 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5283 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5288 /* Check for keywords */
5289 tmp = keyword(PL_tokenbuf, len, 0);
5291 /* Is this a word before a => operator? */
5292 if (*d == '=' && d[1] == '>') {
5295 = (OP*)newSVOP(OP_CONST, 0,
5296 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5297 pl_yylval.opval->op_private = OPpCONST_BARE;
5301 if (tmp < 0) { /* second-class keyword? */
5302 GV *ogv = NULL; /* override (winner) */
5303 GV *hgv = NULL; /* hidden (loser) */
5304 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5306 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5309 if (GvIMPORTED_CV(gv))
5311 else if (! CvMETHOD(cv))
5315 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5316 (gv = *gvp) && isGV_with_GP(gv) &&
5317 GvCVu(gv) && GvIMPORTED_CV(gv))
5324 tmp = 0; /* overridden by import or by GLOBAL */
5327 && -tmp==KEY_lock /* XXX generalizable kludge */
5330 tmp = 0; /* any sub overrides "weak" keyword */
5332 else { /* no override */
5334 if (tmp == KEY_dump) {
5335 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5336 "dump() better written as CORE::dump()");
5340 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
5341 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5342 "Ambiguous call resolved as CORE::%s(), %s",
5343 GvENAME(hgv), "qualify as such or use &");
5350 default: /* not a keyword */
5351 /* Trade off - by using this evil construction we can pull the
5352 variable gv into the block labelled keylookup. If not, then
5353 we have to give it function scope so that the goto from the
5354 earlier ':' case doesn't bypass the initialisation. */
5356 just_a_word_zero_gv:
5364 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5367 SV *nextPL_nextwhite = 0;
5371 /* Get the rest if it looks like a package qualifier */
5373 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5375 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5378 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5379 *s == '\'' ? "'" : "::");
5384 if (PL_expect == XOPERATOR) {
5385 if (PL_bufptr == PL_linestart) {
5386 CopLINE_dec(PL_curcop);
5387 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5388 CopLINE_inc(PL_curcop);
5391 no_op("Bareword",s);
5394 /* Look for a subroutine with this name in current package,
5395 unless name is "Foo::", in which case Foo is a bearword
5396 (and a package name). */
5398 if (len > 2 && !PL_madskills &&
5399 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5401 if (ckWARN(WARN_BAREWORD)
5402 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5403 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5404 "Bareword \"%s\" refers to nonexistent package",
5407 PL_tokenbuf[len] = '\0';
5413 /* Mustn't actually add anything to a symbol table.
5414 But also don't want to "initialise" any placeholder
5415 constants that might already be there into full
5416 blown PVGVs with attached PVCV. */
5417 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5418 GV_NOADD_NOINIT, SVt_PVCV);
5423 /* if we saw a global override before, get the right name */
5426 sv = newSVpvs("CORE::GLOBAL::");
5427 sv_catpv(sv,PL_tokenbuf);
5430 /* If len is 0, newSVpv does strlen(), which is correct.
5431 If len is non-zero, then it will be the true length,
5432 and so the scalar will be created correctly. */
5433 sv = newSVpv(PL_tokenbuf,len);
5436 if (PL_madskills && !PL_thistoken) {
5437 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5438 PL_thistoken = newSVpvn(start,s - start);
5439 PL_realtokenstart = s - SvPVX(PL_linestr);
5443 /* Presume this is going to be a bareword of some sort. */
5446 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5447 pl_yylval.opval->op_private = OPpCONST_BARE;
5448 /* UTF-8 package name? */
5449 if (UTF && !IN_BYTES &&
5450 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5453 /* And if "Foo::", then that's what it certainly is. */
5458 /* Do the explicit type check so that we don't need to force
5459 the initialisation of the symbol table to have a real GV.
5460 Beware - gv may not really be a PVGV, cv may not really be
5461 a PVCV, (because of the space optimisations that gv_init
5462 understands) But they're true if for this symbol there is
5463 respectively a typeglob and a subroutine.
5465 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5466 /* Real typeglob, so get the real subroutine: */
5468 /* A proxy for a subroutine in this package? */
5469 : SvOK(gv) ? MUTABLE_CV(gv) : NULL)
5472 /* See if it's the indirect object for a list operator. */
5474 if (PL_oldoldbufptr &&
5475 PL_oldoldbufptr < PL_bufptr &&
5476 (PL_oldoldbufptr == PL_last_lop
5477 || PL_oldoldbufptr == PL_last_uni) &&
5478 /* NO SKIPSPACE BEFORE HERE! */
5479 (PL_expect == XREF ||
5480 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5482 bool immediate_paren = *s == '(';
5484 /* (Now we can afford to cross potential line boundary.) */
5485 s = SKIPSPACE2(s,nextPL_nextwhite);
5487 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5490 /* Two barewords in a row may indicate method call. */
5492 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5493 (tmp = intuit_method(s, gv, cv)))
5496 /* If not a declared subroutine, it's an indirect object. */
5497 /* (But it's an indir obj regardless for sort.) */
5498 /* Also, if "_" follows a filetest operator, it's a bareword */
5501 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5503 (PL_last_lop_op != OP_MAPSTART &&
5504 PL_last_lop_op != OP_GREPSTART))))
5505 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5506 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5509 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5514 PL_expect = XOPERATOR;
5517 s = SKIPSPACE2(s,nextPL_nextwhite);
5518 PL_nextwhite = nextPL_nextwhite;
5523 /* Is this a word before a => operator? */
5524 if (*s == '=' && s[1] == '>' && !pkgname) {
5526 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
5527 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5528 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
5532 /* If followed by a paren, it's certainly a subroutine. */
5537 while (SPACE_OR_TAB(*d))
5539 if (*d == ')' && (sv = gv_const_sv(gv))) {
5546 PL_nextwhite = PL_thiswhite;
5549 start_force(PL_curforce);
5551 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5552 PL_expect = XOPERATOR;
5555 PL_nextwhite = nextPL_nextwhite;
5556 curmad('X', PL_thistoken);
5557 PL_thistoken = newSVpvs("");
5565 /* If followed by var or block, call it a method (unless sub) */
5567 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5568 PL_last_lop = PL_oldbufptr;
5569 PL_last_lop_op = OP_METHOD;
5573 /* If followed by a bareword, see if it looks like indir obj. */
5576 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5577 && (tmp = intuit_method(s, gv, cv)))
5580 /* Not a method, so call it a subroutine (if defined) */
5583 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5584 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5585 "Ambiguous use of -%s resolved as -&%s()",
5586 PL_tokenbuf, PL_tokenbuf);
5587 /* Check for a constant sub */
5588 if ((sv = gv_const_sv(gv))) {
5590 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
5591 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5592 pl_yylval.opval->op_private = 0;
5596 /* Resolve to GV now. */
5597 if (SvTYPE(gv) != SVt_PVGV) {
5598 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5599 assert (SvTYPE(gv) == SVt_PVGV);
5600 /* cv must have been some sort of placeholder, so
5601 now needs replacing with a real code reference. */
5605 op_free(pl_yylval.opval);
5606 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5607 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5608 PL_last_lop = PL_oldbufptr;
5609 PL_last_lop_op = OP_ENTERSUB;
5610 /* Is there a prototype? */
5618 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5621 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5623 while (*proto == ';')
5625 if (*proto == '&' && *s == '{') {
5627 sv_setpvs(PL_subname, "__ANON__");
5629 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5636 PL_nextwhite = PL_thiswhite;
5639 start_force(PL_curforce);
5640 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5643 PL_nextwhite = nextPL_nextwhite;
5644 curmad('X', PL_thistoken);
5645 PL_thistoken = newSVpvs("");
5652 /* Guess harder when madskills require "best effort". */
5653 if (PL_madskills && (!gv || !GvCVu(gv))) {
5654 int probable_sub = 0;
5655 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5657 else if (isALPHA(*s)) {
5661 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5662 if (!keyword(tmpbuf, tmplen, 0))
5665 while (d < PL_bufend && isSPACE(*d))
5667 if (*d == '=' && d[1] == '>')
5672 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5673 op_free(pl_yylval.opval);
5674 pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5675 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5676 PL_last_lop = PL_oldbufptr;
5677 PL_last_lop_op = OP_ENTERSUB;
5678 PL_nextwhite = PL_thiswhite;
5680 start_force(PL_curforce);
5681 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5683 PL_nextwhite = nextPL_nextwhite;
5684 curmad('X', PL_thistoken);
5685 PL_thistoken = newSVpvs("");
5690 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5697 /* Call it a bare word */
5699 if (PL_hints & HINT_STRICT_SUBS)
5700 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5703 /* after "print" and similar functions (corresponding to
5704 * "F? L" in opcode.pl), whatever wasn't already parsed as
5705 * a filehandle should be subject to "strict subs".
5706 * Likewise for the optional indirect-object argument to system
5707 * or exec, which can't be a bareword */
5708 if ((PL_last_lop_op == OP_PRINT
5709 || PL_last_lop_op == OP_PRTF
5710 || PL_last_lop_op == OP_SAY
5711 || PL_last_lop_op == OP_SYSTEM
5712 || PL_last_lop_op == OP_EXEC)
5713 && (PL_hints & HINT_STRICT_SUBS))
5714 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5715 if (lastchar != '-') {
5716 if (ckWARN(WARN_RESERVED)) {
5720 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5721 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5728 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5729 && ckWARN_d(WARN_AMBIGUOUS)) {
5730 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5731 "Operator or semicolon missing before %c%s",
5732 lastchar, PL_tokenbuf);
5733 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5734 "Ambiguous use of %c resolved as operator %c",
5735 lastchar, lastchar);
5741 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5742 newSVpv(CopFILE(PL_curcop),0));
5746 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5747 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5750 case KEY___PACKAGE__:
5751 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5753 ? newSVhek(HvNAME_HEK(PL_curstash))
5760 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5761 const char *pname = "main";
5762 if (PL_tokenbuf[2] == 'D')
5763 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5764 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5768 GvIOp(gv) = newIO();
5769 IoIFP(GvIOp(gv)) = PL_rsfp;
5770 #if defined(HAS_FCNTL) && defined(F_SETFD)
5772 const int fd = PerlIO_fileno(PL_rsfp);
5773 fcntl(fd,F_SETFD,fd >= 3);
5776 /* Mark this internal pseudo-handle as clean */
5777 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5778 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5779 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5781 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5782 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5783 /* if the script was opened in binmode, we need to revert
5784 * it to text mode for compatibility; but only iff it has CRs
5785 * XXX this is a questionable hack at best. */
5786 if (PL_bufend-PL_bufptr > 2
5787 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5790 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5791 loc = PerlIO_tell(PL_rsfp);
5792 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5795 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5797 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5798 #endif /* NETWARE */
5799 #ifdef PERLIO_IS_STDIO /* really? */
5800 # if defined(__BORLANDC__)
5801 /* XXX see note in do_binmode() */
5802 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5806 PerlIO_seek(PL_rsfp, loc, 0);
5810 #ifdef PERLIO_LAYERS
5813 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5814 else if (PL_encoding) {
5821 XPUSHs(PL_encoding);
5823 call_method("name", G_SCALAR);
5827 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5828 Perl_form(aTHX_ ":encoding(%"SVf")",
5837 if (PL_realtokenstart >= 0) {
5838 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5840 PL_endwhite = newSVpvs("");
5841 sv_catsv(PL_endwhite, PL_thiswhite);
5843 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5844 PL_realtokenstart = -1;
5846 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5847 SvCUR(PL_endwhite))) != NULL) ;
5862 if (PL_expect == XSTATE) {
5869 if (*s == ':' && s[1] == ':') {
5872 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5873 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5874 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5877 else if (tmp == KEY_require || tmp == KEY_do)
5878 /* that's a way to remember we saw "CORE::" */
5891 LOP(OP_ACCEPT,XTERM);
5897 LOP(OP_ATAN2,XTERM);
5903 LOP(OP_BINMODE,XTERM);
5906 LOP(OP_BLESS,XTERM);
5915 /* When 'use switch' is in effect, continue has a dual
5916 life as a control operator. */
5918 if (!FEATURE_IS_ENABLED("switch"))
5921 /* We have to disambiguate the two senses of
5922 "continue". If the next token is a '{' then
5923 treat it as the start of a continue block;
5924 otherwise treat it as a control operator.
5936 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5953 if (!PL_cryptseen) {
5954 PL_cryptseen = TRUE;
5958 LOP(OP_CRYPT,XTERM);
5961 LOP(OP_CHMOD,XTERM);
5964 LOP(OP_CHOWN,XTERM);
5967 LOP(OP_CONNECT,XTERM);
5986 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5987 if (orig_keyword == KEY_do) {
5996 PL_hints |= HINT_BLOCK_SCOPE;
6006 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6007 LOP(OP_DBMOPEN,XTERM);
6013 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6020 pl_yylval.ival = CopLINE(PL_curcop);
6036 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
6037 UNIBRACK(OP_ENTEREVAL);
6051 case KEY_endhostent:
6057 case KEY_endservent:
6060 case KEY_endprotoent:
6071 pl_yylval.ival = CopLINE(PL_curcop);
6073 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6076 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6079 if ((PL_bufend - p) >= 3 &&
6080 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6082 else if ((PL_bufend - p) >= 4 &&
6083 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6086 if (isIDFIRST_lazy_if(p,UTF)) {
6087 p = scan_ident(p, PL_bufend,
6088 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6092 Perl_croak(aTHX_ "Missing $ on loop variable");
6094 s = SvPVX(PL_linestr) + soff;
6100 LOP(OP_FORMLINE,XTERM);
6106 LOP(OP_FCNTL,XTERM);
6112 LOP(OP_FLOCK,XTERM);
6121 LOP(OP_GREPSTART, XREF);
6124 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6139 case KEY_getpriority:
6140 LOP(OP_GETPRIORITY,XTERM);
6142 case KEY_getprotobyname:
6145 case KEY_getprotobynumber:
6146 LOP(OP_GPBYNUMBER,XTERM);
6148 case KEY_getprotoent:
6160 case KEY_getpeername:
6161 UNI(OP_GETPEERNAME);
6163 case KEY_gethostbyname:
6166 case KEY_gethostbyaddr:
6167 LOP(OP_GHBYADDR,XTERM);
6169 case KEY_gethostent:
6172 case KEY_getnetbyname:
6175 case KEY_getnetbyaddr:
6176 LOP(OP_GNBYADDR,XTERM);
6181 case KEY_getservbyname:
6182 LOP(OP_GSBYNAME,XTERM);
6184 case KEY_getservbyport:
6185 LOP(OP_GSBYPORT,XTERM);
6187 case KEY_getservent:
6190 case KEY_getsockname:
6191 UNI(OP_GETSOCKNAME);
6193 case KEY_getsockopt:
6194 LOP(OP_GSOCKOPT,XTERM);
6209 pl_yylval.ival = CopLINE(PL_curcop);
6219 pl_yylval.ival = CopLINE(PL_curcop);
6223 LOP(OP_INDEX,XTERM);
6229 LOP(OP_IOCTL,XTERM);
6241 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6273 LOP(OP_LISTEN,XTERM);
6282 s = scan_pat(s,OP_MATCH);
6283 TERM(sublex_start());
6286 LOP(OP_MAPSTART, XREF);
6289 LOP(OP_MKDIR,XTERM);
6292 LOP(OP_MSGCTL,XTERM);
6295 LOP(OP_MSGGET,XTERM);
6298 LOP(OP_MSGRCV,XTERM);
6301 LOP(OP_MSGSND,XTERM);
6306 PL_in_my = (U16)tmp;
6308 if (isIDFIRST_lazy_if(s,UTF)) {
6312 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6313 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6315 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6316 if (!PL_in_my_stash) {
6319 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6323 if (PL_madskills) { /* just add type to declarator token */
6324 sv_catsv(PL_thistoken, PL_nextwhite);
6326 sv_catpvn(PL_thistoken, start, s - start);
6334 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6341 s = tokenize_use(0, s);
6345 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6352 if (isIDFIRST_lazy_if(s,UTF)) {
6354 for (d = s; isALNUM_lazy_if(d,UTF);)
6356 for (t=d; isSPACE(*t);)
6358 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6360 && !(t[0] == '=' && t[1] == '>')
6362 int parms_len = (int)(d-s);
6363 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6364 "Precedence problem: open %.*s should be open(%.*s)",
6365 parms_len, s, parms_len, s);
6371 pl_yylval.ival = OP_OR;
6381 LOP(OP_OPEN_DIR,XTERM);
6384 checkcomma(s,PL_tokenbuf,"filehandle");
6388 checkcomma(s,PL_tokenbuf,"filehandle");
6407 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6408 s = force_version(s, FALSE);
6412 LOP(OP_PIPE_OP,XTERM);
6415 s = scan_str(s,!!PL_madskills,FALSE);
6418 pl_yylval.ival = OP_CONST;
6419 TERM(sublex_start());
6425 s = scan_str(s,!!PL_madskills,FALSE);
6428 PL_expect = XOPERATOR;
6430 if (SvCUR(PL_lex_stuff)) {
6433 d = SvPV_force(PL_lex_stuff, len);
6435 for (; isSPACE(*d) && len; --len, ++d)
6440 if (!warned && ckWARN(WARN_QW)) {
6441 for (; !isSPACE(*d) && len; --len, ++d) {
6443 Perl_warner(aTHX_ packWARN(WARN_QW),
6444 "Possible attempt to separate words with commas");
6447 else if (*d == '#') {
6448 Perl_warner(aTHX_ packWARN(WARN_QW),
6449 "Possible attempt to put comments in qw() list");
6455 for (; !isSPACE(*d) && len; --len, ++d)
6458 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
6459 words = append_elem(OP_LIST, words,
6460 newSVOP(OP_CONST, 0, tokeq(sv)));
6464 start_force(PL_curforce);
6465 NEXTVAL_NEXTTOKE.opval = words;
6470 SvREFCNT_dec(PL_lex_stuff);
6471 PL_lex_stuff = NULL;
6477 s = scan_str(s,!!PL_madskills,FALSE);
6480 pl_yylval.ival = OP_STRINGIFY;
6481 if (SvIVX(PL_lex_stuff) == '\'')
6482 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6483 TERM(sublex_start());
6486 s = scan_pat(s,OP_QR);
6487 TERM(sublex_start());
6490 s = scan_str(s,!!PL_madskills,FALSE);
6493 readpipe_override();
6494 TERM(sublex_start());
6502 s = force_version(s, FALSE);
6504 else if (*s != 'v' || !isDIGIT(s[1])
6505 || (s = force_version(s, TRUE), *s == 'v'))
6507 *PL_tokenbuf = '\0';
6508 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6509 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6510 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6512 yyerror("<> should be quotes");
6514 if (orig_keyword == KEY_require) {
6522 PL_last_uni = PL_oldbufptr;
6523 PL_last_lop_op = OP_REQUIRE;
6525 return REPORT( (int)REQUIRE );
6531 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6535 LOP(OP_RENAME,XTERM);
6544 LOP(OP_RINDEX,XTERM);
6553 UNIDOR(OP_READLINE);
6556 UNIDOR(OP_BACKTICK);
6565 LOP(OP_REVERSE,XTERM);
6568 UNIDOR(OP_READLINK);
6575 if (pl_yylval.opval)
6576 TERM(sublex_start());
6578 TOKEN(1); /* force error */
6581 checkcomma(s,PL_tokenbuf,"filehandle");
6591 LOP(OP_SELECT,XTERM);
6597 LOP(OP_SEMCTL,XTERM);
6600 LOP(OP_SEMGET,XTERM);
6603 LOP(OP_SEMOP,XTERM);
6609 LOP(OP_SETPGRP,XTERM);
6611 case KEY_setpriority:
6612 LOP(OP_SETPRIORITY,XTERM);
6614 case KEY_sethostent:
6620 case KEY_setservent:
6623 case KEY_setprotoent:
6633 LOP(OP_SEEKDIR,XTERM);
6635 case KEY_setsockopt:
6636 LOP(OP_SSOCKOPT,XTERM);
6642 LOP(OP_SHMCTL,XTERM);
6645 LOP(OP_SHMGET,XTERM);
6648 LOP(OP_SHMREAD,XTERM);
6651 LOP(OP_SHMWRITE,XTERM);
6654 LOP(OP_SHUTDOWN,XTERM);
6663 LOP(OP_SOCKET,XTERM);
6665 case KEY_socketpair:
6666 LOP(OP_SOCKPAIR,XTERM);
6669 checkcomma(s,PL_tokenbuf,"subroutine name");
6671 if (*s == ';' || *s == ')') /* probably a close */
6672 Perl_croak(aTHX_ "sort is now a reserved word");
6674 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6678 LOP(OP_SPLIT,XTERM);
6681 LOP(OP_SPRINTF,XTERM);
6684 LOP(OP_SPLICE,XTERM);
6699 LOP(OP_SUBSTR,XTERM);
6705 char tmpbuf[sizeof PL_tokenbuf];
6706 SSize_t tboffset = 0;
6707 expectation attrful;
6708 bool have_name, have_proto;
6709 const int key = tmp;
6714 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6715 SV *subtoken = newSVpvn(tstart, s - tstart);
6719 s = SKIPSPACE2(s,tmpwhite);
6724 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6725 (*s == ':' && s[1] == ':'))
6728 SV *nametoke = NULL;
6732 attrful = XATTRBLOCK;
6733 /* remember buffer pos'n for later force_word */
6734 tboffset = s - PL_oldbufptr;
6735 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6738 nametoke = newSVpvn(s, d - s);
6740 if (memchr(tmpbuf, ':', len))
6741 sv_setpvn(PL_subname, tmpbuf, len);
6743 sv_setsv(PL_subname,PL_curstname);
6744 sv_catpvs(PL_subname,"::");
6745 sv_catpvn(PL_subname,tmpbuf,len);
6752 CURMAD('X', nametoke);
6753 CURMAD('_', tmpwhite);
6754 (void) force_word(PL_oldbufptr + tboffset, WORD,
6757 s = SKIPSPACE2(d,tmpwhite);
6764 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6765 PL_expect = XTERMBLOCK;
6766 attrful = XATTRTERM;
6767 sv_setpvs(PL_subname,"?");
6771 if (key == KEY_format) {
6773 PL_lex_formbrack = PL_lex_brackets + 1;
6775 PL_thistoken = subtoken;
6779 (void) force_word(PL_oldbufptr + tboffset, WORD,
6785 /* Look for a prototype */
6788 bool bad_proto = FALSE;
6789 bool in_brackets = FALSE;
6790 char greedy_proto = ' ';
6791 bool proto_after_greedy_proto = FALSE;
6792 bool must_be_last = FALSE;
6793 bool underscore = FALSE;
6794 bool seen_underscore = FALSE;
6795 const bool warnsyntax = ckWARN(WARN_SYNTAX);
6797 s = scan_str(s,!!PL_madskills,FALSE);
6799 Perl_croak(aTHX_ "Prototype not terminated");
6800 /* strip spaces and check for bad characters */
6801 d = SvPVX(PL_lex_stuff);
6803 for (p = d; *p; ++p) {
6809 proto_after_greedy_proto = TRUE;
6810 if (!strchr("$@%*;[]&\\_", *p)) {
6822 else if ( *p == ']' ) {
6823 in_brackets = FALSE;
6825 else if ( (*p == '@' || *p == '%') &&
6826 ( tmp < 2 || d[tmp-2] != '\\' ) &&
6828 must_be_last = TRUE;
6831 else if ( *p == '_' ) {
6832 underscore = seen_underscore = TRUE;
6839 if (proto_after_greedy_proto)
6840 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6841 "Prototype after '%c' for %"SVf" : %s",
6842 greedy_proto, SVfARG(PL_subname), d);
6844 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6845 "Illegal character %sin prototype for %"SVf" : %s",
6846 seen_underscore ? "after '_' " : "",
6847 SVfARG(PL_subname), d);
6848 SvCUR_set(PL_lex_stuff, tmp);
6853 CURMAD('q', PL_thisopen);
6854 CURMAD('_', tmpwhite);
6855 CURMAD('=', PL_thisstuff);
6856 CURMAD('Q', PL_thisclose);
6857 NEXTVAL_NEXTTOKE.opval =
6858 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6859 PL_lex_stuff = NULL;
6862 s = SKIPSPACE2(s,tmpwhite);
6870 if (*s == ':' && s[1] != ':')
6871 PL_expect = attrful;
6872 else if (*s != '{' && key == KEY_sub) {
6874 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6876 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6883 curmad('^', newSVpvs(""));
6884 CURMAD('_', tmpwhite);
6888 PL_thistoken = subtoken;
6891 NEXTVAL_NEXTTOKE.opval =
6892 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6893 PL_lex_stuff = NULL;
6899 sv_setpvs(PL_subname, "__ANON__");
6901 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6905 (void) force_word(PL_oldbufptr + tboffset, WORD,
6914 LOP(OP_SYSTEM,XREF);
6917 LOP(OP_SYMLINK,XTERM);
6920 LOP(OP_SYSCALL,XTERM);
6923 LOP(OP_SYSOPEN,XTERM);
6926 LOP(OP_SYSSEEK,XTERM);
6929 LOP(OP_SYSREAD,XTERM);
6932 LOP(OP_SYSWRITE,XTERM);
6936 TERM(sublex_start());
6957 LOP(OP_TRUNCATE,XTERM);
6969 pl_yylval.ival = CopLINE(PL_curcop);
6973 pl_yylval.ival = CopLINE(PL_curcop);
6977 LOP(OP_UNLINK,XTERM);
6983 LOP(OP_UNPACK,XTERM);
6986 LOP(OP_UTIME,XTERM);
6992 LOP(OP_UNSHIFT,XTERM);
6995 s = tokenize_use(1, s);
7005 pl_yylval.ival = CopLINE(PL_curcop);
7009 pl_yylval.ival = CopLINE(PL_curcop);
7013 PL_hints |= HINT_BLOCK_SCOPE;
7020 LOP(OP_WAITPID,XTERM);
7029 ctl_l[0] = toCTRL('L');
7031 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7034 /* Make sure $^L is defined */
7035 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7040 if (PL_expect == XOPERATOR)
7046 pl_yylval.ival = OP_XOR;
7051 TERM(sublex_start());
7056 #pragma segment Main
7060 S_pending_ident(pTHX)
7065 /* pit holds the identifier we read and pending_ident is reset */
7066 char pit = PL_pending_ident;
7067 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7068 /* All routes through this function want to know if there is a colon. */
7069 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7070 PL_pending_ident = 0;
7072 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7073 DEBUG_T({ PerlIO_printf(Perl_debug_log,
7074 "### Pending identifier '%s'\n", PL_tokenbuf); });
7076 /* if we're in a my(), we can't allow dynamics here.
7077 $foo'bar has already been turned into $foo::bar, so
7078 just check for colons.
7080 if it's a legal name, the OP is a PADANY.
7083 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
7085 yyerror(Perl_form(aTHX_ "No package name allowed for "
7086 "variable %s in \"our\"",
7088 tmp = allocmy(PL_tokenbuf);
7092 yyerror(Perl_form(aTHX_ PL_no_myglob,
7093 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7095 pl_yylval.opval = newOP(OP_PADANY, 0);
7096 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf);
7102 build the ops for accesses to a my() variable.
7104 Deny my($a) or my($b) in a sort block, *if* $a or $b is
7105 then used in a comparison. This catches most, but not
7106 all cases. For instance, it catches
7107 sort { my($a); $a <=> $b }
7109 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7110 (although why you'd do that is anyone's guess).
7115 tmp = pad_findmy(PL_tokenbuf);
7116 if (tmp != NOT_IN_PAD) {
7117 /* might be an "our" variable" */
7118 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7119 /* build ops for a bareword */
7120 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
7121 HEK * const stashname = HvNAME_HEK(stash);
7122 SV * const sym = newSVhek(stashname);
7123 sv_catpvs(sym, "::");
7124 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7125 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7126 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7129 ? (GV_ADDMULTI | GV_ADDINEVAL)
7132 ((PL_tokenbuf[0] == '$') ? SVt_PV
7133 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7138 /* if it's a sort block and they're naming $a or $b */
7139 if (PL_last_lop_op == OP_SORT &&
7140 PL_tokenbuf[0] == '$' &&
7141 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7144 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7145 d < PL_bufend && *d != '\n';
7148 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7149 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7155 pl_yylval.opval = newOP(OP_PADANY, 0);
7156 pl_yylval.opval->op_targ = tmp;
7162 Whine if they've said @foo in a doublequoted string,
7163 and @foo isn't a variable we can find in the symbol
7166 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7167 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7169 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7170 /* DO NOT warn for @- and @+ */
7171 && !( PL_tokenbuf[2] == '\0' &&
7172 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7175 /* Downgraded from fatal to warning 20000522 mjd */
7176 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7177 "Possible unintended interpolation of %s in string",
7182 /* build ops for a bareword */
7183 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7185 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7187 PL_tokenbuf + 1, tokenbuf_len - 1,
7188 /* If the identifier refers to a stash, don't autovivify it.
7189 * Change 24660 had the side effect of causing symbol table
7190 * hashes to always be defined, even if they were freshly
7191 * created and the only reference in the entire program was
7192 * the single statement with the defined %foo::bar:: test.
7193 * It appears that all code in the wild doing this actually
7194 * wants to know whether sub-packages have been loaded, so
7195 * by avoiding auto-vivifying symbol tables, we ensure that
7196 * defined %foo::bar:: continues to be false, and the existing
7197 * tests still give the expected answers, even though what
7198 * they're actually testing has now changed subtly.
7200 (*PL_tokenbuf == '%'
7201 && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7204 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7205 ((PL_tokenbuf[0] == '$') ? SVt_PV
7206 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7212 * The following code was generated by perl_keyword.pl.
7216 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7220 PERL_ARGS_ASSERT_KEYWORD;
7224 case 1: /* 5 tokens of length 1 */
7256 case 2: /* 18 tokens of length 2 */
7402 case 3: /* 29 tokens of length 3 */
7406 if (name[1] == 'N' &&
7469 if (name[1] == 'i' &&
7501 if (name[1] == 'o' &&
7510 if (name[1] == 'e' &&
7519 if (name[1] == 'n' &&
7528 if (name[1] == 'o' &&
7537 if (name[1] == 'a' &&
7546 if (name[1] == 'o' &&
7608 if (name[1] == 'e' &&
7622 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7648 if (name[1] == 'i' &&
7657 if (name[1] == 's' &&
7666 if (name[1] == 'e' &&
7675 if (name[1] == 'o' &&
7687 case 4: /* 41 tokens of length 4 */
7691 if (name[1] == 'O' &&
7701 if (name[1] == 'N' &&
7711 if (name[1] == 'i' &&
7721 if (name[1] == 'h' &&
7731 if (name[1] == 'u' &&
7744 if (name[2] == 'c' &&
7753 if (name[2] == 's' &&
7762 if (name[2] == 'a' &&
7798 if (name[1] == 'o' &&
7811 if (name[2] == 't' &&
7820 if (name[2] == 'o' &&
7829 if (name[2] == 't' &&
7838 if (name[2] == 'e' &&
7851 if (name[1] == 'o' &&
7864 if (name[2] == 'y' &&
7873 if (name[2] == 'l' &&
7889 if (name[2] == 's' &&
7898 if (name[2] == 'n' &&
7907 if (name[2] == 'c' &&
7920 if (name[1] == 'e' &&
7930 if (name[1] == 'p' &&
7943 if (name[2] == 'c' &&
7952 if (name[2] == 'p' &&
7961 if (name[2] == 's' &&
7977 if (name[2] == 'n' &&
8047 if (name[2] == 'r' &&
8056 if (name[2] == 'r' &&
8065 if (name[2] == 'a' &&
8081 if (name[2] == 'l' &&
8143 if (name[2] == 'e' &&
8146 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8159 case 5: /* 39 tokens of length 5 */
8163 if (name[1] == 'E' &&
8174 if (name[1] == 'H' &&
8188 if (name[2] == 'a' &&
8198 if (name[2] == 'a' &&
8215 if (name[2] == 'e' &&
8225 if (name[2] == 'e' &&
8229 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8245 if (name[3] == 'i' &&
8254 if (name[3] == 'o' &&
8290 if (name[2] == 'o' &&
8300 if (name[2] == 'y' &&
8314 if (name[1] == 'l' &&
8328 if (name[2] == 'n' &&
8338 if (name[2] == 'o' &&
8352 if (name[1] == 'i' &&
8357 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8366 if (name[2] == 'd' &&
8376 if (name[2] == 'c' &&
8393 if (name[2] == 'c' &&
8403 if (name[2] == 't' &&
8417 if (name[1] == 'k' &&
8428 if (name[1] == 'r' &&
8442 if (name[2] == 's' &&
8452 if (name[2] == 'd' &&
8469 if (name[2] == 'm' &&
8479 if (name[2] == 'i' &&
8489 if (name[2] == 'e' &&
8499 if (name[2] == 'l' &&
8509 if (name[2] == 'a' &&
8522 if (name[3] == 't' &&
8525 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8531 if (name[3] == 'd' &&
8548 if (name[1] == 'i' &&
8562 if (name[2] == 'a' &&
8575 if (name[3] == 'e' &&
8610 if (name[2] == 'i' &&
8627 if (name[2] == 'i' &&
8637 if (name[2] == 'i' &&
8654 case 6: /* 33 tokens of length 6 */
8658 if (name[1] == 'c' &&
8673 if (name[2] == 'l' &&
8684 if (name[2] == 'r' &&
8699 if (name[1] == 'e' &&
8714 if (name[2] == 's' &&
8719 if(ckWARN_d(WARN_SYNTAX))
8720 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8726 if (name[2] == 'i' &&
8744 if (name[2] == 'l' &&
8755 if (name[2] == 'r' &&
8770 if (name[1] == 'm' &&
8785 if (name[2] == 'n' &&
8796 if (name[2] == 's' &&
8811 if (name[1] == 's' &&
8817 if (name[4] == 't' &&
8826 if (name[4] == 'e' &&
8835 if (name[4] == 'c' &&
8844 if (name[4] == 'n' &&
8860 if (name[1] == 'r' &&
8878 if (name[3] == 'a' &&
8888 if (name[3] == 'u' &&
8902 if (name[2] == 'n' &&
8920 if (name[2] == 'a' &&
8934 if (name[3] == 'e' &&
8947 if (name[4] == 't' &&
8956 if (name[4] == 'e' &&
8978 if (name[4] == 't' &&
8987 if (name[4] == 'e' &&
9003 if (name[2] == 'c' &&
9014 if (name[2] == 'l' &&
9025 if (name[2] == 'b' &&
9036 if (name[2] == 's' &&
9059 if (name[4] == 's' &&
9068 if (name[4] == 'n' &&
9081 if (name[3] == 'a' &&
9098 if (name[1] == 'a' &&
9113 case 7: /* 29 tokens of length 7 */
9117 if (name[1] == 'E' &&
9130 if (name[1] == '_' &&
9143 if (name[1] == 'i' &&
9150 return -KEY_binmode;
9156 if (name[1] == 'o' &&
9163 return -KEY_connect;
9172 if (name[2] == 'm' &&
9178 return -KEY_dbmopen;
9189 if (name[4] == 'u' &&
9193 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9199 if (name[4] == 'n' &&
9220 if (name[1] == 'o' &&
9233 if (name[1] == 'e' &&
9240 if (name[5] == 'r' &&
9243 return -KEY_getpgrp;
9249 if (name[5] == 'i' &&
9252 return -KEY_getppid;
9265 if (name[1] == 'c' &&
9272 return -KEY_lcfirst;
9278 if (name[1] == 'p' &&
9285 return -KEY_opendir;
9291 if (name[1] == 'a' &&
9309 if (name[3] == 'd' &&
9314 return -KEY_readdir;
9320 if (name[3] == 'u' &&
9331 if (name[3] == 'e' &&
9336 return -KEY_reverse;
9355 if (name[3] == 'k' &&
9360 return -KEY_seekdir;
9366 if (name[3] == 'p' &&
9371 return -KEY_setpgrp;
9381 if (name[2] == 'm' &&
9387 return -KEY_shmread;
9393 if (name[2] == 'r' &&
9399 return -KEY_sprintf;
9408 if (name[3] == 'l' &&
9413 return -KEY_symlink;
9422 if (name[4] == 'a' &&
9426 return -KEY_syscall;
9432 if (name[4] == 'p' &&
9436 return -KEY_sysopen;
9442 if (name[4] == 'e' &&
9446 return -KEY_sysread;
9452 if (name[4] == 'e' &&
9456 return -KEY_sysseek;
9474 if (name[1] == 'e' &&
9481 return -KEY_telldir;
9490 if (name[2] == 'f' &&
9496 return -KEY_ucfirst;
9502 if (name[2] == 's' &&
9508 return -KEY_unshift;
9518 if (name[1] == 'a' &&
9525 return -KEY_waitpid;
9534 case 8: /* 26 tokens of length 8 */
9538 if (name[1] == 'U' &&
9546 return KEY_AUTOLOAD;
9557 if (name[3] == 'A' &&
9563 return KEY___DATA__;
9569 if (name[3] == 'I' &&
9575 return -KEY___FILE__;
9581 if (name[3] == 'I' &&
9587 return -KEY___LINE__;
9603 if (name[2] == 'o' &&
9610 return -KEY_closedir;
9616 if (name[2] == 'n' &&
9623 return -KEY_continue;
9633 if (name[1] == 'b' &&
9641 return -KEY_dbmclose;
9647 if (name[1] == 'n' &&
9653 if (name[4] == 'r' &&
9658 return -KEY_endgrent;
9664 if (name[4] == 'w' &&
9669 return -KEY_endpwent;
9682 if (name[1] == 'o' &&
9690 return -KEY_formline;
9696 if (name[1] == 'e' &&
9707 if (name[6] == 'n' &&
9710 return -KEY_getgrent;
9716 if (name[6] == 'i' &&
9719 return -KEY_getgrgid;
9725 if (name[6] == 'a' &&
9728 return -KEY_getgrnam;
9741 if (name[4] == 'o' &&
9746 return -KEY_getlogin;
9757 if (name[6] == 'n' &&
9760 return -KEY_getpwent;
9766 if (name[6] == 'a' &&
9769 return -KEY_getpwnam;
9775 if (name[6] == 'i' &&
9778 return -KEY_getpwuid;
9798 if (name[1] == 'e' &&
9805 if (name[5] == 'i' &&
9812 return -KEY_readline;
9817 return -KEY_readlink;
9828 if (name[5] == 'i' &&
9832 return -KEY_readpipe;
9853 if (name[4] == 'r' &&
9858 return -KEY_setgrent;
9864 if (name[4] == 'w' &&
9869 return -KEY_setpwent;
9885 if (name[3] == 'w' &&
9891 return -KEY_shmwrite;
9897 if (name[3] == 't' &&
9903 return -KEY_shutdown;
9913 if (name[2] == 's' &&
9920 return -KEY_syswrite;
9930 if (name[1] == 'r' &&
9938 return -KEY_truncate;
9947 case 9: /* 9 tokens of length 9 */
9951 if (name[1] == 'N' &&
9960 return KEY_UNITCHECK;
9966 if (name[1] == 'n' &&
9975 return -KEY_endnetent;
9981 if (name[1] == 'e' &&
9990 return -KEY_getnetent;
9996 if (name[1] == 'o' &&
10005 return -KEY_localtime;
10011 if (name[1] == 'r' &&
10020 return KEY_prototype;
10026 if (name[1] == 'u' &&
10035 return -KEY_quotemeta;
10041 if (name[1] == 'e' &&
10050 return -KEY_rewinddir;
10056 if (name[1] == 'e' &&
10065 return -KEY_setnetent;
10071 if (name[1] == 'a' &&
10080 return -KEY_wantarray;
10089 case 10: /* 9 tokens of length 10 */
10093 if (name[1] == 'n' &&
10099 if (name[4] == 'o' &&
10106 return -KEY_endhostent;
10112 if (name[4] == 'e' &&
10119 return -KEY_endservent;
10132 if (name[1] == 'e' &&
10138 if (name[4] == 'o' &&
10145 return -KEY_gethostent;
10154 if (name[5] == 'r' &&
10160 return -KEY_getservent;
10166 if (name[5] == 'c' &&
10172 return -KEY_getsockopt;
10192 if (name[2] == 't')
10197 if (name[4] == 'o' &&
10204 return -KEY_sethostent;
10213 if (name[5] == 'r' &&
10219 return -KEY_setservent;
10225 if (name[5] == 'c' &&
10231 return -KEY_setsockopt;
10248 if (name[2] == 'c' &&
10257 return -KEY_socketpair;
10270 case 11: /* 8 tokens of length 11 */
10274 if (name[1] == '_' &&
10284 { /* __PACKAGE__ */
10285 return -KEY___PACKAGE__;
10291 if (name[1] == 'n' &&
10301 { /* endprotoent */
10302 return -KEY_endprotoent;
10308 if (name[1] == 'e' &&
10317 if (name[5] == 'e' &&
10323 { /* getpeername */
10324 return -KEY_getpeername;
10333 if (name[6] == 'o' &&
10338 { /* getpriority */
10339 return -KEY_getpriority;
10345 if (name[6] == 't' &&
10350 { /* getprotoent */
10351 return -KEY_getprotoent;
10365 if (name[4] == 'o' &&
10372 { /* getsockname */
10373 return -KEY_getsockname;
10386 if (name[1] == 'e' &&
10394 if (name[6] == 'o' &&
10399 { /* setpriority */
10400 return -KEY_setpriority;
10406 if (name[6] == 't' &&
10411 { /* setprotoent */
10412 return -KEY_setprotoent;
10428 case 12: /* 2 tokens of length 12 */
10429 if (name[0] == 'g' &&
10441 if (name[9] == 'd' &&
10444 { /* getnetbyaddr */
10445 return -KEY_getnetbyaddr;
10451 if (name[9] == 'a' &&
10454 { /* getnetbyname */
10455 return -KEY_getnetbyname;
10467 case 13: /* 4 tokens of length 13 */
10468 if (name[0] == 'g' &&
10475 if (name[4] == 'o' &&
10484 if (name[10] == 'd' &&
10487 { /* gethostbyaddr */
10488 return -KEY_gethostbyaddr;
10494 if (name[10] == 'a' &&
10497 { /* gethostbyname */
10498 return -KEY_gethostbyname;
10511 if (name[4] == 'e' &&
10520 if (name[10] == 'a' &&
10523 { /* getservbyname */
10524 return -KEY_getservbyname;
10530 if (name[10] == 'o' &&
10533 { /* getservbyport */
10534 return -KEY_getservbyport;
10553 case 14: /* 1 tokens of length 14 */
10554 if (name[0] == 'g' &&
10568 { /* getprotobyname */
10569 return -KEY_getprotobyname;
10574 case 16: /* 1 tokens of length 16 */
10575 if (name[0] == 'g' &&
10591 { /* getprotobynumber */
10592 return -KEY_getprotobynumber;
10606 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10610 PERL_ARGS_ASSERT_CHECKCOMMA;
10612 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10613 if (ckWARN(WARN_SYNTAX)) {
10616 for (w = s+2; *w && level; w++) {
10619 else if (*w == ')')
10622 while (isSPACE(*w))
10624 /* the list of chars below is for end of statements or
10625 * block / parens, boolean operators (&&, ||, //) and branch
10626 * constructs (or, and, if, until, unless, while, err, for).
10627 * Not a very solid hack... */
10628 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10629 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10630 "%s (...) interpreted as function",name);
10633 while (s < PL_bufend && isSPACE(*s))
10637 while (s < PL_bufend && isSPACE(*s))
10639 if (isIDFIRST_lazy_if(s,UTF)) {
10640 const char * const w = s++;
10641 while (isALNUM_lazy_if(s,UTF))
10643 while (s < PL_bufend && isSPACE(*s))
10647 if (keyword(w, s - w, 0))
10650 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10651 if (gv && GvCVu(gv))
10653 Perl_croak(aTHX_ "No comma allowed after %s", what);
10658 /* Either returns sv, or mortalizes sv and returns a new SV*.
10659 Best used as sv=new_constant(..., sv, ...).
10660 If s, pv are NULL, calls subroutine with one argument,
10661 and type is used with error messages only. */
10664 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10665 SV *sv, SV *pv, const char *type, STRLEN typelen)
10668 HV * const table = GvHV(PL_hintgv); /* ^H */
10672 const char *why1 = "", *why2 = "", *why3 = "";
10674 PERL_ARGS_ASSERT_NEW_CONSTANT;
10676 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10679 why2 = (const char *)
10680 (strEQ(key,"charnames")
10681 ? "(possibly a missing \"use charnames ...\")"
10683 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10684 (type ? type: "undef"), why2);
10686 /* This is convoluted and evil ("goto considered harmful")
10687 * but I do not understand the intricacies of all the different
10688 * failure modes of %^H in here. The goal here is to make
10689 * the most probable error message user-friendly. --jhi */
10694 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10695 (type ? type: "undef"), why1, why2, why3);
10697 yyerror(SvPVX_const(msg));
10701 cvp = hv_fetch(table, key, keylen, FALSE);
10702 if (!cvp || !SvOK(*cvp)) {
10705 why3 = "} is not defined";
10708 sv_2mortal(sv); /* Parent created it permanently */
10711 pv = newSVpvn_flags(s, len, SVs_TEMP);
10713 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
10715 typesv = &PL_sv_undef;
10717 PUSHSTACKi(PERLSI_OVERLOAD);
10729 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10733 /* Check the eval first */
10734 if (!PL_in_eval && SvTRUE(ERRSV)) {
10735 sv_catpvs(ERRSV, "Propagated");
10736 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10738 res = SvREFCNT_inc_simple(sv);
10742 SvREFCNT_inc_simple_void(res);
10751 why1 = "Call to &{$^H{";
10753 why3 = "}} did not return a defined value";
10761 /* Returns a NUL terminated string, with the length of the string written to
10765 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10768 register char *d = dest;
10769 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10771 PERL_ARGS_ASSERT_SCAN_WORD;
10775 Perl_croak(aTHX_ ident_too_long);
10776 if (isALNUM(*s)) /* UTF handled below */
10778 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10783 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10787 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10788 char *t = s + UTF8SKIP(s);
10790 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10794 Perl_croak(aTHX_ ident_too_long);
10795 Copy(s, d, len, char);
10808 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10811 char *bracket = NULL;
10813 register char *d = dest;
10814 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10816 PERL_ARGS_ASSERT_SCAN_IDENT;
10821 while (isDIGIT(*s)) {
10823 Perl_croak(aTHX_ ident_too_long);
10830 Perl_croak(aTHX_ ident_too_long);
10831 if (isALNUM(*s)) /* UTF handled below */
10833 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10838 else if (*s == ':' && s[1] == ':') {
10842 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10843 char *t = s + UTF8SKIP(s);
10844 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10846 if (d + (t - s) > e)
10847 Perl_croak(aTHX_ ident_too_long);
10848 Copy(s, d, t - s, char);
10859 if (PL_lex_state != LEX_NORMAL)
10860 PL_lex_state = LEX_INTERPENDMAYBE;
10863 if (*s == '$' && s[1] &&
10864 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10877 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10882 if (isSPACE(s[-1])) {
10884 const char ch = *s++;
10885 if (!SPACE_OR_TAB(ch)) {
10891 if (isIDFIRST_lazy_if(d,UTF)) {
10895 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10896 end += UTF8SKIP(end);
10897 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10898 end += UTF8SKIP(end);
10900 Copy(s, d, end - s, char);
10905 while ((isALNUM(*s) || *s == ':') && d < e)
10908 Perl_croak(aTHX_ ident_too_long);
10911 while (s < send && SPACE_OR_TAB(*s))
10913 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10914 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10915 const char * const brack =
10917 ((*s == '[') ? "[...]" : "{...}");
10918 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10919 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10920 funny, dest, brack, funny, dest, brack);
10923 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10927 /* Handle extended ${^Foo} variables
10928 * 1999-02-27 mjd-perl-patch@plover.com */
10929 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10933 while (isALNUM(*s) && d < e) {
10937 Perl_croak(aTHX_ ident_too_long);
10942 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10943 PL_lex_state = LEX_INTERPEND;
10946 if (PL_lex_state == LEX_NORMAL) {
10947 if (ckWARN(WARN_AMBIGUOUS) &&
10948 (keyword(dest, d - dest, 0)
10949 || get_cvn_flags(dest, d - dest, 0)))
10953 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10954 "Ambiguous use of %c{%s} resolved to %c%s",
10955 funny, dest, funny, dest);
10960 s = bracket; /* let the parser handle it */
10964 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10965 PL_lex_state = LEX_INTERPEND;
10970 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10972 PERL_ARGS_ASSERT_PMFLAG;
10974 PERL_UNUSED_CONTEXT;
10976 const char c = (char)ch;
10978 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
10979 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10980 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10981 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10982 case KEEPCOPY_PAT_MOD: *pmfl |= PMf_KEEPCOPY; break;
10988 S_scan_pat(pTHX_ char *start, I32 type)
10992 char *s = scan_str(start,!!PL_madskills,FALSE);
10993 const char * const valid_flags =
10994 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10999 PERL_ARGS_ASSERT_SCAN_PAT;
11002 const char * const delimiter = skipspace(start);
11006 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11007 : "Search pattern not terminated" ));
11010 pm = (PMOP*)newPMOP(type, 0);
11011 if (PL_multi_open == '?') {
11012 /* This is the only point in the code that sets PMf_ONCE: */
11013 pm->op_pmflags |= PMf_ONCE;
11015 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11016 allows us to restrict the list needed by reset to just the ??
11018 assert(type != OP_TRANS);
11020 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11023 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11026 elements = mg->mg_len / sizeof(PMOP**);
11027 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11028 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11029 mg->mg_len = elements * sizeof(PMOP**);
11030 PmopSTASH_set(pm,PL_curstash);
11036 while (*s && strchr(valid_flags, *s))
11037 pmflag(&pm->op_pmflags,*s++);
11039 if (PL_madskills && modstart != s) {
11040 SV* tmptoken = newSVpvn(modstart, s - modstart);
11041 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11044 /* issue a warning if /c is specified,but /g is not */
11045 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11047 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11048 "Use of /c modifier is meaningless without /g" );
11051 PL_lex_op = (OP*)pm;
11052 pl_yylval.ival = OP_MATCH;
11057 S_scan_subst(pTHX_ char *start)
11068 PERL_ARGS_ASSERT_SCAN_SUBST;
11070 pl_yylval.ival = OP_NULL;
11072 s = scan_str(start,!!PL_madskills,FALSE);
11075 Perl_croak(aTHX_ "Substitution pattern not terminated");
11077 if (s[-1] == PL_multi_open)
11080 if (PL_madskills) {
11081 CURMAD('q', PL_thisopen);
11082 CURMAD('_', PL_thiswhite);
11083 CURMAD('E', PL_thisstuff);
11084 CURMAD('Q', PL_thisclose);
11085 PL_realtokenstart = s - SvPVX(PL_linestr);
11089 first_start = PL_multi_start;
11090 s = scan_str(s,!!PL_madskills,FALSE);
11092 if (PL_lex_stuff) {
11093 SvREFCNT_dec(PL_lex_stuff);
11094 PL_lex_stuff = NULL;
11096 Perl_croak(aTHX_ "Substitution replacement not terminated");
11098 PL_multi_start = first_start; /* so whole substitution is taken together */
11100 pm = (PMOP*)newPMOP(OP_SUBST, 0);
11103 if (PL_madskills) {
11104 CURMAD('z', PL_thisopen);
11105 CURMAD('R', PL_thisstuff);
11106 CURMAD('Z', PL_thisclose);
11112 if (*s == EXEC_PAT_MOD) {
11116 else if (strchr(S_PAT_MODS, *s))
11117 pmflag(&pm->op_pmflags,*s++);
11123 if (PL_madskills) {
11125 curmad('m', newSVpvn(modstart, s - modstart));
11126 append_madprops(PL_thismad, (OP*)pm, 0);
11130 if ((pm->op_pmflags & PMf_CONTINUE)) {
11131 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11135 SV * const repl = newSVpvs("");
11137 PL_sublex_info.super_bufptr = s;
11138 PL_sublex_info.super_bufend = PL_bufend;
11140 pm->op_pmflags |= PMf_EVAL;
11143 sv_catpvs(repl, "eval ");
11145 sv_catpvs(repl, "do ");
11147 sv_catpvs(repl, "{");
11148 sv_catsv(repl, PL_lex_repl);
11149 if (strchr(SvPVX(PL_lex_repl), '#'))
11150 sv_catpvs(repl, "\n");
11151 sv_catpvs(repl, "}");
11153 SvREFCNT_dec(PL_lex_repl);
11154 PL_lex_repl = repl;
11157 PL_lex_op = (OP*)pm;
11158 pl_yylval.ival = OP_SUBST;
11163 S_scan_trans(pTHX_ char *start)
11176 PERL_ARGS_ASSERT_SCAN_TRANS;
11178 pl_yylval.ival = OP_NULL;
11180 s = scan_str(start,!!PL_madskills,FALSE);
11182 Perl_croak(aTHX_ "Transliteration pattern not terminated");
11184 if (s[-1] == PL_multi_open)
11187 if (PL_madskills) {
11188 CURMAD('q', PL_thisopen);
11189 CURMAD('_', PL_thiswhite);
11190 CURMAD('E', PL_thisstuff);
11191 CURMAD('Q', PL_thisclose);
11192 PL_realtokenstart = s - SvPVX(PL_linestr);
11196 s = scan_str(s,!!PL_madskills,FALSE);
11198 if (PL_lex_stuff) {
11199 SvREFCNT_dec(PL_lex_stuff);
11200 PL_lex_stuff = NULL;
11202 Perl_croak(aTHX_ "Transliteration replacement not terminated");
11204 if (PL_madskills) {
11205 CURMAD('z', PL_thisopen);
11206 CURMAD('R', PL_thisstuff);
11207 CURMAD('Z', PL_thisclose);
11210 complement = del = squash = 0;
11217 complement = OPpTRANS_COMPLEMENT;
11220 del = OPpTRANS_DELETE;
11223 squash = OPpTRANS_SQUASH;
11232 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11233 o = newPVOP(OP_TRANS, 0, (char*)tbl);
11234 o->op_private &= ~OPpTRANS_ALL;
11235 o->op_private |= del|squash|complement|
11236 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11237 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
11240 pl_yylval.ival = OP_TRANS;
11243 if (PL_madskills) {
11245 curmad('m', newSVpvn(modstart, s - modstart));
11246 append_madprops(PL_thismad, o, 0);
11255 S_scan_heredoc(pTHX_ register char *s)
11259 I32 op_type = OP_SCALAR;
11263 const char *found_newline;
11267 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11269 I32 stuffstart = s - SvPVX(PL_linestr);
11272 PL_realtokenstart = -1;
11275 PERL_ARGS_ASSERT_SCAN_HEREDOC;
11279 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11283 while (SPACE_OR_TAB(*peek))
11285 if (*peek == '`' || *peek == '\'' || *peek =='"') {
11288 s = delimcpy(d, e, s, PL_bufend, term, &len);
11298 if (!isALNUM_lazy_if(s,UTF))
11299 deprecate_old("bare << to mean <<\"\"");
11300 for (; isALNUM_lazy_if(s,UTF); s++) {
11305 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11306 Perl_croak(aTHX_ "Delimiter for here document is too long");
11309 len = d - PL_tokenbuf;
11312 if (PL_madskills) {
11313 tstart = PL_tokenbuf + !outer;
11314 PL_thisclose = newSVpvn(tstart, len - !outer);
11315 tstart = SvPVX(PL_linestr) + stuffstart;
11316 PL_thisopen = newSVpvn(tstart, s - tstart);
11317 stuffstart = s - SvPVX(PL_linestr);
11320 #ifndef PERL_STRICT_CR
11321 d = strchr(s, '\r');
11323 char * const olds = s;
11325 while (s < PL_bufend) {
11331 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11340 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11347 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11348 herewas = newSVpvn(s,PL_bufend-s);
11352 herewas = newSVpvn(s-1,found_newline-s+1);
11355 herewas = newSVpvn(s,found_newline-s);
11359 if (PL_madskills) {
11360 tstart = SvPVX(PL_linestr) + stuffstart;
11362 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11364 PL_thisstuff = newSVpvn(tstart, s - tstart);
11367 s += SvCUR(herewas);
11370 stuffstart = s - SvPVX(PL_linestr);
11376 tmpstr = newSV_type(SVt_PVIV);
11377 SvGROW(tmpstr, 80);
11378 if (term == '\'') {
11379 op_type = OP_CONST;
11380 SvIV_set(tmpstr, -1);
11382 else if (term == '`') {
11383 op_type = OP_BACKTICK;
11384 SvIV_set(tmpstr, '\\');
11388 PL_multi_start = CopLINE(PL_curcop);
11389 PL_multi_open = PL_multi_close = '<';
11390 term = *PL_tokenbuf;
11391 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11392 char * const bufptr = PL_sublex_info.super_bufptr;
11393 char * const bufend = PL_sublex_info.super_bufend;
11394 char * const olds = s - SvCUR(herewas);
11395 s = strchr(bufptr, '\n');
11399 while (s < bufend &&
11400 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11402 CopLINE_inc(PL_curcop);
11405 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11406 missingterm(PL_tokenbuf);
11408 sv_setpvn(herewas,bufptr,d-bufptr+1);
11409 sv_setpvn(tmpstr,d+1,s-d);
11411 sv_catpvn(herewas,s,bufend-s);
11412 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11419 while (s < PL_bufend &&
11420 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11422 CopLINE_inc(PL_curcop);
11424 if (s >= PL_bufend) {
11425 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11426 missingterm(PL_tokenbuf);
11428 sv_setpvn(tmpstr,d+1,s-d);
11430 if (PL_madskills) {
11432 sv_catpvn(PL_thisstuff, d + 1, s - d);
11434 PL_thisstuff = newSVpvn(d + 1, s - d);
11435 stuffstart = s - SvPVX(PL_linestr);
11439 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11441 sv_catpvn(herewas,s,PL_bufend-s);
11442 sv_setsv(PL_linestr,herewas);
11443 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11444 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11445 PL_last_lop = PL_last_uni = NULL;
11448 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
11449 while (s >= PL_bufend) { /* multiple line string? */
11451 if (PL_madskills) {
11452 tstart = SvPVX(PL_linestr) + stuffstart;
11454 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11456 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11460 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11461 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11462 missingterm(PL_tokenbuf);
11465 stuffstart = s - SvPVX(PL_linestr);
11467 CopLINE_inc(PL_curcop);
11468 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11469 PL_last_lop = PL_last_uni = NULL;
11470 #ifndef PERL_STRICT_CR
11471 if (PL_bufend - PL_linestart >= 2) {
11472 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11473 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11475 PL_bufend[-2] = '\n';
11477 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11479 else if (PL_bufend[-1] == '\r')
11480 PL_bufend[-1] = '\n';
11482 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11483 PL_bufend[-1] = '\n';
11485 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
11486 update_debugger_info(PL_linestr, NULL, 0);
11487 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11488 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11489 *(SvPVX(PL_linestr) + off ) = ' ';
11490 sv_catsv(PL_linestr,herewas);
11491 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11492 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11496 sv_catsv(tmpstr,PL_linestr);
11501 PL_multi_end = CopLINE(PL_curcop);
11502 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11503 SvPV_shrink_to_cur(tmpstr);
11505 SvREFCNT_dec(herewas);
11507 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11509 else if (PL_encoding)
11510 sv_recode_to_utf8(tmpstr, PL_encoding);
11512 PL_lex_stuff = tmpstr;
11513 pl_yylval.ival = op_type;
11517 /* scan_inputsymbol
11518 takes: current position in input buffer
11519 returns: new position in input buffer
11520 side-effects: pl_yylval and lex_op are set.
11525 <FH> read from filehandle
11526 <pkg::FH> read from package qualified filehandle
11527 <pkg'FH> read from package qualified filehandle
11528 <$fh> read from filehandle in $fh
11529 <*.h> filename glob
11534 S_scan_inputsymbol(pTHX_ char *start)
11537 register char *s = start; /* current position in buffer */
11540 char *d = PL_tokenbuf; /* start of temp holding space */
11541 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11543 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11545 end = strchr(s, '\n');
11548 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11550 /* die if we didn't have space for the contents of the <>,
11551 or if it didn't end, or if we see a newline
11554 if (len >= (I32)sizeof PL_tokenbuf)
11555 Perl_croak(aTHX_ "Excessively long <> operator");
11557 Perl_croak(aTHX_ "Unterminated <> operator");
11562 Remember, only scalar variables are interpreted as filehandles by
11563 this code. Anything more complex (e.g., <$fh{$num}>) will be
11564 treated as a glob() call.
11565 This code makes use of the fact that except for the $ at the front,
11566 a scalar variable and a filehandle look the same.
11568 if (*d == '$' && d[1]) d++;
11570 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11571 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11574 /* If we've tried to read what we allow filehandles to look like, and
11575 there's still text left, then it must be a glob() and not a getline.
11576 Use scan_str to pull out the stuff between the <> and treat it
11577 as nothing more than a string.
11580 if (d - PL_tokenbuf != len) {
11581 pl_yylval.ival = OP_GLOB;
11582 s = scan_str(start,!!PL_madskills,FALSE);
11584 Perl_croak(aTHX_ "Glob not terminated");
11588 bool readline_overriden = FALSE;
11591 /* we're in a filehandle read situation */
11594 /* turn <> into <ARGV> */
11596 Copy("ARGV",d,5,char);
11598 /* Check whether readline() is overriden */
11599 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11601 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11603 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11604 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11605 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11606 readline_overriden = TRUE;
11608 /* if <$fh>, create the ops to turn the variable into a
11612 /* try to find it in the pad for this block, otherwise find
11613 add symbol table ops
11615 const PADOFFSET tmp = pad_findmy(d);
11616 if (tmp != NOT_IN_PAD) {
11617 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11618 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11619 HEK * const stashname = HvNAME_HEK(stash);
11620 SV * const sym = sv_2mortal(newSVhek(stashname));
11621 sv_catpvs(sym, "::");
11622 sv_catpv(sym, d+1);
11627 OP * const o = newOP(OP_PADSV, 0);
11629 PL_lex_op = readline_overriden
11630 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11631 append_elem(OP_LIST, o,
11632 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11633 : (OP*)newUNOP(OP_READLINE, 0, o);
11642 ? (GV_ADDMULTI | GV_ADDINEVAL)
11645 PL_lex_op = readline_overriden
11646 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11647 append_elem(OP_LIST,
11648 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11649 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11650 : (OP*)newUNOP(OP_READLINE, 0,
11651 newUNOP(OP_RV2SV, 0,
11652 newGVOP(OP_GV, 0, gv)));
11654 if (!readline_overriden)
11655 PL_lex_op->op_flags |= OPf_SPECIAL;
11656 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11657 pl_yylval.ival = OP_NULL;
11660 /* If it's none of the above, it must be a literal filehandle
11661 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11663 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11664 PL_lex_op = readline_overriden
11665 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11666 append_elem(OP_LIST,
11667 newGVOP(OP_GV, 0, gv),
11668 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11669 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11670 pl_yylval.ival = OP_NULL;
11679 takes: start position in buffer
11680 keep_quoted preserve \ on the embedded delimiter(s)
11681 keep_delims preserve the delimiters around the string
11682 returns: position to continue reading from buffer
11683 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11684 updates the read buffer.
11686 This subroutine pulls a string out of the input. It is called for:
11687 q single quotes q(literal text)
11688 ' single quotes 'literal text'
11689 qq double quotes qq(interpolate $here please)
11690 " double quotes "interpolate $here please"
11691 qx backticks qx(/bin/ls -l)
11692 ` backticks `/bin/ls -l`
11693 qw quote words @EXPORT_OK = qw( func() $spam )
11694 m// regexp match m/this/
11695 s/// regexp substitute s/this/that/
11696 tr/// string transliterate tr/this/that/
11697 y/// string transliterate y/this/that/
11698 ($*@) sub prototypes sub foo ($)
11699 (stuff) sub attr parameters sub foo : attr(stuff)
11700 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11702 In most of these cases (all but <>, patterns and transliterate)
11703 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11704 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11705 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11708 It skips whitespace before the string starts, and treats the first
11709 character as the delimiter. If the delimiter is one of ([{< then
11710 the corresponding "close" character )]}> is used as the closing
11711 delimiter. It allows quoting of delimiters, and if the string has
11712 balanced delimiters ([{<>}]) it allows nesting.
11714 On success, the SV with the resulting string is put into lex_stuff or,
11715 if that is already non-NULL, into lex_repl. The second case occurs only
11716 when parsing the RHS of the special constructs s/// and tr/// (y///).
11717 For convenience, the terminating delimiter character is stuffed into
11722 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11725 SV *sv; /* scalar value: string */
11726 const char *tmps; /* temp string, used for delimiter matching */
11727 register char *s = start; /* current position in the buffer */
11728 register char term; /* terminating character */
11729 register char *to; /* current position in the sv's data */
11730 I32 brackets = 1; /* bracket nesting level */
11731 bool has_utf8 = FALSE; /* is there any utf8 content? */
11732 I32 termcode; /* terminating char. code */
11733 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11734 STRLEN termlen; /* length of terminating string */
11735 int last_off = 0; /* last position for nesting bracket */
11741 PERL_ARGS_ASSERT_SCAN_STR;
11743 /* skip space before the delimiter */
11749 if (PL_realtokenstart >= 0) {
11750 stuffstart = PL_realtokenstart;
11751 PL_realtokenstart = -1;
11754 stuffstart = start - SvPVX(PL_linestr);
11756 /* mark where we are, in case we need to report errors */
11759 /* after skipping whitespace, the next character is the terminator */
11762 termcode = termstr[0] = term;
11766 termcode = utf8_to_uvchr((U8*)s, &termlen);
11767 Copy(s, termstr, termlen, U8);
11768 if (!UTF8_IS_INVARIANT(term))
11772 /* mark where we are */
11773 PL_multi_start = CopLINE(PL_curcop);
11774 PL_multi_open = term;
11776 /* find corresponding closing delimiter */
11777 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11778 termcode = termstr[0] = term = tmps[5];
11780 PL_multi_close = term;
11782 /* create a new SV to hold the contents. 79 is the SV's initial length.
11783 What a random number. */
11784 sv = newSV_type(SVt_PVIV);
11786 SvIV_set(sv, termcode);
11787 (void)SvPOK_only(sv); /* validate pointer */
11789 /* move past delimiter and try to read a complete string */
11791 sv_catpvn(sv, s, termlen);
11794 tstart = SvPVX(PL_linestr) + stuffstart;
11795 if (!PL_thisopen && !keep_delims) {
11796 PL_thisopen = newSVpvn(tstart, s - tstart);
11797 stuffstart = s - SvPVX(PL_linestr);
11801 if (PL_encoding && !UTF) {
11805 int offset = s - SvPVX_const(PL_linestr);
11806 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11807 &offset, (char*)termstr, termlen);
11808 const char * const ns = SvPVX_const(PL_linestr) + offset;
11809 char * const svlast = SvEND(sv) - 1;
11811 for (; s < ns; s++) {
11812 if (*s == '\n' && !PL_rsfp)
11813 CopLINE_inc(PL_curcop);
11816 goto read_more_line;
11818 /* handle quoted delimiters */
11819 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11821 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11823 if ((svlast-1 - t) % 2) {
11824 if (!keep_quoted) {
11825 *(svlast-1) = term;
11827 SvCUR_set(sv, SvCUR(sv) - 1);
11832 if (PL_multi_open == PL_multi_close) {
11838 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11839 /* At here, all closes are "was quoted" one,
11840 so we don't check PL_multi_close. */
11842 if (!keep_quoted && *(t+1) == PL_multi_open)
11847 else if (*t == PL_multi_open)
11855 SvCUR_set(sv, w - SvPVX_const(sv));
11857 last_off = w - SvPVX(sv);
11858 if (--brackets <= 0)
11863 if (!keep_delims) {
11864 SvCUR_set(sv, SvCUR(sv) - 1);
11870 /* extend sv if need be */
11871 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11872 /* set 'to' to the next character in the sv's string */
11873 to = SvPVX(sv)+SvCUR(sv);
11875 /* if open delimiter is the close delimiter read unbridle */
11876 if (PL_multi_open == PL_multi_close) {
11877 for (; s < PL_bufend; s++,to++) {
11878 /* embedded newlines increment the current line number */
11879 if (*s == '\n' && !PL_rsfp)
11880 CopLINE_inc(PL_curcop);
11881 /* handle quoted delimiters */
11882 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11883 if (!keep_quoted && s[1] == term)
11885 /* any other quotes are simply copied straight through */
11889 /* terminate when run out of buffer (the for() condition), or
11890 have found the terminator */
11891 else if (*s == term) {
11894 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11897 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11903 /* if the terminator isn't the same as the start character (e.g.,
11904 matched brackets), we have to allow more in the quoting, and
11905 be prepared for nested brackets.
11908 /* read until we run out of string, or we find the terminator */
11909 for (; s < PL_bufend; s++,to++) {
11910 /* embedded newlines increment the line count */
11911 if (*s == '\n' && !PL_rsfp)
11912 CopLINE_inc(PL_curcop);
11913 /* backslashes can escape the open or closing characters */
11914 if (*s == '\\' && s+1 < PL_bufend) {
11915 if (!keep_quoted &&
11916 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11921 /* allow nested opens and closes */
11922 else if (*s == PL_multi_close && --brackets <= 0)
11924 else if (*s == PL_multi_open)
11926 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11931 /* terminate the copied string and update the sv's end-of-string */
11933 SvCUR_set(sv, to - SvPVX_const(sv));
11936 * this next chunk reads more into the buffer if we're not done yet
11940 break; /* handle case where we are done yet :-) */
11942 #ifndef PERL_STRICT_CR
11943 if (to - SvPVX_const(sv) >= 2) {
11944 if ((to[-2] == '\r' && to[-1] == '\n') ||
11945 (to[-2] == '\n' && to[-1] == '\r'))
11949 SvCUR_set(sv, to - SvPVX_const(sv));
11951 else if (to[-1] == '\r')
11954 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11959 /* if we're out of file, or a read fails, bail and reset the current
11960 line marker so we can report where the unterminated string began
11963 if (PL_madskills) {
11964 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11966 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11968 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11972 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11974 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11980 /* we read a line, so increment our line counter */
11981 CopLINE_inc(PL_curcop);
11983 /* update debugger info */
11984 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
11985 update_debugger_info(PL_linestr, NULL, 0);
11987 /* having changed the buffer, we must update PL_bufend */
11988 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11989 PL_last_lop = PL_last_uni = NULL;
11992 /* at this point, we have successfully read the delimited string */
11994 if (!PL_encoding || UTF) {
11996 if (PL_madskills) {
11997 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11998 const int len = s - tstart;
12000 sv_catpvn(PL_thisstuff, tstart, len);
12002 PL_thisstuff = newSVpvn(tstart, len);
12003 if (!PL_thisclose && !keep_delims)
12004 PL_thisclose = newSVpvn(s,termlen);
12009 sv_catpvn(sv, s, termlen);
12014 if (PL_madskills) {
12015 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12016 const int len = s - tstart - termlen;
12018 sv_catpvn(PL_thisstuff, tstart, len);
12020 PL_thisstuff = newSVpvn(tstart, len);
12021 if (!PL_thisclose && !keep_delims)
12022 PL_thisclose = newSVpvn(s - termlen,termlen);
12026 if (has_utf8 || PL_encoding)
12029 PL_multi_end = CopLINE(PL_curcop);
12031 /* if we allocated too much space, give some back */
12032 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12033 SvLEN_set(sv, SvCUR(sv) + 1);
12034 SvPV_renew(sv, SvLEN(sv));
12037 /* decide whether this is the first or second quoted string we've read
12050 takes: pointer to position in buffer
12051 returns: pointer to new position in buffer
12052 side-effects: builds ops for the constant in pl_yylval.op
12054 Read a number in any of the formats that Perl accepts:
12056 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12057 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
12060 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12062 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12065 If it reads a number without a decimal point or an exponent, it will
12066 try converting the number to an integer and see if it can do so
12067 without loss of precision.
12071 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12074 register const char *s = start; /* current position in buffer */
12075 register char *d; /* destination in temp buffer */
12076 register char *e; /* end of temp buffer */
12077 NV nv; /* number read, as a double */
12078 SV *sv = NULL; /* place to put the converted number */
12079 bool floatit; /* boolean: int or float? */
12080 const char *lastub = NULL; /* position of last underbar */
12081 static char const number_too_long[] = "Number too long";
12083 PERL_ARGS_ASSERT_SCAN_NUM;
12085 /* We use the first character to decide what type of number this is */
12089 Perl_croak(aTHX_ "panic: scan_num");
12091 /* if it starts with a 0, it could be an octal number, a decimal in
12092 0.13 disguise, or a hexadecimal number, or a binary number. */
12096 u holds the "number so far"
12097 shift the power of 2 of the base
12098 (hex == 4, octal == 3, binary == 1)
12099 overflowed was the number more than we can hold?
12101 Shift is used when we add a digit. It also serves as an "are
12102 we in octal/hex/binary?" indicator to disallow hex characters
12103 when in octal mode.
12108 bool overflowed = FALSE;
12109 bool just_zero = TRUE; /* just plain 0 or binary number? */
12110 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12111 static const char* const bases[5] =
12112 { "", "binary", "", "octal", "hexadecimal" };
12113 static const char* const Bases[5] =
12114 { "", "Binary", "", "Octal", "Hexadecimal" };
12115 static const char* const maxima[5] =
12117 "0b11111111111111111111111111111111",
12121 const char *base, *Base, *max;
12123 /* check for hex */
12128 } else if (s[1] == 'b') {
12133 /* check for a decimal in disguise */
12134 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12136 /* so it must be octal */
12143 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12144 "Misplaced _ in number");
12148 base = bases[shift];
12149 Base = Bases[shift];
12150 max = maxima[shift];
12152 /* read the rest of the number */
12154 /* x is used in the overflow test,
12155 b is the digit we're adding on. */
12160 /* if we don't mention it, we're done */
12164 /* _ are ignored -- but warned about if consecutive */
12166 if (lastub && s == lastub + 1)
12167 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12168 "Misplaced _ in number");
12172 /* 8 and 9 are not octal */
12173 case '8': case '9':
12175 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12179 case '2': case '3': case '4':
12180 case '5': case '6': case '7':
12182 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12185 case '0': case '1':
12186 b = *s++ & 15; /* ASCII digit -> value of digit */
12190 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12191 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12192 /* make sure they said 0x */
12195 b = (*s++ & 7) + 9;
12197 /* Prepare to put the digit we have onto the end
12198 of the number so far. We check for overflows.
12204 x = u << shift; /* make room for the digit */
12206 if ((x >> shift) != u
12207 && !(PL_hints & HINT_NEW_BINARY)) {
12210 if (ckWARN_d(WARN_OVERFLOW))
12211 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12212 "Integer overflow in %s number",
12215 u = x | b; /* add the digit to the end */
12218 n *= nvshift[shift];
12219 /* If an NV has not enough bits in its
12220 * mantissa to represent an UV this summing of
12221 * small low-order numbers is a waste of time
12222 * (because the NV cannot preserve the
12223 * low-order bits anyway): we could just
12224 * remember when did we overflow and in the
12225 * end just multiply n by the right
12233 /* if we get here, we had success: make a scalar value from
12238 /* final misplaced underbar check */
12239 if (s[-1] == '_') {
12240 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12245 if (n > 4294967295.0)
12246 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12247 "%s number > %s non-portable",
12253 if (u > 0xffffffff)
12254 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12255 "%s number > %s non-portable",
12260 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12261 sv = new_constant(start, s - start, "integer",
12262 sv, NULL, NULL, 0);
12263 else if (PL_hints & HINT_NEW_BINARY)
12264 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12269 handle decimal numbers.
12270 we're also sent here when we read a 0 as the first digit
12272 case '1': case '2': case '3': case '4': case '5':
12273 case '6': case '7': case '8': case '9': case '.':
12276 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12279 /* read next group of digits and _ and copy into d */
12280 while (isDIGIT(*s) || *s == '_') {
12281 /* skip underscores, checking for misplaced ones
12285 if (lastub && s == lastub + 1)
12286 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12287 "Misplaced _ in number");
12291 /* check for end of fixed-length buffer */
12293 Perl_croak(aTHX_ number_too_long);
12294 /* if we're ok, copy the character */
12299 /* final misplaced underbar check */
12300 if (lastub && s == lastub + 1) {
12301 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12304 /* read a decimal portion if there is one. avoid
12305 3..5 being interpreted as the number 3. followed
12308 if (*s == '.' && s[1] != '.') {
12313 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12314 "Misplaced _ in number");
12318 /* copy, ignoring underbars, until we run out of digits.
12320 for (; isDIGIT(*s) || *s == '_'; s++) {
12321 /* fixed length buffer check */
12323 Perl_croak(aTHX_ number_too_long);
12325 if (lastub && s == lastub + 1)
12326 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12327 "Misplaced _ in number");
12333 /* fractional part ending in underbar? */
12334 if (s[-1] == '_') {
12335 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12336 "Misplaced _ in number");
12338 if (*s == '.' && isDIGIT(s[1])) {
12339 /* oops, it's really a v-string, but without the "v" */
12345 /* read exponent part, if present */
12346 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12350 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12351 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12353 /* stray preinitial _ */
12355 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12356 "Misplaced _ in number");
12360 /* allow positive or negative exponent */
12361 if (*s == '+' || *s == '-')
12364 /* stray initial _ */
12366 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12367 "Misplaced _ in number");
12371 /* read digits of exponent */
12372 while (isDIGIT(*s) || *s == '_') {
12375 Perl_croak(aTHX_ number_too_long);
12379 if (((lastub && s == lastub + 1) ||
12380 (!isDIGIT(s[1]) && s[1] != '_')))
12381 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12382 "Misplaced _ in number");
12389 /* make an sv from the string */
12393 We try to do an integer conversion first if no characters
12394 indicating "float" have been found.
12399 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12401 if (flags == IS_NUMBER_IN_UV) {
12403 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12406 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12407 if (uv <= (UV) IV_MIN)
12408 sv_setiv(sv, -(IV)uv);
12415 /* terminate the string */
12417 nv = Atof(PL_tokenbuf);
12422 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12423 const char *const key = floatit ? "float" : "integer";
12424 const STRLEN keylen = floatit ? 5 : 7;
12425 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12426 key, keylen, sv, NULL, NULL, 0);
12430 /* if it starts with a v, it could be a v-string */
12433 sv = newSV(5); /* preallocate storage space */
12434 s = scan_vstring(s, PL_bufend, sv);
12438 /* make the op for the constant and return */
12441 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12443 lvalp->opval = NULL;
12449 S_scan_formline(pTHX_ register char *s)
12452 register char *eol;
12454 SV * const stuff = newSVpvs("");
12455 bool needargs = FALSE;
12456 bool eofmt = FALSE;
12458 char *tokenstart = s;
12459 SV* savewhite = NULL;
12461 if (PL_madskills) {
12462 savewhite = PL_thiswhite;
12467 PERL_ARGS_ASSERT_SCAN_FORMLINE;
12469 while (!needargs) {
12472 #ifdef PERL_STRICT_CR
12473 while (SPACE_OR_TAB(*t))
12476 while (SPACE_OR_TAB(*t) || *t == '\r')
12479 if (*t == '\n' || t == PL_bufend) {
12484 if (PL_in_eval && !PL_rsfp) {
12485 eol = (char *) memchr(s,'\n',PL_bufend-s);
12490 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12492 for (t = s; t < eol; t++) {
12493 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12495 goto enough; /* ~~ must be first line in formline */
12497 if (*t == '@' || *t == '^')
12501 sv_catpvn(stuff, s, eol-s);
12502 #ifndef PERL_STRICT_CR
12503 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12504 char *end = SvPVX(stuff) + SvCUR(stuff);
12507 SvCUR_set(stuff, SvCUR(stuff) - 1);
12517 if (PL_madskills) {
12519 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12521 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12524 s = filter_gets(PL_linestr, PL_rsfp, 0);
12526 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12528 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12530 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12531 PL_last_lop = PL_last_uni = NULL;
12540 if (SvCUR(stuff)) {
12543 PL_lex_state = LEX_NORMAL;
12544 start_force(PL_curforce);
12545 NEXTVAL_NEXTTOKE.ival = 0;
12549 PL_lex_state = LEX_FORMLINE;
12551 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12553 else if (PL_encoding)
12554 sv_recode_to_utf8(stuff, PL_encoding);
12556 start_force(PL_curforce);
12557 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12559 start_force(PL_curforce);
12560 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12564 SvREFCNT_dec(stuff);
12566 PL_lex_formbrack = 0;
12570 if (PL_madskills) {
12572 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12574 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12575 PL_thiswhite = savewhite;
12582 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12585 const I32 oldsavestack_ix = PL_savestack_ix;
12586 CV* const outsidecv = PL_compcv;
12589 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12591 SAVEI32(PL_subline);
12592 save_item(PL_subname);
12593 SAVESPTR(PL_compcv);
12595 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12596 CvFLAGS(PL_compcv) |= flags;
12598 PL_subline = CopLINE(PL_curcop);
12599 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12600 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12601 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12603 return oldsavestack_ix;
12607 #pragma segment Perl_yylex
12610 S_yywarn(pTHX_ const char *const s)
12614 PERL_ARGS_ASSERT_YYWARN;
12616 PL_in_eval |= EVAL_WARNONLY;
12618 PL_in_eval &= ~EVAL_WARNONLY;
12623 Perl_yyerror(pTHX_ const char *const s)
12626 const char *where = NULL;
12627 const char *context = NULL;
12630 int yychar = PL_parser->yychar;
12632 PERL_ARGS_ASSERT_YYERROR;
12634 if (!yychar || (yychar == ';' && !PL_rsfp))
12636 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12637 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12638 PL_oldbufptr != PL_bufptr) {
12641 The code below is removed for NetWare because it abends/crashes on NetWare
12642 when the script has error such as not having the closing quotes like:
12643 if ($var eq "value)
12644 Checking of white spaces is anyway done in NetWare code.
12647 while (isSPACE(*PL_oldoldbufptr))
12650 context = PL_oldoldbufptr;
12651 contlen = PL_bufptr - PL_oldoldbufptr;
12653 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12654 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12657 The code below is removed for NetWare because it abends/crashes on NetWare
12658 when the script has error such as not having the closing quotes like:
12659 if ($var eq "value)
12660 Checking of white spaces is anyway done in NetWare code.
12663 while (isSPACE(*PL_oldbufptr))
12666 context = PL_oldbufptr;
12667 contlen = PL_bufptr - PL_oldbufptr;
12669 else if (yychar > 255)
12670 where = "next token ???";
12671 else if (yychar == -2) { /* YYEMPTY */
12672 if (PL_lex_state == LEX_NORMAL ||
12673 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12674 where = "at end of line";
12675 else if (PL_lex_inpat)
12676 where = "within pattern";
12678 where = "within string";
12681 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
12683 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12684 else if (isPRINT_LC(yychar)) {
12685 const char string = yychar;
12686 sv_catpvn(where_sv, &string, 1);
12689 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12690 where = SvPVX_const(where_sv);
12692 msg = sv_2mortal(newSVpv(s, 0));
12693 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12694 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12696 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12698 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12699 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12700 Perl_sv_catpvf(aTHX_ msg,
12701 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12702 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12705 if (PL_in_eval & EVAL_WARNONLY) {
12706 if (ckWARN_d(WARN_SYNTAX))
12707 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12711 if (PL_error_count >= 10) {
12712 if (PL_in_eval && SvCUR(ERRSV))
12713 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12714 SVfARG(ERRSV), OutCopFILE(PL_curcop));
12716 Perl_croak(aTHX_ "%s has too many errors.\n",
12717 OutCopFILE(PL_curcop));
12720 PL_in_my_stash = NULL;
12724 #pragma segment Main
12728 S_swallow_bom(pTHX_ U8 *s)
12731 const STRLEN slen = SvCUR(PL_linestr);
12733 PERL_ARGS_ASSERT_SWALLOW_BOM;
12737 if (s[1] == 0xFE) {
12738 /* UTF-16 little-endian? (or UTF32-LE?) */
12739 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12740 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12741 #ifndef PERL_NO_UTF16_FILTER
12742 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12745 if (PL_bufend > (char*)s) {
12749 filter_add(utf16rev_textfilter, NULL);
12750 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12751 utf16_to_utf8_reversed(s, news,
12752 PL_bufend - (char*)s - 1,
12754 sv_setpvn(PL_linestr, (const char*)news, newlen);
12756 s = (U8*)SvPVX(PL_linestr);
12757 Copy(news, s, newlen, U8);
12761 SvUTF8_on(PL_linestr);
12762 s = (U8*)SvPVX(PL_linestr);
12764 /* FIXME - is this a general bug fix? */
12767 PL_bufend = SvPVX(PL_linestr) + newlen;
12770 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12775 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12776 #ifndef PERL_NO_UTF16_FILTER
12777 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12780 if (PL_bufend > (char *)s) {
12784 filter_add(utf16_textfilter, NULL);
12785 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12786 utf16_to_utf8(s, news,
12787 PL_bufend - (char*)s,
12789 sv_setpvn(PL_linestr, (const char*)news, newlen);
12791 SvUTF8_on(PL_linestr);
12792 s = (U8*)SvPVX(PL_linestr);
12793 PL_bufend = SvPVX(PL_linestr) + newlen;
12796 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12801 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12802 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12803 s += 3; /* UTF-8 */
12809 if (s[2] == 0xFE && s[3] == 0xFF) {
12810 /* UTF-32 big-endian */
12811 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12814 else if (s[2] == 0 && s[3] != 0) {
12817 * are a good indicator of UTF-16BE. */
12818 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12824 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12825 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12826 s += 4; /* UTF-8 */
12832 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12835 * are a good indicator of UTF-16LE. */
12836 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12844 #ifndef PERL_NO_UTF16_FILTER
12846 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12849 const STRLEN old = SvCUR(sv);
12850 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12851 DEBUG_P(PerlIO_printf(Perl_debug_log,
12852 "utf16_textfilter(%p): %d %d (%d)\n",
12853 FPTR2DPTR(void *, utf16_textfilter),
12854 idx, maxlen, (int) count));
12858 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12859 Copy(SvPVX_const(sv), tmps, old, char);
12860 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12861 SvCUR(sv) - old, &newlen);
12862 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12864 DEBUG_P({sv_dump(sv);});
12869 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12872 const STRLEN old = SvCUR(sv);
12873 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12874 DEBUG_P(PerlIO_printf(Perl_debug_log,
12875 "utf16rev_textfilter(%p): %d %d (%d)\n",
12876 FPTR2DPTR(void *, utf16rev_textfilter),
12877 idx, maxlen, (int) count));
12881 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12882 Copy(SvPVX_const(sv), tmps, old, char);
12883 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12884 SvCUR(sv) - old, &newlen);
12885 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12887 DEBUG_P({ sv_dump(sv); });
12893 Returns a pointer to the next character after the parsed
12894 vstring, as well as updating the passed in sv.
12896 Function must be called like
12899 s = scan_vstring(s,e,sv);
12901 where s and e are the start and end of the string.
12902 The sv should already be large enough to store the vstring
12903 passed in, for performance reasons.
12908 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12911 const char *pos = s;
12912 const char *start = s;
12914 PERL_ARGS_ASSERT_SCAN_VSTRING;
12916 if (*pos == 'v') pos++; /* get past 'v' */
12917 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12919 if ( *pos != '.') {
12920 /* this may not be a v-string if followed by => */
12921 const char *next = pos;
12922 while (next < e && isSPACE(*next))
12924 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12925 /* return string not v-string */
12926 sv_setpvn(sv,(char *)s,pos-s);
12927 return (char *)pos;
12931 if (!isALPHA(*pos)) {
12932 U8 tmpbuf[UTF8_MAXBYTES+1];
12935 s++; /* get past 'v' */
12940 /* this is atoi() that tolerates underscores */
12943 const char *end = pos;
12945 while (--end >= s) {
12947 const UV orev = rev;
12948 rev += (*end - '0') * mult;
12950 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12951 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12952 "Integer overflow in decimal number");
12956 if (rev > 0x7FFFFFFF)
12957 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12959 /* Append native character for the rev point */
12960 tmpend = uvchr_to_utf8(tmpbuf, rev);
12961 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12962 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12964 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12970 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12974 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12982 * c-indentation-style: bsd
12983 * c-basic-offset: 4
12984 * indent-tabs-mode: t
12987 * ex: set ts=8 sts=4 sw=4 noet: