3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] = "Identifier too long";
30 static const char commaless_variable_list[] = "comma-less variable list";
32 static void restore_rsfp(pTHX_ void *f);
33 #ifndef PERL_NO_UTF16_FILTER
34 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39 /* XXX these probably need to be made into PL vars */
40 static I32 realtokenstart;
41 static I32 faketokens = 0;
42 static MADPROP *thismad;
51 static I32 curforce = -1;
53 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
55 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
57 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
60 #define XFAKEBRACK 128
63 #ifdef USE_UTF8_SCRIPTS
64 # define UTF (!IN_BYTES)
66 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
69 /* In variables named $^X, these are the legal values for X.
70 * 1999-02-27 mjd-perl-patch@plover.com */
71 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
73 /* On MacOS, respect nonbreaking spaces */
74 #ifdef MACOS_TRADITIONAL
75 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
77 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
80 /* LEX_* are values for PL_lex_state, the state of the lexer.
81 * They are arranged oddly so that the guard on the switch statement
82 * can get by with a single comparison (if the compiler is smart enough).
85 /* #define LEX_NOTPARSING 11 is done in perl.h. */
87 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
88 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
89 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
90 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
91 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
93 /* at end of code, eg "$x" followed by: */
94 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
95 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
97 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
98 string or after \E, $foo, etc */
99 #define LEX_INTERPCONST 2 /* NOT USED */
100 #define LEX_FORMLINE 1 /* expecting a format line */
101 #define LEX_KNOWNEXT 0 /* next token known; just return it */
105 static const char* const lex_state_names[] = {
124 #include "keywords.h"
126 /* CLINE is a macro that ensures PL_copline has a sane value */
131 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
133 #if 0 && defined(PERL_MAD)
134 # define SKIPSPACE0(s) skipspace0(s)
135 # define SKIPSPACE1(s) skipspace1(s)
136 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
137 # define PEEKSPACE(s) skipspace2(s,0)
139 # define SKIPSPACE0(s) skipspace(s)
140 # define SKIPSPACE1(s) skipspace(s)
141 # define SKIPSPACE2(s,tsv) skipspace(s)
142 # define PEEKSPACE(s) skipspace(s)
146 * Convenience functions to return different tokens and prime the
147 * lexer for the next token. They all take an argument.
149 * TOKEN : generic token (used for '(', DOLSHARP, etc)
150 * OPERATOR : generic operator
151 * AOPERATOR : assignment operator
152 * PREBLOCK : beginning the block after an if, while, foreach, ...
153 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
154 * PREREF : *EXPR where EXPR is not a simple identifier
155 * TERM : expression term
156 * LOOPX : loop exiting command (goto, last, dump, etc)
157 * FTST : file test operator
158 * FUN0 : zero-argument function
159 * FUN1 : not used, except for not, which isn't a UNIOP
160 * BOop : bitwise or or xor
162 * SHop : shift operator
163 * PWop : power operator
164 * PMop : pattern-matching operator
165 * Aop : addition-level operator
166 * Mop : multiplication-level operator
167 * Eop : equality-testing operator
168 * Rop : relational operator <= != gt
170 * Also see LOP and lop() below.
173 #ifdef DEBUGGING /* Serve -DT. */
174 # define REPORT(retval) tokereport((I32)retval)
176 # define REPORT(retval) (retval)
179 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
180 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
181 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
182 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
183 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
184 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
185 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
186 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
187 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
188 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
189 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
190 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
191 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
192 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
193 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
194 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
195 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
196 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
197 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
198 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
200 /* This bit of chicanery makes a unary function followed by
201 * a parenthesis into a function with one argument, highest precedence.
202 * The UNIDOR macro is for unary functions that can be followed by the //
203 * operator (such as C<shift // 0>).
205 #define UNI2(f,x) { \
209 PL_last_uni = PL_oldbufptr; \
210 PL_last_lop_op = f; \
212 return REPORT( (int)FUNC1 ); \
214 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
216 #define UNI(f) UNI2(f,XTERM)
217 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
219 #define UNIBRACK(f) { \
222 PL_last_uni = PL_oldbufptr; \
224 return REPORT( (int)FUNC1 ); \
226 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
229 /* grandfather return to old style */
230 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
234 /* how to interpret the yylval associated with the token */
238 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
244 static struct debug_tokens {
246 enum token_type type;
248 } const debug_tokens[] =
250 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
251 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
252 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
253 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
254 { ARROW, TOKENTYPE_NONE, "ARROW" },
255 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
256 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
257 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
258 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
259 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
260 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
261 { DO, TOKENTYPE_NONE, "DO" },
262 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
263 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
264 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
265 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
266 { ELSE, TOKENTYPE_NONE, "ELSE" },
267 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
268 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
269 { FOR, TOKENTYPE_IVAL, "FOR" },
270 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
271 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
272 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
273 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
274 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
275 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
276 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
277 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
278 { IF, TOKENTYPE_IVAL, "IF" },
279 { LABEL, TOKENTYPE_PVAL, "LABEL" },
280 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
281 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
282 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
283 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
284 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
285 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
286 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
287 { MY, TOKENTYPE_IVAL, "MY" },
288 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
289 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
290 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
291 { OROP, TOKENTYPE_IVAL, "OROP" },
292 { OROR, TOKENTYPE_NONE, "OROR" },
293 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
294 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
295 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
296 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
297 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
298 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
299 { PREINC, TOKENTYPE_NONE, "PREINC" },
300 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
301 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
302 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
303 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
304 { SUB, TOKENTYPE_NONE, "SUB" },
305 { THING, TOKENTYPE_OPVAL, "THING" },
306 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
307 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
308 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
309 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
310 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
311 { USE, TOKENTYPE_IVAL, "USE" },
312 { WHEN, TOKENTYPE_IVAL, "WHEN" },
313 { WHILE, TOKENTYPE_IVAL, "WHILE" },
314 { WORD, TOKENTYPE_OPVAL, "WORD" },
315 { 0, TOKENTYPE_NONE, 0 }
318 /* dump the returned token in rv, plus any optional arg in yylval */
321 S_tokereport(pTHX_ I32 rv)
325 const char *name = NULL;
326 enum token_type type = TOKENTYPE_NONE;
327 const struct debug_tokens *p;
328 SV* const report = newSVpvs("<== ");
330 for (p = debug_tokens; p->token; p++) {
331 if (p->token == (int)rv) {
338 Perl_sv_catpv(aTHX_ report, name);
339 else if ((char)rv > ' ' && (char)rv < '~')
340 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
342 sv_catpvs(report, "EOF");
344 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
347 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
350 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
352 case TOKENTYPE_OPNUM:
353 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
354 PL_op_name[yylval.ival]);
357 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
359 case TOKENTYPE_OPVAL:
361 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
362 PL_op_name[yylval.opval->op_type]);
363 if (yylval.opval->op_type == OP_CONST) {
364 Perl_sv_catpvf(aTHX_ report, " %s",
365 SvPEEK(cSVOPx_sv(yylval.opval)));
370 sv_catpvs(report, "(opval=null)");
373 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
379 /* print the buffer with suitable escapes */
382 S_printbuf(pTHX_ const char* fmt, const char* s)
384 SV* const tmp = newSVpvs("");
385 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
394 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
395 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
399 S_ao(pTHX_ int toketype)
402 if (*PL_bufptr == '=') {
404 if (toketype == ANDAND)
405 yylval.ival = OP_ANDASSIGN;
406 else if (toketype == OROR)
407 yylval.ival = OP_ORASSIGN;
408 else if (toketype == DORDOR)
409 yylval.ival = OP_DORASSIGN;
417 * When Perl expects an operator and finds something else, no_op
418 * prints the warning. It always prints "<something> found where
419 * operator expected. It prints "Missing semicolon on previous line?"
420 * if the surprise occurs at the start of the line. "do you need to
421 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
422 * where the compiler doesn't know if foo is a method call or a function.
423 * It prints "Missing operator before end of line" if there's nothing
424 * after the missing operator, or "... before <...>" if there is something
425 * after the missing operator.
429 S_no_op(pTHX_ const char *what, char *s)
432 char * const oldbp = PL_bufptr;
433 const bool is_first = (PL_oldbufptr == PL_linestart);
439 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
440 if (ckWARN_d(WARN_SYNTAX)) {
442 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
443 "\t(Missing semicolon on previous line?)\n");
444 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
446 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
447 if (t < PL_bufptr && isSPACE(*t))
448 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
449 "\t(Do you need to predeclare %.*s?)\n",
450 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
454 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
455 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
463 * Complain about missing quote/regexp/heredoc terminator.
464 * If it's called with (char *)NULL then it cauterizes the line buffer.
465 * If we're in a delimited string and the delimiter is a control
466 * character, it's reformatted into a two-char sequence like ^C.
471 S_missingterm(pTHX_ char *s)
477 char * const nl = strrchr(s,'\n');
483 iscntrl(PL_multi_close)
485 PL_multi_close < 32 || PL_multi_close == 127
489 tmpbuf[1] = (char)toCTRL(PL_multi_close);
494 *tmpbuf = (char)PL_multi_close;
498 q = strchr(s,'"') ? '\'' : '"';
499 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
502 #define FEATURE_IS_ENABLED(name) \
503 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
504 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
506 * S_feature_is_enabled
507 * Check whether the named feature is enabled.
510 S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
513 HV * const hinthv = GvHV(PL_hintgv);
514 char he_name[32] = "feature_";
515 (void) strncpy(&he_name[8], name, 24);
517 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
525 Perl_deprecate(pTHX_ const char *s)
527 if (ckWARN(WARN_DEPRECATED))
528 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
532 Perl_deprecate_old(pTHX_ const char *s)
534 /* This function should NOT be called for any new deprecated warnings */
535 /* Use Perl_deprecate instead */
537 /* It is here to maintain backward compatibility with the pre-5.8 */
538 /* warnings category hierarchy. The "deprecated" category used to */
539 /* live under the "syntax" category. It is now a top-level category */
540 /* in its own right. */
542 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
543 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
544 "Use of %s is deprecated", s);
548 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
549 * utf16-to-utf8-reversed.
552 #ifdef PERL_CR_FILTER
556 register const char *s = SvPVX_const(sv);
557 register const char * const e = s + SvCUR(sv);
558 /* outer loop optimized to do nothing if there are no CR-LFs */
560 if (*s++ == '\r' && *s == '\n') {
561 /* hit a CR-LF, need to copy the rest */
562 register char *d = s - 1;
565 if (*s == '\r' && s[1] == '\n')
576 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
578 const I32 count = FILTER_READ(idx+1, sv, maxlen);
579 if (count > 0 && !maxlen)
587 * Initialize variables. Uses the Perl save_stack to save its state (for
588 * recursive calls to the parser).
592 Perl_lex_start(pTHX_ SV *line)
598 SAVEI32(PL_lex_dojoin);
599 SAVEI32(PL_lex_brackets);
600 SAVEI32(PL_lex_casemods);
601 SAVEI32(PL_lex_starts);
602 SAVEI32(PL_lex_state);
603 SAVEVPTR(PL_lex_inpat);
604 SAVEI32(PL_lex_inwhat);
605 if (PL_lex_state == LEX_KNOWNEXT) {
606 I32 toke = PL_nexttoke;
607 while (--toke >= 0) {
608 SAVEI32(PL_nexttype[toke]);
609 SAVEVPTR(PL_nextval[toke]);
611 SAVEI32(PL_nexttoke);
613 SAVECOPLINE(PL_curcop);
616 SAVEPPTR(PL_oldbufptr);
617 SAVEPPTR(PL_oldoldbufptr);
618 SAVEPPTR(PL_last_lop);
619 SAVEPPTR(PL_last_uni);
620 SAVEPPTR(PL_linestart);
621 SAVESPTR(PL_linestr);
622 SAVEGENERICPV(PL_lex_brackstack);
623 SAVEGENERICPV(PL_lex_casestack);
624 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
625 SAVESPTR(PL_lex_stuff);
626 SAVEI32(PL_lex_defer);
627 SAVEI32(PL_sublex_info.sub_inwhat);
628 SAVESPTR(PL_lex_repl);
630 SAVEINT(PL_lex_expect);
632 PL_lex_state = LEX_NORMAL;
636 Newx(PL_lex_brackstack, 120, char);
637 Newx(PL_lex_casestack, 12, char);
639 *PL_lex_casestack = '\0';
647 PL_sublex_info.sub_inwhat = 0;
649 if (SvREADONLY(PL_linestr))
650 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
651 s = SvPV_const(PL_linestr, len);
652 if (!len || s[len-1] != ';') {
653 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
654 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
655 sv_catpvs(PL_linestr, "\n;");
657 SvTEMP_off(PL_linestr);
658 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
659 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
660 PL_last_lop = PL_last_uni = NULL;
666 * Finalizer for lexing operations. Must be called when the parser is
667 * done with the lexer.
674 PL_doextract = FALSE;
679 * This subroutine has nothing to do with tilting, whether at windmills
680 * or pinball tables. Its name is short for "increment line". It
681 * increments the current line number in CopLINE(PL_curcop) and checks
682 * to see whether the line starts with a comment of the form
683 * # line 500 "foo.pm"
684 * If so, it sets the current line number and file to the values in the comment.
688 S_incline(pTHX_ char *s)
696 CopLINE_inc(PL_curcop);
699 while (SPACE_OR_TAB(*s)) s++;
700 if (strnEQ(s, "line", 4))
704 if (SPACE_OR_TAB(*s))
708 while (SPACE_OR_TAB(*s)) s++;
714 while (SPACE_OR_TAB(*s))
716 if (*s == '"' && (t = strchr(s+1, '"'))) {
721 for (t = s; !isSPACE(*t); t++) ;
724 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
726 if (*e != '\n' && *e != '\0')
727 return; /* false alarm */
733 const char * const cf = CopFILE(PL_curcop);
734 STRLEN tmplen = cf ? strlen(cf) : 0;
735 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
736 /* must copy *{"::_<(eval N)[oldfilename:L]"}
737 * to *{"::_<newfilename"} */
738 char smallbuf[256], smallbuf2[256];
739 char *tmpbuf, *tmpbuf2;
741 STRLEN tmplen2 = strlen(s);
742 if (tmplen + 3 < sizeof smallbuf)
745 Newx(tmpbuf, tmplen + 3, char);
746 if (tmplen2 + 3 < sizeof smallbuf2)
749 Newx(tmpbuf2, tmplen2 + 3, char);
750 tmpbuf[0] = tmpbuf2[0] = '_';
751 tmpbuf[1] = tmpbuf2[1] = '<';
752 memcpy(tmpbuf + 2, cf, ++tmplen);
753 memcpy(tmpbuf2 + 2, s, ++tmplen2);
755 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
757 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
759 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
760 /* adjust ${"::_<newfilename"} to store the new file name */
761 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
762 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
763 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
765 if (tmpbuf != smallbuf) Safefree(tmpbuf);
766 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
769 CopFILE_free(PL_curcop);
770 CopFILE_set(PL_curcop, s);
773 CopLINE_set(PL_curcop, atoi(n)-1);
777 /* skip space before thistoken */
780 S_skipspace0(pTHX_ register char *s)
787 thiswhite = newSVpvn("",0);
788 sv_catsv(thiswhite, skipwhite);
792 realtokenstart = s - SvPVX(PL_linestr);
796 /* skip space after thistoken */
799 S_skipspace1(pTHX_ register char *s)
802 I32 startoff = start - SvPVX(PL_linestr);
807 start = SvPVX(PL_linestr) + startoff;
808 if (!thistoken && realtokenstart >= 0) {
809 char *tstart = SvPVX(PL_linestr) + realtokenstart;
810 thistoken = newSVpvn(tstart, start - tstart);
815 nextwhite = newSVpvn("",0);
816 sv_catsv(nextwhite, skipwhite);
824 S_skipspace2(pTHX_ register char *s, SV **svp)
827 I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
828 I32 startoff = start - SvPVX(PL_linestr);
830 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
831 if (!PL_madskills || !svp)
833 start = SvPVX(PL_linestr) + startoff;
834 if (!thistoken && realtokenstart >= 0) {
835 char *tstart = SvPVX(PL_linestr) + realtokenstart;
836 thistoken = newSVpvn(tstart, start - tstart);
841 *svp = newSVpvn("",0);
842 sv_setsv(*svp, skipwhite);
853 * Called to gobble the appropriate amount and type of whitespace.
854 * Skips comments as well.
858 S_skipspace(pTHX_ register char *s)
861 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
862 while (s < PL_bufend && SPACE_OR_TAB(*s))
868 SSize_t oldprevlen, oldoldprevlen;
869 SSize_t oldloplen = 0, oldunilen = 0;
870 while (s < PL_bufend && isSPACE(*s)) {
871 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
876 if (s < PL_bufend && *s == '#') {
877 while (s < PL_bufend && *s != '\n')
881 if (PL_in_eval && !PL_rsfp) {
888 /* only continue to recharge the buffer if we're at the end
889 * of the buffer, we're not reading from a source filter, and
890 * we're in normal lexing mode
892 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
893 PL_lex_state == LEX_FORMLINE)
896 /* try to recharge the buffer */
897 if ((s = filter_gets(PL_linestr, PL_rsfp,
898 (prevlen = SvCUR(PL_linestr)))) == NULL)
900 /* end of file. Add on the -p or -n magic */
903 ";}continue{print or die qq(-p destination: $!\\n);}");
904 PL_minus_n = PL_minus_p = 0;
906 else if (PL_minus_n) {
907 sv_setpvn(PL_linestr, ";}", 2);
911 sv_setpvn(PL_linestr,";", 1);
913 /* reset variables for next time we lex */
914 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
916 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
917 PL_last_lop = PL_last_uni = NULL;
919 /* Close the filehandle. Could be from -P preprocessor,
920 * STDIN, or a regular file. If we were reading code from
921 * STDIN (because the commandline held no -e or filename)
922 * then we don't close it, we reset it so the code can
923 * read from STDIN too.
926 if (PL_preprocess && !PL_in_eval)
927 (void)PerlProc_pclose(PL_rsfp);
928 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
929 PerlIO_clearerr(PL_rsfp);
931 (void)PerlIO_close(PL_rsfp);
936 /* not at end of file, so we only read another line */
937 /* make corresponding updates to old pointers, for yyerror() */
938 oldprevlen = PL_oldbufptr - PL_bufend;
939 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
941 oldunilen = PL_last_uni - PL_bufend;
943 oldloplen = PL_last_lop - PL_bufend;
944 PL_linestart = PL_bufptr = s + prevlen;
945 PL_bufend = s + SvCUR(PL_linestr);
947 PL_oldbufptr = s + oldprevlen;
948 PL_oldoldbufptr = s + oldoldprevlen;
950 PL_last_uni = s + oldunilen;
952 PL_last_lop = s + oldloplen;
955 /* debugger active and we're not compiling the debugger code,
956 * so store the line into the debugger's array of lines
958 if (PERLDB_LINE && PL_curstash != PL_debstash) {
959 SV * const sv = newSV(0);
961 sv_upgrade(sv, SVt_PVMG);
962 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
965 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
972 * Check the unary operators to ensure there's no ambiguity in how they're
973 * used. An ambiguous piece of code would be:
975 * This doesn't mean rand() + 5. Because rand() is a unary operator,
976 * the +5 is its argument.
986 if (PL_oldoldbufptr != PL_last_uni)
988 while (isSPACE(*PL_last_uni))
990 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
991 if ((t = strchr(s, '(')) && t < PL_bufptr)
994 /* XXX Things like this are just so nasty. We shouldn't be modifying
995 source code, even if we realquick set it back. */
996 if (ckWARN_d(WARN_AMBIGUOUS)){
999 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1000 "Warning: Use of \"%s\" without parentheses is ambiguous",
1007 * LOP : macro to build a list operator. Its behaviour has been replaced
1008 * with a subroutine, S_lop() for which LOP is just another name.
1011 #define LOP(f,x) return lop(f,x,s)
1015 * Build a list operator (or something that might be one). The rules:
1016 * - if we have a next token, then it's a list operator [why?]
1017 * - if the next thing is an opening paren, then it's a function
1018 * - else it's a list operator
1022 S_lop(pTHX_ I32 f, int x, char *s)
1029 PL_last_lop = PL_oldbufptr;
1030 PL_last_lop_op = (OPCODE)f;
1032 return REPORT(LSTOP);
1034 return REPORT(FUNC);
1037 return REPORT(FUNC);
1039 return REPORT(LSTOP);
1044 * When the lexer realizes it knows the next token (for instance,
1045 * it is reordering tokens for the parser) then it can call S_force_next
1046 * to know what token to return the next time the lexer is called. Caller
1047 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
1048 * handles the token correctly.
1052 S_force_next(pTHX_ I32 type)
1055 PL_nexttype[PL_nexttoke] = type;
1057 if (PL_lex_state != LEX_KNOWNEXT) {
1058 PL_lex_defer = PL_lex_state;
1059 PL_lex_expect = PL_expect;
1060 PL_lex_state = LEX_KNOWNEXT;
1065 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1068 SV * const sv = newSVpvn(start,len);
1069 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1076 * When the lexer knows the next thing is a word (for instance, it has
1077 * just seen -> and it knows that the next char is a word char, then
1078 * it calls S_force_word to stick the next word into the PL_next lookahead.
1081 * char *start : buffer position (must be within PL_linestr)
1082 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1083 * int check_keyword : if true, Perl checks to make sure the word isn't
1084 * a keyword (do this if the word is a label, e.g. goto FOO)
1085 * int allow_pack : if true, : characters will also be allowed (require,
1086 * use, etc. do this)
1087 * int allow_initial_tick : used by the "sub" lexer only.
1091 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1097 start = SKIPSPACE1(start);
1099 if (isIDFIRST_lazy_if(s,UTF) ||
1100 (allow_pack && *s == ':') ||
1101 (allow_initial_tick && *s == '\'') )
1103 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1104 if (check_keyword && keyword(PL_tokenbuf, len))
1106 if (token == METHOD) {
1111 PL_expect = XOPERATOR;
1114 NEXTVAL_NEXTTOKE.opval
1115 = (OP*)newSVOP(OP_CONST,0,
1116 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1117 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1125 * Called when the lexer wants $foo *foo &foo etc, but the program
1126 * text only contains the "foo" portion. The first argument is a pointer
1127 * to the "foo", and the second argument is the type symbol to prefix.
1128 * Forces the next token to be a "WORD".
1129 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1133 S_force_ident(pTHX_ register const char *s, int kind)
1137 const STRLEN len = strlen(s);
1138 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1139 NEXTVAL_NEXTTOKE.opval = o;
1142 o->op_private = OPpCONST_ENTERED;
1143 /* XXX see note in pp_entereval() for why we forgo typo
1144 warnings if the symbol must be introduced in an eval.
1146 gv_fetchpvn_flags(s, len,
1147 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1149 kind == '$' ? SVt_PV :
1150 kind == '@' ? SVt_PVAV :
1151 kind == '%' ? SVt_PVHV :
1159 Perl_str_to_version(pTHX_ SV *sv)
1164 const char *start = SvPV_const(sv,len);
1165 const char * const end = start + len;
1166 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1167 while (start < end) {
1171 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1176 retval += ((NV)n)/nshift;
1185 * Forces the next token to be a version number.
1186 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1187 * and if "guessing" is TRUE, then no new token is created (and the caller
1188 * must use an alternative parsing method).
1192 S_force_version(pTHX_ char *s, int guessing)
1204 while (isDIGIT(*d) || *d == '_' || *d == '.')
1206 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1208 s = scan_num(s, &yylval);
1209 version = yylval.opval;
1210 ver = cSVOPx(version)->op_sv;
1211 if (SvPOK(ver) && !SvNIOK(ver)) {
1212 SvUPGRADE(ver, SVt_PVNV);
1213 SvNV_set(ver, str_to_version(ver));
1214 SvNOK_on(ver); /* hint that it is a version */
1221 /* NOTE: The parser sees the package name and the VERSION swapped */
1222 NEXTVAL_NEXTTOKE.opval = version;
1230 * Tokenize a quoted string passed in as an SV. It finds the next
1231 * chunk, up to end of string or a backslash. It may make a new
1232 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1237 S_tokeq(pTHX_ SV *sv)
1241 register char *send;
1249 s = SvPV_force(sv, len);
1250 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1253 while (s < send && *s != '\\')
1258 if ( PL_hints & HINT_NEW_STRING ) {
1259 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1265 if (s + 1 < send && (s[1] == '\\'))
1266 s++; /* all that, just for this */
1271 SvCUR_set(sv, d - SvPVX_const(sv));
1273 if ( PL_hints & HINT_NEW_STRING )
1274 return new_constant(NULL, 0, "q", sv, pv, "q");
1279 * Now come three functions related to double-quote context,
1280 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1281 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1282 * interact with PL_lex_state, and create fake ( ... ) argument lists
1283 * to handle functions and concatenation.
1284 * They assume that whoever calls them will be setting up a fake
1285 * join call, because each subthing puts a ',' after it. This lets
1288 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1290 * (I'm not sure whether the spurious commas at the end of lcfirst's
1291 * arguments and join's arguments are created or not).
1296 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1298 * Pattern matching will set PL_lex_op to the pattern-matching op to
1299 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1301 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1303 * Everything else becomes a FUNC.
1305 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1306 * had an OP_CONST or OP_READLINE). This just sets us up for a
1307 * call to S_sublex_push().
1311 S_sublex_start(pTHX)
1314 register const I32 op_type = yylval.ival;
1316 if (op_type == OP_NULL) {
1317 yylval.opval = PL_lex_op;
1321 if (op_type == OP_CONST || op_type == OP_READLINE) {
1322 SV *sv = tokeq(PL_lex_stuff);
1324 if (SvTYPE(sv) == SVt_PVIV) {
1325 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1327 const char * const p = SvPV_const(sv, len);
1328 SV * const nsv = newSVpvn(p, len);
1334 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1335 PL_lex_stuff = NULL;
1336 /* Allow <FH> // "foo" */
1337 if (op_type == OP_READLINE)
1338 PL_expect = XTERMORDORDOR;
1342 PL_sublex_info.super_state = PL_lex_state;
1343 PL_sublex_info.sub_inwhat = op_type;
1344 PL_sublex_info.sub_op = PL_lex_op;
1345 PL_lex_state = LEX_INTERPPUSH;
1349 yylval.opval = PL_lex_op;
1359 * Create a new scope to save the lexing state. The scope will be
1360 * ended in S_sublex_done. Returns a '(', starting the function arguments
1361 * to the uc, lc, etc. found before.
1362 * Sets PL_lex_state to LEX_INTERPCONCAT.
1371 PL_lex_state = PL_sublex_info.super_state;
1372 SAVEI32(PL_lex_dojoin);
1373 SAVEI32(PL_lex_brackets);
1374 SAVEI32(PL_lex_casemods);
1375 SAVEI32(PL_lex_starts);
1376 SAVEI32(PL_lex_state);
1377 SAVEVPTR(PL_lex_inpat);
1378 SAVEI32(PL_lex_inwhat);
1379 SAVECOPLINE(PL_curcop);
1380 SAVEPPTR(PL_bufptr);
1381 SAVEPPTR(PL_bufend);
1382 SAVEPPTR(PL_oldbufptr);
1383 SAVEPPTR(PL_oldoldbufptr);
1384 SAVEPPTR(PL_last_lop);
1385 SAVEPPTR(PL_last_uni);
1386 SAVEPPTR(PL_linestart);
1387 SAVESPTR(PL_linestr);
1388 SAVEGENERICPV(PL_lex_brackstack);
1389 SAVEGENERICPV(PL_lex_casestack);
1391 PL_linestr = PL_lex_stuff;
1392 PL_lex_stuff = NULL;
1394 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1395 = SvPVX(PL_linestr);
1396 PL_bufend += SvCUR(PL_linestr);
1397 PL_last_lop = PL_last_uni = NULL;
1398 SAVEFREESV(PL_linestr);
1400 PL_lex_dojoin = FALSE;
1401 PL_lex_brackets = 0;
1402 Newx(PL_lex_brackstack, 120, char);
1403 Newx(PL_lex_casestack, 12, char);
1404 PL_lex_casemods = 0;
1405 *PL_lex_casestack = '\0';
1407 PL_lex_state = LEX_INTERPCONCAT;
1408 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1410 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1411 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1412 PL_lex_inpat = PL_sublex_info.sub_op;
1414 PL_lex_inpat = NULL;
1421 * Restores lexer state after a S_sublex_push.
1428 if (!PL_lex_starts++) {
1429 SV * const sv = newSVpvs("");
1430 if (SvUTF8(PL_linestr))
1432 PL_expect = XOPERATOR;
1433 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1437 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1438 PL_lex_state = LEX_INTERPCASEMOD;
1442 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1443 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1444 PL_linestr = PL_lex_repl;
1446 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1447 PL_bufend += SvCUR(PL_linestr);
1448 PL_last_lop = PL_last_uni = NULL;
1449 SAVEFREESV(PL_linestr);
1450 PL_lex_dojoin = FALSE;
1451 PL_lex_brackets = 0;
1452 PL_lex_casemods = 0;
1453 *PL_lex_casestack = '\0';
1455 if (SvEVALED(PL_lex_repl)) {
1456 PL_lex_state = LEX_INTERPNORMAL;
1458 /* we don't clear PL_lex_repl here, so that we can check later
1459 whether this is an evalled subst; that means we rely on the
1460 logic to ensure sublex_done() is called again only via the
1461 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1464 PL_lex_state = LEX_INTERPCONCAT;
1471 PL_bufend = SvPVX(PL_linestr);
1472 PL_bufend += SvCUR(PL_linestr);
1473 PL_expect = XOPERATOR;
1474 PL_sublex_info.sub_inwhat = 0;
1482 Extracts a pattern, double-quoted string, or transliteration. This
1485 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1486 processing a pattern (PL_lex_inpat is true), a transliteration
1487 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1489 Returns a pointer to the character scanned up to. Iff this is
1490 advanced from the start pointer supplied (ie if anything was
1491 successfully parsed), will leave an OP for the substring scanned
1492 in yylval. Caller must intuit reason for not parsing further
1493 by looking at the next characters herself.
1497 double-quoted style: \r and \n
1498 regexp special ones: \D \s
1500 backrefs: \1 (deprecated in substitution replacements)
1501 case and quoting: \U \Q \E
1502 stops on @ and $, but not for $ as tail anchor
1504 In transliterations:
1505 characters are VERY literal, except for - not at the start or end
1506 of the string, which indicates a range. scan_const expands the
1507 range to the full set of intermediate characters.
1509 In double-quoted strings:
1511 double-quoted style: \r and \n
1513 backrefs: \1 (deprecated)
1514 case and quoting: \U \Q \E
1517 scan_const does *not* construct ops to handle interpolated strings.
1518 It stops processing as soon as it finds an embedded $ or @ variable
1519 and leaves it to the caller to work out what's going on.
1521 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1523 $ in pattern could be $foo or could be tail anchor. Assumption:
1524 it's a tail anchor if $ is the last thing in the string, or if it's
1525 followed by one of ")| \n\t"
1527 \1 (backreferences) are turned into $1
1529 The structure of the code is
1530 while (there's a character to process) {
1531 handle transliteration ranges
1532 skip regexp comments
1533 skip # initiated comments in //x patterns
1534 check for embedded @foo
1535 check for embedded scalars
1537 leave intact backslashes from leave (below)
1538 deprecate \1 in strings and sub replacements
1539 handle string-changing backslashes \l \U \Q \E, etc.
1540 switch (what was escaped) {
1541 handle - in a transliteration (becomes a literal -)
1542 handle \132 octal characters
1543 handle 0x15 hex characters
1544 handle \cV (control V)
1545 handle printf backslashes (\f, \r, \n, etc)
1547 } (end if backslash)
1548 } (end while character to read)
1553 S_scan_const(pTHX_ char *start)
1556 register char *send = PL_bufend; /* end of the constant */
1557 SV *sv = newSV(send - start); /* sv for the constant */
1558 register char *s = start; /* start of the constant */
1559 register char *d = SvPVX(sv); /* destination for copies */
1560 bool dorange = FALSE; /* are we in a translit range? */
1561 bool didrange = FALSE; /* did we just finish a range? */
1562 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1563 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1566 UV literal_endpoint = 0;
1569 const char *leaveit = /* set of acceptably-backslashed characters */
1571 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1574 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1575 /* If we are doing a trans and we know we want UTF8 set expectation */
1576 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1577 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1581 while (s < send || dorange) {
1582 /* get transliterations out of the way (they're most literal) */
1583 if (PL_lex_inwhat == OP_TRANS) {
1584 /* expand a range A-Z to the full set of characters. AIE! */
1586 I32 i; /* current expanded character */
1587 I32 min; /* first character in range */
1588 I32 max; /* last character in range */
1591 char * const c = (char*)utf8_hop((U8*)d, -1);
1595 *c = (char)UTF_TO_NATIVE(0xff);
1596 /* mark the range as done, and continue */
1602 i = d - SvPVX_const(sv); /* remember current offset */
1603 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1604 d = SvPVX(sv) + i; /* refresh d after realloc */
1605 d -= 2; /* eat the first char and the - */
1607 min = (U8)*d; /* first char in range */
1608 max = (U8)d[1]; /* last char in range */
1612 "Invalid range \"%c-%c\" in transliteration operator",
1613 (char)min, (char)max);
1617 if (literal_endpoint == 2 &&
1618 ((isLOWER(min) && isLOWER(max)) ||
1619 (isUPPER(min) && isUPPER(max)))) {
1621 for (i = min; i <= max; i++)
1623 *d++ = NATIVE_TO_NEED(has_utf8,i);
1625 for (i = min; i <= max; i++)
1627 *d++ = NATIVE_TO_NEED(has_utf8,i);
1632 for (i = min; i <= max; i++)
1635 /* mark the range as done, and continue */
1639 literal_endpoint = 0;
1644 /* range begins (ignore - as first or last char) */
1645 else if (*s == '-' && s+1 < send && s != start) {
1647 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1650 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1660 literal_endpoint = 0;
1665 /* if we get here, we're not doing a transliteration */
1667 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1668 except for the last char, which will be done separately. */
1669 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1671 while (s+1 < send && *s != ')')
1672 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1674 else if (s[2] == '{' /* This should match regcomp.c */
1675 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1678 char *regparse = s + (s[2] == '{' ? 3 : 4);
1681 while (count && (c = *regparse)) {
1682 if (c == '\\' && regparse[1])
1690 if (*regparse != ')')
1691 regparse--; /* Leave one char for continuation. */
1692 while (s < regparse)
1693 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1697 /* likewise skip #-initiated comments in //x patterns */
1698 else if (*s == '#' && PL_lex_inpat &&
1699 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1700 while (s+1 < send && *s != '\n')
1701 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1704 /* check for embedded arrays
1705 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1707 else if (*s == '@' && s[1]
1708 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1711 /* check for embedded scalars. only stop if we're sure it's a
1714 else if (*s == '$') {
1715 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1717 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1718 break; /* in regexp, $ might be tail anchor */
1721 /* End of else if chain - OP_TRANS rejoin rest */
1724 if (*s == '\\' && s+1 < send) {
1727 /* some backslashes we leave behind */
1728 if (*leaveit && *s && strchr(leaveit, *s)) {
1729 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1730 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1734 /* deprecate \1 in strings and substitution replacements */
1735 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1736 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1738 if (ckWARN(WARN_SYNTAX))
1739 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1744 /* string-change backslash escapes */
1745 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1750 /* if we get here, it's either a quoted -, or a digit */
1753 /* quoted - in transliterations */
1755 if (PL_lex_inwhat == OP_TRANS) {
1765 Perl_warner(aTHX_ packWARN(WARN_MISC),
1766 "Unrecognized escape \\%c passed through",
1768 /* default action is to copy the quoted character */
1769 goto default_action;
1772 /* \132 indicates an octal constant */
1773 case '0': case '1': case '2': case '3':
1774 case '4': case '5': case '6': case '7':
1778 uv = grok_oct(s, &len, &flags, NULL);
1781 goto NUM_ESCAPE_INSERT;
1783 /* \x24 indicates a hex constant */
1787 char* const e = strchr(s, '}');
1788 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1789 PERL_SCAN_DISALLOW_PREFIX;
1794 yyerror("Missing right brace on \\x{}");
1798 uv = grok_hex(s, &len, &flags, NULL);
1804 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1805 uv = grok_hex(s, &len, &flags, NULL);
1811 /* Insert oct or hex escaped character.
1812 * There will always enough room in sv since such
1813 * escapes will be longer than any UTF-8 sequence
1814 * they can end up as. */
1816 /* We need to map to chars to ASCII before doing the tests
1819 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1820 if (!has_utf8 && uv > 255) {
1821 /* Might need to recode whatever we have
1822 * accumulated so far if it contains any
1825 * (Can't we keep track of that and avoid
1826 * this rescan? --jhi)
1830 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1831 if (!NATIVE_IS_INVARIANT(*c)) {
1836 const STRLEN offset = d - SvPVX_const(sv);
1838 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1842 while (src >= (const U8 *)SvPVX_const(sv)) {
1843 if (!NATIVE_IS_INVARIANT(*src)) {
1844 const U8 ch = NATIVE_TO_ASCII(*src);
1845 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1846 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1856 if (has_utf8 || uv > 255) {
1857 d = (char*)uvchr_to_utf8((U8*)d, uv);
1859 if (PL_lex_inwhat == OP_TRANS &&
1860 PL_sublex_info.sub_op) {
1861 PL_sublex_info.sub_op->op_private |=
1862 (PL_lex_repl ? OPpTRANS_FROM_UTF
1875 /* \N{LATIN SMALL LETTER A} is a named character */
1879 char* e = strchr(s, '}');
1885 yyerror("Missing right brace on \\N{}");
1889 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1891 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1892 PERL_SCAN_DISALLOW_PREFIX;
1895 uv = grok_hex(s, &len, &flags, NULL);
1897 goto NUM_ESCAPE_INSERT;
1899 res = newSVpvn(s + 1, e - s - 1);
1900 res = new_constant( NULL, 0, "charnames",
1901 res, NULL, "\\N{...}" );
1903 sv_utf8_upgrade(res);
1904 str = SvPV_const(res,len);
1905 #ifdef EBCDIC_NEVER_MIND
1906 /* charnames uses pack U and that has been
1907 * recently changed to do the below uni->native
1908 * mapping, so this would be redundant (and wrong,
1909 * the code point would be doubly converted).
1910 * But leave this in just in case the pack U change
1911 * gets revoked, but the semantics is still
1912 * desireable for charnames. --jhi */
1914 UV uv = utf8_to_uvchr((const U8*)str, 0);
1917 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1919 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1920 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1921 str = SvPV_const(res, len);
1925 if (!has_utf8 && SvUTF8(res)) {
1926 const char * const ostart = SvPVX_const(sv);
1927 SvCUR_set(sv, d - ostart);
1930 sv_utf8_upgrade(sv);
1931 /* this just broke our allocation above... */
1932 SvGROW(sv, (STRLEN)(send - start));
1933 d = SvPVX(sv) + SvCUR(sv);
1936 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1937 const char * const odest = SvPVX_const(sv);
1939 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1940 d = SvPVX(sv) + (d - odest);
1942 Copy(str, d, len, char);
1949 yyerror("Missing braces on \\N{}");
1952 /* \c is a control character */
1961 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1964 yyerror("Missing control char name in \\c");
1968 /* printf-style backslashes, formfeeds, newlines, etc */
1970 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1973 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1976 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1979 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1982 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1985 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1988 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1994 } /* end if (backslash) */
2001 /* If we started with encoded form, or already know we want it
2002 and then encode the next character */
2003 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2005 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2006 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2009 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2010 const STRLEN off = d - SvPVX_const(sv);
2011 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2013 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2017 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2019 } /* while loop to process each character */
2021 /* terminate the string and set up the sv */
2023 SvCUR_set(sv, d - SvPVX_const(sv));
2024 if (SvCUR(sv) >= SvLEN(sv))
2025 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2028 if (PL_encoding && !has_utf8) {
2029 sv_recode_to_utf8(sv, PL_encoding);
2035 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2036 PL_sublex_info.sub_op->op_private |=
2037 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2041 /* shrink the sv if we allocated more than we used */
2042 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2043 SvPV_shrink_to_cur(sv);
2046 /* return the substring (via yylval) only if we parsed anything */
2047 if (s > PL_bufptr) {
2048 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2049 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
2051 ( PL_lex_inwhat == OP_TRANS
2053 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2056 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2063 * Returns TRUE if there's more to the expression (e.g., a subscript),
2066 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2068 * ->[ and ->{ return TRUE
2069 * { and [ outside a pattern are always subscripts, so return TRUE
2070 * if we're outside a pattern and it's not { or [, then return FALSE
2071 * if we're in a pattern and the first char is a {
2072 * {4,5} (any digits around the comma) returns FALSE
2073 * if we're in a pattern and the first char is a [
2075 * [SOMETHING] has a funky algorithm to decide whether it's a
2076 * character class or not. It has to deal with things like
2077 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2078 * anything else returns TRUE
2081 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2084 S_intuit_more(pTHX_ register char *s)
2087 if (PL_lex_brackets)
2089 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2091 if (*s != '{' && *s != '[')
2096 /* In a pattern, so maybe we have {n,m}. */
2113 /* On the other hand, maybe we have a character class */
2116 if (*s == ']' || *s == '^')
2119 /* this is terrifying, and it works */
2120 int weight = 2; /* let's weigh the evidence */
2122 unsigned char un_char = 255, last_un_char;
2123 const char * const send = strchr(s,']');
2124 char tmpbuf[sizeof PL_tokenbuf * 4];
2126 if (!send) /* has to be an expression */
2129 Zero(seen,256,char);
2132 else if (isDIGIT(*s)) {
2134 if (isDIGIT(s[1]) && s[2] == ']')
2140 for (; s < send; s++) {
2141 last_un_char = un_char;
2142 un_char = (unsigned char)*s;
2147 weight -= seen[un_char] * 10;
2148 if (isALNUM_lazy_if(s+1,UTF)) {
2150 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2151 len = (int)strlen(tmpbuf);
2152 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2157 else if (*s == '$' && s[1] &&
2158 strchr("[#!%*<>()-=",s[1])) {
2159 if (/*{*/ strchr("])} =",s[2]))
2168 if (strchr("wds]",s[1]))
2170 else if (seen['\''] || seen['"'])
2172 else if (strchr("rnftbxcav",s[1]))
2174 else if (isDIGIT(s[1])) {
2176 while (s[1] && isDIGIT(s[1]))
2186 if (strchr("aA01! ",last_un_char))
2188 if (strchr("zZ79~",s[1]))
2190 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2191 weight -= 5; /* cope with negative subscript */
2194 if (!isALNUM(last_un_char)
2195 && !(last_un_char == '$' || last_un_char == '@'
2196 || last_un_char == '&')
2197 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2202 if (keyword(tmpbuf, d - tmpbuf))
2205 if (un_char == last_un_char + 1)
2207 weight -= seen[un_char];
2212 if (weight >= 0) /* probably a character class */
2222 * Does all the checking to disambiguate
2224 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2225 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2227 * First argument is the stuff after the first token, e.g. "bar".
2229 * Not a method if bar is a filehandle.
2230 * Not a method if foo is a subroutine prototyped to take a filehandle.
2231 * Not a method if it's really "Foo $bar"
2232 * Method if it's "foo $bar"
2233 * Not a method if it's really "print foo $bar"
2234 * Method if it's really "foo package::" (interpreted as package->foo)
2235 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2236 * Not a method if bar is a filehandle or package, but is quoted with
2241 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2244 char *s = start + (*start == '$');
2245 char tmpbuf[sizeof PL_tokenbuf];
2250 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2254 const char *proto = SvPVX_const(cv);
2265 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2266 /* start is the beginning of the possible filehandle/object,
2267 * and s is the end of it
2268 * tmpbuf is a copy of it
2271 if (*start == '$') {
2272 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2277 return *s == '(' ? FUNCMETH : METHOD;
2279 if (!keyword(tmpbuf, len)) {
2280 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2285 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2286 if (indirgv && GvCVu(indirgv))
2288 /* filehandle or package name makes it a method */
2289 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2291 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2292 return 0; /* no assumptions -- "=>" quotes bearword */
2294 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2295 newSVpvn(tmpbuf,len));
2296 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2300 return *s == '(' ? FUNCMETH : METHOD;
2308 * Return a string of Perl code to load the debugger. If PERL5DB
2309 * is set, it will return the contents of that, otherwise a
2310 * compile-time require of perl5db.pl.
2318 const char * const pdb = PerlEnv_getenv("PERL5DB");
2322 SETERRNO(0,SS_NORMAL);
2323 return "BEGIN { require 'perl5db.pl' }";
2329 /* Encoded script support. filter_add() effectively inserts a
2330 * 'pre-processing' function into the current source input stream.
2331 * Note that the filter function only applies to the current source file
2332 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2334 * The datasv parameter (which may be NULL) can be used to pass
2335 * private data to this instance of the filter. The filter function
2336 * can recover the SV using the FILTER_DATA macro and use it to
2337 * store private buffers and state information.
2339 * The supplied datasv parameter is upgraded to a PVIO type
2340 * and the IoDIRP/IoANY field is used to store the function pointer,
2341 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2342 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2343 * private use must be set using malloc'd pointers.
2347 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2353 if (!PL_rsfp_filters)
2354 PL_rsfp_filters = newAV();
2357 SvUPGRADE(datasv, SVt_PVIO);
2358 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2359 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2360 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2361 IoANY(datasv), SvPV_nolen(datasv)));
2362 av_unshift(PL_rsfp_filters, 1);
2363 av_store(PL_rsfp_filters, 0, datasv) ;
2368 /* Delete most recently added instance of this filter function. */
2370 Perl_filter_del(pTHX_ filter_t funcp)
2376 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
2378 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2380 /* if filter is on top of stack (usual case) just pop it off */
2381 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2382 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2383 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2384 IoANY(datasv) = (void *)NULL;
2385 sv_free(av_pop(PL_rsfp_filters));
2389 /* we need to search for the correct entry and clear it */
2390 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2394 /* Invoke the idxth filter function for the current rsfp. */
2395 /* maxlen 0 = read one text line */
2397 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2403 if (!PL_rsfp_filters)
2405 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2406 /* Provide a default input filter to make life easy. */
2407 /* Note that we append to the line. This is handy. */
2408 DEBUG_P(PerlIO_printf(Perl_debug_log,
2409 "filter_read %d: from rsfp\n", idx));
2413 const int old_len = SvCUR(buf_sv);
2415 /* ensure buf_sv is large enough */
2416 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2417 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2418 if (PerlIO_error(PL_rsfp))
2419 return -1; /* error */
2421 return 0 ; /* end of file */
2423 SvCUR_set(buf_sv, old_len + len) ;
2426 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2427 if (PerlIO_error(PL_rsfp))
2428 return -1; /* error */
2430 return 0 ; /* end of file */
2433 return SvCUR(buf_sv);
2435 /* Skip this filter slot if filter has been deleted */
2436 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2437 DEBUG_P(PerlIO_printf(Perl_debug_log,
2438 "filter_read %d: skipped (filter deleted)\n",
2440 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2442 /* Get function pointer hidden within datasv */
2443 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2444 DEBUG_P(PerlIO_printf(Perl_debug_log,
2445 "filter_read %d: via function %p (%s)\n",
2446 idx, datasv, SvPV_nolen_const(datasv)));
2447 /* Call function. The function is expected to */
2448 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2449 /* Return: <0:error, =0:eof, >0:not eof */
2450 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2454 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2457 #ifdef PERL_CR_FILTER
2458 if (!PL_rsfp_filters) {
2459 filter_add(S_cr_textfilter,NULL);
2462 if (PL_rsfp_filters) {
2464 SvCUR_set(sv, 0); /* start with empty line */
2465 if (FILTER_READ(0, sv, 0) > 0)
2466 return ( SvPVX(sv) ) ;
2471 return (sv_gets(sv, fp, append));
2475 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2480 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2484 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2485 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2487 return GvHV(gv); /* Foo:: */
2490 /* use constant CLASS => 'MyClass' */
2491 if ((gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV))) {
2493 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2494 pkgname = SvPV_nolen_const(sv);
2498 return gv_stashpv(pkgname, FALSE);
2502 S_tokenize_use(pTHX_ int is_use, char *s) {
2504 if (PL_expect != XSTATE)
2505 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2506 is_use ? "use" : "no"));
2508 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2509 s = force_version(s, TRUE);
2510 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
2511 NEXTVAL_NEXTTOKE.opval = NULL;
2514 else if (*s == 'v') {
2515 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2516 s = force_version(s, FALSE);
2520 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2521 s = force_version(s, FALSE);
2523 yylval.ival = is_use;
2527 static const char* const exp_name[] =
2528 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2529 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2536 Works out what to call the token just pulled out of the input
2537 stream. The yacc parser takes care of taking the ops we return and
2538 stitching them into a tree.
2544 if read an identifier
2545 if we're in a my declaration
2546 croak if they tried to say my($foo::bar)
2547 build the ops for a my() declaration
2548 if it's an access to a my() variable
2549 are we in a sort block?
2550 croak if my($a); $a <=> $b
2551 build ops for access to a my() variable
2552 if in a dq string, and they've said @foo and we can't find @foo
2554 build ops for a bareword
2555 if we already built the token before, use it.
2560 #pragma segment Perl_yylex
2566 register char *s = PL_bufptr;
2572 SV* tmp = newSVpvs("");
2573 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2574 (IV)CopLINE(PL_curcop),
2575 lex_state_names[PL_lex_state],
2576 exp_name[PL_expect],
2577 pv_display(tmp, s, strlen(s), 0, 60));
2580 /* check if there's an identifier for us to look at */
2581 if (PL_pending_ident)
2582 return REPORT(S_pending_ident(aTHX));
2584 /* no identifier pending identification */
2586 switch (PL_lex_state) {
2588 case LEX_NORMAL: /* Some compilers will produce faster */
2589 case LEX_INTERPNORMAL: /* code if we comment these out. */
2593 /* when we've already built the next token, just pull it out of the queue */
2596 yylval = NEXTVAL_NEXTTOKE;
2598 PL_lex_state = PL_lex_defer;
2599 PL_expect = PL_lex_expect;
2600 PL_lex_defer = LEX_NORMAL;
2602 return REPORT(PL_nexttype[PL_nexttoke]);
2604 /* interpolated case modifiers like \L \U, including \Q and \E.
2605 when we get here, PL_bufptr is at the \
2607 case LEX_INTERPCASEMOD:
2609 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2610 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2612 /* handle \E or end of string */
2613 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2615 if (PL_lex_casemods) {
2616 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2617 PL_lex_casestack[PL_lex_casemods] = '\0';
2619 if (PL_bufptr != PL_bufend
2620 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2622 PL_lex_state = LEX_INTERPCONCAT;
2626 if (PL_bufptr != PL_bufend)
2628 PL_lex_state = LEX_INTERPCONCAT;
2632 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2633 "### Saw case modifier\n"); });
2635 if (s[1] == '\\' && s[2] == 'E') {
2637 PL_lex_state = LEX_INTERPCONCAT;
2642 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2643 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2644 if ((*s == 'L' || *s == 'U') &&
2645 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2646 PL_lex_casestack[--PL_lex_casemods] = '\0';
2649 if (PL_lex_casemods > 10)
2650 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2651 PL_lex_casestack[PL_lex_casemods++] = *s;
2652 PL_lex_casestack[PL_lex_casemods] = '\0';
2653 PL_lex_state = LEX_INTERPCONCAT;
2654 NEXTVAL_NEXTTOKE.ival = 0;
2657 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
2659 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
2661 NEXTVAL_NEXTTOKE.ival = OP_LC;
2663 NEXTVAL_NEXTTOKE.ival = OP_UC;
2665 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
2667 Perl_croak(aTHX_ "panic: yylex");
2671 if (PL_lex_starts) {
2674 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2675 if (PL_lex_casemods == 1 && PL_lex_inpat)
2684 case LEX_INTERPPUSH:
2685 return REPORT(sublex_push());
2687 case LEX_INTERPSTART:
2688 if (PL_bufptr == PL_bufend)
2689 return REPORT(sublex_done());
2690 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2691 "### Interpolated variable\n"); });
2693 PL_lex_dojoin = (*PL_bufptr == '@');
2694 PL_lex_state = LEX_INTERPNORMAL;
2695 if (PL_lex_dojoin) {
2696 NEXTVAL_NEXTTOKE.ival = 0;
2698 force_ident("\"", '$');
2699 NEXTVAL_NEXTTOKE.ival = 0;
2701 NEXTVAL_NEXTTOKE.ival = 0;
2703 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
2706 if (PL_lex_starts++) {
2708 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2709 if (!PL_lex_casemods && PL_lex_inpat)
2716 case LEX_INTERPENDMAYBE:
2717 if (intuit_more(PL_bufptr)) {
2718 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2724 if (PL_lex_dojoin) {
2725 PL_lex_dojoin = FALSE;
2726 PL_lex_state = LEX_INTERPCONCAT;
2729 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2730 && SvEVALED(PL_lex_repl))
2732 if (PL_bufptr != PL_bufend)
2733 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2737 case LEX_INTERPCONCAT:
2739 if (PL_lex_brackets)
2740 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2742 if (PL_bufptr == PL_bufend)
2743 return REPORT(sublex_done());
2745 if (SvIVX(PL_linestr) == '\'') {
2746 SV *sv = newSVsv(PL_linestr);
2749 else if ( PL_hints & HINT_NEW_RE )
2750 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2751 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2755 s = scan_const(PL_bufptr);
2757 PL_lex_state = LEX_INTERPCASEMOD;
2759 PL_lex_state = LEX_INTERPSTART;
2762 if (s != PL_bufptr) {
2763 NEXTVAL_NEXTTOKE = yylval;
2766 if (PL_lex_starts++) {
2767 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2768 if (!PL_lex_casemods && PL_lex_inpat)
2781 PL_lex_state = LEX_NORMAL;
2782 s = scan_formline(PL_bufptr);
2783 if (!PL_lex_formbrack)
2789 PL_oldoldbufptr = PL_oldbufptr;
2795 if (isIDFIRST_lazy_if(s,UTF))
2797 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2800 goto fake_eof; /* emulate EOF on ^D or ^Z */
2805 if (PL_lex_brackets) {
2806 yyerror(PL_lex_formbrack
2807 ? "Format not terminated"
2808 : "Missing right curly or square bracket");
2810 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2811 "### Tokener got EOF\n");
2815 if (s++ < PL_bufend)
2816 goto retry; /* ignore stray nulls */
2819 if (!PL_in_eval && !PL_preambled) {
2820 PL_preambled = TRUE;
2821 sv_setpv(PL_linestr,incl_perldb());
2822 if (SvCUR(PL_linestr))
2823 sv_catpvs(PL_linestr,";");
2825 while(AvFILLp(PL_preambleav) >= 0) {
2826 SV *tmpsv = av_shift(PL_preambleav);
2827 sv_catsv(PL_linestr, tmpsv);
2828 sv_catpvs(PL_linestr, ";");
2831 sv_free((SV*)PL_preambleav);
2832 PL_preambleav = NULL;
2834 if (PL_minus_n || PL_minus_p) {
2835 sv_catpvs(PL_linestr, "LINE: while (<>) {");
2837 sv_catpvs(PL_linestr,"chomp;");
2840 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2841 || *PL_splitstr == '"')
2842 && strchr(PL_splitstr + 1, *PL_splitstr))
2843 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2845 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2846 bytes can be used as quoting characters. :-) */
2847 const char *splits = PL_splitstr;
2848 sv_catpvs(PL_linestr, "our @F=split(q\0");
2851 if (*splits == '\\')
2852 sv_catpvn(PL_linestr, splits, 1);
2853 sv_catpvn(PL_linestr, splits, 1);
2854 } while (*splits++);
2855 /* This loop will embed the trailing NUL of
2856 PL_linestr as the last thing it does before
2858 sv_catpvs(PL_linestr, ");");
2862 sv_catpvs(PL_linestr,"our @F=split(' ');");
2866 sv_catpvs(PL_linestr,"use feature ':5.10';");
2867 sv_catpvs(PL_linestr, "\n");
2868 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2869 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2870 PL_last_lop = PL_last_uni = NULL;
2871 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2872 SV * const sv = newSV(0);
2874 sv_upgrade(sv, SVt_PVMG);
2875 sv_setsv(sv,PL_linestr);
2878 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2883 bof = PL_rsfp ? TRUE : FALSE;
2884 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
2887 if (PL_preprocess && !PL_in_eval)
2888 (void)PerlProc_pclose(PL_rsfp);
2889 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2890 PerlIO_clearerr(PL_rsfp);
2892 (void)PerlIO_close(PL_rsfp);
2894 PL_doextract = FALSE;
2896 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2897 sv_setpv(PL_linestr,PL_minus_p
2898 ? ";}continue{print;}" : ";}");
2899 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2900 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2901 PL_last_lop = PL_last_uni = NULL;
2902 PL_minus_n = PL_minus_p = 0;
2905 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2906 PL_last_lop = PL_last_uni = NULL;
2907 sv_setpvn(PL_linestr,"",0);
2908 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2910 /* If it looks like the start of a BOM or raw UTF-16,
2911 * check if it in fact is. */
2917 #ifdef PERLIO_IS_STDIO
2918 # ifdef __GNU_LIBRARY__
2919 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2920 # define FTELL_FOR_PIPE_IS_BROKEN
2924 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2925 # define FTELL_FOR_PIPE_IS_BROKEN
2930 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2931 /* This loses the possibility to detect the bof
2932 * situation on perl -P when the libc5 is being used.
2933 * Workaround? Maybe attach some extra state to PL_rsfp?
2936 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2938 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2941 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2942 s = swallow_bom((U8*)s);
2946 /* Incest with pod. */
2947 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2948 sv_setpvn(PL_linestr, "", 0);
2949 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2950 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2951 PL_last_lop = PL_last_uni = NULL;
2952 PL_doextract = FALSE;
2956 } while (PL_doextract);
2957 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2958 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2959 SV * const sv = newSV(0);
2961 sv_upgrade(sv, SVt_PVMG);
2962 sv_setsv(sv,PL_linestr);
2965 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2967 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2968 PL_last_lop = PL_last_uni = NULL;
2969 if (CopLINE(PL_curcop) == 1) {
2970 while (s < PL_bufend && isSPACE(*s))
2972 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2976 if (*s == '#' && *(s+1) == '!')
2978 #ifdef ALTERNATE_SHEBANG
2980 static char const as[] = ALTERNATE_SHEBANG;
2981 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2982 d = s + (sizeof(as) - 1);
2984 #endif /* ALTERNATE_SHEBANG */
2993 while (*d && !isSPACE(*d))
2997 #ifdef ARG_ZERO_IS_SCRIPT
2998 if (ipathend > ipath) {
3000 * HP-UX (at least) sets argv[0] to the script name,
3001 * which makes $^X incorrect. And Digital UNIX and Linux,
3002 * at least, set argv[0] to the basename of the Perl
3003 * interpreter. So, having found "#!", we'll set it right.
3005 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3007 assert(SvPOK(x) || SvGMAGICAL(x));
3008 if (sv_eq(x, CopFILESV(PL_curcop))) {
3009 sv_setpvn(x, ipath, ipathend - ipath);
3015 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3016 const char * const lstart = SvPV_const(x,llen);
3018 bstart += blen - llen;
3019 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3020 sv_setpvn(x, ipath, ipathend - ipath);
3025 TAINT_NOT; /* $^X is always tainted, but that's OK */
3027 #endif /* ARG_ZERO_IS_SCRIPT */
3032 d = instr(s,"perl -");
3034 d = instr(s,"perl");
3036 /* avoid getting into infinite loops when shebang
3037 * line contains "Perl" rather than "perl" */
3039 for (d = ipathend-4; d >= ipath; --d) {
3040 if ((*d == 'p' || *d == 'P')
3041 && !ibcmp(d, "perl", 4))
3051 #ifdef ALTERNATE_SHEBANG
3053 * If the ALTERNATE_SHEBANG on this system starts with a
3054 * character that can be part of a Perl expression, then if
3055 * we see it but not "perl", we're probably looking at the
3056 * start of Perl code, not a request to hand off to some
3057 * other interpreter. Similarly, if "perl" is there, but
3058 * not in the first 'word' of the line, we assume the line
3059 * contains the start of the Perl program.
3061 if (d && *s != '#') {
3062 const char *c = ipath;
3063 while (*c && !strchr("; \t\r\n\f\v#", *c))
3066 d = NULL; /* "perl" not in first word; ignore */
3068 *s = '#'; /* Don't try to parse shebang line */
3070 #endif /* ALTERNATE_SHEBANG */
3071 #ifndef MACOS_TRADITIONAL
3076 !instr(s,"indir") &&
3077 instr(PL_origargv[0],"perl"))
3084 while (s < PL_bufend && isSPACE(*s))
3086 if (s < PL_bufend) {
3087 Newxz(newargv,PL_origargc+3,char*);
3089 while (s < PL_bufend && !isSPACE(*s))
3092 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3095 newargv = PL_origargv;
3098 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3100 Perl_croak(aTHX_ "Can't exec %s", ipath);
3104 while (*d && !isSPACE(*d)) d++;
3105 while (SPACE_OR_TAB(*d)) d++;
3108 const bool switches_done = PL_doswitches;
3109 const U32 oldpdb = PL_perldb;
3110 const bool oldn = PL_minus_n;
3111 const bool oldp = PL_minus_p;
3114 if (*d == 'M' || *d == 'm' || *d == 'C') {
3115 const char * const m = d;
3116 while (*d && !isSPACE(*d)) d++;
3117 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3120 d = moreswitches(d);
3122 if (PL_doswitches && !switches_done) {
3123 int argc = PL_origargc;
3124 char **argv = PL_origargv;
3127 } while (argc && argv[0][0] == '-' && argv[0][1]);
3128 init_argv_symbols(argc,argv);
3130 if ((PERLDB_LINE && !oldpdb) ||
3131 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3132 /* if we have already added "LINE: while (<>) {",
3133 we must not do it again */
3135 sv_setpvn(PL_linestr, "", 0);
3136 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3137 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3138 PL_last_lop = PL_last_uni = NULL;
3139 PL_preambled = FALSE;
3141 (void)gv_fetchfile(PL_origfilename);
3148 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3150 PL_lex_state = LEX_FORMLINE;
3155 #ifdef PERL_STRICT_CR
3156 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3158 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3160 case ' ': case '\t': case '\f': case 013:
3161 #ifdef MACOS_TRADITIONAL
3168 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3169 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3170 /* handle eval qq[#line 1 "foo"\n ...] */
3171 CopLINE_dec(PL_curcop);
3175 while (d < PL_bufend && *d != '\n')
3179 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3180 Perl_croak(aTHX_ "panic: input overflow");
3183 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3185 PL_lex_state = LEX_FORMLINE;
3195 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3203 while (s < PL_bufend && SPACE_OR_TAB(*s))
3206 if (strnEQ(s,"=>",2)) {
3207 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3208 DEBUG_T( { S_printbuf(aTHX_
3209 "### Saw unary minus before =>, forcing word %s\n", s);
3211 OPERATOR('-'); /* unary minus */
3213 PL_last_uni = PL_oldbufptr;
3215 case 'r': ftst = OP_FTEREAD; break;
3216 case 'w': ftst = OP_FTEWRITE; break;
3217 case 'x': ftst = OP_FTEEXEC; break;
3218 case 'o': ftst = OP_FTEOWNED; break;
3219 case 'R': ftst = OP_FTRREAD; break;
3220 case 'W': ftst = OP_FTRWRITE; break;
3221 case 'X': ftst = OP_FTREXEC; break;
3222 case 'O': ftst = OP_FTROWNED; break;
3223 case 'e': ftst = OP_FTIS; break;
3224 case 'z': ftst = OP_FTZERO; break;
3225 case 's': ftst = OP_FTSIZE; break;
3226 case 'f': ftst = OP_FTFILE; break;
3227 case 'd': ftst = OP_FTDIR; break;
3228 case 'l': ftst = OP_FTLINK; break;
3229 case 'p': ftst = OP_FTPIPE; break;
3230 case 'S': ftst = OP_FTSOCK; break;
3231 case 'u': ftst = OP_FTSUID; break;
3232 case 'g': ftst = OP_FTSGID; break;
3233 case 'k': ftst = OP_FTSVTX; break;
3234 case 'b': ftst = OP_FTBLK; break;
3235 case 'c': ftst = OP_FTCHR; break;
3236 case 't': ftst = OP_FTTTY; break;
3237 case 'T': ftst = OP_FTTEXT; break;
3238 case 'B': ftst = OP_FTBINARY; break;
3239 case 'M': case 'A': case 'C':
3240 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
3242 case 'M': ftst = OP_FTMTIME; break;
3243 case 'A': ftst = OP_FTATIME; break;
3244 case 'C': ftst = OP_FTCTIME; break;
3252 PL_last_lop_op = (OPCODE)ftst;
3253 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3254 "### Saw file test %c\n", (int)tmp);
3259 /* Assume it was a minus followed by a one-letter named
3260 * subroutine call (or a -bareword), then. */
3261 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3262 "### '-%c' looked like a file test but was not\n",
3269 const char tmp = *s++;
3272 if (PL_expect == XOPERATOR)
3277 else if (*s == '>') {
3280 if (isIDFIRST_lazy_if(s,UTF)) {
3281 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3289 if (PL_expect == XOPERATOR)
3292 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3294 OPERATOR('-'); /* unary minus */
3300 const char tmp = *s++;
3303 if (PL_expect == XOPERATOR)
3308 if (PL_expect == XOPERATOR)
3311 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3318 if (PL_expect != XOPERATOR) {
3319 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3320 PL_expect = XOPERATOR;
3321 force_ident(PL_tokenbuf, '*');
3334 if (PL_expect == XOPERATOR) {
3338 PL_tokenbuf[0] = '%';
3339 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3340 if (!PL_tokenbuf[1]) {
3343 PL_pending_ident = '%';
3354 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
3355 && FEATURE_IS_ENABLED("~~"))
3362 const char tmp = *s++;
3368 goto just_a_word_zero_gv;
3371 switch (PL_expect) {
3374 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3376 PL_bufptr = s; /* update in case we back off */
3382 PL_expect = XTERMBLOCK;
3386 while (isIDFIRST_lazy_if(s,UTF)) {
3388 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3389 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3390 if (tmp < 0) tmp = -tmp;
3406 d = scan_str(d,TRUE,TRUE);
3408 /* MUST advance bufptr here to avoid bogus
3409 "at end of line" context messages from yyerror().
3411 PL_bufptr = s + len;
3412 yyerror("Unterminated attribute parameter in attribute list");
3415 return REPORT(0); /* EOF indicator */
3419 SV *sv = newSVpvn(s, len);
3420 sv_catsv(sv, PL_lex_stuff);
3421 attrs = append_elem(OP_LIST, attrs,
3422 newSVOP(OP_CONST, 0, sv));
3423 SvREFCNT_dec(PL_lex_stuff);
3424 PL_lex_stuff = NULL;
3427 if (len == 6 && strnEQ(s, "unique", len)) {
3428 if (PL_in_my == KEY_our)
3430 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3432 /*EMPTY*/; /* skip to avoid loading attributes.pm */
3435 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3438 /* NOTE: any CV attrs applied here need to be part of
3439 the CVf_BUILTIN_ATTRS define in cv.h! */
3440 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3441 CvLVALUE_on(PL_compcv);
3442 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3443 CvLOCKED_on(PL_compcv);
3444 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3445 CvMETHOD_on(PL_compcv);
3446 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3447 CvASSERTION_on(PL_compcv);
3448 /* After we've set the flags, it could be argued that
3449 we don't need to do the attributes.pm-based setting
3450 process, and shouldn't bother appending recognized
3451 flags. To experiment with that, uncomment the
3452 following "else". (Note that's already been
3453 uncommented. That keeps the above-applied built-in
3454 attributes from being intercepted (and possibly
3455 rejected) by a package's attribute routines, but is
3456 justified by the performance win for the common case
3457 of applying only built-in attributes.) */
3459 attrs = append_elem(OP_LIST, attrs,
3460 newSVOP(OP_CONST, 0,
3464 if (*s == ':' && s[1] != ':')
3467 break; /* require real whitespace or :'s */
3468 /* XXX losing whitespace on sequential attributes here */
3472 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3473 if (*s != ';' && *s != '}' && *s != tmp
3474 && (tmp != '=' || *s != ')')) {
3475 const char q = ((*s == '\'') ? '"' : '\'');
3476 /* If here for an expression, and parsed no attrs, back
3478 if (tmp == '=' && !attrs) {
3482 /* MUST advance bufptr here to avoid bogus "at end of line"
3483 context messages from yyerror().
3487 ? Perl_form(aTHX_ "Invalid separator character "
3488 "%c%c%c in attribute list", q, *s, q)
3489 : "Unterminated attribute list" );
3497 NEXTVAL_NEXTTOKE.opval = attrs;
3505 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3506 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3514 const char tmp = *s++;
3519 const char tmp = *s++;
3527 if (PL_lex_brackets <= 0)
3528 yyerror("Unmatched right square bracket");
3531 if (PL_lex_state == LEX_INTERPNORMAL) {
3532 if (PL_lex_brackets == 0) {
3533 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3534 PL_lex_state = LEX_INTERPEND;
3541 if (PL_lex_brackets > 100) {
3542 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3544 switch (PL_expect) {
3546 if (PL_lex_formbrack) {
3550 if (PL_oldoldbufptr == PL_last_lop)
3551 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3553 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3554 OPERATOR(HASHBRACK);
3556 while (s < PL_bufend && SPACE_OR_TAB(*s))
3559 PL_tokenbuf[0] = '\0';
3560 if (d < PL_bufend && *d == '-') {
3561 PL_tokenbuf[0] = '-';
3563 while (d < PL_bufend && SPACE_OR_TAB(*d))
3566 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3567 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3569 while (d < PL_bufend && SPACE_OR_TAB(*d))
3572 const char minus = (PL_tokenbuf[0] == '-');
3573 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3581 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3586 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3591 if (PL_oldoldbufptr == PL_last_lop)
3592 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3594 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3597 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3599 /* This hack is to get the ${} in the message. */
3601 yyerror("syntax error");
3604 OPERATOR(HASHBRACK);
3606 /* This hack serves to disambiguate a pair of curlies
3607 * as being a block or an anon hash. Normally, expectation
3608 * determines that, but in cases where we're not in a
3609 * position to expect anything in particular (like inside
3610 * eval"") we have to resolve the ambiguity. This code
3611 * covers the case where the first term in the curlies is a
3612 * quoted string. Most other cases need to be explicitly
3613 * disambiguated by prepending a "+" before the opening
3614 * curly in order to force resolution as an anon hash.
3616 * XXX should probably propagate the outer expectation
3617 * into eval"" to rely less on this hack, but that could
3618 * potentially break current behavior of eval"".
3622 if (*s == '\'' || *s == '"' || *s == '`') {
3623 /* common case: get past first string, handling escapes */
3624 for (t++; t < PL_bufend && *t != *s;)
3625 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3629 else if (*s == 'q') {
3632 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3635 /* skip q//-like construct */
3637 char open, close, term;
3640 while (t < PL_bufend && isSPACE(*t))
3642 /* check for q => */
3643 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3644 OPERATOR(HASHBRACK);
3648 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3652 for (t++; t < PL_bufend; t++) {
3653 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3655 else if (*t == open)
3659 for (t++; t < PL_bufend; t++) {
3660 if (*t == '\\' && t+1 < PL_bufend)
3662 else if (*t == close && --brackets <= 0)
3664 else if (*t == open)
3671 /* skip plain q word */
3672 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3675 else if (isALNUM_lazy_if(t,UTF)) {
3677 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3680 while (t < PL_bufend && isSPACE(*t))
3682 /* if comma follows first term, call it an anon hash */
3683 /* XXX it could be a comma expression with loop modifiers */
3684 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3685 || (*t == '=' && t[1] == '>')))
3686 OPERATOR(HASHBRACK);
3687 if (PL_expect == XREF)
3690 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3696 yylval.ival = CopLINE(PL_curcop);
3697 if (isSPACE(*s) || *s == '#')
3698 PL_copline = NOLINE; /* invalidate current command line number */
3703 if (PL_lex_brackets <= 0)
3704 yyerror("Unmatched right curly bracket");
3706 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3707 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3708 PL_lex_formbrack = 0;
3709 if (PL_lex_state == LEX_INTERPNORMAL) {
3710 if (PL_lex_brackets == 0) {
3711 if (PL_expect & XFAKEBRACK) {
3712 PL_expect &= XENUMMASK;
3713 PL_lex_state = LEX_INTERPEND;
3715 return yylex(); /* ignore fake brackets */
3717 if (*s == '-' && s[1] == '>')
3718 PL_lex_state = LEX_INTERPENDMAYBE;
3719 else if (*s != '[' && *s != '{')
3720 PL_lex_state = LEX_INTERPEND;
3723 if (PL_expect & XFAKEBRACK) {
3724 PL_expect &= XENUMMASK;
3726 return yylex(); /* ignore fake brackets */
3735 if (PL_expect == XOPERATOR) {
3736 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
3737 && isIDFIRST_lazy_if(s,UTF))
3739 CopLINE_dec(PL_curcop);
3740 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3741 CopLINE_inc(PL_curcop);
3746 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3748 PL_expect = XOPERATOR;
3749 force_ident(PL_tokenbuf, '&');
3753 yylval.ival = (OPpENTERSUB_AMPER<<8);
3765 const char tmp = *s++;
3772 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
3773 && strchr("+-*/%.^&|<",tmp))
3774 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3775 "Reversed %c= operator",(int)tmp);
3777 if (PL_expect == XSTATE && isALPHA(tmp) &&
3778 (s == PL_linestart+1 || s[-2] == '\n') )
3780 if (PL_in_eval && !PL_rsfp) {
3785 if (strnEQ(s,"=cut",4)) {
3799 PL_doextract = TRUE;
3803 if (PL_lex_brackets < PL_lex_formbrack) {
3805 #ifdef PERL_STRICT_CR
3806 for (t = s; SPACE_OR_TAB(*t); t++) ;
3808 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3810 if (*t == '\n' || *t == '#') {
3821 const char tmp = *s++;
3823 /* was this !=~ where !~ was meant?
3824 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3826 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3827 const char *t = s+1;
3829 while (t < PL_bufend && isSPACE(*t))
3832 if (*t == '/' || *t == '?' ||
3833 ((*t == 'm' || *t == 's' || *t == 'y')
3834 && !isALNUM(t[1])) ||
3835 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3836 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3837 "!=~ should be !~");
3847 if (PL_expect != XOPERATOR) {
3848 if (s[1] != '<' && !strchr(s,'>'))
3851 s = scan_heredoc(s);
3853 s = scan_inputsymbol(s);
3854 TERM(sublex_start());
3860 SHop(OP_LEFT_SHIFT);
3874 const char tmp = *s++;
3876 SHop(OP_RIGHT_SHIFT);
3886 if (PL_expect == XOPERATOR) {
3887 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3889 deprecate_old(commaless_variable_list);
3890 return REPORT(','); /* grandfather non-comma-format format */
3894 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3895 PL_tokenbuf[0] = '@';
3896 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3897 sizeof PL_tokenbuf - 1, FALSE);
3898 if (PL_expect == XOPERATOR)
3899 no_op("Array length", s);
3900 if (!PL_tokenbuf[1])
3902 PL_expect = XOPERATOR;
3903 PL_pending_ident = '#';
3907 PL_tokenbuf[0] = '$';
3908 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3909 sizeof PL_tokenbuf - 1, FALSE);
3910 if (PL_expect == XOPERATOR)
3912 if (!PL_tokenbuf[1]) {
3914 yyerror("Final $ should be \\$ or $name");
3918 /* This kludge not intended to be bulletproof. */
3919 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3920 yylval.opval = newSVOP(OP_CONST, 0,
3921 newSViv(PL_compiling.cop_arybase));
3922 yylval.opval->op_private = OPpCONST_ARYBASE;
3928 const char tmp = *s;
3929 if (PL_lex_state == LEX_NORMAL)
3932 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
3933 && intuit_more(s)) {
3935 PL_tokenbuf[0] = '@';
3936 if (ckWARN(WARN_SYNTAX)) {
3939 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3942 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
3943 while (t < PL_bufend && *t != ']')
3945 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3946 "Multidimensional syntax %.*s not supported",
3947 (int)((t - PL_bufptr) + 1), PL_bufptr);
3951 else if (*s == '{') {
3953 PL_tokenbuf[0] = '%';
3954 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
3955 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
3957 char tmpbuf[sizeof PL_tokenbuf];
3958 for (t++; isSPACE(*t); t++) ;
3959 if (isIDFIRST_lazy_if(t,UTF)) {
3961 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
3963 for (; isSPACE(*t); t++) ;
3964 if (*t == ';' && get_cv(tmpbuf, FALSE))
3965 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3966 "You need to quote \"%s\"",
3973 PL_expect = XOPERATOR;
3974 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3975 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3976 if (!islop || PL_last_lop_op == OP_GREPSTART)
3977 PL_expect = XOPERATOR;
3978 else if (strchr("$@\"'`q", *s))
3979 PL_expect = XTERM; /* e.g. print $fh "foo" */
3980 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3981 PL_expect = XTERM; /* e.g. print $fh &sub */
3982 else if (isIDFIRST_lazy_if(s,UTF)) {
3983 char tmpbuf[sizeof PL_tokenbuf];
3985 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3986 if ((t2 = keyword(tmpbuf, len))) {
3987 /* binary operators exclude handle interpretations */
3999 PL_expect = XTERM; /* e.g. print $fh length() */
4004 PL_expect = XTERM; /* e.g. print $fh subr() */
4007 else if (isDIGIT(*s))
4008 PL_expect = XTERM; /* e.g. print $fh 3 */
4009 else if (*s == '.' && isDIGIT(s[1]))
4010 PL_expect = XTERM; /* e.g. print $fh .3 */
4011 else if ((*s == '?' || *s == '-' || *s == '+')
4012 && !isSPACE(s[1]) && s[1] != '=')
4013 PL_expect = XTERM; /* e.g. print $fh -1 */
4014 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4016 PL_expect = XTERM; /* e.g. print $fh /.../
4017 XXX except DORDOR operator
4019 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4021 PL_expect = XTERM; /* print $fh <<"EOF" */
4024 PL_pending_ident = '$';
4028 if (PL_expect == XOPERATOR)
4030 PL_tokenbuf[0] = '@';
4031 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4032 if (!PL_tokenbuf[1]) {
4035 if (PL_lex_state == LEX_NORMAL)
4037 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4039 PL_tokenbuf[0] = '%';
4041 /* Warn about @ where they meant $. */
4042 if (*s == '[' || *s == '{') {
4043 if (ckWARN(WARN_SYNTAX)) {
4044 const char *t = s + 1;
4045 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4047 if (*t == '}' || *t == ']') {
4049 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4050 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4051 "Scalar value %.*s better written as $%.*s",
4052 (int)(t-PL_bufptr), PL_bufptr,
4053 (int)(t-PL_bufptr-1), PL_bufptr+1);
4058 PL_pending_ident = '@';
4061 case '/': /* may be division, defined-or, or pattern */
4062 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4066 case '?': /* may either be conditional or pattern */
4067 if(PL_expect == XOPERATOR) {
4075 /* A // operator. */
4085 /* Disable warning on "study /blah/" */
4086 if (PL_oldoldbufptr == PL_last_uni
4087 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4088 || memNE(PL_last_uni, "study", 5)
4089 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4092 s = scan_pat(s,OP_MATCH);
4093 TERM(sublex_start());
4097 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4098 #ifdef PERL_STRICT_CR
4101 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
4103 && (s == PL_linestart || s[-1] == '\n') )
4105 PL_lex_formbrack = 0;
4109 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
4115 yylval.ival = OPf_SPECIAL;
4121 if (PL_expect != XOPERATOR)
4126 case '0': case '1': case '2': case '3': case '4':
4127 case '5': case '6': case '7': case '8': case '9':
4128 s = scan_num(s, &yylval);
4129 DEBUG_T( { S_printbuf(aTHX_ "### Saw number in %s\n", s); } );
4130 if (PL_expect == XOPERATOR)
4135 s = scan_str(s,FALSE,FALSE);
4136 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4137 if (PL_expect == XOPERATOR) {
4138 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4140 deprecate_old(commaless_variable_list);
4141 return REPORT(','); /* grandfather non-comma-format format */
4147 missingterm((char*)0);
4148 yylval.ival = OP_CONST;
4149 TERM(sublex_start());
4152 s = scan_str(s,FALSE,FALSE);
4153 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4154 if (PL_expect == XOPERATOR) {
4155 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4157 deprecate_old(commaless_variable_list);
4158 return REPORT(','); /* grandfather non-comma-format format */
4164 missingterm((char*)0);
4165 yylval.ival = OP_CONST;
4166 /* FIXME. I think that this can be const if char *d is replaced by
4167 more localised variables. */
4168 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4169 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4170 yylval.ival = OP_STRINGIFY;
4174 TERM(sublex_start());
4177 s = scan_str(s,FALSE,FALSE);
4178 DEBUG_T( { S_printbuf(aTHX_ "### Saw backtick string before %s\n", s); } );
4179 if (PL_expect == XOPERATOR)
4180 no_op("Backticks",s);
4182 missingterm((char*)0);
4183 yylval.ival = OP_BACKTICK;
4185 TERM(sublex_start());
4189 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
4190 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
4192 if (PL_expect == XOPERATOR)
4193 no_op("Backslash",s);
4197 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
4198 char *start = s + 2;
4199 while (isDIGIT(*start) || *start == '_')
4201 if (*start == '.' && isDIGIT(start[1])) {
4202 s = scan_num(s, &yylval);
4205 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
4206 else if (!isALPHA(*start) && (PL_expect == XTERM
4207 || PL_expect == XREF || PL_expect == XSTATE
4208 || PL_expect == XTERMORDORDOR)) {
4209 const char c = *start;
4212 gv = gv_fetchpv(s, 0, SVt_PVCV);
4215 s = scan_num(s, &yylval);
4222 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
4258 I32 orig_keyword = 0;
4263 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4265 /* Some keywords can be followed by any delimiter, including ':' */
4266 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
4267 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4268 (PL_tokenbuf[0] == 'q' &&
4269 strchr("qwxr", PL_tokenbuf[1])))));
4271 /* x::* is just a word, unless x is "CORE" */
4272 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4276 while (d < PL_bufend && isSPACE(*d))
4277 d++; /* no comments skipped here, or s### is misparsed */
4279 /* Is this a label? */
4280 if (!tmp && PL_expect == XSTATE
4281 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4283 yylval.pval = savepv(PL_tokenbuf);
4288 /* Check for keywords */
4289 tmp = keyword(PL_tokenbuf, len);
4291 /* Is this a word before a => operator? */
4292 if (*d == '=' && d[1] == '>') {
4295 = (OP*)newSVOP(OP_CONST, 0,
4296 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4297 yylval.opval->op_private = OPpCONST_BARE;
4301 if (tmp < 0) { /* second-class keyword? */
4302 GV *ogv = NULL; /* override (winner) */
4303 GV *hgv = NULL; /* hidden (loser) */
4304 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4306 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
4309 if (GvIMPORTED_CV(gv))
4311 else if (! CvMETHOD(cv))
4315 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4316 (gv = *gvp) != (GV*)&PL_sv_undef &&
4317 GvCVu(gv) && GvIMPORTED_CV(gv))
4324 tmp = 0; /* overridden by import or by GLOBAL */
4327 && -tmp==KEY_lock /* XXX generalizable kludge */
4329 && !hv_fetchs(GvHVn(PL_incgv), "Thread.pm", FALSE))
4331 tmp = 0; /* any sub overrides "weak" keyword */
4333 else { /* no override */
4335 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4336 Perl_warner(aTHX_ packWARN(WARN_MISC),
4337 "dump() better written as CORE::dump()");
4341 if (hgv && tmp != KEY_x && tmp != KEY_CORE
4342 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
4343 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4344 "Ambiguous call resolved as CORE::%s(), %s",
4345 GvENAME(hgv), "qualify as such or use &");
4352 default: /* not a keyword */
4353 /* Trade off - by using this evil construction we can pull the
4354 variable gv into the block labelled keylookup. If not, then
4355 we have to give it function scope so that the goto from the
4356 earlier ':' case doesn't bypass the initialisation. */
4358 just_a_word_zero_gv:
4366 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4369 /* Get the rest if it looks like a package qualifier */
4371 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4373 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4376 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4377 *s == '\'' ? "'" : "::");
4382 if (PL_expect == XOPERATOR) {
4383 if (PL_bufptr == PL_linestart) {
4384 CopLINE_dec(PL_curcop);
4385 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4386 CopLINE_inc(PL_curcop);
4389 no_op("Bareword",s);
4392 /* Look for a subroutine with this name in current package,
4393 unless name is "Foo::", in which case Foo is a bearword
4394 (and a package name). */
4397 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4399 if (ckWARN(WARN_BAREWORD)
4400 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
4401 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4402 "Bareword \"%s\" refers to nonexistent package",
4405 PL_tokenbuf[len] = '\0';
4411 /* Mustn't actually add anything to a symbol table.
4412 But also don't want to "initialise" any placeholder
4413 constants that might already be there into full
4414 blown PVGVs with attached PVCV. */
4415 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
4416 GV_NOADD_NOINIT, SVt_PVCV);
4421 /* if we saw a global override before, get the right name */
4424 sv = newSVpvs("CORE::GLOBAL::");
4425 sv_catpv(sv,PL_tokenbuf);
4428 /* If len is 0, newSVpv does strlen(), which is correct.
4429 If len is non-zero, then it will be the true length,
4430 and so the scalar will be created correctly. */
4431 sv = newSVpv(PL_tokenbuf,len);
4434 /* Presume this is going to be a bareword of some sort. */
4437 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4438 yylval.opval->op_private = OPpCONST_BARE;
4439 /* UTF-8 package name? */
4440 if (UTF && !IN_BYTES &&
4441 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
4444 /* And if "Foo::", then that's what it certainly is. */
4449 /* Do the explicit type check so that we don't need to force
4450 the initialisation of the symbol table to have a real GV.
4451 Beware - gv may not really be a PVGV, cv may not really be
4452 a PVCV, (because of the space optimisations that gv_init
4453 understands) But they're true if for this symbol there is
4454 respectively a typeglob and a subroutine.
4456 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
4457 /* Real typeglob, so get the real subroutine: */
4459 /* A proxy for a subroutine in this package? */
4460 : SvOK(gv) ? (CV *) gv : NULL)
4463 /* See if it's the indirect object for a list operator. */
4465 if (PL_oldoldbufptr &&
4466 PL_oldoldbufptr < PL_bufptr &&
4467 (PL_oldoldbufptr == PL_last_lop
4468 || PL_oldoldbufptr == PL_last_uni) &&
4469 /* NO SKIPSPACE BEFORE HERE! */
4470 (PL_expect == XREF ||
4471 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4473 bool immediate_paren = *s == '(';
4475 /* (Now we can afford to cross potential line boundary.) */
4476 s = SKIPSPACE2(s,nextnextwhite);
4478 /* Two barewords in a row may indicate method call. */
4480 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
4481 (tmp = intuit_method(s, gv, cv)))
4484 /* If not a declared subroutine, it's an indirect object. */
4485 /* (But it's an indir obj regardless for sort.) */
4486 /* Also, if "_" follows a filetest operator, it's a bareword */
4489 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4491 (PL_last_lop_op != OP_MAPSTART &&
4492 PL_last_lop_op != OP_GREPSTART))))
4493 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
4494 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
4497 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4502 PL_expect = XOPERATOR;
4505 /* Is this a word before a => operator? */
4506 if (*s == '=' && s[1] == '>' && !pkgname) {
4508 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4509 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4510 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4514 /* If followed by a paren, it's certainly a subroutine. */
4518 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4519 if (*d == ')' && (sv = gv_const_sv(gv))) {
4524 NEXTVAL_NEXTTOKE.opval = yylval.opval;
4525 PL_expect = XOPERATOR;
4531 /* If followed by var or block, call it a method (unless sub) */
4533 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
4534 PL_last_lop = PL_oldbufptr;
4535 PL_last_lop_op = OP_METHOD;
4539 /* If followed by a bareword, see if it looks like indir obj. */
4542 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4543 && (tmp = intuit_method(s, gv, cv)))
4546 /* Not a method, so call it a subroutine (if defined) */
4549 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4550 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4551 "Ambiguous use of -%s resolved as -&%s()",
4552 PL_tokenbuf, PL_tokenbuf);
4553 /* Check for a constant sub */
4554 if ((sv = gv_const_sv(gv))) {
4556 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4557 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
4558 yylval.opval->op_private = 0;
4562 /* Resolve to GV now. */
4563 if (SvTYPE(gv) != SVt_PVGV) {
4564 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
4565 assert (SvTYPE(gv) == SVt_PVGV);
4566 /* cv must have been some sort of placeholder, so
4567 now needs replacing with a real code reference. */
4571 op_free(yylval.opval);
4572 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4573 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4574 PL_last_lop = PL_oldbufptr;
4575 PL_last_lop_op = OP_ENTERSUB;
4576 /* Is there a prototype? */
4579 const char *proto = SvPV_const((SV*)cv, protolen);
4582 if (*proto == '$' && proto[1] == '\0')
4584 while (*proto == ';')
4586 if (*proto == '&' && *s == '{') {
4587 sv_setpv(PL_subname, PL_curstash ?
4588 "__ANON__" : "__ANON__::__ANON__");
4592 NEXTVAL_NEXTTOKE.opval = yylval.opval;
4598 /* Call it a bare word */
4600 if (PL_hints & HINT_STRICT_SUBS)
4601 yylval.opval->op_private |= OPpCONST_STRICT;
4604 if (lastchar != '-') {
4605 if (ckWARN(WARN_RESERVED)) {
4606 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4607 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4608 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4615 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4616 && ckWARN_d(WARN_AMBIGUOUS)) {
4617 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4618 "Operator or semicolon missing before %c%s",
4619 lastchar, PL_tokenbuf);
4620 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4621 "Ambiguous use of %c resolved as operator %c",
4622 lastchar, lastchar);
4628 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4629 newSVpv(CopFILE(PL_curcop),0));
4633 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4634 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4637 case KEY___PACKAGE__:
4638 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4640 ? newSVhek(HvNAME_HEK(PL_curstash))
4647 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4648 const char *pname = "main";
4649 if (PL_tokenbuf[2] == 'D')
4650 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
4651 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
4655 GvIOp(gv) = newIO();
4656 IoIFP(GvIOp(gv)) = PL_rsfp;
4657 #if defined(HAS_FCNTL) && defined(F_SETFD)
4659 const int fd = PerlIO_fileno(PL_rsfp);
4660 fcntl(fd,F_SETFD,fd >= 3);
4663 /* Mark this internal pseudo-handle as clean */
4664 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4666 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4667 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4668 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4670 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4671 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4672 /* if the script was opened in binmode, we need to revert
4673 * it to text mode for compatibility; but only iff it has CRs
4674 * XXX this is a questionable hack at best. */
4675 if (PL_bufend-PL_bufptr > 2
4676 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4679 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4680 loc = PerlIO_tell(PL_rsfp);
4681 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4684 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4686 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4687 #endif /* NETWARE */
4688 #ifdef PERLIO_IS_STDIO /* really? */
4689 # if defined(__BORLANDC__)
4690 /* XXX see note in do_binmode() */
4691 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4695 PerlIO_seek(PL_rsfp, loc, 0);
4699 #ifdef PERLIO_LAYERS
4702 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4703 else if (PL_encoding) {
4710 XPUSHs(PL_encoding);
4712 call_method("name", G_SCALAR);
4716 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4717 Perl_form(aTHX_ ":encoding(%"SVf")",
4735 if (PL_expect == XSTATE) {
4742 if (*s == ':' && s[1] == ':') {
4745 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4746 if (!(tmp = keyword(PL_tokenbuf, len)))
4747 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4750 else if (tmp == KEY_require || tmp == KEY_do)
4751 /* that's a way to remember we saw "CORE::" */
4764 LOP(OP_ACCEPT,XTERM);
4770 LOP(OP_ATAN2,XTERM);
4776 LOP(OP_BINMODE,XTERM);
4779 LOP(OP_BLESS,XTERM);
4788 /* When 'use switch' is in effect, continue has a dual
4789 life as a control operator. */
4791 if (!FEATURE_IS_ENABLED("switch"))
4794 /* We have to disambiguate the two senses of
4795 "continue". If the next token is a '{' then
4796 treat it as the start of a continue block;
4797 otherwise treat it as a control operator.
4809 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
4826 if (!PL_cryptseen) {
4827 PL_cryptseen = TRUE;
4831 LOP(OP_CRYPT,XTERM);
4834 LOP(OP_CHMOD,XTERM);
4837 LOP(OP_CHOWN,XTERM);
4840 LOP(OP_CONNECT,XTERM);
4859 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4860 if (orig_keyword == KEY_do) {
4869 PL_hints |= HINT_BLOCK_SCOPE;
4879 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4880 LOP(OP_DBMOPEN,XTERM);
4886 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4893 yylval.ival = CopLINE(PL_curcop);
4907 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4908 UNIBRACK(OP_ENTEREVAL);
4926 case KEY_endhostent:
4932 case KEY_endservent:
4935 case KEY_endprotoent:
4946 yylval.ival = CopLINE(PL_curcop);
4948 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4950 if ((PL_bufend - p) >= 3 &&
4951 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4953 else if ((PL_bufend - p) >= 4 &&
4954 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4957 if (isIDFIRST_lazy_if(p,UTF)) {
4958 p = scan_ident(p, PL_bufend,
4959 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4963 Perl_croak(aTHX_ "Missing $ on loop variable");
4968 LOP(OP_FORMLINE,XTERM);
4974 LOP(OP_FCNTL,XTERM);
4980 LOP(OP_FLOCK,XTERM);
4989 LOP(OP_GREPSTART, XREF);
4992 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5007 case KEY_getpriority:
5008 LOP(OP_GETPRIORITY,XTERM);
5010 case KEY_getprotobyname:
5013 case KEY_getprotobynumber:
5014 LOP(OP_GPBYNUMBER,XTERM);
5016 case KEY_getprotoent:
5028 case KEY_getpeername:
5029 UNI(OP_GETPEERNAME);
5031 case KEY_gethostbyname:
5034 case KEY_gethostbyaddr:
5035 LOP(OP_GHBYADDR,XTERM);
5037 case KEY_gethostent:
5040 case KEY_getnetbyname:
5043 case KEY_getnetbyaddr:
5044 LOP(OP_GNBYADDR,XTERM);
5049 case KEY_getservbyname:
5050 LOP(OP_GSBYNAME,XTERM);
5052 case KEY_getservbyport:
5053 LOP(OP_GSBYPORT,XTERM);
5055 case KEY_getservent:
5058 case KEY_getsockname:
5059 UNI(OP_GETSOCKNAME);
5061 case KEY_getsockopt:
5062 LOP(OP_GSOCKOPT,XTERM);
5077 yylval.ival = CopLINE(PL_curcop);
5088 yylval.ival = CopLINE(PL_curcop);
5092 LOP(OP_INDEX,XTERM);
5098 LOP(OP_IOCTL,XTERM);
5110 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5142 LOP(OP_LISTEN,XTERM);
5151 s = scan_pat(s,OP_MATCH);
5152 TERM(sublex_start());
5155 LOP(OP_MAPSTART, XREF);
5158 LOP(OP_MKDIR,XTERM);
5161 LOP(OP_MSGCTL,XTERM);
5164 LOP(OP_MSGGET,XTERM);
5167 LOP(OP_MSGRCV,XTERM);
5170 LOP(OP_MSGSND,XTERM);
5176 if (isIDFIRST_lazy_if(s,UTF)) {
5177 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
5178 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
5180 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
5181 if (!PL_in_my_stash) {
5184 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
5192 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5199 s = tokenize_use(0, s);
5203 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
5210 if (isIDFIRST_lazy_if(s,UTF)) {
5212 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
5213 for (t=d; *t && isSPACE(*t); t++) ;
5214 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
5216 && !(t[0] == '=' && t[1] == '>')
5218 int parms_len = (int)(d-s);
5219 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
5220 "Precedence problem: open %.*s should be open(%.*s)",
5221 parms_len, s, parms_len, s);
5227 yylval.ival = OP_OR;
5237 LOP(OP_OPEN_DIR,XTERM);
5240 checkcomma(s,PL_tokenbuf,"filehandle");
5244 checkcomma(s,PL_tokenbuf,"filehandle");
5263 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5267 LOP(OP_PIPE_OP,XTERM);
5270 s = scan_str(s,FALSE,FALSE);
5272 missingterm((char*)0);
5273 yylval.ival = OP_CONST;
5274 TERM(sublex_start());
5280 s = scan_str(s,FALSE,FALSE);
5282 missingterm((char*)0);
5283 PL_expect = XOPERATOR;
5285 if (SvCUR(PL_lex_stuff)) {
5288 d = SvPV_force(PL_lex_stuff, len);
5291 for (; isSPACE(*d) && len; --len, ++d) ;
5294 if (!warned && ckWARN(WARN_QW)) {
5295 for (; !isSPACE(*d) && len; --len, ++d) {
5297 Perl_warner(aTHX_ packWARN(WARN_QW),
5298 "Possible attempt to separate words with commas");
5301 else if (*d == '#') {
5302 Perl_warner(aTHX_ packWARN(WARN_QW),
5303 "Possible attempt to put comments in qw() list");
5309 for (; !isSPACE(*d) && len; --len, ++d) ;
5311 sv = newSVpvn(b, d-b);
5312 if (DO_UTF8(PL_lex_stuff))
5314 words = append_elem(OP_LIST, words,
5315 newSVOP(OP_CONST, 0, tokeq(sv)));
5319 NEXTVAL_NEXTTOKE.opval = words;
5324 SvREFCNT_dec(PL_lex_stuff);
5325 PL_lex_stuff = NULL;
5331 s = scan_str(s,FALSE,FALSE);
5333 missingterm((char*)0);
5334 yylval.ival = OP_STRINGIFY;
5335 if (SvIVX(PL_lex_stuff) == '\'')
5336 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
5337 TERM(sublex_start());
5340 s = scan_pat(s,OP_QR);
5341 TERM(sublex_start());
5344 s = scan_str(s,FALSE,FALSE);
5346 missingterm((char*)0);
5347 yylval.ival = OP_BACKTICK;
5349 TERM(sublex_start());
5357 s = force_version(s, FALSE);
5359 else if (*s != 'v' || !isDIGIT(s[1])
5360 || (s = force_version(s, TRUE), *s == 'v'))
5362 *PL_tokenbuf = '\0';
5363 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5364 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5365 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5367 yyerror("<> should be quotes");
5369 if (orig_keyword == KEY_require) {
5377 PL_last_uni = PL_oldbufptr;
5378 PL_last_lop_op = OP_REQUIRE;
5380 return REPORT( (int)REQUIRE );
5386 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5390 LOP(OP_RENAME,XTERM);
5399 LOP(OP_RINDEX,XTERM);
5409 UNIDOR(OP_READLINE);
5422 LOP(OP_REVERSE,XTERM);
5425 UNIDOR(OP_READLINK);
5433 TERM(sublex_start());
5435 TOKEN(1); /* force error */
5438 checkcomma(s,PL_tokenbuf,"filehandle");
5448 LOP(OP_SELECT,XTERM);
5454 LOP(OP_SEMCTL,XTERM);
5457 LOP(OP_SEMGET,XTERM);
5460 LOP(OP_SEMOP,XTERM);
5466 LOP(OP_SETPGRP,XTERM);
5468 case KEY_setpriority:
5469 LOP(OP_SETPRIORITY,XTERM);
5471 case KEY_sethostent:
5477 case KEY_setservent:
5480 case KEY_setprotoent:
5490 LOP(OP_SEEKDIR,XTERM);
5492 case KEY_setsockopt:
5493 LOP(OP_SSOCKOPT,XTERM);
5499 LOP(OP_SHMCTL,XTERM);
5502 LOP(OP_SHMGET,XTERM);
5505 LOP(OP_SHMREAD,XTERM);
5508 LOP(OP_SHMWRITE,XTERM);
5511 LOP(OP_SHUTDOWN,XTERM);
5520 LOP(OP_SOCKET,XTERM);
5522 case KEY_socketpair:
5523 LOP(OP_SOCKPAIR,XTERM);
5526 checkcomma(s,PL_tokenbuf,"subroutine name");
5528 if (*s == ';' || *s == ')') /* probably a close */
5529 Perl_croak(aTHX_ "sort is now a reserved word");
5531 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5535 LOP(OP_SPLIT,XTERM);
5538 LOP(OP_SPRINTF,XTERM);
5541 LOP(OP_SPLICE,XTERM);
5556 LOP(OP_SUBSTR,XTERM);
5562 char tmpbuf[sizeof PL_tokenbuf];
5563 SSize_t tboffset = 0;
5564 expectation attrful;
5565 bool have_name, have_proto, bad_proto;
5566 const int key = tmp;
5570 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5571 (*s == ':' && s[1] == ':'))
5574 attrful = XATTRBLOCK;
5575 /* remember buffer pos'n for later force_word */
5576 tboffset = s - PL_oldbufptr;
5577 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5578 if (strchr(tmpbuf, ':'))
5579 sv_setpv(PL_subname, tmpbuf);
5581 sv_setsv(PL_subname,PL_curstname);
5582 sv_catpvs(PL_subname,"::");
5583 sv_catpvn(PL_subname,tmpbuf,len);
5590 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5591 PL_expect = XTERMBLOCK;
5592 attrful = XATTRTERM;
5593 sv_setpvn(PL_subname,"?",1);
5597 if (key == KEY_format) {
5599 PL_lex_formbrack = PL_lex_brackets + 1;
5601 (void) force_word(PL_oldbufptr + tboffset, WORD,
5606 /* Look for a prototype */
5610 s = scan_str(s,FALSE,FALSE);
5612 Perl_croak(aTHX_ "Prototype not terminated");
5613 /* strip spaces and check for bad characters */
5614 d = SvPVX(PL_lex_stuff);
5617 for (p = d; *p; ++p) {
5620 if (!strchr("$@%*;[]&\\", *p))
5625 if (bad_proto && ckWARN(WARN_SYNTAX))
5626 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5627 "Illegal character in prototype for %"SVf" : %s",
5629 SvCUR_set(PL_lex_stuff, tmp);
5637 if (*s == ':' && s[1] != ':')
5638 PL_expect = attrful;
5639 else if (*s != '{' && key == KEY_sub) {
5641 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5643 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5647 NEXTVAL_NEXTTOKE.opval =
5648 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5649 PL_lex_stuff = NULL;
5653 sv_setpv(PL_subname,
5654 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5657 (void) force_word(PL_oldbufptr + tboffset, WORD,
5666 LOP(OP_SYSTEM,XREF);
5669 LOP(OP_SYMLINK,XTERM);
5672 LOP(OP_SYSCALL,XTERM);
5675 LOP(OP_SYSOPEN,XTERM);
5678 LOP(OP_SYSSEEK,XTERM);
5681 LOP(OP_SYSREAD,XTERM);
5684 LOP(OP_SYSWRITE,XTERM);
5688 TERM(sublex_start());
5709 LOP(OP_TRUNCATE,XTERM);
5721 yylval.ival = CopLINE(PL_curcop);
5725 yylval.ival = CopLINE(PL_curcop);
5729 LOP(OP_UNLINK,XTERM);
5735 LOP(OP_UNPACK,XTERM);
5738 LOP(OP_UTIME,XTERM);
5744 LOP(OP_UNSHIFT,XTERM);
5747 s = tokenize_use(1, s);
5757 yylval.ival = CopLINE(PL_curcop);
5761 yylval.ival = CopLINE(PL_curcop);
5765 PL_hints |= HINT_BLOCK_SCOPE;
5772 LOP(OP_WAITPID,XTERM);
5781 ctl_l[0] = toCTRL('L');
5783 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
5786 /* Make sure $^L is defined */
5787 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
5792 if (PL_expect == XOPERATOR)
5798 yylval.ival = OP_XOR;
5803 TERM(sublex_start());
5808 #pragma segment Main
5812 S_pending_ident(pTHX)
5816 register I32 tmp = 0;
5817 /* pit holds the identifier we read and pending_ident is reset */
5818 char pit = PL_pending_ident;
5819 PL_pending_ident = 0;
5821 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5822 "### Pending identifier '%s'\n", PL_tokenbuf); });
5824 /* if we're in a my(), we can't allow dynamics here.
5825 $foo'bar has already been turned into $foo::bar, so
5826 just check for colons.
5828 if it's a legal name, the OP is a PADANY.
5831 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5832 if (strchr(PL_tokenbuf,':'))
5833 yyerror(Perl_form(aTHX_ "No package name allowed for "
5834 "variable %s in \"our\"",
5836 tmp = allocmy(PL_tokenbuf);
5839 if (strchr(PL_tokenbuf,':'))
5840 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5842 yylval.opval = newOP(OP_PADANY, 0);
5843 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5849 build the ops for accesses to a my() variable.
5851 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5852 then used in a comparison. This catches most, but not
5853 all cases. For instance, it catches
5854 sort { my($a); $a <=> $b }
5856 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5857 (although why you'd do that is anyone's guess).
5860 if (!strchr(PL_tokenbuf,':')) {
5862 tmp = pad_findmy(PL_tokenbuf);
5863 if (tmp != NOT_IN_PAD) {
5864 /* might be an "our" variable" */
5865 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
5866 /* build ops for a bareword */
5867 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
5868 HEK * const stashname = HvNAME_HEK(stash);
5869 SV * const sym = newSVhek(stashname);
5870 sv_catpvs(sym, "::");
5871 sv_catpv(sym, PL_tokenbuf+1);
5872 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5873 yylval.opval->op_private = OPpCONST_ENTERED;
5876 ? (GV_ADDMULTI | GV_ADDINEVAL)
5879 ((PL_tokenbuf[0] == '$') ? SVt_PV
5880 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5885 /* if it's a sort block and they're naming $a or $b */
5886 if (PL_last_lop_op == OP_SORT &&
5887 PL_tokenbuf[0] == '$' &&
5888 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5891 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5892 d < PL_bufend && *d != '\n';
5895 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5896 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5902 yylval.opval = newOP(OP_PADANY, 0);
5903 yylval.opval->op_targ = tmp;
5909 Whine if they've said @foo in a doublequoted string,
5910 and @foo isn't a variable we can find in the symbol
5913 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5914 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
5915 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5916 && ckWARN(WARN_AMBIGUOUS))
5918 /* Downgraded from fatal to warning 20000522 mjd */
5919 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5920 "Possible unintended interpolation of %s in string",
5925 /* build ops for a bareword */
5926 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5927 yylval.opval->op_private = OPpCONST_ENTERED;
5930 /* If the identifier refers to a stash, don't autovivify it.
5931 * Change 24660 had the side effect of causing symbol table
5932 * hashes to always be defined, even if they were freshly
5933 * created and the only reference in the entire program was
5934 * the single statement with the defined %foo::bar:: test.
5935 * It appears that all code in the wild doing this actually
5936 * wants to know whether sub-packages have been loaded, so
5937 * by avoiding auto-vivifying symbol tables, we ensure that
5938 * defined %foo::bar:: continues to be false, and the existing
5939 * tests still give the expected answers, even though what
5940 * they're actually testing has now changed subtly.
5942 (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
5944 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
5945 ((PL_tokenbuf[0] == '$') ? SVt_PV
5946 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5952 * The following code was generated by perl_keyword.pl.
5956 Perl_keyword (pTHX_ const char *name, I32 len)
5961 case 1: /* 5 tokens of length 1 */
5993 case 2: /* 18 tokens of length 2 */
6139 case 3: /* 29 tokens of length 3 */
6143 if (name[1] == 'N' &&
6206 if (name[1] == 'i' &&
6228 return (FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
6246 if (name[1] == 'o' &&
6255 if (name[1] == 'e' &&
6264 if (name[1] == 'n' &&
6273 if (name[1] == 'o' &&
6282 if (name[1] == 'a' &&
6291 if (name[1] == 'o' &&
6353 if (name[1] == 'e' &&
6367 return (FEATURE_IS_ENABLED("say") ? -KEY_say : 0);
6393 if (name[1] == 'i' &&
6402 if (name[1] == 's' &&
6411 if (name[1] == 'e' &&
6420 if (name[1] == 'o' &&
6432 case 4: /* 41 tokens of length 4 */
6436 if (name[1] == 'O' &&
6446 if (name[1] == 'N' &&
6456 if (name[1] == 'i' &&
6466 if (name[1] == 'h' &&
6476 if (name[1] == 'u' &&
6489 if (name[2] == 'c' &&
6498 if (name[2] == 's' &&
6507 if (name[2] == 'a' &&
6543 if (name[1] == 'o' &&
6556 if (name[2] == 't' &&
6565 if (name[2] == 'o' &&
6574 if (name[2] == 't' &&
6583 if (name[2] == 'e' &&
6596 if (name[1] == 'o' &&
6609 if (name[2] == 'y' &&
6618 if (name[2] == 'l' &&
6634 if (name[2] == 's' &&
6643 if (name[2] == 'n' &&
6652 if (name[2] == 'c' &&
6665 if (name[1] == 'e' &&
6675 if (name[1] == 'p' &&
6688 if (name[2] == 'c' &&
6697 if (name[2] == 'p' &&
6706 if (name[2] == 's' &&
6722 if (name[2] == 'n' &&
6792 if (name[2] == 'r' &&
6801 if (name[2] == 'r' &&
6810 if (name[2] == 'a' &&
6826 if (name[2] == 'l' &&
6888 if (name[2] == 'e' &&
6891 return (FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
6904 case 5: /* 38 tokens of length 5 */
6908 if (name[1] == 'E' &&
6919 if (name[1] == 'H' &&
6933 if (name[2] == 'a' &&
6943 if (name[2] == 'a' &&
6960 if (name[2] == 'e' &&
6970 if (name[2] == 'e' &&
6974 return (FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
6990 if (name[3] == 'i' &&
6999 if (name[3] == 'o' &&
7035 if (name[2] == 'o' &&
7045 if (name[2] == 'y' &&
7059 if (name[1] == 'l' &&
7073 if (name[2] == 'n' &&
7083 if (name[2] == 'o' &&
7097 if (name[1] == 'i' &&
7102 return (FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
7111 if (name[2] == 'd' &&
7121 if (name[2] == 'c' &&
7138 if (name[2] == 'c' &&
7148 if (name[2] == 't' &&
7162 if (name[1] == 'k' &&
7173 if (name[1] == 'r' &&
7187 if (name[2] == 's' &&
7197 if (name[2] == 'd' &&
7214 if (name[2] == 'm' &&
7224 if (name[2] == 'i' &&
7234 if (name[2] == 'e' &&
7244 if (name[2] == 'l' &&
7254 if (name[2] == 'a' &&
7264 if (name[2] == 'u' &&
7278 if (name[1] == 'i' &&
7292 if (name[2] == 'a' &&
7305 if (name[3] == 'e' &&
7340 if (name[2] == 'i' &&
7357 if (name[2] == 'i' &&
7367 if (name[2] == 'i' &&
7384 case 6: /* 33 tokens of length 6 */
7388 if (name[1] == 'c' &&
7403 if (name[2] == 'l' &&
7414 if (name[2] == 'r' &&
7429 if (name[1] == 'e' &&
7444 if (name[2] == 's' &&
7449 if(ckWARN_d(WARN_SYNTAX))
7450 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
7456 if (name[2] == 'i' &&
7474 if (name[2] == 'l' &&
7485 if (name[2] == 'r' &&
7500 if (name[1] == 'm' &&
7515 if (name[2] == 'n' &&
7526 if (name[2] == 's' &&
7541 if (name[1] == 's' &&
7547 if (name[4] == 't' &&
7556 if (name[4] == 'e' &&
7565 if (name[4] == 'c' &&
7574 if (name[4] == 'n' &&
7590 if (name[1] == 'r' &&
7608 if (name[3] == 'a' &&
7618 if (name[3] == 'u' &&
7632 if (name[2] == 'n' &&
7650 if (name[2] == 'a' &&
7664 if (name[3] == 'e' &&
7677 if (name[4] == 't' &&
7686 if (name[4] == 'e' &&
7708 if (name[4] == 't' &&
7717 if (name[4] == 'e' &&
7733 if (name[2] == 'c' &&
7744 if (name[2] == 'l' &&
7755 if (name[2] == 'b' &&
7766 if (name[2] == 's' &&
7789 if (name[4] == 's' &&
7798 if (name[4] == 'n' &&
7811 if (name[3] == 'a' &&
7828 if (name[1] == 'a' &&
7843 case 7: /* 29 tokens of length 7 */
7847 if (name[1] == 'E' &&
7860 if (name[1] == '_' &&
7873 if (name[1] == 'i' &&
7880 return -KEY_binmode;
7886 if (name[1] == 'o' &&
7893 return -KEY_connect;
7902 if (name[2] == 'm' &&
7908 return -KEY_dbmopen;
7919 if (name[4] == 'u' &&
7923 return (FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
7929 if (name[4] == 'n' &&
7950 if (name[1] == 'o' &&
7963 if (name[1] == 'e' &&
7970 if (name[5] == 'r' &&
7973 return -KEY_getpgrp;
7979 if (name[5] == 'i' &&
7982 return -KEY_getppid;
7995 if (name[1] == 'c' &&
8002 return -KEY_lcfirst;
8008 if (name[1] == 'p' &&
8015 return -KEY_opendir;
8021 if (name[1] == 'a' &&
8039 if (name[3] == 'd' &&
8044 return -KEY_readdir;
8050 if (name[3] == 'u' &&
8061 if (name[3] == 'e' &&
8066 return -KEY_reverse;
8085 if (name[3] == 'k' &&
8090 return -KEY_seekdir;
8096 if (name[3] == 'p' &&
8101 return -KEY_setpgrp;
8111 if (name[2] == 'm' &&
8117 return -KEY_shmread;
8123 if (name[2] == 'r' &&
8129 return -KEY_sprintf;
8138 if (name[3] == 'l' &&
8143 return -KEY_symlink;
8152 if (name[4] == 'a' &&
8156 return -KEY_syscall;
8162 if (name[4] == 'p' &&
8166 return -KEY_sysopen;
8172 if (name[4] == 'e' &&
8176 return -KEY_sysread;
8182 if (name[4] == 'e' &&
8186 return -KEY_sysseek;
8204 if (name[1] == 'e' &&
8211 return -KEY_telldir;
8220 if (name[2] == 'f' &&
8226 return -KEY_ucfirst;
8232 if (name[2] == 's' &&
8238 return -KEY_unshift;
8248 if (name[1] == 'a' &&
8255 return -KEY_waitpid;
8264 case 8: /* 26 tokens of length 8 */
8268 if (name[1] == 'U' &&
8276 return KEY_AUTOLOAD;
8287 if (name[3] == 'A' &&
8293 return KEY___DATA__;
8299 if (name[3] == 'I' &&
8305 return -KEY___FILE__;
8311 if (name[3] == 'I' &&
8317 return -KEY___LINE__;
8333 if (name[2] == 'o' &&
8340 return -KEY_closedir;
8346 if (name[2] == 'n' &&
8353 return -KEY_continue;
8363 if (name[1] == 'b' &&
8371 return -KEY_dbmclose;
8377 if (name[1] == 'n' &&
8383 if (name[4] == 'r' &&
8388 return -KEY_endgrent;
8394 if (name[4] == 'w' &&
8399 return -KEY_endpwent;
8412 if (name[1] == 'o' &&
8420 return -KEY_formline;
8426 if (name[1] == 'e' &&
8437 if (name[6] == 'n' &&
8440 return -KEY_getgrent;
8446 if (name[6] == 'i' &&
8449 return -KEY_getgrgid;
8455 if (name[6] == 'a' &&
8458 return -KEY_getgrnam;
8471 if (name[4] == 'o' &&
8476 return -KEY_getlogin;
8487 if (name[6] == 'n' &&
8490 return -KEY_getpwent;
8496 if (name[6] == 'a' &&
8499 return -KEY_getpwnam;
8505 if (name[6] == 'i' &&
8508 return -KEY_getpwuid;
8528 if (name[1] == 'e' &&
8535 if (name[5] == 'i' &&
8542 return -KEY_readline;
8547 return -KEY_readlink;
8558 if (name[5] == 'i' &&
8562 return -KEY_readpipe;
8583 if (name[4] == 'r' &&
8588 return -KEY_setgrent;
8594 if (name[4] == 'w' &&
8599 return -KEY_setpwent;
8615 if (name[3] == 'w' &&
8621 return -KEY_shmwrite;
8627 if (name[3] == 't' &&
8633 return -KEY_shutdown;
8643 if (name[2] == 's' &&
8650 return -KEY_syswrite;
8660 if (name[1] == 'r' &&
8668 return -KEY_truncate;
8677 case 9: /* 8 tokens of length 9 */
8681 if (name[1] == 'n' &&
8690 return -KEY_endnetent;
8696 if (name[1] == 'e' &&
8705 return -KEY_getnetent;
8711 if (name[1] == 'o' &&
8720 return -KEY_localtime;
8726 if (name[1] == 'r' &&
8735 return KEY_prototype;
8741 if (name[1] == 'u' &&
8750 return -KEY_quotemeta;
8756 if (name[1] == 'e' &&
8765 return -KEY_rewinddir;
8771 if (name[1] == 'e' &&
8780 return -KEY_setnetent;
8786 if (name[1] == 'a' &&
8795 return -KEY_wantarray;
8804 case 10: /* 9 tokens of length 10 */
8808 if (name[1] == 'n' &&
8814 if (name[4] == 'o' &&
8821 return -KEY_endhostent;
8827 if (name[4] == 'e' &&
8834 return -KEY_endservent;
8847 if (name[1] == 'e' &&
8853 if (name[4] == 'o' &&
8860 return -KEY_gethostent;
8869 if (name[5] == 'r' &&
8875 return -KEY_getservent;
8881 if (name[5] == 'c' &&
8887 return -KEY_getsockopt;
8912 if (name[4] == 'o' &&
8919 return -KEY_sethostent;
8928 if (name[5] == 'r' &&
8934 return -KEY_setservent;
8940 if (name[5] == 'c' &&
8946 return -KEY_setsockopt;
8963 if (name[2] == 'c' &&
8972 return -KEY_socketpair;
8985 case 11: /* 8 tokens of length 11 */
8989 if (name[1] == '_' &&
9000 return -KEY___PACKAGE__;
9006 if (name[1] == 'n' &&
9017 return -KEY_endprotoent;
9023 if (name[1] == 'e' &&
9032 if (name[5] == 'e' &&
9039 return -KEY_getpeername;
9048 if (name[6] == 'o' &&
9054 return -KEY_getpriority;
9060 if (name[6] == 't' &&
9066 return -KEY_getprotoent;
9080 if (name[4] == 'o' &&
9088 return -KEY_getsockname;
9101 if (name[1] == 'e' &&
9109 if (name[6] == 'o' &&
9115 return -KEY_setpriority;
9121 if (name[6] == 't' &&
9127 return -KEY_setprotoent;
9143 case 12: /* 2 tokens of length 12 */
9144 if (name[0] == 'g' &&
9156 if (name[9] == 'd' &&
9159 { /* getnetbyaddr */
9160 return -KEY_getnetbyaddr;
9166 if (name[9] == 'a' &&
9169 { /* getnetbyname */
9170 return -KEY_getnetbyname;
9182 case 13: /* 4 tokens of length 13 */
9183 if (name[0] == 'g' &&
9190 if (name[4] == 'o' &&
9199 if (name[10] == 'd' &&
9202 { /* gethostbyaddr */
9203 return -KEY_gethostbyaddr;
9209 if (name[10] == 'a' &&
9212 { /* gethostbyname */
9213 return -KEY_gethostbyname;
9226 if (name[4] == 'e' &&
9235 if (name[10] == 'a' &&
9238 { /* getservbyname */
9239 return -KEY_getservbyname;
9245 if (name[10] == 'o' &&
9248 { /* getservbyport */
9249 return -KEY_getservbyport;
9268 case 14: /* 1 tokens of length 14 */
9269 if (name[0] == 'g' &&
9283 { /* getprotobyname */
9284 return -KEY_getprotobyname;
9289 case 16: /* 1 tokens of length 16 */
9290 if (name[0] == 'g' &&
9306 { /* getprotobynumber */
9307 return -KEY_getprotobynumber;
9321 S_checkcomma(pTHX_ register char *s, const char *name, const char *what)
9326 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9327 if (ckWARN(WARN_SYNTAX)) {
9329 for (w = s+2; *w && level; w++) {
9336 for (; *w && isSPACE(*w); w++) ;
9337 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
9338 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9339 "%s (...) interpreted as function",name);
9342 while (s < PL_bufend && isSPACE(*s))
9346 while (s < PL_bufend && isSPACE(*s))
9348 if (isIDFIRST_lazy_if(s,UTF)) {
9350 while (isALNUM_lazy_if(s,UTF))
9352 while (s < PL_bufend && isSPACE(*s))
9356 *s = '\0'; /* XXX If we didn't do this, we could const a lot of toke.c */
9357 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
9361 Perl_croak(aTHX_ "No comma allowed after %s", what);
9366 /* Either returns sv, or mortalizes sv and returns a new SV*.
9367 Best used as sv=new_constant(..., sv, ...).
9368 If s, pv are NULL, calls subroutine with one argument,
9369 and type is used with error messages only. */
9372 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9376 HV * const table = GvHV(PL_hintgv); /* ^H */
9380 const char *why1 = "", *why2 = "", *why3 = "";
9382 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9385 why2 = strEQ(key,"charnames")
9386 ? "(possibly a missing \"use charnames ...\")"
9388 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
9389 (type ? type: "undef"), why2);
9391 /* This is convoluted and evil ("goto considered harmful")
9392 * but I do not understand the intricacies of all the different
9393 * failure modes of %^H in here. The goal here is to make
9394 * the most probable error message user-friendly. --jhi */
9399 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
9400 (type ? type: "undef"), why1, why2, why3);
9402 yyerror(SvPVX_const(msg));
9406 cvp = hv_fetch(table, key, strlen(key), FALSE);
9407 if (!cvp || !SvOK(*cvp)) {
9410 why3 = "} is not defined";
9413 sv_2mortal(sv); /* Parent created it permanently */
9416 pv = sv_2mortal(newSVpvn(s, len));
9418 typesv = sv_2mortal(newSVpv(type, 0));
9420 typesv = &PL_sv_undef;
9422 PUSHSTACKi(PERLSI_OVERLOAD);
9434 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9438 /* Check the eval first */
9439 if (!PL_in_eval && SvTRUE(ERRSV)) {
9440 sv_catpvs(ERRSV, "Propagated");
9441 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
9443 res = SvREFCNT_inc_simple(sv);
9447 SvREFCNT_inc_simple_void(res);
9456 why1 = "Call to &{$^H{";
9458 why3 = "}} did not return a defined value";
9466 /* Returns a NUL terminated string, with the length of the string written to
9470 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9473 register char *d = dest;
9474 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
9477 Perl_croak(aTHX_ ident_too_long);
9478 if (isALNUM(*s)) /* UTF handled below */
9480 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
9485 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
9489 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9490 char *t = s + UTF8SKIP(s);
9491 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9493 if (d + (t - s) > e)
9494 Perl_croak(aTHX_ ident_too_long);
9495 Copy(s, d, t - s, char);
9508 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9511 char *bracket = NULL;
9513 register char *d = dest;
9514 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
9519 while (isDIGIT(*s)) {
9521 Perl_croak(aTHX_ ident_too_long);
9528 Perl_croak(aTHX_ ident_too_long);
9529 if (isALNUM(*s)) /* UTF handled below */
9531 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9536 else if (*s == ':' && s[1] == ':') {
9540 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9541 char *t = s + UTF8SKIP(s);
9542 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9544 if (d + (t - s) > e)
9545 Perl_croak(aTHX_ ident_too_long);
9546 Copy(s, d, t - s, char);
9557 if (PL_lex_state != LEX_NORMAL)
9558 PL_lex_state = LEX_INTERPENDMAYBE;
9561 if (*s == '$' && s[1] &&
9562 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9575 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9580 if (isSPACE(s[-1])) {
9582 const char ch = *s++;
9583 if (!SPACE_OR_TAB(ch)) {
9589 if (isIDFIRST_lazy_if(d,UTF)) {
9593 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9594 end += UTF8SKIP(end);
9595 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9596 end += UTF8SKIP(end);
9598 Copy(s, d, end - s, char);
9603 while ((isALNUM(*s) || *s == ':') && d < e)
9606 Perl_croak(aTHX_ ident_too_long);
9609 while (s < send && SPACE_OR_TAB(*s)) s++;
9610 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9611 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
9612 const char *brack = *s == '[' ? "[...]" : "{...}";
9613 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9614 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9615 funny, dest, brack, funny, dest, brack);
9618 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9622 /* Handle extended ${^Foo} variables
9623 * 1999-02-27 mjd-perl-patch@plover.com */
9624 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9628 while (isALNUM(*s) && d < e) {
9632 Perl_croak(aTHX_ ident_too_long);
9637 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9638 PL_lex_state = LEX_INTERPEND;
9643 if (PL_lex_state == LEX_NORMAL) {
9644 if (ckWARN(WARN_AMBIGUOUS) &&
9645 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
9647 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9648 "Ambiguous use of %c{%s} resolved to %c%s",
9649 funny, dest, funny, dest);
9654 s = bracket; /* let the parser handle it */
9658 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9659 PL_lex_state = LEX_INTERPEND;
9664 Perl_pmflag(pTHX_ U32* pmfl, int ch)
9666 PERL_UNUSED_CONTEXT;
9670 *pmfl |= PMf_GLOBAL;
9672 *pmfl |= PMf_CONTINUE;
9676 *pmfl |= PMf_MULTILINE;
9678 *pmfl |= PMf_SINGLELINE;
9680 *pmfl |= PMf_EXTENDED;
9684 S_scan_pat(pTHX_ char *start, I32 type)
9688 char *s = scan_str(start,FALSE,FALSE);
9689 const char * const valid_flags = (type == OP_QR) ? "iomsx" : "iogcmsx";
9692 const char * const delimiter = skipspace(start);
9693 Perl_croak(aTHX_ *delimiter == '?'
9694 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9695 : "Search pattern not terminated" );
9698 pm = (PMOP*)newPMOP(type, 0);
9699 if (PL_multi_open == '?')
9700 pm->op_pmflags |= PMf_ONCE;
9701 while (*s && strchr(valid_flags, *s))
9702 pmflag(&pm->op_pmflags,*s++);
9703 /* issue a warning if /c is specified,but /g is not */
9704 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
9705 && ckWARN(WARN_REGEXP))
9707 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
9710 pm->op_pmpermflags = pm->op_pmflags;
9712 PL_lex_op = (OP*)pm;
9713 yylval.ival = OP_MATCH;
9718 S_scan_subst(pTHX_ char *start)
9726 yylval.ival = OP_NULL;
9728 s = scan_str(start,FALSE,FALSE);
9731 Perl_croak(aTHX_ "Substitution pattern not terminated");
9733 if (s[-1] == PL_multi_open)
9736 first_start = PL_multi_start;
9737 s = scan_str(s,FALSE,FALSE);
9740 SvREFCNT_dec(PL_lex_stuff);
9741 PL_lex_stuff = NULL;
9743 Perl_croak(aTHX_ "Substitution replacement not terminated");
9745 PL_multi_start = first_start; /* so whole substitution is taken together */
9747 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9753 else if (strchr("iogcmsx", *s))
9754 pmflag(&pm->op_pmflags,*s++);
9759 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
9760 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9764 SV * const repl = newSVpvs("");
9766 PL_sublex_info.super_bufptr = s;
9767 PL_sublex_info.super_bufend = PL_bufend;
9769 pm->op_pmflags |= PMf_EVAL;
9771 sv_catpv(repl, es ? "eval " : "do ");
9772 sv_catpvs(repl, "{ ");
9773 sv_catsv(repl, PL_lex_repl);
9774 sv_catpvs(repl, " }");
9776 SvREFCNT_dec(PL_lex_repl);
9780 pm->op_pmpermflags = pm->op_pmflags;
9781 PL_lex_op = (OP*)pm;
9782 yylval.ival = OP_SUBST;
9787 S_scan_trans(pTHX_ char *start)
9797 yylval.ival = OP_NULL;
9799 s = scan_str(start,FALSE,FALSE);
9801 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9802 if (s[-1] == PL_multi_open)
9805 s = scan_str(s,FALSE,FALSE);
9808 SvREFCNT_dec(PL_lex_stuff);
9809 PL_lex_stuff = NULL;
9811 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9814 complement = del = squash = 0;
9818 complement = OPpTRANS_COMPLEMENT;
9821 del = OPpTRANS_DELETE;
9824 squash = OPpTRANS_SQUASH;
9833 Newx(tbl, complement&&!del?258:256, short);
9834 o = newPVOP(OP_TRANS, 0, (char*)tbl);
9835 o->op_private &= ~OPpTRANS_ALL;
9836 o->op_private |= del|squash|complement|
9837 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9838 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9841 yylval.ival = OP_TRANS;
9846 S_scan_heredoc(pTHX_ register char *s)
9850 I32 op_type = OP_SCALAR;
9854 const char *found_newline;
9858 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9862 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9865 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
9866 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9869 s = delimcpy(d, e, s, PL_bufend, term, &len);
9879 if (!isALNUM_lazy_if(s,UTF))
9880 deprecate_old("bare << to mean <<\"\"");
9881 for (; isALNUM_lazy_if(s,UTF); s++) {
9886 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9887 Perl_croak(aTHX_ "Delimiter for here document is too long");
9890 len = d - PL_tokenbuf;
9891 #ifndef PERL_STRICT_CR
9892 d = strchr(s, '\r');
9894 char * const olds = s;
9896 while (s < PL_bufend) {
9902 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9911 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9915 if ( outer || !(found_newline = memchr(s, '\n', PL_bufend - s)) ) {
9916 herewas = newSVpvn(s,PL_bufend-s);
9920 herewas = newSVpvn(s,found_newline-s);
9922 s += SvCUR(herewas);
9925 sv_upgrade(tmpstr, SVt_PVIV);
9928 SvIV_set(tmpstr, -1);
9930 else if (term == '`') {
9931 op_type = OP_BACKTICK;
9932 SvIV_set(tmpstr, '\\');
9936 PL_multi_start = CopLINE(PL_curcop);
9937 PL_multi_open = PL_multi_close = '<';
9938 term = *PL_tokenbuf;
9939 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9940 char * const bufptr = PL_sublex_info.super_bufptr;
9941 char * const bufend = PL_sublex_info.super_bufend;
9942 char * const olds = s - SvCUR(herewas);
9943 s = strchr(bufptr, '\n');
9947 while (s < bufend &&
9948 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9950 CopLINE_inc(PL_curcop);
9953 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9954 missingterm(PL_tokenbuf);
9956 sv_setpvn(herewas,bufptr,d-bufptr+1);
9957 sv_setpvn(tmpstr,d+1,s-d);
9959 sv_catpvn(herewas,s,bufend-s);
9960 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9967 while (s < PL_bufend &&
9968 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9970 CopLINE_inc(PL_curcop);
9972 if (s >= PL_bufend) {
9973 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9974 missingterm(PL_tokenbuf);
9976 sv_setpvn(tmpstr,d+1,s-d);
9978 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9980 sv_catpvn(herewas,s,PL_bufend-s);
9981 sv_setsv(PL_linestr,herewas);
9982 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9983 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9984 PL_last_lop = PL_last_uni = NULL;
9987 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
9988 while (s >= PL_bufend) { /* multiple line string? */
9990 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
9991 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9992 missingterm(PL_tokenbuf);
9994 CopLINE_inc(PL_curcop);
9995 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9996 PL_last_lop = PL_last_uni = NULL;
9997 #ifndef PERL_STRICT_CR
9998 if (PL_bufend - PL_linestart >= 2) {
9999 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10000 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10002 PL_bufend[-2] = '\n';
10004 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10006 else if (PL_bufend[-1] == '\r')
10007 PL_bufend[-1] = '\n';
10009 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10010 PL_bufend[-1] = '\n';
10012 if (PERLDB_LINE && PL_curstash != PL_debstash) {
10013 SV * const sv = newSV(0);
10015 sv_upgrade(sv, SVt_PVMG);
10016 sv_setsv(sv,PL_linestr);
10017 (void)SvIOK_on(sv);
10019 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop),sv);
10021 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
10022 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
10023 *(SvPVX(PL_linestr) + off ) = ' ';
10024 sv_catsv(PL_linestr,herewas);
10025 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10026 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
10030 sv_catsv(tmpstr,PL_linestr);
10035 PL_multi_end = CopLINE(PL_curcop);
10036 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10037 SvPV_shrink_to_cur(tmpstr);
10039 SvREFCNT_dec(herewas);
10041 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10043 else if (PL_encoding)
10044 sv_recode_to_utf8(tmpstr, PL_encoding);
10046 PL_lex_stuff = tmpstr;
10047 yylval.ival = op_type;
10051 /* scan_inputsymbol
10052 takes: current position in input buffer
10053 returns: new position in input buffer
10054 side-effects: yylval and lex_op are set.
10059 <FH> read from filehandle
10060 <pkg::FH> read from package qualified filehandle
10061 <pkg'FH> read from package qualified filehandle
10062 <$fh> read from filehandle in $fh
10063 <*.h> filename glob
10068 S_scan_inputsymbol(pTHX_ char *start)
10071 register char *s = start; /* current position in buffer */
10075 char *d = PL_tokenbuf; /* start of temp holding space */
10076 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10078 end = strchr(s, '\n');
10081 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10083 /* die if we didn't have space for the contents of the <>,
10084 or if it didn't end, or if we see a newline
10087 if (len >= sizeof PL_tokenbuf)
10088 Perl_croak(aTHX_ "Excessively long <> operator");
10090 Perl_croak(aTHX_ "Unterminated <> operator");
10095 Remember, only scalar variables are interpreted as filehandles by
10096 this code. Anything more complex (e.g., <$fh{$num}>) will be
10097 treated as a glob() call.
10098 This code makes use of the fact that except for the $ at the front,
10099 a scalar variable and a filehandle look the same.
10101 if (*d == '$' && d[1]) d++;
10103 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10104 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10107 /* If we've tried to read what we allow filehandles to look like, and
10108 there's still text left, then it must be a glob() and not a getline.
10109 Use scan_str to pull out the stuff between the <> and treat it
10110 as nothing more than a string.
10113 if (d - PL_tokenbuf != len) {
10114 yylval.ival = OP_GLOB;
10116 s = scan_str(start,FALSE,FALSE);
10118 Perl_croak(aTHX_ "Glob not terminated");
10122 bool readline_overriden = FALSE;
10125 /* we're in a filehandle read situation */
10128 /* turn <> into <ARGV> */
10130 Copy("ARGV",d,5,char);
10132 /* Check whether readline() is overriden */
10133 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10135 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10137 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10138 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
10139 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10140 readline_overriden = TRUE;
10142 /* if <$fh>, create the ops to turn the variable into a
10148 /* try to find it in the pad for this block, otherwise find
10149 add symbol table ops
10151 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
10152 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10153 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10154 HEK * const stashname = HvNAME_HEK(stash);
10155 SV * const sym = sv_2mortal(newSVhek(stashname));
10156 sv_catpvs(sym, "::");
10157 sv_catpv(sym, d+1);
10162 OP * const o = newOP(OP_PADSV, 0);
10164 PL_lex_op = readline_overriden
10165 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10166 append_elem(OP_LIST, o,
10167 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10168 : (OP*)newUNOP(OP_READLINE, 0, o);
10177 ? (GV_ADDMULTI | GV_ADDINEVAL)
10180 PL_lex_op = readline_overriden
10181 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10182 append_elem(OP_LIST,
10183 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10184 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10185 : (OP*)newUNOP(OP_READLINE, 0,
10186 newUNOP(OP_RV2SV, 0,
10187 newGVOP(OP_GV, 0, gv)));
10189 if (!readline_overriden)
10190 PL_lex_op->op_flags |= OPf_SPECIAL;
10191 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
10192 yylval.ival = OP_NULL;
10195 /* If it's none of the above, it must be a literal filehandle
10196 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10198 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
10199 PL_lex_op = readline_overriden
10200 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10201 append_elem(OP_LIST,
10202 newGVOP(OP_GV, 0, gv),
10203 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10204 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10205 yylval.ival = OP_NULL;
10214 takes: start position in buffer
10215 keep_quoted preserve \ on the embedded delimiter(s)
10216 keep_delims preserve the delimiters around the string
10217 returns: position to continue reading from buffer
10218 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10219 updates the read buffer.
10221 This subroutine pulls a string out of the input. It is called for:
10222 q single quotes q(literal text)
10223 ' single quotes 'literal text'
10224 qq double quotes qq(interpolate $here please)
10225 " double quotes "interpolate $here please"
10226 qx backticks qx(/bin/ls -l)
10227 ` backticks `/bin/ls -l`
10228 qw quote words @EXPORT_OK = qw( func() $spam )
10229 m// regexp match m/this/
10230 s/// regexp substitute s/this/that/
10231 tr/// string transliterate tr/this/that/
10232 y/// string transliterate y/this/that/
10233 ($*@) sub prototypes sub foo ($)
10234 (stuff) sub attr parameters sub foo : attr(stuff)
10235 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10237 In most of these cases (all but <>, patterns and transliterate)
10238 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10239 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10240 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10243 It skips whitespace before the string starts, and treats the first
10244 character as the delimiter. If the delimiter is one of ([{< then
10245 the corresponding "close" character )]}> is used as the closing
10246 delimiter. It allows quoting of delimiters, and if the string has
10247 balanced delimiters ([{<>}]) it allows nesting.
10249 On success, the SV with the resulting string is put into lex_stuff or,
10250 if that is already non-NULL, into lex_repl. The second case occurs only
10251 when parsing the RHS of the special constructs s/// and tr/// (y///).
10252 For convenience, the terminating delimiter character is stuffed into
10257 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
10260 SV *sv; /* scalar value: string */
10261 char *tmps; /* temp string, used for delimiter matching */
10262 register char *s = start; /* current position in the buffer */
10263 register char term; /* terminating character */
10264 register char *to; /* current position in the sv's data */
10265 I32 brackets = 1; /* bracket nesting level */
10266 bool has_utf8 = FALSE; /* is there any utf8 content? */
10267 I32 termcode; /* terminating char. code */
10268 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10269 STRLEN termlen; /* length of terminating string */
10270 char *last = NULL; /* last position for nesting bracket */
10272 /* skip space before the delimiter */
10277 /* mark where we are, in case we need to report errors */
10280 /* after skipping whitespace, the next character is the terminator */
10283 termcode = termstr[0] = term;
10287 termcode = utf8_to_uvchr((U8*)s, &termlen);
10288 Copy(s, termstr, termlen, U8);
10289 if (!UTF8_IS_INVARIANT(term))
10293 /* mark where we are */
10294 PL_multi_start = CopLINE(PL_curcop);
10295 PL_multi_open = term;
10297 /* find corresponding closing delimiter */
10298 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10299 termcode = termstr[0] = term = tmps[5];
10301 PL_multi_close = term;
10303 /* create a new SV to hold the contents. 79 is the SV's initial length.
10304 What a random number. */
10306 sv_upgrade(sv, SVt_PVIV);
10307 SvIV_set(sv, termcode);
10308 (void)SvPOK_only(sv); /* validate pointer */
10310 /* move past delimiter and try to read a complete string */
10312 sv_catpvn(sv, s, termlen);
10315 if (PL_encoding && !UTF) {
10319 int offset = s - SvPVX_const(PL_linestr);
10320 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10321 &offset, (char*)termstr, termlen);
10322 const char * const ns = SvPVX_const(PL_linestr) + offset;
10323 char * const svlast = SvEND(sv) - 1;
10325 for (; s < ns; s++) {
10326 if (*s == '\n' && !PL_rsfp)
10327 CopLINE_inc(PL_curcop);
10330 goto read_more_line;
10332 /* handle quoted delimiters */
10333 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10335 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10337 if ((svlast-1 - t) % 2) {
10338 if (!keep_quoted) {
10339 *(svlast-1) = term;
10341 SvCUR_set(sv, SvCUR(sv) - 1);
10346 if (PL_multi_open == PL_multi_close) {
10354 for (t = w = last; t < svlast; w++, t++) {
10355 /* At here, all closes are "was quoted" one,
10356 so we don't check PL_multi_close. */
10358 if (!keep_quoted && *(t+1) == PL_multi_open)
10363 else if (*t == PL_multi_open)
10371 SvCUR_set(sv, w - SvPVX_const(sv));
10374 if (--brackets <= 0)
10379 if (!keep_delims) {
10380 SvCUR_set(sv, SvCUR(sv) - 1);
10386 /* extend sv if need be */
10387 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10388 /* set 'to' to the next character in the sv's string */
10389 to = SvPVX(sv)+SvCUR(sv);
10391 /* if open delimiter is the close delimiter read unbridle */
10392 if (PL_multi_open == PL_multi_close) {
10393 for (; s < PL_bufend; s++,to++) {
10394 /* embedded newlines increment the current line number */
10395 if (*s == '\n' && !PL_rsfp)
10396 CopLINE_inc(PL_curcop);
10397 /* handle quoted delimiters */
10398 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10399 if (!keep_quoted && s[1] == term)
10401 /* any other quotes are simply copied straight through */
10405 /* terminate when run out of buffer (the for() condition), or
10406 have found the terminator */
10407 else if (*s == term) {
10410 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10413 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10419 /* if the terminator isn't the same as the start character (e.g.,
10420 matched brackets), we have to allow more in the quoting, and
10421 be prepared for nested brackets.
10424 /* read until we run out of string, or we find the terminator */
10425 for (; s < PL_bufend; s++,to++) {
10426 /* embedded newlines increment the line count */
10427 if (*s == '\n' && !PL_rsfp)
10428 CopLINE_inc(PL_curcop);
10429 /* backslashes can escape the open or closing characters */
10430 if (*s == '\\' && s+1 < PL_bufend) {
10431 if (!keep_quoted &&
10432 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10437 /* allow nested opens and closes */
10438 else if (*s == PL_multi_close && --brackets <= 0)
10440 else if (*s == PL_multi_open)
10442 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10447 /* terminate the copied string and update the sv's end-of-string */
10449 SvCUR_set(sv, to - SvPVX_const(sv));
10452 * this next chunk reads more into the buffer if we're not done yet
10456 break; /* handle case where we are done yet :-) */
10458 #ifndef PERL_STRICT_CR
10459 if (to - SvPVX_const(sv) >= 2) {
10460 if ((to[-2] == '\r' && to[-1] == '\n') ||
10461 (to[-2] == '\n' && to[-1] == '\r'))
10465 SvCUR_set(sv, to - SvPVX_const(sv));
10467 else if (to[-1] == '\r')
10470 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10475 /* if we're out of file, or a read fails, bail and reset the current
10476 line marker so we can report where the unterminated string began
10479 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10481 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10484 /* we read a line, so increment our line counter */
10485 CopLINE_inc(PL_curcop);
10487 /* update debugger info */
10488 if (PERLDB_LINE && PL_curstash != PL_debstash) {
10489 SV * const line_sv = newSV(0);
10491 sv_upgrade(line_sv, SVt_PVMG);
10492 sv_setsv(line_sv,PL_linestr);
10493 (void)SvIOK_on(line_sv);
10494 SvIV_set(line_sv, 0);
10495 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop), line_sv);
10498 /* having changed the buffer, we must update PL_bufend */
10499 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10500 PL_last_lop = PL_last_uni = NULL;
10503 /* at this point, we have successfully read the delimited string */
10505 if (!PL_encoding || UTF) {
10507 sv_catpvn(sv, s, termlen);
10510 if (has_utf8 || PL_encoding)
10513 PL_multi_end = CopLINE(PL_curcop);
10515 /* if we allocated too much space, give some back */
10516 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10517 SvLEN_set(sv, SvCUR(sv) + 1);
10518 SvPV_renew(sv, SvLEN(sv));
10521 /* decide whether this is the first or second quoted string we've read
10534 takes: pointer to position in buffer
10535 returns: pointer to new position in buffer
10536 side-effects: builds ops for the constant in yylval.op
10538 Read a number in any of the formats that Perl accepts:
10540 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10541 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10544 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10546 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10549 If it reads a number without a decimal point or an exponent, it will
10550 try converting the number to an integer and see if it can do so
10551 without loss of precision.
10555 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10558 register const char *s = start; /* current position in buffer */
10559 register char *d; /* destination in temp buffer */
10560 register char *e; /* end of temp buffer */
10561 NV nv; /* number read, as a double */
10562 SV *sv = NULL; /* place to put the converted number */
10563 bool floatit; /* boolean: int or float? */
10564 const char *lastub = NULL; /* position of last underbar */
10565 static char const number_too_long[] = "Number too long";
10567 /* We use the first character to decide what type of number this is */
10571 Perl_croak(aTHX_ "panic: scan_num");
10573 /* if it starts with a 0, it could be an octal number, a decimal in
10574 0.13 disguise, or a hexadecimal number, or a binary number. */
10578 u holds the "number so far"
10579 shift the power of 2 of the base
10580 (hex == 4, octal == 3, binary == 1)
10581 overflowed was the number more than we can hold?
10583 Shift is used when we add a digit. It also serves as an "are
10584 we in octal/hex/binary?" indicator to disallow hex characters
10585 when in octal mode.
10590 bool overflowed = FALSE;
10591 bool just_zero = TRUE; /* just plain 0 or binary number? */
10592 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10593 static const char* const bases[5] =
10594 { "", "binary", "", "octal", "hexadecimal" };
10595 static const char* const Bases[5] =
10596 { "", "Binary", "", "Octal", "Hexadecimal" };
10597 static const char* const maxima[5] =
10599 "0b11111111111111111111111111111111",
10603 const char *base, *Base, *max;
10605 /* check for hex */
10610 } else if (s[1] == 'b') {
10615 /* check for a decimal in disguise */
10616 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10618 /* so it must be octal */
10625 if (ckWARN(WARN_SYNTAX))
10626 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10627 "Misplaced _ in number");
10631 base = bases[shift];
10632 Base = Bases[shift];
10633 max = maxima[shift];
10635 /* read the rest of the number */
10637 /* x is used in the overflow test,
10638 b is the digit we're adding on. */
10643 /* if we don't mention it, we're done */
10647 /* _ are ignored -- but warned about if consecutive */
10649 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10650 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10651 "Misplaced _ in number");
10655 /* 8 and 9 are not octal */
10656 case '8': case '9':
10658 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10662 case '2': case '3': case '4':
10663 case '5': case '6': case '7':
10665 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10668 case '0': case '1':
10669 b = *s++ & 15; /* ASCII digit -> value of digit */
10673 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10674 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10675 /* make sure they said 0x */
10678 b = (*s++ & 7) + 9;
10680 /* Prepare to put the digit we have onto the end
10681 of the number so far. We check for overflows.
10687 x = u << shift; /* make room for the digit */
10689 if ((x >> shift) != u
10690 && !(PL_hints & HINT_NEW_BINARY)) {
10693 if (ckWARN_d(WARN_OVERFLOW))
10694 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10695 "Integer overflow in %s number",
10698 u = x | b; /* add the digit to the end */
10701 n *= nvshift[shift];
10702 /* If an NV has not enough bits in its
10703 * mantissa to represent an UV this summing of
10704 * small low-order numbers is a waste of time
10705 * (because the NV cannot preserve the
10706 * low-order bits anyway): we could just
10707 * remember when did we overflow and in the
10708 * end just multiply n by the right
10716 /* if we get here, we had success: make a scalar value from
10721 /* final misplaced underbar check */
10722 if (s[-1] == '_') {
10723 if (ckWARN(WARN_SYNTAX))
10724 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10729 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
10730 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10731 "%s number > %s non-portable",
10737 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
10738 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10739 "%s number > %s non-portable",
10744 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10745 sv = new_constant(start, s - start, "integer",
10747 else if (PL_hints & HINT_NEW_BINARY)
10748 sv = new_constant(start, s - start, "binary", sv, NULL, NULL);
10753 handle decimal numbers.
10754 we're also sent here when we read a 0 as the first digit
10756 case '1': case '2': case '3': case '4': case '5':
10757 case '6': case '7': case '8': case '9': case '.':
10760 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10763 /* read next group of digits and _ and copy into d */
10764 while (isDIGIT(*s) || *s == '_') {
10765 /* skip underscores, checking for misplaced ones
10769 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10770 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10771 "Misplaced _ in number");
10775 /* check for end of fixed-length buffer */
10777 Perl_croak(aTHX_ number_too_long);
10778 /* if we're ok, copy the character */
10783 /* final misplaced underbar check */
10784 if (lastub && s == lastub + 1) {
10785 if (ckWARN(WARN_SYNTAX))
10786 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10789 /* read a decimal portion if there is one. avoid
10790 3..5 being interpreted as the number 3. followed
10793 if (*s == '.' && s[1] != '.') {
10798 if (ckWARN(WARN_SYNTAX))
10799 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10800 "Misplaced _ in number");
10804 /* copy, ignoring underbars, until we run out of digits.
10806 for (; isDIGIT(*s) || *s == '_'; s++) {
10807 /* fixed length buffer check */
10809 Perl_croak(aTHX_ number_too_long);
10811 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10812 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10813 "Misplaced _ in number");
10819 /* fractional part ending in underbar? */
10820 if (s[-1] == '_') {
10821 if (ckWARN(WARN_SYNTAX))
10822 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10823 "Misplaced _ in number");
10825 if (*s == '.' && isDIGIT(s[1])) {
10826 /* oops, it's really a v-string, but without the "v" */
10832 /* read exponent part, if present */
10833 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10837 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10838 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10840 /* stray preinitial _ */
10842 if (ckWARN(WARN_SYNTAX))
10843 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10844 "Misplaced _ in number");
10848 /* allow positive or negative exponent */
10849 if (*s == '+' || *s == '-')
10852 /* stray initial _ */
10854 if (ckWARN(WARN_SYNTAX))
10855 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10856 "Misplaced _ in number");
10860 /* read digits of exponent */
10861 while (isDIGIT(*s) || *s == '_') {
10864 Perl_croak(aTHX_ number_too_long);
10868 if (((lastub && s == lastub + 1) ||
10869 (!isDIGIT(s[1]) && s[1] != '_'))
10870 && ckWARN(WARN_SYNTAX))
10871 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10872 "Misplaced _ in number");
10879 /* make an sv from the string */
10883 We try to do an integer conversion first if no characters
10884 indicating "float" have been found.
10889 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10891 if (flags == IS_NUMBER_IN_UV) {
10893 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
10896 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10897 if (uv <= (UV) IV_MIN)
10898 sv_setiv(sv, -(IV)uv);
10905 /* terminate the string */
10907 nv = Atof(PL_tokenbuf);
10911 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
10912 (PL_hints & HINT_NEW_INTEGER) )
10913 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
10914 (floatit ? "float" : "integer"),
10918 /* if it starts with a v, it could be a v-string */
10921 sv = newSV(5); /* preallocate storage space */
10922 s = scan_vstring(s,sv);
10926 /* make the op for the constant and return */
10929 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10931 lvalp->opval = NULL;
10937 S_scan_formline(pTHX_ register char *s)
10940 register char *eol;
10942 SV * const stuff = newSVpvs("");
10943 bool needargs = FALSE;
10944 bool eofmt = FALSE;
10946 while (!needargs) {
10948 #ifdef PERL_STRICT_CR
10949 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
10951 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
10953 if (*t == '\n' || t == PL_bufend) {
10958 if (PL_in_eval && !PL_rsfp) {
10959 eol = (char *) memchr(s,'\n',PL_bufend-s);
10964 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10966 for (t = s; t < eol; t++) {
10967 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10969 goto enough; /* ~~ must be first line in formline */
10971 if (*t == '@' || *t == '^')
10975 sv_catpvn(stuff, s, eol-s);
10976 #ifndef PERL_STRICT_CR
10977 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10978 char *end = SvPVX(stuff) + SvCUR(stuff);
10981 SvCUR_set(stuff, SvCUR(stuff) - 1);
10990 s = filter_gets(PL_linestr, PL_rsfp, 0);
10991 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
10992 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
10993 PL_last_lop = PL_last_uni = NULL;
11002 if (SvCUR(stuff)) {
11005 PL_lex_state = LEX_NORMAL;
11006 NEXTVAL_NEXTTOKE.ival = 0;
11010 PL_lex_state = LEX_FORMLINE;
11012 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11014 else if (PL_encoding)
11015 sv_recode_to_utf8(stuff, PL_encoding);
11017 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11019 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
11023 SvREFCNT_dec(stuff);
11025 PL_lex_formbrack = 0;
11037 PL_cshlen = strlen(PL_cshname);
11039 #if defined(USE_ITHREADS)
11040 PERL_UNUSED_CONTEXT;
11046 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11049 const I32 oldsavestack_ix = PL_savestack_ix;
11050 CV* const outsidecv = PL_compcv;
11053 assert(SvTYPE(PL_compcv) == SVt_PVCV);
11055 SAVEI32(PL_subline);
11056 save_item(PL_subname);
11057 SAVESPTR(PL_compcv);
11059 PL_compcv = (CV*)newSV(0);
11060 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
11061 CvFLAGS(PL_compcv) |= flags;
11063 PL_subline = CopLINE(PL_curcop);
11064 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11065 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
11066 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11068 return oldsavestack_ix;
11072 #pragma segment Perl_yylex
11075 Perl_yywarn(pTHX_ const char *s)
11078 PL_in_eval |= EVAL_WARNONLY;
11080 PL_in_eval &= ~EVAL_WARNONLY;
11085 Perl_yyerror(pTHX_ const char *s)
11088 const char *where = NULL;
11089 const char *context = NULL;
11093 if (!yychar || (yychar == ';' && !PL_rsfp))
11095 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11096 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11097 PL_oldbufptr != PL_bufptr) {
11100 The code below is removed for NetWare because it abends/crashes on NetWare
11101 when the script has error such as not having the closing quotes like:
11102 if ($var eq "value)
11103 Checking of white spaces is anyway done in NetWare code.
11106 while (isSPACE(*PL_oldoldbufptr))
11109 context = PL_oldoldbufptr;
11110 contlen = PL_bufptr - PL_oldoldbufptr;
11112 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11113 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11116 The code below is removed for NetWare because it abends/crashes on NetWare
11117 when the script has error such as not having the closing quotes like:
11118 if ($var eq "value)
11119 Checking of white spaces is anyway done in NetWare code.
11122 while (isSPACE(*PL_oldbufptr))
11125 context = PL_oldbufptr;
11126 contlen = PL_bufptr - PL_oldbufptr;
11128 else if (yychar > 255)
11129 where = "next token ???";
11130 else if (yychar == -2) { /* YYEMPTY */
11131 if (PL_lex_state == LEX_NORMAL ||
11132 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11133 where = "at end of line";
11134 else if (PL_lex_inpat)
11135 where = "within pattern";
11137 where = "within string";
11140 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
11142 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11143 else if (isPRINT_LC(yychar))
11144 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
11146 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11147 where = SvPVX_const(where_sv);
11149 msg = sv_2mortal(newSVpv(s, 0));
11150 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11151 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11153 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
11155 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
11156 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11157 Perl_sv_catpvf(aTHX_ msg,
11158 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11159 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11162 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
11163 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
11166 if (PL_error_count >= 10) {
11167 if (PL_in_eval && SvCUR(ERRSV))
11168 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11169 ERRSV, OutCopFILE(PL_curcop));
11171 Perl_croak(aTHX_ "%s has too many errors.\n",
11172 OutCopFILE(PL_curcop));
11175 PL_in_my_stash = NULL;
11179 #pragma segment Main
11183 S_swallow_bom(pTHX_ U8 *s)
11186 const STRLEN slen = SvCUR(PL_linestr);
11189 if (s[1] == 0xFE) {
11190 /* UTF-16 little-endian? (or UTF32-LE?) */
11191 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11192 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
11193 #ifndef PERL_NO_UTF16_FILTER
11194 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
11197 if (PL_bufend > (char*)s) {
11201 filter_add(utf16rev_textfilter, NULL);
11202 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11203 utf16_to_utf8_reversed(s, news,
11204 PL_bufend - (char*)s - 1,
11206 sv_setpvn(PL_linestr, (const char*)news, newlen);
11208 SvUTF8_on(PL_linestr);
11209 s = (U8*)SvPVX(PL_linestr);
11210 PL_bufend = SvPVX(PL_linestr) + newlen;
11213 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
11218 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11219 #ifndef PERL_NO_UTF16_FILTER
11220 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11223 if (PL_bufend > (char *)s) {
11227 filter_add(utf16_textfilter, NULL);
11228 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11229 utf16_to_utf8(s, news,
11230 PL_bufend - (char*)s,
11232 sv_setpvn(PL_linestr, (const char*)news, newlen);
11234 SvUTF8_on(PL_linestr);
11235 s = (U8*)SvPVX(PL_linestr);
11236 PL_bufend = SvPVX(PL_linestr) + newlen;
11239 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
11244 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11245 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11246 s += 3; /* UTF-8 */
11252 if (s[2] == 0xFE && s[3] == 0xFF) {
11253 /* UTF-32 big-endian */
11254 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
11257 else if (s[2] == 0 && s[3] != 0) {
11260 * are a good indicator of UTF-16BE. */
11261 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11266 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11269 * are a good indicator of UTF-16LE. */
11270 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11279 * Restore a source filter.
11283 restore_rsfp(pTHX_ void *f)
11286 PerlIO * const fp = (PerlIO*)f;
11288 if (PL_rsfp == PerlIO_stdin())
11289 PerlIO_clearerr(PL_rsfp);
11290 else if (PL_rsfp && (PL_rsfp != fp))
11291 PerlIO_close(PL_rsfp);
11295 #ifndef PERL_NO_UTF16_FILTER
11297 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11300 const STRLEN old = SvCUR(sv);
11301 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11302 DEBUG_P(PerlIO_printf(Perl_debug_log,
11303 "utf16_textfilter(%p): %d %d (%d)\n",
11304 utf16_textfilter, idx, maxlen, (int) count));
11308 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11309 Copy(SvPVX_const(sv), tmps, old, char);
11310 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11311 SvCUR(sv) - old, &newlen);
11312 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11314 DEBUG_P({sv_dump(sv);});
11319 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11322 const STRLEN old = SvCUR(sv);
11323 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11324 DEBUG_P(PerlIO_printf(Perl_debug_log,
11325 "utf16rev_textfilter(%p): %d %d (%d)\n",
11326 utf16rev_textfilter, idx, maxlen, (int) count));
11330 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11331 Copy(SvPVX_const(sv), tmps, old, char);
11332 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11333 SvCUR(sv) - old, &newlen);
11334 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11336 DEBUG_P({ sv_dump(sv); });
11342 Returns a pointer to the next character after the parsed
11343 vstring, as well as updating the passed in sv.
11345 Function must be called like
11348 s = scan_vstring(s,sv);
11350 The sv should already be large enough to store the vstring
11351 passed in, for performance reasons.
11356 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
11359 const char *pos = s;
11360 const char *start = s;
11361 if (*pos == 'v') pos++; /* get past 'v' */
11362 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11364 if ( *pos != '.') {
11365 /* this may not be a v-string if followed by => */
11366 const char *next = pos;
11367 while (next < PL_bufend && isSPACE(*next))
11369 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
11370 /* return string not v-string */
11371 sv_setpvn(sv,(char *)s,pos-s);
11372 return (char *)pos;
11376 if (!isALPHA(*pos)) {
11377 U8 tmpbuf[UTF8_MAXBYTES+1];
11379 if (*s == 'v') s++; /* get past 'v' */
11381 sv_setpvn(sv, "", 0);
11387 /* this is atoi() that tolerates underscores */
11388 const char *end = pos;
11390 while (--end >= s) {
11395 rev += (*end - '0') * mult;
11397 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
11398 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11399 "Integer overflow in decimal number");
11403 if (rev > 0x7FFFFFFF)
11404 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11406 /* Append native character for the rev point */
11407 tmpend = uvchr_to_utf8(tmpbuf, rev);
11408 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11409 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11411 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
11417 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11421 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11429 * c-indentation-style: bsd
11430 * c-basic-offset: 4
11431 * indent-tabs-mode: t
11434 * ex: set ts=8 sts=4 sw=4 noet: