3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] = "Identifier too long";
30 static const char commaless_variable_list[] = "comma-less variable list";
32 static void restore_rsfp(pTHX_ void *f);
33 #ifndef PERL_NO_UTF16_FILTER
34 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
40 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
42 # define CURMAD(slot,sv)
43 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
46 #define XFAKEBRACK 128
49 #ifdef USE_UTF8_SCRIPTS
50 # define UTF (!IN_BYTES)
52 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
55 /* In variables named $^X, these are the legal values for X.
56 * 1999-02-27 mjd-perl-patch@plover.com */
57 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
59 /* On MacOS, respect nonbreaking spaces */
60 #ifdef MACOS_TRADITIONAL
61 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
63 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
66 /* LEX_* are values for PL_lex_state, the state of the lexer.
67 * They are arranged oddly so that the guard on the switch statement
68 * can get by with a single comparison (if the compiler is smart enough).
71 /* #define LEX_NOTPARSING 11 is done in perl.h. */
73 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
74 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
75 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
76 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
77 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
79 /* at end of code, eg "$x" followed by: */
80 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
81 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
83 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
84 string or after \E, $foo, etc */
85 #define LEX_INTERPCONST 2 /* NOT USED */
86 #define LEX_FORMLINE 1 /* expecting a format line */
87 #define LEX_KNOWNEXT 0 /* next token known; just return it */
91 static const char* const lex_state_names[] = {
110 #include "keywords.h"
112 /* CLINE is a macro that ensures PL_copline has a sane value */
117 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
120 # define SKIPSPACE0(s) skipspace0(s)
121 # define SKIPSPACE1(s) skipspace1(s)
122 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
123 # define PEEKSPACE(s) skipspace2(s,0)
125 # define SKIPSPACE0(s) skipspace(s)
126 # define SKIPSPACE1(s) skipspace(s)
127 # define SKIPSPACE2(s,tsv) skipspace(s)
128 # define PEEKSPACE(s) skipspace(s)
132 * Convenience functions to return different tokens and prime the
133 * lexer for the next token. They all take an argument.
135 * TOKEN : generic token (used for '(', DOLSHARP, etc)
136 * OPERATOR : generic operator
137 * AOPERATOR : assignment operator
138 * PREBLOCK : beginning the block after an if, while, foreach, ...
139 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
140 * PREREF : *EXPR where EXPR is not a simple identifier
141 * TERM : expression term
142 * LOOPX : loop exiting command (goto, last, dump, etc)
143 * FTST : file test operator
144 * FUN0 : zero-argument function
145 * FUN1 : not used, except for not, which isn't a UNIOP
146 * BOop : bitwise or or xor
148 * SHop : shift operator
149 * PWop : power operator
150 * PMop : pattern-matching operator
151 * Aop : addition-level operator
152 * Mop : multiplication-level operator
153 * Eop : equality-testing operator
154 * Rop : relational operator <= != gt
156 * Also see LOP and lop() below.
159 #ifdef DEBUGGING /* Serve -DT. */
160 # define REPORT(retval) tokereport((I32)retval)
162 # define REPORT(retval) (retval)
165 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
166 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
167 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
168 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
169 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
170 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
171 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
172 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
173 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
174 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
175 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
176 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
177 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
178 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
179 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
180 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
181 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
182 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
183 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
184 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
186 /* This bit of chicanery makes a unary function followed by
187 * a parenthesis into a function with one argument, highest precedence.
188 * The UNIDOR macro is for unary functions that can be followed by the //
189 * operator (such as C<shift // 0>).
191 #define UNI2(f,x) { \
195 PL_last_uni = PL_oldbufptr; \
196 PL_last_lop_op = f; \
198 return REPORT( (int)FUNC1 ); \
200 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
202 #define UNI(f) UNI2(f,XTERM)
203 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
205 #define UNIBRACK(f) { \
208 PL_last_uni = PL_oldbufptr; \
210 return REPORT( (int)FUNC1 ); \
212 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
215 /* grandfather return to old style */
216 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
220 /* how to interpret the yylval associated with the token */
224 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
230 static struct debug_tokens {
232 enum token_type type;
234 } const debug_tokens[] =
236 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
237 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
238 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
239 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
240 { ARROW, TOKENTYPE_NONE, "ARROW" },
241 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
242 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
243 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
244 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
245 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
246 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
247 { DO, TOKENTYPE_NONE, "DO" },
248 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
249 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
250 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
251 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
252 { ELSE, TOKENTYPE_NONE, "ELSE" },
253 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
254 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
255 { FOR, TOKENTYPE_IVAL, "FOR" },
256 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
257 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
258 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
259 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
260 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
261 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
262 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
263 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
264 { IF, TOKENTYPE_IVAL, "IF" },
265 { LABEL, TOKENTYPE_PVAL, "LABEL" },
266 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
267 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
268 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
269 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
270 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
271 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
272 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
273 { MY, TOKENTYPE_IVAL, "MY" },
274 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
275 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
276 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
277 { OROP, TOKENTYPE_IVAL, "OROP" },
278 { OROR, TOKENTYPE_NONE, "OROR" },
279 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
280 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
281 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
282 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
283 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
284 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
285 { PREINC, TOKENTYPE_NONE, "PREINC" },
286 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
287 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
288 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
289 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
290 { SUB, TOKENTYPE_NONE, "SUB" },
291 { THING, TOKENTYPE_OPVAL, "THING" },
292 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
293 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
294 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
295 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
296 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
297 { USE, TOKENTYPE_IVAL, "USE" },
298 { WHEN, TOKENTYPE_IVAL, "WHEN" },
299 { WHILE, TOKENTYPE_IVAL, "WHILE" },
300 { WORD, TOKENTYPE_OPVAL, "WORD" },
301 { 0, TOKENTYPE_NONE, NULL }
304 /* dump the returned token in rv, plus any optional arg in yylval */
307 S_tokereport(pTHX_ I32 rv)
311 const char *name = NULL;
312 enum token_type type = TOKENTYPE_NONE;
313 const struct debug_tokens *p;
314 SV* const report = newSVpvs("<== ");
316 for (p = debug_tokens; p->token; p++) {
317 if (p->token == (int)rv) {
324 Perl_sv_catpv(aTHX_ report, name);
325 else if ((char)rv > ' ' && (char)rv < '~')
326 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
328 sv_catpvs(report, "EOF");
330 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
333 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
336 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
338 case TOKENTYPE_OPNUM:
339 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
340 PL_op_name[yylval.ival]);
343 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
345 case TOKENTYPE_OPVAL:
347 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
348 PL_op_name[yylval.opval->op_type]);
349 if (yylval.opval->op_type == OP_CONST) {
350 Perl_sv_catpvf(aTHX_ report, " %s",
351 SvPEEK(cSVOPx_sv(yylval.opval)));
356 sv_catpvs(report, "(opval=null)");
359 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
365 /* print the buffer with suitable escapes */
368 S_printbuf(pTHX_ const char* fmt, const char* s)
370 SV* const tmp = newSVpvs("");
371 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
380 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
381 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
385 S_ao(pTHX_ int toketype)
388 if (*PL_bufptr == '=') {
390 if (toketype == ANDAND)
391 yylval.ival = OP_ANDASSIGN;
392 else if (toketype == OROR)
393 yylval.ival = OP_ORASSIGN;
394 else if (toketype == DORDOR)
395 yylval.ival = OP_DORASSIGN;
403 * When Perl expects an operator and finds something else, no_op
404 * prints the warning. It always prints "<something> found where
405 * operator expected. It prints "Missing semicolon on previous line?"
406 * if the surprise occurs at the start of the line. "do you need to
407 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
408 * where the compiler doesn't know if foo is a method call or a function.
409 * It prints "Missing operator before end of line" if there's nothing
410 * after the missing operator, or "... before <...>" if there is something
411 * after the missing operator.
415 S_no_op(pTHX_ const char *what, char *s)
418 char * const oldbp = PL_bufptr;
419 const bool is_first = (PL_oldbufptr == PL_linestart);
425 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
426 if (ckWARN_d(WARN_SYNTAX)) {
428 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
429 "\t(Missing semicolon on previous line?)\n");
430 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
432 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
434 if (t < PL_bufptr && isSPACE(*t))
435 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
436 "\t(Do you need to predeclare %.*s?)\n",
437 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
441 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
442 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
450 * Complain about missing quote/regexp/heredoc terminator.
451 * If it's called with NULL then it cauterizes the line buffer.
452 * If we're in a delimited string and the delimiter is a control
453 * character, it's reformatted into a two-char sequence like ^C.
458 S_missingterm(pTHX_ char *s)
464 char * const nl = strrchr(s,'\n');
470 iscntrl(PL_multi_close)
472 PL_multi_close < 32 || PL_multi_close == 127
476 tmpbuf[1] = (char)toCTRL(PL_multi_close);
481 *tmpbuf = (char)PL_multi_close;
485 q = strchr(s,'"') ? '\'' : '"';
486 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
489 #define FEATURE_IS_ENABLED(name) \
490 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
491 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
493 * S_feature_is_enabled
494 * Check whether the named feature is enabled.
497 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
500 HV * const hinthv = GvHV(PL_hintgv);
501 char he_name[32] = "feature_";
502 (void) my_strlcpy(&he_name[8], name, 24);
504 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
512 Perl_deprecate(pTHX_ const char *s)
514 if (ckWARN(WARN_DEPRECATED))
515 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
519 Perl_deprecate_old(pTHX_ const char *s)
521 /* This function should NOT be called for any new deprecated warnings */
522 /* Use Perl_deprecate instead */
524 /* It is here to maintain backward compatibility with the pre-5.8 */
525 /* warnings category hierarchy. The "deprecated" category used to */
526 /* live under the "syntax" category. It is now a top-level category */
527 /* in its own right. */
529 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
530 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
531 "Use of %s is deprecated", s);
535 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
536 * utf16-to-utf8-reversed.
539 #ifdef PERL_CR_FILTER
543 register const char *s = SvPVX_const(sv);
544 register const char * const e = s + SvCUR(sv);
545 /* outer loop optimized to do nothing if there are no CR-LFs */
547 if (*s++ == '\r' && *s == '\n') {
548 /* hit a CR-LF, need to copy the rest */
549 register char *d = s - 1;
552 if (*s == '\r' && s[1] == '\n')
563 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
565 const I32 count = FILTER_READ(idx+1, sv, maxlen);
566 if (count > 0 && !maxlen)
574 * Initialize variables. Uses the Perl save_stack to save its state (for
575 * recursive calls to the parser).
579 Perl_lex_start(pTHX_ SV *line)
585 SAVEI32(PL_lex_dojoin);
586 SAVEI32(PL_lex_brackets);
587 SAVEI32(PL_lex_casemods);
588 SAVEI32(PL_lex_starts);
589 SAVEI32(PL_lex_state);
590 SAVEVPTR(PL_lex_inpat);
591 SAVEI32(PL_lex_inwhat);
593 if (PL_lex_state == LEX_KNOWNEXT) {
594 I32 toke = PL_lasttoke;
595 while (--toke >= 0) {
596 SAVEI32(PL_nexttoke[toke].next_type);
597 SAVEVPTR(PL_nexttoke[toke].next_val);
599 SAVEVPTR(PL_nexttoke[toke].next_mad);
601 SAVEI32(PL_lasttoke);
604 SAVESPTR(PL_thistoken);
605 SAVESPTR(PL_thiswhite);
606 SAVESPTR(PL_nextwhite);
607 SAVESPTR(PL_thisopen);
608 SAVESPTR(PL_thisclose);
609 SAVESPTR(PL_thisstuff);
610 SAVEVPTR(PL_thismad);
611 SAVEI32(PL_realtokenstart);
612 SAVEI32(PL_faketokens);
614 SAVEI32(PL_curforce);
616 if (PL_lex_state == LEX_KNOWNEXT) {
617 I32 toke = PL_nexttoke;
618 while (--toke >= 0) {
619 SAVEI32(PL_nexttype[toke]);
620 SAVEVPTR(PL_nextval[toke]);
622 SAVEI32(PL_nexttoke);
625 SAVECOPLINE(PL_curcop);
628 SAVEPPTR(PL_oldbufptr);
629 SAVEPPTR(PL_oldoldbufptr);
630 SAVEPPTR(PL_last_lop);
631 SAVEPPTR(PL_last_uni);
632 SAVEPPTR(PL_linestart);
633 SAVESPTR(PL_linestr);
634 SAVEGENERICPV(PL_lex_brackstack);
635 SAVEGENERICPV(PL_lex_casestack);
636 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
637 SAVESPTR(PL_lex_stuff);
638 SAVEI32(PL_lex_defer);
639 SAVEI32(PL_sublex_info.sub_inwhat);
640 SAVESPTR(PL_lex_repl);
642 SAVEINT(PL_lex_expect);
644 PL_lex_state = LEX_NORMAL;
648 Newx(PL_lex_brackstack, 120, char);
649 Newx(PL_lex_casestack, 12, char);
651 *PL_lex_casestack = '\0';
663 PL_sublex_info.sub_inwhat = 0;
665 if (SvREADONLY(PL_linestr))
666 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
667 s = SvPV_const(PL_linestr, len);
668 if (!len || s[len-1] != ';') {
669 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
670 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
671 sv_catpvs(PL_linestr, "\n;");
673 SvTEMP_off(PL_linestr);
674 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
675 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
676 PL_last_lop = PL_last_uni = NULL;
682 * Finalizer for lexing operations. Must be called when the parser is
683 * done with the lexer.
690 PL_doextract = FALSE;
695 * This subroutine has nothing to do with tilting, whether at windmills
696 * or pinball tables. Its name is short for "increment line". It
697 * increments the current line number in CopLINE(PL_curcop) and checks
698 * to see whether the line starts with a comment of the form
699 * # line 500 "foo.pm"
700 * If so, it sets the current line number and file to the values in the comment.
704 S_incline(pTHX_ char *s)
712 CopLINE_inc(PL_curcop);
715 while (SPACE_OR_TAB(*s))
717 if (strnEQ(s, "line", 4))
721 if (SPACE_OR_TAB(*s))
725 while (SPACE_OR_TAB(*s))
733 while (SPACE_OR_TAB(*s))
735 if (*s == '"' && (t = strchr(s+1, '"'))) {
745 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
747 if (*e != '\n' && *e != '\0')
748 return; /* false alarm */
754 const char * const cf = CopFILE(PL_curcop);
755 STRLEN tmplen = cf ? strlen(cf) : 0;
756 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
757 /* must copy *{"::_<(eval N)[oldfilename:L]"}
758 * to *{"::_<newfilename"} */
759 char smallbuf[256], smallbuf2[256];
760 char *tmpbuf, *tmpbuf2;
762 STRLEN tmplen2 = strlen(s);
763 if (tmplen + 3 < sizeof smallbuf)
766 Newx(tmpbuf, tmplen + 3, char);
767 if (tmplen2 + 3 < sizeof smallbuf2)
770 Newx(tmpbuf2, tmplen2 + 3, char);
771 tmpbuf[0] = tmpbuf2[0] = '_';
772 tmpbuf[1] = tmpbuf2[1] = '<';
773 memcpy(tmpbuf + 2, cf, ++tmplen);
774 memcpy(tmpbuf2 + 2, s, ++tmplen2);
776 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
778 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
780 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
781 /* adjust ${"::_<newfilename"} to store the new file name */
782 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
783 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
784 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
786 if (tmpbuf != smallbuf) Safefree(tmpbuf);
787 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
790 CopFILE_free(PL_curcop);
791 CopFILE_set(PL_curcop, s);
794 CopLINE_set(PL_curcop, atoi(n)-1);
798 /* skip space before PL_thistoken */
801 S_skipspace0(pTHX_ register char *s)
808 PL_thiswhite = newSVpvn("",0);
809 sv_catsv(PL_thiswhite, PL_skipwhite);
810 sv_free(PL_skipwhite);
813 PL_realtokenstart = s - SvPVX(PL_linestr);
817 /* skip space after PL_thistoken */
820 S_skipspace1(pTHX_ register char *s)
822 const char *start = s;
823 I32 startoff = start - SvPVX(PL_linestr);
828 start = SvPVX(PL_linestr) + startoff;
829 if (!PL_thistoken && PL_realtokenstart >= 0) {
830 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
831 PL_thistoken = newSVpvn(tstart, start - tstart);
833 PL_realtokenstart = -1;
836 PL_nextwhite = newSVpvn("",0);
837 sv_catsv(PL_nextwhite, PL_skipwhite);
838 sv_free(PL_skipwhite);
845 S_skipspace2(pTHX_ register char *s, SV **svp)
848 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
849 const I32 startoff = s - SvPVX(PL_linestr);
852 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
853 if (!PL_madskills || !svp)
855 start = SvPVX(PL_linestr) + startoff;
856 if (!PL_thistoken && PL_realtokenstart >= 0) {
857 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
858 PL_thistoken = newSVpvn(tstart, start - tstart);
859 PL_realtokenstart = -1;
863 *svp = newSVpvn("",0);
864 sv_setsv(*svp, PL_skipwhite);
865 sv_free(PL_skipwhite);
875 * Called to gobble the appropriate amount and type of whitespace.
876 * Skips comments as well.
880 S_skipspace(pTHX_ register char *s)
885 int startoff = s - SvPVX(PL_linestr);
888 sv_free(PL_skipwhite);
893 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
894 while (s < PL_bufend && SPACE_OR_TAB(*s))
904 SSize_t oldprevlen, oldoldprevlen;
905 SSize_t oldloplen = 0, oldunilen = 0;
906 while (s < PL_bufend && isSPACE(*s)) {
907 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
912 if (s < PL_bufend && *s == '#') {
913 while (s < PL_bufend && *s != '\n')
917 if (PL_in_eval && !PL_rsfp) {
924 /* only continue to recharge the buffer if we're at the end
925 * of the buffer, we're not reading from a source filter, and
926 * we're in normal lexing mode
928 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
929 PL_lex_state == LEX_FORMLINE)
936 /* try to recharge the buffer */
938 curoff = s - SvPVX(PL_linestr);
941 if ((s = filter_gets(PL_linestr, PL_rsfp,
942 (prevlen = SvCUR(PL_linestr)))) == NULL)
945 if (PL_madskills && curoff != startoff) {
947 PL_skipwhite = newSVpvn("",0);
948 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
952 /* mustn't throw out old stuff yet if madpropping */
953 SvCUR(PL_linestr) = curoff;
954 s = SvPVX(PL_linestr) + curoff;
956 if (curoff && s[-1] == '\n')
960 /* end of file. Add on the -p or -n magic */
961 /* XXX these shouldn't really be added here, can't set PL_faketokens */
965 ";}continue{print or die qq(-p destination: $!\\n);}");
968 ";}continue{print or die qq(-p destination: $!\\n);}");
970 PL_minus_n = PL_minus_p = 0;
972 else if (PL_minus_n) {
974 sv_catpvn(PL_linestr, ";}", 2);
976 sv_setpvn(PL_linestr, ";}", 2);
982 sv_catpvn(PL_linestr,";", 1);
984 sv_setpvn(PL_linestr,";", 1);
987 /* reset variables for next time we lex */
988 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
994 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
995 PL_last_lop = PL_last_uni = NULL;
997 /* Close the filehandle. Could be from -P preprocessor,
998 * STDIN, or a regular file. If we were reading code from
999 * STDIN (because the commandline held no -e or filename)
1000 * then we don't close it, we reset it so the code can
1001 * read from STDIN too.
1004 if (PL_preprocess && !PL_in_eval)
1005 (void)PerlProc_pclose(PL_rsfp);
1006 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1007 PerlIO_clearerr(PL_rsfp);
1009 (void)PerlIO_close(PL_rsfp);
1014 /* not at end of file, so we only read another line */
1015 /* make corresponding updates to old pointers, for yyerror() */
1016 oldprevlen = PL_oldbufptr - PL_bufend;
1017 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1019 oldunilen = PL_last_uni - PL_bufend;
1021 oldloplen = PL_last_lop - PL_bufend;
1022 PL_linestart = PL_bufptr = s + prevlen;
1023 PL_bufend = s + SvCUR(PL_linestr);
1025 PL_oldbufptr = s + oldprevlen;
1026 PL_oldoldbufptr = s + oldoldprevlen;
1028 PL_last_uni = s + oldunilen;
1030 PL_last_lop = s + oldloplen;
1033 /* debugger active and we're not compiling the debugger code,
1034 * so store the line into the debugger's array of lines
1036 if (PERLDB_LINE && PL_curstash != PL_debstash) {
1037 SV * const sv = newSV(0);
1039 sv_upgrade(sv, SVt_PVMG);
1040 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
1043 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
1051 PL_skipwhite = newSVpvn("",0);
1052 curoff = s - SvPVX(PL_linestr);
1053 if (curoff - startoff)
1054 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1063 * Check the unary operators to ensure there's no ambiguity in how they're
1064 * used. An ambiguous piece of code would be:
1066 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1067 * the +5 is its argument.
1077 if (PL_oldoldbufptr != PL_last_uni)
1079 while (isSPACE(*PL_last_uni))
1082 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1084 if ((t = strchr(s, '(')) && t < PL_bufptr)
1087 if (ckWARN_d(WARN_AMBIGUOUS)){
1088 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1089 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1090 (int)(s - PL_last_uni), PL_last_uni);
1095 * LOP : macro to build a list operator. Its behaviour has been replaced
1096 * with a subroutine, S_lop() for which LOP is just another name.
1099 #define LOP(f,x) return lop(f,x,s)
1103 * Build a list operator (or something that might be one). The rules:
1104 * - if we have a next token, then it's a list operator [why?]
1105 * - if the next thing is an opening paren, then it's a function
1106 * - else it's a list operator
1110 S_lop(pTHX_ I32 f, int x, char *s)
1117 PL_last_lop = PL_oldbufptr;
1118 PL_last_lop_op = (OPCODE)f;
1121 return REPORT(LSTOP);
1124 return REPORT(LSTOP);
1127 return REPORT(FUNC);
1130 return REPORT(FUNC);
1132 return REPORT(LSTOP);
1138 * Sets up for an eventual force_next(). start_force(0) basically does
1139 * an unshift, while start_force(-1) does a push. yylex removes items
1144 S_start_force(pTHX_ int where)
1148 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1149 where = PL_lasttoke;
1150 assert(PL_curforce < 0 || PL_curforce == where);
1151 if (PL_curforce != where) {
1152 for (i = PL_lasttoke; i > where; --i) {
1153 PL_nexttoke[i] = PL_nexttoke[i-1];
1157 if (PL_curforce < 0) /* in case of duplicate start_force() */
1158 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1159 PL_curforce = where;
1162 curmad('^', newSVpvn("",0));
1163 CURMAD('_', PL_nextwhite);
1168 S_curmad(pTHX_ char slot, SV *sv)
1174 if (PL_curforce < 0)
1175 where = &PL_thismad;
1177 where = &PL_nexttoke[PL_curforce].next_mad;
1180 sv_setpvn(sv, "", 0);
1183 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1185 else if (PL_encoding) {
1186 sv_recode_to_utf8(sv, PL_encoding);
1191 /* keep a slot open for the head of the list? */
1192 if (slot != '_' && *where && (*where)->mad_key == '^') {
1193 (*where)->mad_key = slot;
1194 sv_free((*where)->mad_val);
1195 (*where)->mad_val = (void*)sv;
1198 addmad(newMADsv(slot, sv), where, 0);
1201 # define start_force(where) NOOP
1202 # define curmad(slot, sv) NOOP
1207 * When the lexer realizes it knows the next token (for instance,
1208 * it is reordering tokens for the parser) then it can call S_force_next
1209 * to know what token to return the next time the lexer is called. Caller
1210 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1211 * and possibly PL_expect to ensure the lexer handles the token correctly.
1215 S_force_next(pTHX_ I32 type)
1219 if (PL_curforce < 0)
1220 start_force(PL_lasttoke);
1221 PL_nexttoke[PL_curforce].next_type = type;
1222 if (PL_lex_state != LEX_KNOWNEXT)
1223 PL_lex_defer = PL_lex_state;
1224 PL_lex_state = LEX_KNOWNEXT;
1225 PL_lex_expect = PL_expect;
1228 PL_nexttype[PL_nexttoke] = type;
1230 if (PL_lex_state != LEX_KNOWNEXT) {
1231 PL_lex_defer = PL_lex_state;
1232 PL_lex_expect = PL_expect;
1233 PL_lex_state = LEX_KNOWNEXT;
1239 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1242 SV * const sv = newSVpvn(start,len);
1243 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1250 * When the lexer knows the next thing is a word (for instance, it has
1251 * just seen -> and it knows that the next char is a word char, then
1252 * it calls S_force_word to stick the next word into the PL_next lookahead.
1255 * char *start : buffer position (must be within PL_linestr)
1256 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1257 * int check_keyword : if true, Perl checks to make sure the word isn't
1258 * a keyword (do this if the word is a label, e.g. goto FOO)
1259 * int allow_pack : if true, : characters will also be allowed (require,
1260 * use, etc. do this)
1261 * int allow_initial_tick : used by the "sub" lexer only.
1265 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1271 start = SKIPSPACE1(start);
1273 if (isIDFIRST_lazy_if(s,UTF) ||
1274 (allow_pack && *s == ':') ||
1275 (allow_initial_tick && *s == '\'') )
1277 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1278 if (check_keyword && keyword(PL_tokenbuf, len))
1280 start_force(PL_curforce);
1282 curmad('X', newSVpvn(start,s-start));
1283 if (token == METHOD) {
1288 PL_expect = XOPERATOR;
1291 NEXTVAL_NEXTTOKE.opval
1292 = (OP*)newSVOP(OP_CONST,0,
1293 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1294 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1302 * Called when the lexer wants $foo *foo &foo etc, but the program
1303 * text only contains the "foo" portion. The first argument is a pointer
1304 * to the "foo", and the second argument is the type symbol to prefix.
1305 * Forces the next token to be a "WORD".
1306 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1310 S_force_ident(pTHX_ register const char *s, int kind)
1314 const STRLEN len = strlen(s);
1315 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1316 start_force(PL_curforce);
1317 NEXTVAL_NEXTTOKE.opval = o;
1320 o->op_private = OPpCONST_ENTERED;
1321 /* XXX see note in pp_entereval() for why we forgo typo
1322 warnings if the symbol must be introduced in an eval.
1324 gv_fetchpvn_flags(s, len,
1325 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1327 kind == '$' ? SVt_PV :
1328 kind == '@' ? SVt_PVAV :
1329 kind == '%' ? SVt_PVHV :
1337 Perl_str_to_version(pTHX_ SV *sv)
1342 const char *start = SvPV_const(sv,len);
1343 const char * const end = start + len;
1344 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1345 while (start < end) {
1349 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1354 retval += ((NV)n)/nshift;
1363 * Forces the next token to be a version number.
1364 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1365 * and if "guessing" is TRUE, then no new token is created (and the caller
1366 * must use an alternative parsing method).
1370 S_force_version(pTHX_ char *s, int guessing)
1376 I32 startoff = s - SvPVX(PL_linestr);
1385 while (isDIGIT(*d) || *d == '_' || *d == '.')
1389 start_force(PL_curforce);
1390 curmad('X', newSVpvn(s,d-s));
1393 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1395 s = scan_num(s, &yylval);
1396 version = yylval.opval;
1397 ver = cSVOPx(version)->op_sv;
1398 if (SvPOK(ver) && !SvNIOK(ver)) {
1399 SvUPGRADE(ver, SVt_PVNV);
1400 SvNV_set(ver, str_to_version(ver));
1401 SvNOK_on(ver); /* hint that it is a version */
1404 else if (guessing) {
1407 sv_free(PL_nextwhite); /* let next token collect whitespace */
1409 s = SvPVX(PL_linestr) + startoff;
1417 if (PL_madskills && !version) {
1418 sv_free(PL_nextwhite); /* let next token collect whitespace */
1420 s = SvPVX(PL_linestr) + startoff;
1423 /* NOTE: The parser sees the package name and the VERSION swapped */
1424 start_force(PL_curforce);
1425 NEXTVAL_NEXTTOKE.opval = version;
1433 * Tokenize a quoted string passed in as an SV. It finds the next
1434 * chunk, up to end of string or a backslash. It may make a new
1435 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1440 S_tokeq(pTHX_ SV *sv)
1444 register char *send;
1452 s = SvPV_force(sv, len);
1453 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1456 while (s < send && *s != '\\')
1461 if ( PL_hints & HINT_NEW_STRING ) {
1462 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1468 if (s + 1 < send && (s[1] == '\\'))
1469 s++; /* all that, just for this */
1474 SvCUR_set(sv, d - SvPVX_const(sv));
1476 if ( PL_hints & HINT_NEW_STRING )
1477 return new_constant(NULL, 0, "q", sv, pv, "q");
1482 * Now come three functions related to double-quote context,
1483 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1484 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1485 * interact with PL_lex_state, and create fake ( ... ) argument lists
1486 * to handle functions and concatenation.
1487 * They assume that whoever calls them will be setting up a fake
1488 * join call, because each subthing puts a ',' after it. This lets
1491 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1493 * (I'm not sure whether the spurious commas at the end of lcfirst's
1494 * arguments and join's arguments are created or not).
1499 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1501 * Pattern matching will set PL_lex_op to the pattern-matching op to
1502 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1504 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1506 * Everything else becomes a FUNC.
1508 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1509 * had an OP_CONST or OP_READLINE). This just sets us up for a
1510 * call to S_sublex_push().
1514 S_sublex_start(pTHX)
1517 register const I32 op_type = yylval.ival;
1519 if (op_type == OP_NULL) {
1520 yylval.opval = PL_lex_op;
1524 if (op_type == OP_CONST || op_type == OP_READLINE) {
1525 SV *sv = tokeq(PL_lex_stuff);
1527 if (SvTYPE(sv) == SVt_PVIV) {
1528 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1530 const char * const p = SvPV_const(sv, len);
1531 SV * const nsv = newSVpvn(p, len);
1537 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1538 PL_lex_stuff = NULL;
1539 /* Allow <FH> // "foo" */
1540 if (op_type == OP_READLINE)
1541 PL_expect = XTERMORDORDOR;
1545 PL_sublex_info.super_state = PL_lex_state;
1546 PL_sublex_info.sub_inwhat = op_type;
1547 PL_sublex_info.sub_op = PL_lex_op;
1548 PL_lex_state = LEX_INTERPPUSH;
1552 yylval.opval = PL_lex_op;
1562 * Create a new scope to save the lexing state. The scope will be
1563 * ended in S_sublex_done. Returns a '(', starting the function arguments
1564 * to the uc, lc, etc. found before.
1565 * Sets PL_lex_state to LEX_INTERPCONCAT.
1574 PL_lex_state = PL_sublex_info.super_state;
1575 SAVEI32(PL_lex_dojoin);
1576 SAVEI32(PL_lex_brackets);
1577 SAVEI32(PL_lex_casemods);
1578 SAVEI32(PL_lex_starts);
1579 SAVEI32(PL_lex_state);
1580 SAVEVPTR(PL_lex_inpat);
1581 SAVEI32(PL_lex_inwhat);
1582 SAVECOPLINE(PL_curcop);
1583 SAVEPPTR(PL_bufptr);
1584 SAVEPPTR(PL_bufend);
1585 SAVEPPTR(PL_oldbufptr);
1586 SAVEPPTR(PL_oldoldbufptr);
1587 SAVEPPTR(PL_last_lop);
1588 SAVEPPTR(PL_last_uni);
1589 SAVEPPTR(PL_linestart);
1590 SAVESPTR(PL_linestr);
1591 SAVEGENERICPV(PL_lex_brackstack);
1592 SAVEGENERICPV(PL_lex_casestack);
1594 PL_linestr = PL_lex_stuff;
1595 PL_lex_stuff = NULL;
1597 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1598 = SvPVX(PL_linestr);
1599 PL_bufend += SvCUR(PL_linestr);
1600 PL_last_lop = PL_last_uni = NULL;
1601 SAVEFREESV(PL_linestr);
1603 PL_lex_dojoin = FALSE;
1604 PL_lex_brackets = 0;
1605 Newx(PL_lex_brackstack, 120, char);
1606 Newx(PL_lex_casestack, 12, char);
1607 PL_lex_casemods = 0;
1608 *PL_lex_casestack = '\0';
1610 PL_lex_state = LEX_INTERPCONCAT;
1611 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1613 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1614 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1615 PL_lex_inpat = PL_sublex_info.sub_op;
1617 PL_lex_inpat = NULL;
1624 * Restores lexer state after a S_sublex_push.
1631 if (!PL_lex_starts++) {
1632 SV * const sv = newSVpvs("");
1633 if (SvUTF8(PL_linestr))
1635 PL_expect = XOPERATOR;
1636 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1640 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1641 PL_lex_state = LEX_INTERPCASEMOD;
1645 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1646 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1647 PL_linestr = PL_lex_repl;
1649 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1650 PL_bufend += SvCUR(PL_linestr);
1651 PL_last_lop = PL_last_uni = NULL;
1652 SAVEFREESV(PL_linestr);
1653 PL_lex_dojoin = FALSE;
1654 PL_lex_brackets = 0;
1655 PL_lex_casemods = 0;
1656 *PL_lex_casestack = '\0';
1658 if (SvEVALED(PL_lex_repl)) {
1659 PL_lex_state = LEX_INTERPNORMAL;
1661 /* we don't clear PL_lex_repl here, so that we can check later
1662 whether this is an evalled subst; that means we rely on the
1663 logic to ensure sublex_done() is called again only via the
1664 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1667 PL_lex_state = LEX_INTERPCONCAT;
1677 PL_endwhite = newSVpvn("",0);
1678 sv_catsv(PL_endwhite, PL_thiswhite);
1682 sv_setpvn(PL_thistoken,"",0);
1684 PL_realtokenstart = -1;
1688 PL_bufend = SvPVX(PL_linestr);
1689 PL_bufend += SvCUR(PL_linestr);
1690 PL_expect = XOPERATOR;
1691 PL_sublex_info.sub_inwhat = 0;
1699 Extracts a pattern, double-quoted string, or transliteration. This
1702 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1703 processing a pattern (PL_lex_inpat is true), a transliteration
1704 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1706 Returns a pointer to the character scanned up to. If this is
1707 advanced from the start pointer supplied (i.e. if anything was
1708 successfully parsed), will leave an OP for the substring scanned
1709 in yylval. Caller must intuit reason for not parsing further
1710 by looking at the next characters herself.
1714 double-quoted style: \r and \n
1715 regexp special ones: \D \s
1718 case and quoting: \U \Q \E
1719 stops on @ and $, but not for $ as tail anchor
1721 In transliterations:
1722 characters are VERY literal, except for - not at the start or end
1723 of the string, which indicates a range. If the range is in bytes,
1724 scan_const expands the range to the full set of intermediate
1725 characters. If the range is in utf8, the hyphen is replaced with
1726 a certain range mark which will be handled by pmtrans() in op.c.
1728 In double-quoted strings:
1730 double-quoted style: \r and \n
1732 deprecated backrefs: \1 (in substitution replacements)
1733 case and quoting: \U \Q \E
1736 scan_const does *not* construct ops to handle interpolated strings.
1737 It stops processing as soon as it finds an embedded $ or @ variable
1738 and leaves it to the caller to work out what's going on.
1740 embedded arrays (whether in pattern or not) could be:
1741 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1743 $ in double-quoted strings must be the symbol of an embedded scalar.
1745 $ in pattern could be $foo or could be tail anchor. Assumption:
1746 it's a tail anchor if $ is the last thing in the string, or if it's
1747 followed by one of "()| \r\n\t"
1749 \1 (backreferences) are turned into $1
1751 The structure of the code is
1752 while (there's a character to process) {
1753 handle transliteration ranges
1754 skip regexp comments /(?#comment)/ and codes /(?{code})/
1755 skip #-initiated comments in //x patterns
1756 check for embedded arrays
1757 check for embedded scalars
1759 leave intact backslashes from leaveit (below)
1760 deprecate \1 in substitution replacements
1761 handle string-changing backslashes \l \U \Q \E, etc.
1762 switch (what was escaped) {
1763 handle \- in a transliteration (becomes a literal -)
1764 handle \132 (octal characters)
1765 handle \x15 and \x{1234} (hex characters)
1766 handle \N{name} (named characters)
1767 handle \cV (control characters)
1768 handle printf-style backslashes (\f, \r, \n, etc)
1770 } (end if backslash)
1771 } (end while character to read)
1776 S_scan_const(pTHX_ char *start)
1779 register char *send = PL_bufend; /* end of the constant */
1780 SV *sv = newSV(send - start); /* sv for the constant */
1781 register char *s = start; /* start of the constant */
1782 register char *d = SvPVX(sv); /* destination for copies */
1783 bool dorange = FALSE; /* are we in a translit range? */
1784 bool didrange = FALSE; /* did we just finish a range? */
1785 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1786 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1789 UV literal_endpoint = 0;
1790 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1793 const char * const leaveit = /* set of acceptably-backslashed characters */
1795 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
1798 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1799 /* If we are doing a trans and we know we want UTF8 set expectation */
1800 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1801 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1805 while (s < send || dorange) {
1806 /* get transliterations out of the way (they're most literal) */
1807 if (PL_lex_inwhat == OP_TRANS) {
1808 /* expand a range A-Z to the full set of characters. AIE! */
1810 I32 i; /* current expanded character */
1811 I32 min; /* first character in range */
1812 I32 max; /* last character in range */
1823 char * const c = (char*)utf8_hop((U8*)d, -1);
1827 *c = (char)UTF_TO_NATIVE(0xff);
1828 /* mark the range as done, and continue */
1834 i = d - SvPVX_const(sv); /* remember current offset */
1837 SvLEN(sv) + (has_utf8 ?
1838 (512 - UTF_CONTINUATION_MARK +
1841 /* How many two-byte within 0..255: 128 in UTF-8,
1842 * 96 in UTF-8-mod. */
1844 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1846 d = SvPVX(sv) + i; /* refresh d after realloc */
1850 for (j = 0; j <= 1; j++) {
1851 char * const c = (char*)utf8_hop((U8*)d, -1);
1852 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1858 max = (U8)0xff; /* only to \xff */
1859 uvmax = uv; /* \x{100} to uvmax */
1861 d = c; /* eat endpoint chars */
1866 d -= 2; /* eat the first char and the - */
1867 min = (U8)*d; /* first char in range */
1868 max = (U8)d[1]; /* last char in range */
1875 "Invalid range \"%c-%c\" in transliteration operator",
1876 (char)min, (char)max);
1880 if (literal_endpoint == 2 &&
1881 ((isLOWER(min) && isLOWER(max)) ||
1882 (isUPPER(min) && isUPPER(max)))) {
1884 for (i = min; i <= max; i++)
1886 *d++ = NATIVE_TO_NEED(has_utf8,i);
1888 for (i = min; i <= max; i++)
1890 *d++ = NATIVE_TO_NEED(has_utf8,i);
1895 for (i = min; i <= max; i++)
1898 const U8 ch = (U8)NATIVE_TO_UTF(i);
1899 if (UNI_IS_INVARIANT(ch))
1902 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1903 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1912 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1914 *d++ = (char)UTF_TO_NATIVE(0xff);
1916 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1920 /* mark the range as done, and continue */
1924 literal_endpoint = 0;
1929 /* range begins (ignore - as first or last char) */
1930 else if (*s == '-' && s+1 < send && s != start) {
1932 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1939 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1949 literal_endpoint = 0;
1950 native_range = TRUE;
1955 /* if we get here, we're not doing a transliteration */
1957 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1958 except for the last char, which will be done separately. */
1959 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1961 while (s+1 < send && *s != ')')
1962 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1964 else if (s[2] == '{' /* This should match regcomp.c */
1965 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1968 char *regparse = s + (s[2] == '{' ? 3 : 4);
1971 while (count && (c = *regparse)) {
1972 if (c == '\\' && regparse[1])
1980 if (*regparse != ')')
1981 regparse--; /* Leave one char for continuation. */
1982 while (s < regparse)
1983 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1987 /* likewise skip #-initiated comments in //x patterns */
1988 else if (*s == '#' && PL_lex_inpat &&
1989 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1990 while (s+1 < send && *s != '\n')
1991 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1994 /* check for embedded arrays
1995 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1997 else if (*s == '@' && s[1]) {
1998 if (isALNUM_lazy_if(s+1,UTF))
2000 if (strchr(":'{$", s[1]))
2002 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2003 break; /* in regexp, neither @+ nor @- are interpolated */
2006 /* check for embedded scalars. only stop if we're sure it's a
2009 else if (*s == '$') {
2010 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2012 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
2013 break; /* in regexp, $ might be tail anchor */
2016 /* End of else if chain - OP_TRANS rejoin rest */
2019 if (*s == '\\' && s+1 < send) {
2022 /* some backslashes we leave behind */
2023 if (*leaveit && *s && strchr(leaveit, *s)) {
2024 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2025 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2029 /* deprecate \1 in strings and substitution replacements */
2030 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2031 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2033 if (ckWARN(WARN_SYNTAX))
2034 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2039 /* string-change backslash escapes */
2040 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2045 /* if we get here, it's either a quoted -, or a digit */
2048 /* quoted - in transliterations */
2050 if (PL_lex_inwhat == OP_TRANS) {
2057 if ((isALPHA(*s) || isDIGIT(*s)) &&
2059 Perl_warner(aTHX_ packWARN(WARN_MISC),
2060 "Unrecognized escape \\%c passed through",
2062 /* default action is to copy the quoted character */
2063 goto default_action;
2066 /* \132 indicates an octal constant */
2067 case '0': case '1': case '2': case '3':
2068 case '4': case '5': case '6': case '7':
2072 uv = grok_oct(s, &len, &flags, NULL);
2075 goto NUM_ESCAPE_INSERT;
2077 /* \x24 indicates a hex constant */
2081 char* const e = strchr(s, '}');
2082 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2083 PERL_SCAN_DISALLOW_PREFIX;
2088 yyerror("Missing right brace on \\x{}");
2092 uv = grok_hex(s, &len, &flags, NULL);
2098 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2099 uv = grok_hex(s, &len, &flags, NULL);
2105 /* Insert oct or hex escaped character.
2106 * There will always enough room in sv since such
2107 * escapes will be longer than any UTF-8 sequence
2108 * they can end up as. */
2110 /* We need to map to chars to ASCII before doing the tests
2113 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2114 if (!has_utf8 && uv > 255) {
2115 /* Might need to recode whatever we have
2116 * accumulated so far if it contains any
2119 * (Can't we keep track of that and avoid
2120 * this rescan? --jhi)
2124 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2125 if (!NATIVE_IS_INVARIANT(*c)) {
2130 const STRLEN offset = d - SvPVX_const(sv);
2132 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2136 while (src >= (const U8 *)SvPVX_const(sv)) {
2137 if (!NATIVE_IS_INVARIANT(*src)) {
2138 const U8 ch = NATIVE_TO_ASCII(*src);
2139 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2140 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2150 if (has_utf8 || uv > 255) {
2151 d = (char*)uvchr_to_utf8((U8*)d, uv);
2153 if (PL_lex_inwhat == OP_TRANS &&
2154 PL_sublex_info.sub_op) {
2155 PL_sublex_info.sub_op->op_private |=
2156 (PL_lex_repl ? OPpTRANS_FROM_UTF
2160 if (uv > 255 && !dorange)
2161 native_range = FALSE;
2173 /* \N{LATIN SMALL LETTER A} is a named character */
2177 char* e = strchr(s, '}');
2183 yyerror("Missing right brace on \\N{}");
2187 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2189 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2190 PERL_SCAN_DISALLOW_PREFIX;
2193 uv = grok_hex(s, &len, &flags, NULL);
2195 goto NUM_ESCAPE_INSERT;
2197 res = newSVpvn(s + 1, e - s - 1);
2198 res = new_constant( NULL, 0, "charnames",
2199 res, NULL, "\\N{...}" );
2201 sv_utf8_upgrade(res);
2202 str = SvPV_const(res,len);
2203 #ifdef EBCDIC_NEVER_MIND
2204 /* charnames uses pack U and that has been
2205 * recently changed to do the below uni->native
2206 * mapping, so this would be redundant (and wrong,
2207 * the code point would be doubly converted).
2208 * But leave this in just in case the pack U change
2209 * gets revoked, but the semantics is still
2210 * desireable for charnames. --jhi */
2212 UV uv = utf8_to_uvchr((const U8*)str, 0);
2215 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2217 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2218 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2219 str = SvPV_const(res, len);
2223 if (!has_utf8 && SvUTF8(res)) {
2224 const char * const ostart = SvPVX_const(sv);
2225 SvCUR_set(sv, d - ostart);
2228 sv_utf8_upgrade(sv);
2229 /* this just broke our allocation above... */
2230 SvGROW(sv, (STRLEN)(send - start));
2231 d = SvPVX(sv) + SvCUR(sv);
2234 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2235 const char * const odest = SvPVX_const(sv);
2237 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2238 d = SvPVX(sv) + (d - odest);
2242 native_range = FALSE; /* \N{} is guessed to be Unicode */
2244 Copy(str, d, len, char);
2251 yyerror("Missing braces on \\N{}");
2254 /* \c is a control character */
2263 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2266 yyerror("Missing control char name in \\c");
2270 /* printf-style backslashes, formfeeds, newlines, etc */
2272 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2275 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2278 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2281 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2284 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2287 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2290 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2296 } /* end if (backslash) */
2303 /* If we started with encoded form, or already know we want it
2304 and then encode the next character */
2305 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2307 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2308 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2311 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2312 const STRLEN off = d - SvPVX_const(sv);
2313 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2315 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2318 if (uv > 255 && !dorange)
2319 native_range = FALSE;
2323 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2325 } /* while loop to process each character */
2327 /* terminate the string and set up the sv */
2329 SvCUR_set(sv, d - SvPVX_const(sv));
2330 if (SvCUR(sv) >= SvLEN(sv))
2331 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2334 if (PL_encoding && !has_utf8) {
2335 sv_recode_to_utf8(sv, PL_encoding);
2341 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2342 PL_sublex_info.sub_op->op_private |=
2343 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2347 /* shrink the sv if we allocated more than we used */
2348 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2349 SvPV_shrink_to_cur(sv);
2352 /* return the substring (via yylval) only if we parsed anything */
2353 if (s > PL_bufptr) {
2354 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2355 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
2357 ( PL_lex_inwhat == OP_TRANS
2359 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2362 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2369 * Returns TRUE if there's more to the expression (e.g., a subscript),
2372 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2374 * ->[ and ->{ return TRUE
2375 * { and [ outside a pattern are always subscripts, so return TRUE
2376 * if we're outside a pattern and it's not { or [, then return FALSE
2377 * if we're in a pattern and the first char is a {
2378 * {4,5} (any digits around the comma) returns FALSE
2379 * if we're in a pattern and the first char is a [
2381 * [SOMETHING] has a funky algorithm to decide whether it's a
2382 * character class or not. It has to deal with things like
2383 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2384 * anything else returns TRUE
2387 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2390 S_intuit_more(pTHX_ register char *s)
2393 if (PL_lex_brackets)
2395 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2397 if (*s != '{' && *s != '[')
2402 /* In a pattern, so maybe we have {n,m}. */
2419 /* On the other hand, maybe we have a character class */
2422 if (*s == ']' || *s == '^')
2425 /* this is terrifying, and it works */
2426 int weight = 2; /* let's weigh the evidence */
2428 unsigned char un_char = 255, last_un_char;
2429 const char * const send = strchr(s,']');
2430 char tmpbuf[sizeof PL_tokenbuf * 4];
2432 if (!send) /* has to be an expression */
2435 Zero(seen,256,char);
2438 else if (isDIGIT(*s)) {
2440 if (isDIGIT(s[1]) && s[2] == ']')
2446 for (; s < send; s++) {
2447 last_un_char = un_char;
2448 un_char = (unsigned char)*s;
2453 weight -= seen[un_char] * 10;
2454 if (isALNUM_lazy_if(s+1,UTF)) {
2456 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2457 len = (int)strlen(tmpbuf);
2458 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2463 else if (*s == '$' && s[1] &&
2464 strchr("[#!%*<>()-=",s[1])) {
2465 if (/*{*/ strchr("])} =",s[2]))
2474 if (strchr("wds]",s[1]))
2476 else if (seen['\''] || seen['"'])
2478 else if (strchr("rnftbxcav",s[1]))
2480 else if (isDIGIT(s[1])) {
2482 while (s[1] && isDIGIT(s[1]))
2492 if (strchr("aA01! ",last_un_char))
2494 if (strchr("zZ79~",s[1]))
2496 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2497 weight -= 5; /* cope with negative subscript */
2500 if (!isALNUM(last_un_char)
2501 && !(last_un_char == '$' || last_un_char == '@'
2502 || last_un_char == '&')
2503 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2508 if (keyword(tmpbuf, d - tmpbuf))
2511 if (un_char == last_un_char + 1)
2513 weight -= seen[un_char];
2518 if (weight >= 0) /* probably a character class */
2528 * Does all the checking to disambiguate
2530 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2531 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2533 * First argument is the stuff after the first token, e.g. "bar".
2535 * Not a method if bar is a filehandle.
2536 * Not a method if foo is a subroutine prototyped to take a filehandle.
2537 * Not a method if it's really "Foo $bar"
2538 * Method if it's "foo $bar"
2539 * Not a method if it's really "print foo $bar"
2540 * Method if it's really "foo package::" (interpreted as package->foo)
2541 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2542 * Not a method if bar is a filehandle or package, but is quoted with
2547 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2550 char *s = start + (*start == '$');
2551 char tmpbuf[sizeof PL_tokenbuf];
2559 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2563 const char *proto = SvPVX_const(cv);
2574 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2575 /* start is the beginning of the possible filehandle/object,
2576 * and s is the end of it
2577 * tmpbuf is a copy of it
2580 if (*start == '$') {
2581 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2584 len = start - SvPVX(PL_linestr);
2588 start = SvPVX(PL_linestr) + len;
2592 return *s == '(' ? FUNCMETH : METHOD;
2594 if (!keyword(tmpbuf, len)) {
2595 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2599 soff = s - SvPVX(PL_linestr);
2603 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2604 if (indirgv && GvCVu(indirgv))
2606 /* filehandle or package name makes it a method */
2607 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2609 soff = s - SvPVX(PL_linestr);
2612 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2613 return 0; /* no assumptions -- "=>" quotes bearword */
2615 start_force(PL_curforce);
2616 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2617 newSVpvn(tmpbuf,len));
2618 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2620 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2625 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2627 return *s == '(' ? FUNCMETH : METHOD;
2635 * Return a string of Perl code to load the debugger. If PERL5DB
2636 * is set, it will return the contents of that, otherwise a
2637 * compile-time require of perl5db.pl.
2645 const char * const pdb = PerlEnv_getenv("PERL5DB");
2649 SETERRNO(0,SS_NORMAL);
2650 return "BEGIN { require 'perl5db.pl' }";
2656 /* Encoded script support. filter_add() effectively inserts a
2657 * 'pre-processing' function into the current source input stream.
2658 * Note that the filter function only applies to the current source file
2659 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2661 * The datasv parameter (which may be NULL) can be used to pass
2662 * private data to this instance of the filter. The filter function
2663 * can recover the SV using the FILTER_DATA macro and use it to
2664 * store private buffers and state information.
2666 * The supplied datasv parameter is upgraded to a PVIO type
2667 * and the IoDIRP/IoANY field is used to store the function pointer,
2668 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2669 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2670 * private use must be set using malloc'd pointers.
2674 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2680 if (!PL_rsfp_filters)
2681 PL_rsfp_filters = newAV();
2684 SvUPGRADE(datasv, SVt_PVIO);
2685 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2686 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2687 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2688 FPTR2DPTR(void *, IoANY(datasv)),
2689 SvPV_nolen(datasv)));
2690 av_unshift(PL_rsfp_filters, 1);
2691 av_store(PL_rsfp_filters, 0, datasv) ;
2696 /* Delete most recently added instance of this filter function. */
2698 Perl_filter_del(pTHX_ filter_t funcp)
2704 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2705 FPTR2DPTR(void*, funcp)));
2707 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2709 /* if filter is on top of stack (usual case) just pop it off */
2710 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2711 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2712 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2713 IoANY(datasv) = (void *)NULL;
2714 sv_free(av_pop(PL_rsfp_filters));
2718 /* we need to search for the correct entry and clear it */
2719 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2723 /* Invoke the idxth filter function for the current rsfp. */
2724 /* maxlen 0 = read one text line */
2726 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2731 /* This API is bad. It should have been using unsigned int for maxlen.
2732 Not sure if we want to change the API, but if not we should sanity
2733 check the value here. */
2734 const unsigned int correct_length
2743 if (!PL_rsfp_filters)
2745 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2746 /* Provide a default input filter to make life easy. */
2747 /* Note that we append to the line. This is handy. */
2748 DEBUG_P(PerlIO_printf(Perl_debug_log,
2749 "filter_read %d: from rsfp\n", idx));
2750 if (correct_length) {
2753 const int old_len = SvCUR(buf_sv);
2755 /* ensure buf_sv is large enough */
2756 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2757 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2758 correct_length)) <= 0) {
2759 if (PerlIO_error(PL_rsfp))
2760 return -1; /* error */
2762 return 0 ; /* end of file */
2764 SvCUR_set(buf_sv, old_len + len) ;
2767 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2768 if (PerlIO_error(PL_rsfp))
2769 return -1; /* error */
2771 return 0 ; /* end of file */
2774 return SvCUR(buf_sv);
2776 /* Skip this filter slot if filter has been deleted */
2777 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2778 DEBUG_P(PerlIO_printf(Perl_debug_log,
2779 "filter_read %d: skipped (filter deleted)\n",
2781 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2783 /* Get function pointer hidden within datasv */
2784 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2785 DEBUG_P(PerlIO_printf(Perl_debug_log,
2786 "filter_read %d: via function %p (%s)\n",
2787 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2788 /* Call function. The function is expected to */
2789 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2790 /* Return: <0:error, =0:eof, >0:not eof */
2791 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2795 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2798 #ifdef PERL_CR_FILTER
2799 if (!PL_rsfp_filters) {
2800 filter_add(S_cr_textfilter,NULL);
2803 if (PL_rsfp_filters) {
2805 SvCUR_set(sv, 0); /* start with empty line */
2806 if (FILTER_READ(0, sv, 0) > 0)
2807 return ( SvPVX(sv) ) ;
2812 return (sv_gets(sv, fp, append));
2816 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2821 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2825 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2826 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2828 return GvHV(gv); /* Foo:: */
2831 /* use constant CLASS => 'MyClass' */
2832 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2833 if (gv && GvCV(gv)) {
2834 SV * const sv = cv_const_sv(GvCV(gv));
2836 pkgname = SvPV_nolen_const(sv);
2839 return gv_stashpv(pkgname, FALSE);
2845 * The intent of this yylex wrapper is to minimize the changes to the
2846 * tokener when we aren't interested in collecting madprops. It remains
2847 * to be seen how successful this strategy will be...
2854 char *s = PL_bufptr;
2856 /* make sure PL_thiswhite is initialized */
2860 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2861 if (PL_pending_ident)
2862 return S_pending_ident(aTHX);
2864 /* previous token ate up our whitespace? */
2865 if (!PL_lasttoke && PL_nextwhite) {
2866 PL_thiswhite = PL_nextwhite;
2870 /* isolate the token, and figure out where it is without whitespace */
2871 PL_realtokenstart = -1;
2875 assert(PL_curforce < 0);
2877 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2878 if (!PL_thistoken) {
2879 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2880 PL_thistoken = newSVpvn("",0);
2882 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
2883 PL_thistoken = newSVpvn(tstart, s - tstart);
2886 if (PL_thismad) /* install head */
2887 CURMAD('X', PL_thistoken);
2890 /* last whitespace of a sublex? */
2891 if (optype == ')' && PL_endwhite) {
2892 CURMAD('X', PL_endwhite);
2897 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
2898 if (!PL_thiswhite && !PL_endwhite && !optype) {
2899 sv_free(PL_thistoken);
2904 /* put off final whitespace till peg */
2905 if (optype == ';' && !PL_rsfp) {
2906 PL_nextwhite = PL_thiswhite;
2909 else if (PL_thisopen) {
2910 CURMAD('q', PL_thisopen);
2912 sv_free(PL_thistoken);
2916 /* Store actual token text as madprop X */
2917 CURMAD('X', PL_thistoken);
2921 /* add preceding whitespace as madprop _ */
2922 CURMAD('_', PL_thiswhite);
2926 /* add quoted material as madprop = */
2927 CURMAD('=', PL_thisstuff);
2931 /* add terminating quote as madprop Q */
2932 CURMAD('Q', PL_thisclose);
2936 /* special processing based on optype */
2940 /* opval doesn't need a TOKEN since it can already store mp */
2951 append_madprops(PL_thismad, yylval.opval, 0);
2959 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
2968 /* remember any fake bracket that lexer is about to discard */
2969 if (PL_lex_brackets == 1 &&
2970 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
2973 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2976 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
2977 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
2980 break; /* don't bother looking for trailing comment */
2989 /* attach a trailing comment to its statement instead of next token */
2993 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
2995 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2997 if (*s == '\n' || *s == '#') {
2998 while (s < PL_bufend && *s != '\n')
3002 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3003 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3020 /* Create new token struct. Note: opvals return early above. */
3021 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3028 S_tokenize_use(pTHX_ int is_use, char *s) {
3030 if (PL_expect != XSTATE)
3031 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3032 is_use ? "use" : "no"));
3034 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3035 s = force_version(s, TRUE);
3036 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3037 start_force(PL_curforce);
3038 NEXTVAL_NEXTTOKE.opval = NULL;
3041 else if (*s == 'v') {
3042 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3043 s = force_version(s, FALSE);
3047 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3048 s = force_version(s, FALSE);
3050 yylval.ival = is_use;
3054 static const char* const exp_name[] =
3055 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3056 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3063 Works out what to call the token just pulled out of the input
3064 stream. The yacc parser takes care of taking the ops we return and
3065 stitching them into a tree.
3071 if read an identifier
3072 if we're in a my declaration
3073 croak if they tried to say my($foo::bar)
3074 build the ops for a my() declaration
3075 if it's an access to a my() variable
3076 are we in a sort block?
3077 croak if my($a); $a <=> $b
3078 build ops for access to a my() variable
3079 if in a dq string, and they've said @foo and we can't find @foo
3081 build ops for a bareword
3082 if we already built the token before, use it.
3087 #pragma segment Perl_yylex
3093 register char *s = PL_bufptr;
3099 SV* tmp = newSVpvs("");
3100 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3101 (IV)CopLINE(PL_curcop),
3102 lex_state_names[PL_lex_state],
3103 exp_name[PL_expect],
3104 pv_display(tmp, s, strlen(s), 0, 60));
3107 /* check if there's an identifier for us to look at */
3108 if (PL_pending_ident)
3109 return REPORT(S_pending_ident(aTHX));
3111 /* no identifier pending identification */
3113 switch (PL_lex_state) {
3115 case LEX_NORMAL: /* Some compilers will produce faster */
3116 case LEX_INTERPNORMAL: /* code if we comment these out. */
3120 /* when we've already built the next token, just pull it out of the queue */
3124 yylval = PL_nexttoke[PL_lasttoke].next_val;
3126 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3127 PL_nexttoke[PL_lasttoke].next_mad = 0;
3128 if (PL_thismad && PL_thismad->mad_key == '_') {
3129 PL_thiswhite = (SV*)PL_thismad->mad_val;
3130 PL_thismad->mad_val = 0;
3131 mad_free(PL_thismad);
3136 PL_lex_state = PL_lex_defer;
3137 PL_expect = PL_lex_expect;
3138 PL_lex_defer = LEX_NORMAL;
3139 if (!PL_nexttoke[PL_lasttoke].next_type)
3144 yylval = PL_nextval[PL_nexttoke];
3146 PL_lex_state = PL_lex_defer;
3147 PL_expect = PL_lex_expect;
3148 PL_lex_defer = LEX_NORMAL;
3152 /* FIXME - can these be merged? */
3153 return(PL_nexttoke[PL_lasttoke].next_type);
3155 return REPORT(PL_nexttype[PL_nexttoke]);
3158 /* interpolated case modifiers like \L \U, including \Q and \E.
3159 when we get here, PL_bufptr is at the \
3161 case LEX_INTERPCASEMOD:
3163 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3164 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3166 /* handle \E or end of string */
3167 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3169 if (PL_lex_casemods) {
3170 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3171 PL_lex_casestack[PL_lex_casemods] = '\0';
3173 if (PL_bufptr != PL_bufend
3174 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3176 PL_lex_state = LEX_INTERPCONCAT;
3179 PL_thistoken = newSVpvn("\\E",2);
3185 while (PL_bufptr != PL_bufend &&
3186 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3188 PL_thiswhite = newSVpvn("",0);
3189 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3193 if (PL_bufptr != PL_bufend)
3196 PL_lex_state = LEX_INTERPCONCAT;
3200 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3201 "### Saw case modifier\n"); });
3203 if (s[1] == '\\' && s[2] == 'E') {
3206 PL_thiswhite = newSVpvn("",0);
3207 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3210 PL_lex_state = LEX_INTERPCONCAT;
3215 if (!PL_madskills) /* when just compiling don't need correct */
3216 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3217 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3218 if ((*s == 'L' || *s == 'U') &&
3219 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3220 PL_lex_casestack[--PL_lex_casemods] = '\0';
3223 if (PL_lex_casemods > 10)
3224 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3225 PL_lex_casestack[PL_lex_casemods++] = *s;
3226 PL_lex_casestack[PL_lex_casemods] = '\0';
3227 PL_lex_state = LEX_INTERPCONCAT;
3228 start_force(PL_curforce);
3229 NEXTVAL_NEXTTOKE.ival = 0;
3231 start_force(PL_curforce);
3233 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3235 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3237 NEXTVAL_NEXTTOKE.ival = OP_LC;
3239 NEXTVAL_NEXTTOKE.ival = OP_UC;
3241 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3243 Perl_croak(aTHX_ "panic: yylex");
3245 SV* const tmpsv = newSVpvn("",0);
3246 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3252 if (PL_lex_starts) {
3258 sv_free(PL_thistoken);
3259 PL_thistoken = newSVpvn("",0);
3262 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3263 if (PL_lex_casemods == 1 && PL_lex_inpat)
3272 case LEX_INTERPPUSH:
3273 return REPORT(sublex_push());
3275 case LEX_INTERPSTART:
3276 if (PL_bufptr == PL_bufend)
3277 return REPORT(sublex_done());
3278 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3279 "### Interpolated variable\n"); });
3281 PL_lex_dojoin = (*PL_bufptr == '@');
3282 PL_lex_state = LEX_INTERPNORMAL;
3283 if (PL_lex_dojoin) {
3284 start_force(PL_curforce);
3285 NEXTVAL_NEXTTOKE.ival = 0;
3287 start_force(PL_curforce);
3288 force_ident("\"", '$');
3289 start_force(PL_curforce);
3290 NEXTVAL_NEXTTOKE.ival = 0;
3292 start_force(PL_curforce);
3293 NEXTVAL_NEXTTOKE.ival = 0;
3295 start_force(PL_curforce);
3296 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3299 if (PL_lex_starts++) {
3304 sv_free(PL_thistoken);
3305 PL_thistoken = newSVpvn("",0);
3308 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3309 if (!PL_lex_casemods && PL_lex_inpat)
3316 case LEX_INTERPENDMAYBE:
3317 if (intuit_more(PL_bufptr)) {
3318 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3324 if (PL_lex_dojoin) {
3325 PL_lex_dojoin = FALSE;
3326 PL_lex_state = LEX_INTERPCONCAT;
3330 sv_free(PL_thistoken);
3331 PL_thistoken = newSVpvn("",0);
3336 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3337 && SvEVALED(PL_lex_repl))
3339 if (PL_bufptr != PL_bufend)
3340 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3344 case LEX_INTERPCONCAT:
3346 if (PL_lex_brackets)
3347 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3349 if (PL_bufptr == PL_bufend)
3350 return REPORT(sublex_done());
3352 if (SvIVX(PL_linestr) == '\'') {
3353 SV *sv = newSVsv(PL_linestr);
3356 else if ( PL_hints & HINT_NEW_RE )
3357 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
3358 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3362 s = scan_const(PL_bufptr);
3364 PL_lex_state = LEX_INTERPCASEMOD;
3366 PL_lex_state = LEX_INTERPSTART;
3369 if (s != PL_bufptr) {
3370 start_force(PL_curforce);
3372 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3374 NEXTVAL_NEXTTOKE = yylval;
3377 if (PL_lex_starts++) {
3381 sv_free(PL_thistoken);
3382 PL_thistoken = newSVpvn("",0);
3385 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3386 if (!PL_lex_casemods && PL_lex_inpat)
3399 PL_lex_state = LEX_NORMAL;
3400 s = scan_formline(PL_bufptr);
3401 if (!PL_lex_formbrack)
3407 PL_oldoldbufptr = PL_oldbufptr;
3413 sv_free(PL_thistoken);
3416 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3420 if (isIDFIRST_lazy_if(s,UTF))
3422 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
3425 goto fake_eof; /* emulate EOF on ^D or ^Z */
3434 if (PL_lex_brackets) {
3435 yyerror(PL_lex_formbrack
3436 ? "Format not terminated"
3437 : "Missing right curly or square bracket");
3439 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3440 "### Tokener got EOF\n");
3444 if (s++ < PL_bufend)
3445 goto retry; /* ignore stray nulls */
3448 if (!PL_in_eval && !PL_preambled) {
3449 PL_preambled = TRUE;
3454 sv_setpv(PL_linestr,incl_perldb());
3455 if (SvCUR(PL_linestr))
3456 sv_catpvs(PL_linestr,";");
3458 while(AvFILLp(PL_preambleav) >= 0) {
3459 SV *tmpsv = av_shift(PL_preambleav);
3460 sv_catsv(PL_linestr, tmpsv);
3461 sv_catpvs(PL_linestr, ";");
3464 sv_free((SV*)PL_preambleav);
3465 PL_preambleav = NULL;
3467 if (PL_minus_n || PL_minus_p) {
3468 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3470 sv_catpvs(PL_linestr,"chomp;");
3473 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3474 || *PL_splitstr == '"')
3475 && strchr(PL_splitstr + 1, *PL_splitstr))
3476 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3478 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3479 bytes can be used as quoting characters. :-) */
3480 const char *splits = PL_splitstr;
3481 sv_catpvs(PL_linestr, "our @F=split(q\0");
3484 if (*splits == '\\')
3485 sv_catpvn(PL_linestr, splits, 1);
3486 sv_catpvn(PL_linestr, splits, 1);
3487 } while (*splits++);
3488 /* This loop will embed the trailing NUL of
3489 PL_linestr as the last thing it does before
3491 sv_catpvs(PL_linestr, ");");
3495 sv_catpvs(PL_linestr,"our @F=split(' ');");
3499 sv_catpvs(PL_linestr,"use feature ':5.10';");
3500 sv_catpvs(PL_linestr, "\n");
3501 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3502 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3503 PL_last_lop = PL_last_uni = NULL;
3504 if (PERLDB_LINE && PL_curstash != PL_debstash) {
3505 SV * const sv = newSV(0);
3507 sv_upgrade(sv, SVt_PVMG);
3508 sv_setsv(sv,PL_linestr);
3511 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
3516 bof = PL_rsfp ? TRUE : FALSE;
3517 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3520 PL_realtokenstart = -1;
3523 if (PL_preprocess && !PL_in_eval)
3524 (void)PerlProc_pclose(PL_rsfp);
3525 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3526 PerlIO_clearerr(PL_rsfp);
3528 (void)PerlIO_close(PL_rsfp);
3530 PL_doextract = FALSE;
3532 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3537 sv_setpv(PL_linestr,PL_minus_p
3538 ? ";}continue{print;}" : ";}");
3539 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3540 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3541 PL_last_lop = PL_last_uni = NULL;
3542 PL_minus_n = PL_minus_p = 0;
3545 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3546 PL_last_lop = PL_last_uni = NULL;
3547 sv_setpvn(PL_linestr,"",0);
3548 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3550 /* If it looks like the start of a BOM or raw UTF-16,
3551 * check if it in fact is. */
3557 #ifdef PERLIO_IS_STDIO
3558 # ifdef __GNU_LIBRARY__
3559 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3560 # define FTELL_FOR_PIPE_IS_BROKEN
3564 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3565 # define FTELL_FOR_PIPE_IS_BROKEN
3570 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3571 /* This loses the possibility to detect the bof
3572 * situation on perl -P when the libc5 is being used.
3573 * Workaround? Maybe attach some extra state to PL_rsfp?
3576 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3578 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3581 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3582 s = swallow_bom((U8*)s);
3586 /* Incest with pod. */
3589 sv_catsv(PL_thiswhite, PL_linestr);
3591 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3592 sv_setpvn(PL_linestr, "", 0);
3593 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3594 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3595 PL_last_lop = PL_last_uni = NULL;
3596 PL_doextract = FALSE;
3600 } while (PL_doextract);
3601 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3602 if (PERLDB_LINE && PL_curstash != PL_debstash) {
3603 SV * const sv = newSV(0);
3605 sv_upgrade(sv, SVt_PVMG);
3606 sv_setsv(sv,PL_linestr);
3609 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
3611 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3612 PL_last_lop = PL_last_uni = NULL;
3613 if (CopLINE(PL_curcop) == 1) {
3614 while (s < PL_bufend && isSPACE(*s))
3616 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3620 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3624 if (*s == '#' && *(s+1) == '!')
3626 #ifdef ALTERNATE_SHEBANG
3628 static char const as[] = ALTERNATE_SHEBANG;
3629 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3630 d = s + (sizeof(as) - 1);
3632 #endif /* ALTERNATE_SHEBANG */
3641 while (*d && !isSPACE(*d))
3645 #ifdef ARG_ZERO_IS_SCRIPT
3646 if (ipathend > ipath) {
3648 * HP-UX (at least) sets argv[0] to the script name,
3649 * which makes $^X incorrect. And Digital UNIX and Linux,
3650 * at least, set argv[0] to the basename of the Perl
3651 * interpreter. So, having found "#!", we'll set it right.
3653 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3655 assert(SvPOK(x) || SvGMAGICAL(x));
3656 if (sv_eq(x, CopFILESV(PL_curcop))) {
3657 sv_setpvn(x, ipath, ipathend - ipath);
3663 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3664 const char * const lstart = SvPV_const(x,llen);
3666 bstart += blen - llen;
3667 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3668 sv_setpvn(x, ipath, ipathend - ipath);
3673 TAINT_NOT; /* $^X is always tainted, but that's OK */
3675 #endif /* ARG_ZERO_IS_SCRIPT */
3680 d = instr(s,"perl -");
3682 d = instr(s,"perl");
3684 /* avoid getting into infinite loops when shebang
3685 * line contains "Perl" rather than "perl" */
3687 for (d = ipathend-4; d >= ipath; --d) {
3688 if ((*d == 'p' || *d == 'P')
3689 && !ibcmp(d, "perl", 4))
3699 #ifdef ALTERNATE_SHEBANG
3701 * If the ALTERNATE_SHEBANG on this system starts with a
3702 * character that can be part of a Perl expression, then if
3703 * we see it but not "perl", we're probably looking at the
3704 * start of Perl code, not a request to hand off to some
3705 * other interpreter. Similarly, if "perl" is there, but
3706 * not in the first 'word' of the line, we assume the line
3707 * contains the start of the Perl program.
3709 if (d && *s != '#') {
3710 const char *c = ipath;
3711 while (*c && !strchr("; \t\r\n\f\v#", *c))
3714 d = NULL; /* "perl" not in first word; ignore */
3716 *s = '#'; /* Don't try to parse shebang line */
3718 #endif /* ALTERNATE_SHEBANG */
3719 #ifndef MACOS_TRADITIONAL
3724 !instr(s,"indir") &&
3725 instr(PL_origargv[0],"perl"))
3732 while (s < PL_bufend && isSPACE(*s))
3734 if (s < PL_bufend) {
3735 Newxz(newargv,PL_origargc+3,char*);
3737 while (s < PL_bufend && !isSPACE(*s))
3740 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3743 newargv = PL_origargv;
3746 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3748 Perl_croak(aTHX_ "Can't exec %s", ipath);
3752 while (*d && !isSPACE(*d))
3754 while (SPACE_OR_TAB(*d))
3758 const bool switches_done = PL_doswitches;
3759 const U32 oldpdb = PL_perldb;
3760 const bool oldn = PL_minus_n;
3761 const bool oldp = PL_minus_p;
3764 if (*d == 'M' || *d == 'm' || *d == 'C') {
3765 const char * const m = d;
3766 while (*d && !isSPACE(*d))
3768 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3771 d = moreswitches(d);
3773 if (PL_doswitches && !switches_done) {
3774 int argc = PL_origargc;
3775 char **argv = PL_origargv;
3778 } while (argc && argv[0][0] == '-' && argv[0][1]);
3779 init_argv_symbols(argc,argv);
3781 if ((PERLDB_LINE && !oldpdb) ||
3782 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3783 /* if we have already added "LINE: while (<>) {",
3784 we must not do it again */
3786 sv_setpvn(PL_linestr, "", 0);
3787 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3788 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3789 PL_last_lop = PL_last_uni = NULL;
3790 PL_preambled = FALSE;
3792 (void)gv_fetchfile(PL_origfilename);
3799 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3801 PL_lex_state = LEX_FORMLINE;
3806 #ifdef PERL_STRICT_CR
3807 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3809 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3811 case ' ': case '\t': case '\f': case 013:
3812 #ifdef MACOS_TRADITIONAL
3816 PL_realtokenstart = -1;
3825 PL_realtokenstart = -1;
3829 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3830 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3831 /* handle eval qq[#line 1 "foo"\n ...] */
3832 CopLINE_dec(PL_curcop);
3835 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3837 if (!PL_in_eval || PL_rsfp)
3842 while (d < PL_bufend && *d != '\n')
3846 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3847 Perl_croak(aTHX_ "panic: input overflow");
3850 PL_thiswhite = newSVpvn(s, d - s);
3855 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3857 PL_lex_state = LEX_FORMLINE;
3863 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
3864 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
3867 TOKEN(PEG); /* make sure any #! line is accessible */
3872 /* if (PL_madskills && PL_lex_formbrack) { */
3874 while (d < PL_bufend && *d != '\n')
3878 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3879 Perl_croak(aTHX_ "panic: input overflow");
3880 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
3882 PL_thiswhite = newSVpvn("",0);
3883 if (CopLINE(PL_curcop) == 1) {
3884 sv_setpvn(PL_thiswhite, "", 0);
3887 sv_catpvn(PL_thiswhite, s, d - s);
3901 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3909 while (s < PL_bufend && SPACE_OR_TAB(*s))
3912 if (strnEQ(s,"=>",2)) {
3913 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3914 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
3915 OPERATOR('-'); /* unary minus */
3917 PL_last_uni = PL_oldbufptr;
3919 case 'r': ftst = OP_FTEREAD; break;
3920 case 'w': ftst = OP_FTEWRITE; break;
3921 case 'x': ftst = OP_FTEEXEC; break;
3922 case 'o': ftst = OP_FTEOWNED; break;
3923 case 'R': ftst = OP_FTRREAD; break;
3924 case 'W': ftst = OP_FTRWRITE; break;
3925 case 'X': ftst = OP_FTREXEC; break;
3926 case 'O': ftst = OP_FTROWNED; break;
3927 case 'e': ftst = OP_FTIS; break;
3928 case 'z': ftst = OP_FTZERO; break;
3929 case 's': ftst = OP_FTSIZE; break;
3930 case 'f': ftst = OP_FTFILE; break;
3931 case 'd': ftst = OP_FTDIR; break;
3932 case 'l': ftst = OP_FTLINK; break;
3933 case 'p': ftst = OP_FTPIPE; break;
3934 case 'S': ftst = OP_FTSOCK; break;
3935 case 'u': ftst = OP_FTSUID; break;
3936 case 'g': ftst = OP_FTSGID; break;
3937 case 'k': ftst = OP_FTSVTX; break;
3938 case 'b': ftst = OP_FTBLK; break;
3939 case 'c': ftst = OP_FTCHR; break;
3940 case 't': ftst = OP_FTTTY; break;
3941 case 'T': ftst = OP_FTTEXT; break;
3942 case 'B': ftst = OP_FTBINARY; break;
3943 case 'M': case 'A': case 'C':
3944 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
3946 case 'M': ftst = OP_FTMTIME; break;
3947 case 'A': ftst = OP_FTATIME; break;
3948 case 'C': ftst = OP_FTCTIME; break;
3956 PL_last_lop_op = (OPCODE)ftst;
3957 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3958 "### Saw file test %c\n", (int)tmp);
3963 /* Assume it was a minus followed by a one-letter named
3964 * subroutine call (or a -bareword), then. */
3965 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3966 "### '-%c' looked like a file test but was not\n",
3973 const char tmp = *s++;
3976 if (PL_expect == XOPERATOR)
3981 else if (*s == '>') {
3984 if (isIDFIRST_lazy_if(s,UTF)) {
3985 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3993 if (PL_expect == XOPERATOR)
3996 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3998 OPERATOR('-'); /* unary minus */
4004 const char tmp = *s++;
4007 if (PL_expect == XOPERATOR)
4012 if (PL_expect == XOPERATOR)
4015 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4022 if (PL_expect != XOPERATOR) {
4023 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4024 PL_expect = XOPERATOR;
4025 force_ident(PL_tokenbuf, '*');
4038 if (PL_expect == XOPERATOR) {
4042 PL_tokenbuf[0] = '%';
4043 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
4044 if (!PL_tokenbuf[1]) {
4047 PL_pending_ident = '%';
4058 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
4059 && FEATURE_IS_ENABLED("~~"))
4066 const char tmp = *s++;
4072 goto just_a_word_zero_gv;
4075 switch (PL_expect) {
4081 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4083 PL_bufptr = s; /* update in case we back off */
4089 PL_expect = XTERMBLOCK;
4092 stuffstart = s - SvPVX(PL_linestr) - 1;
4096 while (isIDFIRST_lazy_if(s,UTF)) {
4099 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4100 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
4101 if (tmp < 0) tmp = -tmp;
4116 sv = newSVpvn(s, len);
4118 d = scan_str(d,TRUE,TRUE);
4120 /* MUST advance bufptr here to avoid bogus
4121 "at end of line" context messages from yyerror().
4123 PL_bufptr = s + len;
4124 yyerror("Unterminated attribute parameter in attribute list");
4128 return REPORT(0); /* EOF indicator */
4132 sv_catsv(sv, PL_lex_stuff);
4133 attrs = append_elem(OP_LIST, attrs,
4134 newSVOP(OP_CONST, 0, sv));
4135 SvREFCNT_dec(PL_lex_stuff);
4136 PL_lex_stuff = NULL;
4139 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4141 if (PL_in_my == KEY_our) {
4143 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
4145 /* skip to avoid loading attributes.pm */
4147 deprecate(":unique");
4150 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4153 /* NOTE: any CV attrs applied here need to be part of
4154 the CVf_BUILTIN_ATTRS define in cv.h! */
4155 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4157 CvLVALUE_on(PL_compcv);
4159 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4161 CvLOCKED_on(PL_compcv);
4163 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4165 CvMETHOD_on(PL_compcv);
4167 else if (!PL_in_my && len == 9 && strnEQ(SvPVX(sv), "assertion", len)) {
4169 CvASSERTION_on(PL_compcv);
4171 /* After we've set the flags, it could be argued that
4172 we don't need to do the attributes.pm-based setting
4173 process, and shouldn't bother appending recognized
4174 flags. To experiment with that, uncomment the
4175 following "else". (Note that's already been
4176 uncommented. That keeps the above-applied built-in
4177 attributes from being intercepted (and possibly
4178 rejected) by a package's attribute routines, but is
4179 justified by the performance win for the common case
4180 of applying only built-in attributes.) */
4182 attrs = append_elem(OP_LIST, attrs,
4183 newSVOP(OP_CONST, 0,
4187 if (*s == ':' && s[1] != ':')
4190 break; /* require real whitespace or :'s */
4191 /* XXX losing whitespace on sequential attributes here */
4195 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4196 if (*s != ';' && *s != '}' && *s != tmp
4197 && (tmp != '=' || *s != ')')) {
4198 const char q = ((*s == '\'') ? '"' : '\'');
4199 /* If here for an expression, and parsed no attrs, back
4201 if (tmp == '=' && !attrs) {
4205 /* MUST advance bufptr here to avoid bogus "at end of line"
4206 context messages from yyerror().
4210 ? Perl_form(aTHX_ "Invalid separator character "
4211 "%c%c%c in attribute list", q, *s, q)
4212 : "Unterminated attribute list" );
4220 start_force(PL_curforce);
4221 NEXTVAL_NEXTTOKE.opval = attrs;
4222 CURMAD('_', PL_nextwhite);
4227 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4228 (s - SvPVX(PL_linestr)) - stuffstart);
4236 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4237 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4245 const char tmp = *s++;
4250 const char tmp = *s++;
4258 if (PL_lex_brackets <= 0)
4259 yyerror("Unmatched right square bracket");
4262 if (PL_lex_state == LEX_INTERPNORMAL) {
4263 if (PL_lex_brackets == 0) {
4264 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
4265 PL_lex_state = LEX_INTERPEND;
4272 if (PL_lex_brackets > 100) {
4273 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4275 switch (PL_expect) {
4277 if (PL_lex_formbrack) {
4281 if (PL_oldoldbufptr == PL_last_lop)
4282 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4284 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4285 OPERATOR(HASHBRACK);
4287 while (s < PL_bufend && SPACE_OR_TAB(*s))
4290 PL_tokenbuf[0] = '\0';
4291 if (d < PL_bufend && *d == '-') {
4292 PL_tokenbuf[0] = '-';
4294 while (d < PL_bufend && SPACE_OR_TAB(*d))
4297 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4298 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4300 while (d < PL_bufend && SPACE_OR_TAB(*d))
4303 const char minus = (PL_tokenbuf[0] == '-');
4304 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4312 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4317 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4322 if (PL_oldoldbufptr == PL_last_lop)
4323 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4325 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4328 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4330 /* This hack is to get the ${} in the message. */
4332 yyerror("syntax error");
4335 OPERATOR(HASHBRACK);
4337 /* This hack serves to disambiguate a pair of curlies
4338 * as being a block or an anon hash. Normally, expectation
4339 * determines that, but in cases where we're not in a
4340 * position to expect anything in particular (like inside
4341 * eval"") we have to resolve the ambiguity. This code
4342 * covers the case where the first term in the curlies is a
4343 * quoted string. Most other cases need to be explicitly
4344 * disambiguated by prepending a "+" before the opening
4345 * curly in order to force resolution as an anon hash.
4347 * XXX should probably propagate the outer expectation
4348 * into eval"" to rely less on this hack, but that could
4349 * potentially break current behavior of eval"".
4353 if (*s == '\'' || *s == '"' || *s == '`') {
4354 /* common case: get past first string, handling escapes */
4355 for (t++; t < PL_bufend && *t != *s;)
4356 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4360 else if (*s == 'q') {
4363 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4366 /* skip q//-like construct */
4368 char open, close, term;
4371 while (t < PL_bufend && isSPACE(*t))
4373 /* check for q => */
4374 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4375 OPERATOR(HASHBRACK);
4379 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4383 for (t++; t < PL_bufend; t++) {
4384 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4386 else if (*t == open)
4390 for (t++; t < PL_bufend; t++) {
4391 if (*t == '\\' && t+1 < PL_bufend)
4393 else if (*t == close && --brackets <= 0)
4395 else if (*t == open)
4402 /* skip plain q word */
4403 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4406 else if (isALNUM_lazy_if(t,UTF)) {
4408 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4411 while (t < PL_bufend && isSPACE(*t))
4413 /* if comma follows first term, call it an anon hash */
4414 /* XXX it could be a comma expression with loop modifiers */
4415 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4416 || (*t == '=' && t[1] == '>')))
4417 OPERATOR(HASHBRACK);
4418 if (PL_expect == XREF)
4421 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4427 yylval.ival = CopLINE(PL_curcop);
4428 if (isSPACE(*s) || *s == '#')
4429 PL_copline = NOLINE; /* invalidate current command line number */
4434 if (PL_lex_brackets <= 0)
4435 yyerror("Unmatched right curly bracket");
4437 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4438 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4439 PL_lex_formbrack = 0;
4440 if (PL_lex_state == LEX_INTERPNORMAL) {
4441 if (PL_lex_brackets == 0) {
4442 if (PL_expect & XFAKEBRACK) {
4443 PL_expect &= XENUMMASK;
4444 PL_lex_state = LEX_INTERPEND;
4449 PL_thiswhite = newSVpvn("",0);
4450 sv_catpvn(PL_thiswhite,"}",1);
4453 return yylex(); /* ignore fake brackets */
4455 if (*s == '-' && s[1] == '>')
4456 PL_lex_state = LEX_INTERPENDMAYBE;
4457 else if (*s != '[' && *s != '{')
4458 PL_lex_state = LEX_INTERPEND;
4461 if (PL_expect & XFAKEBRACK) {
4462 PL_expect &= XENUMMASK;
4464 return yylex(); /* ignore fake brackets */
4466 start_force(PL_curforce);
4468 curmad('X', newSVpvn(s-1,1));
4469 CURMAD('_', PL_thiswhite);
4474 PL_thistoken = newSVpvn("",0);
4482 if (PL_expect == XOPERATOR) {
4483 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4484 && isIDFIRST_lazy_if(s,UTF))
4486 CopLINE_dec(PL_curcop);
4487 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4488 CopLINE_inc(PL_curcop);
4493 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4495 PL_expect = XOPERATOR;
4496 force_ident(PL_tokenbuf, '&');
4500 yylval.ival = (OPpENTERSUB_AMPER<<8);
4512 const char tmp = *s++;
4519 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4520 && strchr("+-*/%.^&|<",tmp))
4521 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4522 "Reversed %c= operator",(int)tmp);
4524 if (PL_expect == XSTATE && isALPHA(tmp) &&
4525 (s == PL_linestart+1 || s[-2] == '\n') )
4527 if (PL_in_eval && !PL_rsfp) {
4532 if (strnEQ(s,"=cut",4)) {
4548 PL_thiswhite = newSVpvn("",0);
4549 sv_catpvn(PL_thiswhite, PL_linestart,
4550 PL_bufend - PL_linestart);
4554 PL_doextract = TRUE;
4558 if (PL_lex_brackets < PL_lex_formbrack) {
4560 #ifdef PERL_STRICT_CR
4561 while (SPACE_OR_TAB(*t))
4563 while (SPACE_OR_TAB(*t) || *t == '\r')
4566 if (*t == '\n' || *t == '#') {
4577 const char tmp = *s++;
4579 /* was this !=~ where !~ was meant?
4580 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4582 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4583 const char *t = s+1;
4585 while (t < PL_bufend && isSPACE(*t))
4588 if (*t == '/' || *t == '?' ||
4589 ((*t == 'm' || *t == 's' || *t == 'y')
4590 && !isALNUM(t[1])) ||
4591 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4592 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4593 "!=~ should be !~");
4603 if (PL_expect != XOPERATOR) {
4604 if (s[1] != '<' && !strchr(s,'>'))
4607 s = scan_heredoc(s);
4609 s = scan_inputsymbol(s);
4610 TERM(sublex_start());
4616 SHop(OP_LEFT_SHIFT);
4630 const char tmp = *s++;
4632 SHop(OP_RIGHT_SHIFT);
4633 else if (tmp == '=')
4642 if (PL_expect == XOPERATOR) {
4643 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4645 deprecate_old(commaless_variable_list);
4646 return REPORT(','); /* grandfather non-comma-format format */
4650 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4651 PL_tokenbuf[0] = '@';
4652 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4653 sizeof PL_tokenbuf - 1, FALSE);
4654 if (PL_expect == XOPERATOR)
4655 no_op("Array length", s);
4656 if (!PL_tokenbuf[1])
4658 PL_expect = XOPERATOR;
4659 PL_pending_ident = '#';
4663 PL_tokenbuf[0] = '$';
4664 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4665 sizeof PL_tokenbuf - 1, FALSE);
4666 if (PL_expect == XOPERATOR)
4668 if (!PL_tokenbuf[1]) {
4670 yyerror("Final $ should be \\$ or $name");
4674 /* This kludge not intended to be bulletproof. */
4675 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4676 yylval.opval = newSVOP(OP_CONST, 0,
4677 newSViv(CopARYBASE_get(&PL_compiling)));
4678 yylval.opval->op_private = OPpCONST_ARYBASE;
4684 const char tmp = *s;
4685 if (PL_lex_state == LEX_NORMAL)
4688 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4689 && intuit_more(s)) {
4691 PL_tokenbuf[0] = '@';
4692 if (ckWARN(WARN_SYNTAX)) {
4695 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4698 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4699 while (t < PL_bufend && *t != ']')
4701 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4702 "Multidimensional syntax %.*s not supported",
4703 (int)((t - PL_bufptr) + 1), PL_bufptr);
4707 else if (*s == '{') {
4709 PL_tokenbuf[0] = '%';
4710 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4711 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4713 char tmpbuf[sizeof PL_tokenbuf];
4716 } while (isSPACE(*t));
4717 if (isIDFIRST_lazy_if(t,UTF)) {
4719 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4723 if (*t == ';' && get_cv(tmpbuf, FALSE))
4724 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4725 "You need to quote \"%s\"",
4732 PL_expect = XOPERATOR;
4733 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4734 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4735 if (!islop || PL_last_lop_op == OP_GREPSTART)
4736 PL_expect = XOPERATOR;
4737 else if (strchr("$@\"'`q", *s))
4738 PL_expect = XTERM; /* e.g. print $fh "foo" */
4739 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4740 PL_expect = XTERM; /* e.g. print $fh &sub */
4741 else if (isIDFIRST_lazy_if(s,UTF)) {
4742 char tmpbuf[sizeof PL_tokenbuf];
4744 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4745 if ((t2 = keyword(tmpbuf, len))) {
4746 /* binary operators exclude handle interpretations */
4758 PL_expect = XTERM; /* e.g. print $fh length() */
4763 PL_expect = XTERM; /* e.g. print $fh subr() */
4766 else if (isDIGIT(*s))
4767 PL_expect = XTERM; /* e.g. print $fh 3 */
4768 else if (*s == '.' && isDIGIT(s[1]))
4769 PL_expect = XTERM; /* e.g. print $fh .3 */
4770 else if ((*s == '?' || *s == '-' || *s == '+')
4771 && !isSPACE(s[1]) && s[1] != '=')
4772 PL_expect = XTERM; /* e.g. print $fh -1 */
4773 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4775 PL_expect = XTERM; /* e.g. print $fh /.../
4776 XXX except DORDOR operator
4778 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4780 PL_expect = XTERM; /* print $fh <<"EOF" */
4783 PL_pending_ident = '$';
4787 if (PL_expect == XOPERATOR)
4789 PL_tokenbuf[0] = '@';
4790 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4791 if (!PL_tokenbuf[1]) {
4794 if (PL_lex_state == LEX_NORMAL)
4796 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4798 PL_tokenbuf[0] = '%';
4800 /* Warn about @ where they meant $. */
4801 if (*s == '[' || *s == '{') {
4802 if (ckWARN(WARN_SYNTAX)) {
4803 const char *t = s + 1;
4804 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4806 if (*t == '}' || *t == ']') {
4808 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4809 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4810 "Scalar value %.*s better written as $%.*s",
4811 (int)(t-PL_bufptr), PL_bufptr,
4812 (int)(t-PL_bufptr-1), PL_bufptr+1);
4817 PL_pending_ident = '@';
4820 case '/': /* may be division, defined-or, or pattern */
4821 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4825 case '?': /* may either be conditional or pattern */
4826 if(PL_expect == XOPERATOR) {
4834 /* A // operator. */
4844 /* Disable warning on "study /blah/" */
4845 if (PL_oldoldbufptr == PL_last_uni
4846 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4847 || memNE(PL_last_uni, "study", 5)
4848 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4851 s = scan_pat(s,OP_MATCH);
4852 TERM(sublex_start());
4856 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4857 #ifdef PERL_STRICT_CR
4860 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
4862 && (s == PL_linestart || s[-1] == '\n') )
4864 PL_lex_formbrack = 0;
4868 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
4874 yylval.ival = OPf_SPECIAL;
4880 if (PL_expect != XOPERATOR)
4885 case '0': case '1': case '2': case '3': case '4':
4886 case '5': case '6': case '7': case '8': case '9':
4887 s = scan_num(s, &yylval);
4888 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
4889 if (PL_expect == XOPERATOR)
4894 s = scan_str(s,!!PL_madskills,FALSE);
4895 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
4896 if (PL_expect == XOPERATOR) {
4897 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4899 deprecate_old(commaless_variable_list);
4900 return REPORT(','); /* grandfather non-comma-format format */
4907 yylval.ival = OP_CONST;
4908 TERM(sublex_start());
4911 s = scan_str(s,!!PL_madskills,FALSE);
4912 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
4913 if (PL_expect == XOPERATOR) {
4914 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4916 deprecate_old(commaless_variable_list);
4917 return REPORT(','); /* grandfather non-comma-format format */
4924 yylval.ival = OP_CONST;
4925 /* FIXME. I think that this can be const if char *d is replaced by
4926 more localised variables. */
4927 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4928 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4929 yylval.ival = OP_STRINGIFY;
4933 TERM(sublex_start());
4936 s = scan_str(s,!!PL_madskills,FALSE);
4937 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
4938 if (PL_expect == XOPERATOR)
4939 no_op("Backticks",s);
4942 yylval.ival = OP_BACKTICK;
4944 TERM(sublex_start());
4948 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
4949 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
4951 if (PL_expect == XOPERATOR)
4952 no_op("Backslash",s);
4956 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
4957 char *start = s + 2;
4958 while (isDIGIT(*start) || *start == '_')
4960 if (*start == '.' && isDIGIT(start[1])) {
4961 s = scan_num(s, &yylval);
4964 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
4965 else if (!isALPHA(*start) && (PL_expect == XTERM
4966 || PL_expect == XREF || PL_expect == XSTATE
4967 || PL_expect == XTERMORDORDOR)) {
4968 /* XXX Use gv_fetchpvn rather than stomping on a const string */
4969 const char c = *start;
4972 gv = gv_fetchpv(s, 0, SVt_PVCV);
4975 s = scan_num(s, &yylval);
4982 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5018 I32 orig_keyword = 0;
5023 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5025 /* Some keywords can be followed by any delimiter, including ':' */
5026 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5027 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5028 (PL_tokenbuf[0] == 'q' &&
5029 strchr("qwxr", PL_tokenbuf[1])))));
5031 /* x::* is just a word, unless x is "CORE" */
5032 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5036 while (d < PL_bufend && isSPACE(*d))
5037 d++; /* no comments skipped here, or s### is misparsed */
5039 /* Is this a label? */
5040 if (!tmp && PL_expect == XSTATE
5041 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5043 yylval.pval = savepv(PL_tokenbuf);
5048 /* Check for keywords */
5049 tmp = keyword(PL_tokenbuf, len);
5051 /* Is this a word before a => operator? */
5052 if (*d == '=' && d[1] == '>') {
5055 = (OP*)newSVOP(OP_CONST, 0,
5056 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5057 yylval.opval->op_private = OPpCONST_BARE;
5061 if (tmp < 0) { /* second-class keyword? */
5062 GV *ogv = NULL; /* override (winner) */
5063 GV *hgv = NULL; /* hidden (loser) */
5064 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5066 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5069 if (GvIMPORTED_CV(gv))
5071 else if (! CvMETHOD(cv))
5075 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5076 (gv = *gvp) != (GV*)&PL_sv_undef &&
5077 GvCVu(gv) && GvIMPORTED_CV(gv))
5084 tmp = 0; /* overridden by import or by GLOBAL */
5087 && -tmp==KEY_lock /* XXX generalizable kludge */
5089 && !hv_fetchs(GvHVn(PL_incgv), "Thread.pm", FALSE))
5091 tmp = 0; /* any sub overrides "weak" keyword */
5093 else { /* no override */
5095 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
5096 Perl_warner(aTHX_ packWARN(WARN_MISC),
5097 "dump() better written as CORE::dump()");
5101 if (hgv && tmp != KEY_x && tmp != KEY_CORE
5102 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
5103 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5104 "Ambiguous call resolved as CORE::%s(), %s",
5105 GvENAME(hgv), "qualify as such or use &");
5112 default: /* not a keyword */
5113 /* Trade off - by using this evil construction we can pull the
5114 variable gv into the block labelled keylookup. If not, then
5115 we have to give it function scope so that the goto from the
5116 earlier ':' case doesn't bypass the initialisation. */
5118 just_a_word_zero_gv:
5126 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5129 SV *nextPL_nextwhite = 0;
5133 /* Get the rest if it looks like a package qualifier */
5135 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5137 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5140 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5141 *s == '\'' ? "'" : "::");
5146 if (PL_expect == XOPERATOR) {
5147 if (PL_bufptr == PL_linestart) {
5148 CopLINE_dec(PL_curcop);
5149 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
5150 CopLINE_inc(PL_curcop);
5153 no_op("Bareword",s);
5156 /* Look for a subroutine with this name in current package,
5157 unless name is "Foo::", in which case Foo is a bearword
5158 (and a package name). */
5160 if (len > 2 && !PL_madskills &&
5161 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5163 if (ckWARN(WARN_BAREWORD)
5164 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5165 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5166 "Bareword \"%s\" refers to nonexistent package",
5169 PL_tokenbuf[len] = '\0';
5175 /* Mustn't actually add anything to a symbol table.
5176 But also don't want to "initialise" any placeholder
5177 constants that might already be there into full
5178 blown PVGVs with attached PVCV. */
5179 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5180 GV_NOADD_NOINIT, SVt_PVCV);
5185 /* if we saw a global override before, get the right name */
5188 sv = newSVpvs("CORE::GLOBAL::");
5189 sv_catpv(sv,PL_tokenbuf);
5192 /* If len is 0, newSVpv does strlen(), which is correct.
5193 If len is non-zero, then it will be the true length,
5194 and so the scalar will be created correctly. */
5195 sv = newSVpv(PL_tokenbuf,len);
5198 if (PL_madskills && !PL_thistoken) {
5199 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5200 PL_thistoken = newSVpv(start,s - start);
5201 PL_realtokenstart = s - SvPVX(PL_linestr);
5205 /* Presume this is going to be a bareword of some sort. */
5208 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5209 yylval.opval->op_private = OPpCONST_BARE;
5210 /* UTF-8 package name? */
5211 if (UTF && !IN_BYTES &&
5212 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5215 /* And if "Foo::", then that's what it certainly is. */
5220 /* Do the explicit type check so that we don't need to force
5221 the initialisation of the symbol table to have a real GV.
5222 Beware - gv may not really be a PVGV, cv may not really be
5223 a PVCV, (because of the space optimisations that gv_init
5224 understands) But they're true if for this symbol there is
5225 respectively a typeglob and a subroutine.
5227 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5228 /* Real typeglob, so get the real subroutine: */
5230 /* A proxy for a subroutine in this package? */
5231 : SvOK(gv) ? (CV *) gv : NULL)
5234 /* See if it's the indirect object for a list operator. */
5236 if (PL_oldoldbufptr &&
5237 PL_oldoldbufptr < PL_bufptr &&
5238 (PL_oldoldbufptr == PL_last_lop
5239 || PL_oldoldbufptr == PL_last_uni) &&
5240 /* NO SKIPSPACE BEFORE HERE! */
5241 (PL_expect == XREF ||
5242 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5244 bool immediate_paren = *s == '(';
5246 /* (Now we can afford to cross potential line boundary.) */
5247 s = SKIPSPACE2(s,nextPL_nextwhite);
5249 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5252 /* Two barewords in a row may indicate method call. */
5254 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5255 (tmp = intuit_method(s, gv, cv)))
5258 /* If not a declared subroutine, it's an indirect object. */
5259 /* (But it's an indir obj regardless for sort.) */
5260 /* Also, if "_" follows a filetest operator, it's a bareword */
5263 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5265 (PL_last_lop_op != OP_MAPSTART &&
5266 PL_last_lop_op != OP_GREPSTART))))
5267 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5268 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5271 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5276 PL_expect = XOPERATOR;
5279 s = SKIPSPACE2(s,nextPL_nextwhite);
5280 PL_nextwhite = nextPL_nextwhite;
5285 /* Is this a word before a => operator? */
5286 if (*s == '=' && s[1] == '>' && !pkgname) {
5288 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
5289 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5290 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
5294 /* If followed by a paren, it's certainly a subroutine. */
5299 while (SPACE_OR_TAB(*d))
5301 if (*d == ')' && (sv = gv_const_sv(gv))) {
5305 char *par = SvPVX(PL_linestr) + PL_realtokenstart;
5306 sv_catpvn(PL_thistoken, par, s - par);
5308 sv_free(PL_nextwhite);
5318 PL_nextwhite = PL_thiswhite;
5321 start_force(PL_curforce);
5323 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5324 PL_expect = XOPERATOR;
5327 PL_nextwhite = nextPL_nextwhite;
5328 curmad('X', PL_thistoken);
5329 PL_thistoken = newSVpvn("",0);
5337 /* If followed by var or block, call it a method (unless sub) */
5339 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5340 PL_last_lop = PL_oldbufptr;
5341 PL_last_lop_op = OP_METHOD;
5345 /* If followed by a bareword, see if it looks like indir obj. */
5348 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5349 && (tmp = intuit_method(s, gv, cv)))
5352 /* Not a method, so call it a subroutine (if defined) */
5355 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5356 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5357 "Ambiguous use of -%s resolved as -&%s()",
5358 PL_tokenbuf, PL_tokenbuf);
5359 /* Check for a constant sub */
5360 if ((sv = gv_const_sv(gv))) {
5362 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
5363 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5364 yylval.opval->op_private = 0;
5368 /* Resolve to GV now. */
5369 if (SvTYPE(gv) != SVt_PVGV) {
5370 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5371 assert (SvTYPE(gv) == SVt_PVGV);
5372 /* cv must have been some sort of placeholder, so
5373 now needs replacing with a real code reference. */
5377 op_free(yylval.opval);
5378 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5379 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5380 PL_last_lop = PL_oldbufptr;
5381 PL_last_lop_op = OP_ENTERSUB;
5382 /* Is there a prototype? */
5389 const char *proto = SvPV_const((SV*)cv, protolen);
5392 if (*proto == '$' && proto[1] == '\0')
5394 while (*proto == ';')
5396 if (*proto == '&' && *s == '{') {
5397 sv_setpv(PL_subname, PL_curstash ?
5398 "__ANON__" : "__ANON__::__ANON__");
5405 PL_nextwhite = PL_thiswhite;
5408 start_force(PL_curforce);
5409 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5412 PL_nextwhite = nextPL_nextwhite;
5413 curmad('X', PL_thistoken);
5414 PL_thistoken = newSVpvn("",0);
5421 /* Guess harder when madskills require "best effort". */
5422 if (PL_madskills && (!gv || !GvCVu(gv))) {
5423 int probable_sub = 0;
5424 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5426 else if (isALPHA(*s)) {
5430 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5431 if (!keyword(tmpbuf,tmplen))
5434 while (d < PL_bufend && isSPACE(*d))
5436 if (*d == '=' && d[1] == '>')
5441 gv = gv_fetchpv(PL_tokenbuf, TRUE, SVt_PVCV);
5442 op_free(yylval.opval);
5443 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5444 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5445 PL_last_lop = PL_oldbufptr;
5446 PL_last_lop_op = OP_ENTERSUB;
5447 PL_nextwhite = PL_thiswhite;
5449 start_force(PL_curforce);
5450 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5452 PL_nextwhite = nextPL_nextwhite;
5453 curmad('X', PL_thistoken);
5454 PL_thistoken = newSVpvn("",0);
5459 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5466 /* Call it a bare word */
5468 if (PL_hints & HINT_STRICT_SUBS)
5469 yylval.opval->op_private |= OPpCONST_STRICT;
5472 if (lastchar != '-') {
5473 if (ckWARN(WARN_RESERVED)) {
5477 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
5478 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5485 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5486 && ckWARN_d(WARN_AMBIGUOUS)) {
5487 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5488 "Operator or semicolon missing before %c%s",
5489 lastchar, PL_tokenbuf);
5490 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5491 "Ambiguous use of %c resolved as operator %c",
5492 lastchar, lastchar);
5498 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5499 newSVpv(CopFILE(PL_curcop),0));
5503 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5504 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5507 case KEY___PACKAGE__:
5508 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5510 ? newSVhek(HvNAME_HEK(PL_curstash))
5517 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5518 const char *pname = "main";
5519 if (PL_tokenbuf[2] == 'D')
5520 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5521 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5525 GvIOp(gv) = newIO();
5526 IoIFP(GvIOp(gv)) = PL_rsfp;
5527 #if defined(HAS_FCNTL) && defined(F_SETFD)
5529 const int fd = PerlIO_fileno(PL_rsfp);
5530 fcntl(fd,F_SETFD,fd >= 3);
5533 /* Mark this internal pseudo-handle as clean */
5534 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5536 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
5537 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5538 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5540 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5541 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5542 /* if the script was opened in binmode, we need to revert
5543 * it to text mode for compatibility; but only iff it has CRs
5544 * XXX this is a questionable hack at best. */
5545 if (PL_bufend-PL_bufptr > 2
5546 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5549 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5550 loc = PerlIO_tell(PL_rsfp);
5551 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5554 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5556 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5557 #endif /* NETWARE */
5558 #ifdef PERLIO_IS_STDIO /* really? */
5559 # if defined(__BORLANDC__)
5560 /* XXX see note in do_binmode() */
5561 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5565 PerlIO_seek(PL_rsfp, loc, 0);
5569 #ifdef PERLIO_LAYERS
5572 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5573 else if (PL_encoding) {
5580 XPUSHs(PL_encoding);
5582 call_method("name", G_SCALAR);
5586 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5587 Perl_form(aTHX_ ":encoding(%"SVf")",
5596 if (PL_realtokenstart >= 0) {
5597 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5599 PL_endwhite = newSVpvn("",0);
5600 sv_catsv(PL_endwhite, PL_thiswhite);
5602 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5603 PL_realtokenstart = -1;
5605 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5606 SvCUR(PL_endwhite))) != Nullch) ;
5620 if (PL_expect == XSTATE) {
5627 if (*s == ':' && s[1] == ':') {
5630 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5631 if (!(tmp = keyword(PL_tokenbuf, len)))
5632 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5635 else if (tmp == KEY_require || tmp == KEY_do)
5636 /* that's a way to remember we saw "CORE::" */
5649 LOP(OP_ACCEPT,XTERM);
5655 LOP(OP_ATAN2,XTERM);
5661 LOP(OP_BINMODE,XTERM);
5664 LOP(OP_BLESS,XTERM);
5673 /* When 'use switch' is in effect, continue has a dual
5674 life as a control operator. */
5676 if (!FEATURE_IS_ENABLED("switch"))
5679 /* We have to disambiguate the two senses of
5680 "continue". If the next token is a '{' then
5681 treat it as the start of a continue block;
5682 otherwise treat it as a control operator.
5694 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5711 if (!PL_cryptseen) {
5712 PL_cryptseen = TRUE;
5716 LOP(OP_CRYPT,XTERM);
5719 LOP(OP_CHMOD,XTERM);
5722 LOP(OP_CHOWN,XTERM);
5725 LOP(OP_CONNECT,XTERM);
5744 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5745 if (orig_keyword == KEY_do) {
5754 PL_hints |= HINT_BLOCK_SCOPE;
5764 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5765 LOP(OP_DBMOPEN,XTERM);
5771 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5778 yylval.ival = CopLINE(PL_curcop);
5794 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
5795 UNIBRACK(OP_ENTEREVAL);
5813 case KEY_endhostent:
5819 case KEY_endservent:
5822 case KEY_endprotoent:
5833 yylval.ival = CopLINE(PL_curcop);
5835 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
5838 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
5841 if ((PL_bufend - p) >= 3 &&
5842 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
5844 else if ((PL_bufend - p) >= 4 &&
5845 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
5848 if (isIDFIRST_lazy_if(p,UTF)) {
5849 p = scan_ident(p, PL_bufend,
5850 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5854 Perl_croak(aTHX_ "Missing $ on loop variable");
5856 s = SvPVX(PL_linestr) + soff;
5862 LOP(OP_FORMLINE,XTERM);
5868 LOP(OP_FCNTL,XTERM);
5874 LOP(OP_FLOCK,XTERM);
5883 LOP(OP_GREPSTART, XREF);
5886 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5901 case KEY_getpriority:
5902 LOP(OP_GETPRIORITY,XTERM);
5904 case KEY_getprotobyname:
5907 case KEY_getprotobynumber:
5908 LOP(OP_GPBYNUMBER,XTERM);
5910 case KEY_getprotoent:
5922 case KEY_getpeername:
5923 UNI(OP_GETPEERNAME);
5925 case KEY_gethostbyname:
5928 case KEY_gethostbyaddr:
5929 LOP(OP_GHBYADDR,XTERM);
5931 case KEY_gethostent:
5934 case KEY_getnetbyname:
5937 case KEY_getnetbyaddr:
5938 LOP(OP_GNBYADDR,XTERM);
5943 case KEY_getservbyname:
5944 LOP(OP_GSBYNAME,XTERM);
5946 case KEY_getservbyport:
5947 LOP(OP_GSBYPORT,XTERM);
5949 case KEY_getservent:
5952 case KEY_getsockname:
5953 UNI(OP_GETSOCKNAME);
5955 case KEY_getsockopt:
5956 LOP(OP_GSOCKOPT,XTERM);
5971 yylval.ival = CopLINE(PL_curcop);
5982 yylval.ival = CopLINE(PL_curcop);
5986 LOP(OP_INDEX,XTERM);
5992 LOP(OP_IOCTL,XTERM);
6004 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6036 LOP(OP_LISTEN,XTERM);
6045 s = scan_pat(s,OP_MATCH);
6046 TERM(sublex_start());
6049 LOP(OP_MAPSTART, XREF);
6052 LOP(OP_MKDIR,XTERM);
6055 LOP(OP_MSGCTL,XTERM);
6058 LOP(OP_MSGGET,XTERM);
6061 LOP(OP_MSGRCV,XTERM);
6064 LOP(OP_MSGSND,XTERM);
6071 if (isIDFIRST_lazy_if(s,UTF)) {
6075 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6076 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6078 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6079 if (!PL_in_my_stash) {
6082 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6086 if (PL_madskills) { /* just add type to declarator token */
6087 sv_catsv(PL_thistoken, PL_nextwhite);
6089 sv_catpvn(PL_thistoken, start, s - start);
6097 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6104 s = tokenize_use(0, s);
6108 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6115 if (isIDFIRST_lazy_if(s,UTF)) {
6117 for (d = s; isALNUM_lazy_if(d,UTF);)
6119 for (t=d; isSPACE(*t);)
6121 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6123 && !(t[0] == '=' && t[1] == '>')
6125 int parms_len = (int)(d-s);
6126 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6127 "Precedence problem: open %.*s should be open(%.*s)",
6128 parms_len, s, parms_len, s);
6134 yylval.ival = OP_OR;
6144 LOP(OP_OPEN_DIR,XTERM);
6147 checkcomma(s,PL_tokenbuf,"filehandle");
6151 checkcomma(s,PL_tokenbuf,"filehandle");
6170 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6174 LOP(OP_PIPE_OP,XTERM);
6177 s = scan_str(s,!!PL_madskills,FALSE);
6180 yylval.ival = OP_CONST;
6181 TERM(sublex_start());
6187 s = scan_str(s,!!PL_madskills,FALSE);
6190 PL_expect = XOPERATOR;
6192 if (SvCUR(PL_lex_stuff)) {
6195 d = SvPV_force(PL_lex_stuff, len);
6197 for (; isSPACE(*d) && len; --len, ++d)
6202 if (!warned && ckWARN(WARN_QW)) {
6203 for (; !isSPACE(*d) && len; --len, ++d) {
6205 Perl_warner(aTHX_ packWARN(WARN_QW),
6206 "Possible attempt to separate words with commas");
6209 else if (*d == '#') {
6210 Perl_warner(aTHX_ packWARN(WARN_QW),
6211 "Possible attempt to put comments in qw() list");
6217 for (; !isSPACE(*d) && len; --len, ++d)
6220 sv = newSVpvn(b, d-b);
6221 if (DO_UTF8(PL_lex_stuff))
6223 words = append_elem(OP_LIST, words,
6224 newSVOP(OP_CONST, 0, tokeq(sv)));
6228 start_force(PL_curforce);
6229 NEXTVAL_NEXTTOKE.opval = words;
6234 SvREFCNT_dec(PL_lex_stuff);
6235 PL_lex_stuff = NULL;
6241 s = scan_str(s,!!PL_madskills,FALSE);
6244 yylval.ival = OP_STRINGIFY;
6245 if (SvIVX(PL_lex_stuff) == '\'')
6246 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6247 TERM(sublex_start());
6250 s = scan_pat(s,OP_QR);
6251 TERM(sublex_start());
6254 s = scan_str(s,!!PL_madskills,FALSE);
6257 yylval.ival = OP_BACKTICK;
6259 TERM(sublex_start());
6267 s = force_version(s, FALSE);
6269 else if (*s != 'v' || !isDIGIT(s[1])
6270 || (s = force_version(s, TRUE), *s == 'v'))
6272 *PL_tokenbuf = '\0';
6273 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6274 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6275 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
6277 yyerror("<> should be quotes");
6279 if (orig_keyword == KEY_require) {
6287 PL_last_uni = PL_oldbufptr;
6288 PL_last_lop_op = OP_REQUIRE;
6290 return REPORT( (int)REQUIRE );
6296 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6300 LOP(OP_RENAME,XTERM);
6309 LOP(OP_RINDEX,XTERM);
6319 UNIDOR(OP_READLINE);
6332 LOP(OP_REVERSE,XTERM);
6335 UNIDOR(OP_READLINK);
6343 TERM(sublex_start());
6345 TOKEN(1); /* force error */
6348 checkcomma(s,PL_tokenbuf,"filehandle");
6358 LOP(OP_SELECT,XTERM);
6364 LOP(OP_SEMCTL,XTERM);
6367 LOP(OP_SEMGET,XTERM);
6370 LOP(OP_SEMOP,XTERM);
6376 LOP(OP_SETPGRP,XTERM);
6378 case KEY_setpriority:
6379 LOP(OP_SETPRIORITY,XTERM);
6381 case KEY_sethostent:
6387 case KEY_setservent:
6390 case KEY_setprotoent:
6400 LOP(OP_SEEKDIR,XTERM);
6402 case KEY_setsockopt:
6403 LOP(OP_SSOCKOPT,XTERM);
6409 LOP(OP_SHMCTL,XTERM);
6412 LOP(OP_SHMGET,XTERM);
6415 LOP(OP_SHMREAD,XTERM);
6418 LOP(OP_SHMWRITE,XTERM);
6421 LOP(OP_SHUTDOWN,XTERM);
6430 LOP(OP_SOCKET,XTERM);
6432 case KEY_socketpair:
6433 LOP(OP_SOCKPAIR,XTERM);
6436 checkcomma(s,PL_tokenbuf,"subroutine name");
6438 if (*s == ';' || *s == ')') /* probably a close */
6439 Perl_croak(aTHX_ "sort is now a reserved word");
6441 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6445 LOP(OP_SPLIT,XTERM);
6448 LOP(OP_SPRINTF,XTERM);
6451 LOP(OP_SPLICE,XTERM);
6466 LOP(OP_SUBSTR,XTERM);
6472 char tmpbuf[sizeof PL_tokenbuf];
6473 SSize_t tboffset = 0;
6474 expectation attrful;
6475 bool have_name, have_proto, bad_proto;
6476 const int key = tmp;
6481 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6482 SV *subtoken = newSVpvn(tstart, s - tstart);
6486 s = SKIPSPACE2(s,tmpwhite);
6491 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6492 (*s == ':' && s[1] == ':'))
6499 attrful = XATTRBLOCK;
6500 /* remember buffer pos'n for later force_word */
6501 tboffset = s - PL_oldbufptr;
6502 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6505 nametoke = newSVpvn(s, d - s);
6507 if (strchr(tmpbuf, ':'))
6508 sv_setpv(PL_subname, tmpbuf);
6510 sv_setsv(PL_subname,PL_curstname);
6511 sv_catpvs(PL_subname,"::");
6512 sv_catpvn(PL_subname,tmpbuf,len);
6519 CURMAD('X', nametoke);
6520 CURMAD('_', tmpwhite);
6521 (void) force_word(PL_oldbufptr + tboffset, WORD,
6524 s = SKIPSPACE2(d,tmpwhite);
6531 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6532 PL_expect = XTERMBLOCK;
6533 attrful = XATTRTERM;
6534 sv_setpvn(PL_subname,"?",1);
6538 if (key == KEY_format) {
6540 PL_lex_formbrack = PL_lex_brackets + 1;
6542 PL_thistoken = subtoken;
6546 (void) force_word(PL_oldbufptr + tboffset, WORD,
6552 /* Look for a prototype */
6556 s = scan_str(s,!!PL_madskills,FALSE);
6558 Perl_croak(aTHX_ "Prototype not terminated");
6559 /* strip spaces and check for bad characters */
6560 d = SvPVX(PL_lex_stuff);
6563 for (p = d; *p; ++p) {
6566 if (!strchr("$@%*;[]&\\", *p))
6571 if (bad_proto && ckWARN(WARN_SYNTAX))
6572 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6573 "Illegal character in prototype for %"SVf" : %s",
6574 (void*)PL_subname, d);
6575 SvCUR_set(PL_lex_stuff, tmp);
6580 CURMAD('q', PL_thisopen);
6581 CURMAD('_', tmpwhite);
6582 CURMAD('=', PL_thisstuff);
6583 CURMAD('Q', PL_thisclose);
6584 NEXTVAL_NEXTTOKE.opval =
6585 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6586 PL_lex_stuff = Nullsv;
6589 s = SKIPSPACE2(s,tmpwhite);
6597 if (*s == ':' && s[1] != ':')
6598 PL_expect = attrful;
6599 else if (*s != '{' && key == KEY_sub) {
6601 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6603 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, (void*)PL_subname);
6610 curmad('^', newSVpvn("",0));
6611 CURMAD('_', tmpwhite);
6615 PL_thistoken = subtoken;
6618 NEXTVAL_NEXTTOKE.opval =
6619 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6620 PL_lex_stuff = NULL;
6625 sv_setpv(PL_subname,
6626 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
6630 (void) force_word(PL_oldbufptr + tboffset, WORD,
6640 LOP(OP_SYSTEM,XREF);
6643 LOP(OP_SYMLINK,XTERM);
6646 LOP(OP_SYSCALL,XTERM);
6649 LOP(OP_SYSOPEN,XTERM);
6652 LOP(OP_SYSSEEK,XTERM);
6655 LOP(OP_SYSREAD,XTERM);
6658 LOP(OP_SYSWRITE,XTERM);
6662 TERM(sublex_start());
6683 LOP(OP_TRUNCATE,XTERM);
6695 yylval.ival = CopLINE(PL_curcop);
6699 yylval.ival = CopLINE(PL_curcop);
6703 LOP(OP_UNLINK,XTERM);
6709 LOP(OP_UNPACK,XTERM);
6712 LOP(OP_UTIME,XTERM);
6718 LOP(OP_UNSHIFT,XTERM);
6721 s = tokenize_use(1, s);
6731 yylval.ival = CopLINE(PL_curcop);
6735 yylval.ival = CopLINE(PL_curcop);
6739 PL_hints |= HINT_BLOCK_SCOPE;
6746 LOP(OP_WAITPID,XTERM);
6755 ctl_l[0] = toCTRL('L');
6757 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6760 /* Make sure $^L is defined */
6761 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
6766 if (PL_expect == XOPERATOR)
6772 yylval.ival = OP_XOR;
6777 TERM(sublex_start());
6782 #pragma segment Main
6786 S_pending_ident(pTHX)
6791 /* pit holds the identifier we read and pending_ident is reset */
6792 char pit = PL_pending_ident;
6793 PL_pending_ident = 0;
6795 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
6796 DEBUG_T({ PerlIO_printf(Perl_debug_log,
6797 "### Pending identifier '%s'\n", PL_tokenbuf); });
6799 /* if we're in a my(), we can't allow dynamics here.
6800 $foo'bar has already been turned into $foo::bar, so
6801 just check for colons.
6803 if it's a legal name, the OP is a PADANY.
6806 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
6807 if (strchr(PL_tokenbuf,':'))
6808 yyerror(Perl_form(aTHX_ "No package name allowed for "
6809 "variable %s in \"our\"",
6811 tmp = allocmy(PL_tokenbuf);
6814 if (strchr(PL_tokenbuf,':'))
6815 yyerror(Perl_form(aTHX_ PL_no_myglob,
6816 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
6818 yylval.opval = newOP(OP_PADANY, 0);
6819 yylval.opval->op_targ = allocmy(PL_tokenbuf);
6825 build the ops for accesses to a my() variable.
6827 Deny my($a) or my($b) in a sort block, *if* $a or $b is
6828 then used in a comparison. This catches most, but not
6829 all cases. For instance, it catches
6830 sort { my($a); $a <=> $b }
6832 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
6833 (although why you'd do that is anyone's guess).
6836 if (!strchr(PL_tokenbuf,':')) {
6838 tmp = pad_findmy(PL_tokenbuf);
6839 if (tmp != NOT_IN_PAD) {
6840 /* might be an "our" variable" */
6841 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6842 /* build ops for a bareword */
6843 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
6844 HEK * const stashname = HvNAME_HEK(stash);
6845 SV * const sym = newSVhek(stashname);
6846 sv_catpvs(sym, "::");
6847 sv_catpv(sym, PL_tokenbuf+1);
6848 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
6849 yylval.opval->op_private = OPpCONST_ENTERED;
6852 ? (GV_ADDMULTI | GV_ADDINEVAL)
6855 ((PL_tokenbuf[0] == '$') ? SVt_PV
6856 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6861 /* if it's a sort block and they're naming $a or $b */
6862 if (PL_last_lop_op == OP_SORT &&
6863 PL_tokenbuf[0] == '$' &&
6864 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
6867 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
6868 d < PL_bufend && *d != '\n';
6871 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
6872 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
6878 yylval.opval = newOP(OP_PADANY, 0);
6879 yylval.opval->op_targ = tmp;
6885 Whine if they've said @foo in a doublequoted string,
6886 and @foo isn't a variable we can find in the symbol
6889 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
6890 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
6891 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
6892 && ckWARN(WARN_AMBIGUOUS))
6894 /* Downgraded from fatal to warning 20000522 mjd */
6895 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6896 "Possible unintended interpolation of %s in string",
6901 /* build ops for a bareword */
6902 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
6903 yylval.opval->op_private = OPpCONST_ENTERED;
6906 /* If the identifier refers to a stash, don't autovivify it.
6907 * Change 24660 had the side effect of causing symbol table
6908 * hashes to always be defined, even if they were freshly
6909 * created and the only reference in the entire program was
6910 * the single statement with the defined %foo::bar:: test.
6911 * It appears that all code in the wild doing this actually
6912 * wants to know whether sub-packages have been loaded, so
6913 * by avoiding auto-vivifying symbol tables, we ensure that
6914 * defined %foo::bar:: continues to be false, and the existing
6915 * tests still give the expected answers, even though what
6916 * they're actually testing has now changed subtly.
6918 (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
6920 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
6921 ((PL_tokenbuf[0] == '$') ? SVt_PV
6922 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6928 * The following code was generated by perl_keyword.pl.
6932 Perl_keyword (pTHX_ const char *name, I32 len)
6937 case 1: /* 5 tokens of length 1 */
6969 case 2: /* 18 tokens of length 2 */
7115 case 3: /* 29 tokens of length 3 */
7119 if (name[1] == 'N' &&
7182 if (name[1] == 'i' &&
7204 return (FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
7222 if (name[1] == 'o' &&
7231 if (name[1] == 'e' &&
7240 if (name[1] == 'n' &&
7249 if (name[1] == 'o' &&
7258 if (name[1] == 'a' &&
7267 if (name[1] == 'o' &&
7329 if (name[1] == 'e' &&
7343 return (FEATURE_IS_ENABLED("say") ? -KEY_say : 0);
7369 if (name[1] == 'i' &&
7378 if (name[1] == 's' &&
7387 if (name[1] == 'e' &&
7396 if (name[1] == 'o' &&
7408 case 4: /* 41 tokens of length 4 */
7412 if (name[1] == 'O' &&
7422 if (name[1] == 'N' &&
7432 if (name[1] == 'i' &&
7442 if (name[1] == 'h' &&
7452 if (name[1] == 'u' &&
7465 if (name[2] == 'c' &&
7474 if (name[2] == 's' &&
7483 if (name[2] == 'a' &&
7519 if (name[1] == 'o' &&
7532 if (name[2] == 't' &&
7541 if (name[2] == 'o' &&
7550 if (name[2] == 't' &&
7559 if (name[2] == 'e' &&
7572 if (name[1] == 'o' &&
7585 if (name[2] == 'y' &&
7594 if (name[2] == 'l' &&
7610 if (name[2] == 's' &&
7619 if (name[2] == 'n' &&
7628 if (name[2] == 'c' &&
7641 if (name[1] == 'e' &&
7651 if (name[1] == 'p' &&
7664 if (name[2] == 'c' &&
7673 if (name[2] == 'p' &&
7682 if (name[2] == 's' &&
7698 if (name[2] == 'n' &&
7768 if (name[2] == 'r' &&
7777 if (name[2] == 'r' &&
7786 if (name[2] == 'a' &&
7802 if (name[2] == 'l' &&
7864 if (name[2] == 'e' &&
7867 return (FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
7880 case 5: /* 39 tokens of length 5 */
7884 if (name[1] == 'E' &&
7895 if (name[1] == 'H' &&
7909 if (name[2] == 'a' &&
7919 if (name[2] == 'a' &&
7936 if (name[2] == 'e' &&
7946 if (name[2] == 'e' &&
7950 return (FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
7966 if (name[3] == 'i' &&
7975 if (name[3] == 'o' &&
8011 if (name[2] == 'o' &&
8021 if (name[2] == 'y' &&
8035 if (name[1] == 'l' &&
8049 if (name[2] == 'n' &&
8059 if (name[2] == 'o' &&
8073 if (name[1] == 'i' &&
8078 return (FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8087 if (name[2] == 'd' &&
8097 if (name[2] == 'c' &&
8114 if (name[2] == 'c' &&
8124 if (name[2] == 't' &&
8138 if (name[1] == 'k' &&
8149 if (name[1] == 'r' &&
8163 if (name[2] == 's' &&
8173 if (name[2] == 'd' &&
8190 if (name[2] == 'm' &&
8200 if (name[2] == 'i' &&
8210 if (name[2] == 'e' &&
8220 if (name[2] == 'l' &&
8230 if (name[2] == 'a' &&
8243 if (name[3] == 't' &&
8246 return (FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8252 if (name[3] == 'd' &&
8269 if (name[1] == 'i' &&
8283 if (name[2] == 'a' &&
8296 if (name[3] == 'e' &&
8331 if (name[2] == 'i' &&
8348 if (name[2] == 'i' &&
8358 if (name[2] == 'i' &&
8375 case 6: /* 33 tokens of length 6 */
8379 if (name[1] == 'c' &&
8394 if (name[2] == 'l' &&
8405 if (name[2] == 'r' &&
8420 if (name[1] == 'e' &&
8435 if (name[2] == 's' &&
8440 if(ckWARN_d(WARN_SYNTAX))
8441 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8447 if (name[2] == 'i' &&
8465 if (name[2] == 'l' &&
8476 if (name[2] == 'r' &&
8491 if (name[1] == 'm' &&
8506 if (name[2] == 'n' &&
8517 if (name[2] == 's' &&
8532 if (name[1] == 's' &&
8538 if (name[4] == 't' &&
8547 if (name[4] == 'e' &&
8556 if (name[4] == 'c' &&
8565 if (name[4] == 'n' &&
8581 if (name[1] == 'r' &&
8599 if (name[3] == 'a' &&
8609 if (name[3] == 'u' &&
8623 if (name[2] == 'n' &&
8641 if (name[2] == 'a' &&
8655 if (name[3] == 'e' &&
8668 if (name[4] == 't' &&
8677 if (name[4] == 'e' &&
8699 if (name[4] == 't' &&
8708 if (name[4] == 'e' &&
8724 if (name[2] == 'c' &&
8735 if (name[2] == 'l' &&
8746 if (name[2] == 'b' &&
8757 if (name[2] == 's' &&
8780 if (name[4] == 's' &&
8789 if (name[4] == 'n' &&
8802 if (name[3] == 'a' &&
8819 if (name[1] == 'a' &&
8834 case 7: /* 29 tokens of length 7 */
8838 if (name[1] == 'E' &&
8851 if (name[1] == '_' &&
8864 if (name[1] == 'i' &&
8871 return -KEY_binmode;
8877 if (name[1] == 'o' &&
8884 return -KEY_connect;
8893 if (name[2] == 'm' &&
8899 return -KEY_dbmopen;
8910 if (name[4] == 'u' &&
8914 return (FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
8920 if (name[4] == 'n' &&
8941 if (name[1] == 'o' &&
8954 if (name[1] == 'e' &&
8961 if (name[5] == 'r' &&
8964 return -KEY_getpgrp;
8970 if (name[5] == 'i' &&
8973 return -KEY_getppid;
8986 if (name[1] == 'c' &&
8993 return -KEY_lcfirst;
8999 if (name[1] == 'p' &&
9006 return -KEY_opendir;
9012 if (name[1] == 'a' &&
9030 if (name[3] == 'd' &&
9035 return -KEY_readdir;
9041 if (name[3] == 'u' &&
9052 if (name[3] == 'e' &&
9057 return -KEY_reverse;
9076 if (name[3] == 'k' &&
9081 return -KEY_seekdir;
9087 if (name[3] == 'p' &&
9092 return -KEY_setpgrp;
9102 if (name[2] == 'm' &&
9108 return -KEY_shmread;
9114 if (name[2] == 'r' &&
9120 return -KEY_sprintf;
9129 if (name[3] == 'l' &&
9134 return -KEY_symlink;
9143 if (name[4] == 'a' &&
9147 return -KEY_syscall;
9153 if (name[4] == 'p' &&
9157 return -KEY_sysopen;
9163 if (name[4] == 'e' &&
9167 return -KEY_sysread;
9173 if (name[4] == 'e' &&
9177 return -KEY_sysseek;
9195 if (name[1] == 'e' &&
9202 return -KEY_telldir;
9211 if (name[2] == 'f' &&
9217 return -KEY_ucfirst;
9223 if (name[2] == 's' &&
9229 return -KEY_unshift;
9239 if (name[1] == 'a' &&
9246 return -KEY_waitpid;
9255 case 8: /* 26 tokens of length 8 */
9259 if (name[1] == 'U' &&
9267 return KEY_AUTOLOAD;
9278 if (name[3] == 'A' &&
9284 return KEY___DATA__;
9290 if (name[3] == 'I' &&
9296 return -KEY___FILE__;
9302 if (name[3] == 'I' &&
9308 return -KEY___LINE__;
9324 if (name[2] == 'o' &&
9331 return -KEY_closedir;
9337 if (name[2] == 'n' &&
9344 return -KEY_continue;
9354 if (name[1] == 'b' &&
9362 return -KEY_dbmclose;
9368 if (name[1] == 'n' &&
9374 if (name[4] == 'r' &&
9379 return -KEY_endgrent;
9385 if (name[4] == 'w' &&
9390 return -KEY_endpwent;
9403 if (name[1] == 'o' &&
9411 return -KEY_formline;
9417 if (name[1] == 'e' &&
9428 if (name[6] == 'n' &&
9431 return -KEY_getgrent;
9437 if (name[6] == 'i' &&
9440 return -KEY_getgrgid;
9446 if (name[6] == 'a' &&
9449 return -KEY_getgrnam;
9462 if (name[4] == 'o' &&
9467 return -KEY_getlogin;
9478 if (name[6] == 'n' &&
9481 return -KEY_getpwent;
9487 if (name[6] == 'a' &&
9490 return -KEY_getpwnam;
9496 if (name[6] == 'i' &&
9499 return -KEY_getpwuid;
9519 if (name[1] == 'e' &&
9526 if (name[5] == 'i' &&
9533 return -KEY_readline;
9538 return -KEY_readlink;
9549 if (name[5] == 'i' &&
9553 return -KEY_readpipe;
9574 if (name[4] == 'r' &&
9579 return -KEY_setgrent;
9585 if (name[4] == 'w' &&
9590 return -KEY_setpwent;
9606 if (name[3] == 'w' &&
9612 return -KEY_shmwrite;
9618 if (name[3] == 't' &&
9624 return -KEY_shutdown;
9634 if (name[2] == 's' &&
9641 return -KEY_syswrite;
9651 if (name[1] == 'r' &&
9659 return -KEY_truncate;
9668 case 9: /* 8 tokens of length 9 */
9672 if (name[1] == 'n' &&
9681 return -KEY_endnetent;
9687 if (name[1] == 'e' &&
9696 return -KEY_getnetent;
9702 if (name[1] == 'o' &&
9711 return -KEY_localtime;
9717 if (name[1] == 'r' &&
9726 return KEY_prototype;
9732 if (name[1] == 'u' &&
9741 return -KEY_quotemeta;
9747 if (name[1] == 'e' &&
9756 return -KEY_rewinddir;
9762 if (name[1] == 'e' &&
9771 return -KEY_setnetent;
9777 if (name[1] == 'a' &&
9786 return -KEY_wantarray;
9795 case 10: /* 9 tokens of length 10 */
9799 if (name[1] == 'n' &&
9805 if (name[4] == 'o' &&
9812 return -KEY_endhostent;
9818 if (name[4] == 'e' &&
9825 return -KEY_endservent;
9838 if (name[1] == 'e' &&
9844 if (name[4] == 'o' &&
9851 return -KEY_gethostent;
9860 if (name[5] == 'r' &&
9866 return -KEY_getservent;
9872 if (name[5] == 'c' &&
9878 return -KEY_getsockopt;
9903 if (name[4] == 'o' &&
9910 return -KEY_sethostent;
9919 if (name[5] == 'r' &&
9925 return -KEY_setservent;
9931 if (name[5] == 'c' &&
9937 return -KEY_setsockopt;
9954 if (name[2] == 'c' &&
9963 return -KEY_socketpair;
9976 case 11: /* 8 tokens of length 11 */
9980 if (name[1] == '_' &&
9991 return -KEY___PACKAGE__;
9997 if (name[1] == 'n' &&
10007 { /* endprotoent */
10008 return -KEY_endprotoent;
10014 if (name[1] == 'e' &&
10023 if (name[5] == 'e' &&
10029 { /* getpeername */
10030 return -KEY_getpeername;
10039 if (name[6] == 'o' &&
10044 { /* getpriority */
10045 return -KEY_getpriority;
10051 if (name[6] == 't' &&
10056 { /* getprotoent */
10057 return -KEY_getprotoent;
10071 if (name[4] == 'o' &&
10078 { /* getsockname */
10079 return -KEY_getsockname;
10092 if (name[1] == 'e' &&
10100 if (name[6] == 'o' &&
10105 { /* setpriority */
10106 return -KEY_setpriority;
10112 if (name[6] == 't' &&
10117 { /* setprotoent */
10118 return -KEY_setprotoent;
10134 case 12: /* 2 tokens of length 12 */
10135 if (name[0] == 'g' &&
10147 if (name[9] == 'd' &&
10150 { /* getnetbyaddr */
10151 return -KEY_getnetbyaddr;
10157 if (name[9] == 'a' &&
10160 { /* getnetbyname */
10161 return -KEY_getnetbyname;
10173 case 13: /* 4 tokens of length 13 */
10174 if (name[0] == 'g' &&
10181 if (name[4] == 'o' &&
10190 if (name[10] == 'd' &&
10193 { /* gethostbyaddr */
10194 return -KEY_gethostbyaddr;
10200 if (name[10] == 'a' &&
10203 { /* gethostbyname */
10204 return -KEY_gethostbyname;
10217 if (name[4] == 'e' &&
10226 if (name[10] == 'a' &&
10229 { /* getservbyname */
10230 return -KEY_getservbyname;
10236 if (name[10] == 'o' &&
10239 { /* getservbyport */
10240 return -KEY_getservbyport;
10259 case 14: /* 1 tokens of length 14 */
10260 if (name[0] == 'g' &&
10274 { /* getprotobyname */
10275 return -KEY_getprotobyname;
10280 case 16: /* 1 tokens of length 16 */
10281 if (name[0] == 'g' &&
10297 { /* getprotobynumber */
10298 return -KEY_getprotobynumber;
10312 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10316 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10317 if (ckWARN(WARN_SYNTAX)) {
10320 for (w = s+2; *w && level; w++) {
10323 else if (*w == ')')
10326 while (isSPACE(*w))
10328 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
10329 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10330 "%s (...) interpreted as function",name);
10333 while (s < PL_bufend && isSPACE(*s))
10337 while (s < PL_bufend && isSPACE(*s))
10339 if (isIDFIRST_lazy_if(s,UTF)) {
10340 const char * const w = s++;
10341 while (isALNUM_lazy_if(s,UTF))
10343 while (s < PL_bufend && isSPACE(*s))
10347 if (keyword(w, s - w))
10350 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10351 if (gv && GvCVu(gv))
10353 Perl_croak(aTHX_ "No comma allowed after %s", what);
10358 /* Either returns sv, or mortalizes sv and returns a new SV*.
10359 Best used as sv=new_constant(..., sv, ...).
10360 If s, pv are NULL, calls subroutine with one argument,
10361 and type is used with error messages only. */
10364 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
10368 HV * const table = GvHV(PL_hintgv); /* ^H */
10372 const char *why1 = "", *why2 = "", *why3 = "";
10374 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10377 why2 = strEQ(key,"charnames")
10378 ? "(possibly a missing \"use charnames ...\")"
10380 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10381 (type ? type: "undef"), why2);
10383 /* This is convoluted and evil ("goto considered harmful")
10384 * but I do not understand the intricacies of all the different
10385 * failure modes of %^H in here. The goal here is to make
10386 * the most probable error message user-friendly. --jhi */
10391 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10392 (type ? type: "undef"), why1, why2, why3);
10394 yyerror(SvPVX_const(msg));
10398 cvp = hv_fetch(table, key, strlen(key), FALSE);
10399 if (!cvp || !SvOK(*cvp)) {
10402 why3 = "} is not defined";
10405 sv_2mortal(sv); /* Parent created it permanently */
10408 pv = sv_2mortal(newSVpvn(s, len));
10410 typesv = sv_2mortal(newSVpv(type, 0));
10412 typesv = &PL_sv_undef;
10414 PUSHSTACKi(PERLSI_OVERLOAD);
10426 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10430 /* Check the eval first */
10431 if (!PL_in_eval && SvTRUE(ERRSV)) {
10432 sv_catpvs(ERRSV, "Propagated");
10433 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10435 res = SvREFCNT_inc_simple(sv);
10439 SvREFCNT_inc_simple_void(res);
10448 why1 = "Call to &{$^H{";
10450 why3 = "}} did not return a defined value";
10458 /* Returns a NUL terminated string, with the length of the string written to
10462 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10465 register char *d = dest;
10466 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10469 Perl_croak(aTHX_ ident_too_long);
10470 if (isALNUM(*s)) /* UTF handled below */
10472 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10477 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10481 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10482 char *t = s + UTF8SKIP(s);
10484 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10488 Perl_croak(aTHX_ ident_too_long);
10489 Copy(s, d, len, char);
10502 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10505 char *bracket = NULL;
10507 register char *d = dest;
10508 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10513 while (isDIGIT(*s)) {
10515 Perl_croak(aTHX_ ident_too_long);
10522 Perl_croak(aTHX_ ident_too_long);
10523 if (isALNUM(*s)) /* UTF handled below */
10525 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10530 else if (*s == ':' && s[1] == ':') {
10534 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10535 char *t = s + UTF8SKIP(s);
10536 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10538 if (d + (t - s) > e)
10539 Perl_croak(aTHX_ ident_too_long);
10540 Copy(s, d, t - s, char);
10551 if (PL_lex_state != LEX_NORMAL)
10552 PL_lex_state = LEX_INTERPENDMAYBE;
10555 if (*s == '$' && s[1] &&
10556 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10569 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10574 if (isSPACE(s[-1])) {
10576 const char ch = *s++;
10577 if (!SPACE_OR_TAB(ch)) {
10583 if (isIDFIRST_lazy_if(d,UTF)) {
10587 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10588 end += UTF8SKIP(end);
10589 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10590 end += UTF8SKIP(end);
10592 Copy(s, d, end - s, char);
10597 while ((isALNUM(*s) || *s == ':') && d < e)
10600 Perl_croak(aTHX_ ident_too_long);
10603 while (s < send && SPACE_OR_TAB(*s))
10605 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10606 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
10607 const char * const brack = (*s == '[') ? "[...]" : "{...}";
10608 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10609 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10610 funny, dest, brack, funny, dest, brack);
10613 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10617 /* Handle extended ${^Foo} variables
10618 * 1999-02-27 mjd-perl-patch@plover.com */
10619 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10623 while (isALNUM(*s) && d < e) {
10627 Perl_croak(aTHX_ ident_too_long);
10632 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10633 PL_lex_state = LEX_INTERPEND;
10636 if (PL_lex_state == LEX_NORMAL) {
10637 if (ckWARN(WARN_AMBIGUOUS) &&
10638 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
10642 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10643 "Ambiguous use of %c{%s} resolved to %c%s",
10644 funny, dest, funny, dest);
10649 s = bracket; /* let the parser handle it */
10653 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10654 PL_lex_state = LEX_INTERPEND;
10659 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10661 PERL_UNUSED_CONTEXT;
10664 else if (ch == 'g')
10665 *pmfl |= PMf_GLOBAL;
10666 else if (ch == 'c')
10667 *pmfl |= PMf_CONTINUE;
10668 else if (ch == 'o')
10670 else if (ch == 'm')
10671 *pmfl |= PMf_MULTILINE;
10672 else if (ch == 's')
10673 *pmfl |= PMf_SINGLELINE;
10674 else if (ch == 'x')
10675 *pmfl |= PMf_EXTENDED;
10679 S_scan_pat(pTHX_ char *start, I32 type)
10683 char *s = scan_str(start,!!PL_madskills,FALSE);
10684 const char * const valid_flags = (type == OP_QR) ? "iomsx" : "iogcmsx";
10691 const char * const delimiter = skipspace(start);
10692 Perl_croak(aTHX_ *delimiter == '?'
10693 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10694 : "Search pattern not terminated" );
10697 pm = (PMOP*)newPMOP(type, 0);
10698 if (PL_multi_open == '?')
10699 pm->op_pmflags |= PMf_ONCE;
10703 while (*s && strchr(valid_flags, *s))
10704 pmflag(&pm->op_pmflags,*s++);
10706 if (PL_madskills && modstart != s) {
10707 SV* tmptoken = newSVpvn(modstart, s - modstart);
10708 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
10711 /* issue a warning if /c is specified,but /g is not */
10712 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
10713 && ckWARN(WARN_REGEXP))
10715 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
10718 pm->op_pmpermflags = pm->op_pmflags;
10720 PL_lex_op = (OP*)pm;
10721 yylval.ival = OP_MATCH;
10726 S_scan_subst(pTHX_ char *start)
10737 yylval.ival = OP_NULL;
10739 s = scan_str(start,!!PL_madskills,FALSE);
10742 Perl_croak(aTHX_ "Substitution pattern not terminated");
10744 if (s[-1] == PL_multi_open)
10747 if (PL_madskills) {
10748 CURMAD('q', PL_thisopen);
10749 CURMAD('_', PL_thiswhite);
10750 CURMAD('E', PL_thisstuff);
10751 CURMAD('Q', PL_thisclose);
10752 PL_realtokenstart = s - SvPVX(PL_linestr);
10756 first_start = PL_multi_start;
10757 s = scan_str(s,!!PL_madskills,FALSE);
10759 if (PL_lex_stuff) {
10760 SvREFCNT_dec(PL_lex_stuff);
10761 PL_lex_stuff = NULL;
10763 Perl_croak(aTHX_ "Substitution replacement not terminated");
10765 PL_multi_start = first_start; /* so whole substitution is taken together */
10767 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10770 if (PL_madskills) {
10771 CURMAD('z', PL_thisopen);
10772 CURMAD('R', PL_thisstuff);
10773 CURMAD('Z', PL_thisclose);
10783 else if (strchr("iogcmsx", *s))
10784 pmflag(&pm->op_pmflags,*s++);
10790 if (PL_madskills) {
10792 curmad('m', newSVpvn(modstart, s - modstart));
10793 append_madprops(PL_thismad, (OP*)pm, 0);
10797 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
10798 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10802 SV * const repl = newSVpvs("");
10804 PL_sublex_info.super_bufptr = s;
10805 PL_sublex_info.super_bufend = PL_bufend;
10807 pm->op_pmflags |= PMf_EVAL;
10809 sv_catpv(repl, es ? "eval " : "do ");
10810 sv_catpvs(repl, "{");
10811 sv_catsv(repl, PL_lex_repl);
10812 if (strchr(SvPVX(PL_lex_repl), '#'))
10813 sv_catpvs(repl, "\n");
10814 sv_catpvs(repl, "}");
10816 SvREFCNT_dec(PL_lex_repl);
10817 PL_lex_repl = repl;
10820 pm->op_pmpermflags = pm->op_pmflags;
10821 PL_lex_op = (OP*)pm;
10822 yylval.ival = OP_SUBST;
10827 S_scan_trans(pTHX_ char *start)
10840 yylval.ival = OP_NULL;
10842 s = scan_str(start,!!PL_madskills,FALSE);
10844 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10846 if (s[-1] == PL_multi_open)
10849 if (PL_madskills) {
10850 CURMAD('q', PL_thisopen);
10851 CURMAD('_', PL_thiswhite);
10852 CURMAD('E', PL_thisstuff);
10853 CURMAD('Q', PL_thisclose);
10854 PL_realtokenstart = s - SvPVX(PL_linestr);
10858 s = scan_str(s,!!PL_madskills,FALSE);
10860 if (PL_lex_stuff) {
10861 SvREFCNT_dec(PL_lex_stuff);
10862 PL_lex_stuff = NULL;
10864 Perl_croak(aTHX_ "Transliteration replacement not terminated");
10866 if (PL_madskills) {
10867 CURMAD('z', PL_thisopen);
10868 CURMAD('R', PL_thisstuff);
10869 CURMAD('Z', PL_thisclose);
10872 complement = del = squash = 0;
10879 complement = OPpTRANS_COMPLEMENT;
10882 del = OPpTRANS_DELETE;
10885 squash = OPpTRANS_SQUASH;
10894 Newx(tbl, complement&&!del?258:256, short);
10895 o = newPVOP(OP_TRANS, 0, (char*)tbl);
10896 o->op_private &= ~OPpTRANS_ALL;
10897 o->op_private |= del|squash|complement|
10898 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
10899 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
10902 yylval.ival = OP_TRANS;
10905 if (PL_madskills) {
10907 curmad('m', newSVpvn(modstart, s - modstart));
10908 append_madprops(PL_thismad, o, 0);
10917 S_scan_heredoc(pTHX_ register char *s)
10921 I32 op_type = OP_SCALAR;
10925 const char *found_newline;
10929 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
10931 I32 stuffstart = s - SvPVX(PL_linestr);
10934 PL_realtokenstart = -1;
10939 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10943 while (SPACE_OR_TAB(*peek))
10945 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10948 s = delimcpy(d, e, s, PL_bufend, term, &len);
10958 if (!isALNUM_lazy_if(s,UTF))
10959 deprecate_old("bare << to mean <<\"\"");
10960 for (; isALNUM_lazy_if(s,UTF); s++) {
10965 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10966 Perl_croak(aTHX_ "Delimiter for here document is too long");
10969 len = d - PL_tokenbuf;
10972 if (PL_madskills) {
10973 tstart = PL_tokenbuf + !outer;
10974 PL_thisclose = newSVpvn(tstart, len - !outer);
10975 tstart = SvPVX(PL_linestr) + stuffstart;
10976 PL_thisopen = newSVpvn(tstart, s - tstart);
10977 stuffstart = s - SvPVX(PL_linestr);
10980 #ifndef PERL_STRICT_CR
10981 d = strchr(s, '\r');
10983 char * const olds = s;
10985 while (s < PL_bufend) {
10991 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11000 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11007 if ( outer || !(found_newline = memchr(s, '\n', PL_bufend - s)) ) {
11008 herewas = newSVpvn(s,PL_bufend-s);
11012 herewas = newSVpvn(s-1,found_newline-s+1);
11015 herewas = newSVpvn(s,found_newline-s);
11019 if (PL_madskills) {
11020 tstart = SvPVX(PL_linestr) + stuffstart;
11022 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11024 PL_thisstuff = newSVpvn(tstart, s - tstart);
11027 s += SvCUR(herewas);
11030 stuffstart = s - SvPVX(PL_linestr);
11036 tmpstr = newSV(79);
11037 sv_upgrade(tmpstr, SVt_PVIV);
11038 if (term == '\'') {
11039 op_type = OP_CONST;
11040 SvIV_set(tmpstr, -1);
11042 else if (term == '`') {
11043 op_type = OP_BACKTICK;
11044 SvIV_set(tmpstr, '\\');
11048 PL_multi_start = CopLINE(PL_curcop);
11049 PL_multi_open = PL_multi_close = '<';
11050 term = *PL_tokenbuf;
11051 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11052 char * const bufptr = PL_sublex_info.super_bufptr;
11053 char * const bufend = PL_sublex_info.super_bufend;
11054 char * const olds = s - SvCUR(herewas);
11055 s = strchr(bufptr, '\n');
11059 while (s < bufend &&
11060 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11062 CopLINE_inc(PL_curcop);
11065 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11066 missingterm(PL_tokenbuf);
11068 sv_setpvn(herewas,bufptr,d-bufptr+1);
11069 sv_setpvn(tmpstr,d+1,s-d);
11071 sv_catpvn(herewas,s,bufend-s);
11072 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11079 while (s < PL_bufend &&
11080 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11082 CopLINE_inc(PL_curcop);
11084 if (s >= PL_bufend) {
11085 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11086 missingterm(PL_tokenbuf);
11088 sv_setpvn(tmpstr,d+1,s-d);
11090 if (PL_madskills) {
11092 sv_catpvn(PL_thisstuff, d + 1, s - d);
11094 PL_thisstuff = newSVpvn(d + 1, s - d);
11095 stuffstart = s - SvPVX(PL_linestr);
11099 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11101 sv_catpvn(herewas,s,PL_bufend-s);
11102 sv_setsv(PL_linestr,herewas);
11103 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11104 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11105 PL_last_lop = PL_last_uni = NULL;
11108 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
11109 while (s >= PL_bufend) { /* multiple line string? */
11111 if (PL_madskills) {
11112 tstart = SvPVX(PL_linestr) + stuffstart;
11114 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11116 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11120 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11121 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11122 missingterm(PL_tokenbuf);
11125 stuffstart = s - SvPVX(PL_linestr);
11127 CopLINE_inc(PL_curcop);
11128 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11129 PL_last_lop = PL_last_uni = NULL;
11130 #ifndef PERL_STRICT_CR
11131 if (PL_bufend - PL_linestart >= 2) {
11132 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11133 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11135 PL_bufend[-2] = '\n';
11137 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11139 else if (PL_bufend[-1] == '\r')
11140 PL_bufend[-1] = '\n';
11142 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11143 PL_bufend[-1] = '\n';
11145 if (PERLDB_LINE && PL_curstash != PL_debstash) {
11146 SV * const sv = newSV(0);
11148 sv_upgrade(sv, SVt_PVMG);
11149 sv_setsv(sv,PL_linestr);
11150 (void)SvIOK_on(sv);
11152 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop),sv);
11154 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11155 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11156 *(SvPVX(PL_linestr) + off ) = ' ';
11157 sv_catsv(PL_linestr,herewas);
11158 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11159 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11163 sv_catsv(tmpstr,PL_linestr);
11168 PL_multi_end = CopLINE(PL_curcop);
11169 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11170 SvPV_shrink_to_cur(tmpstr);
11172 SvREFCNT_dec(herewas);
11174 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11176 else if (PL_encoding)
11177 sv_recode_to_utf8(tmpstr, PL_encoding);
11179 PL_lex_stuff = tmpstr;
11180 yylval.ival = op_type;
11184 /* scan_inputsymbol
11185 takes: current position in input buffer
11186 returns: new position in input buffer
11187 side-effects: yylval and lex_op are set.
11192 <FH> read from filehandle
11193 <pkg::FH> read from package qualified filehandle
11194 <pkg'FH> read from package qualified filehandle
11195 <$fh> read from filehandle in $fh
11196 <*.h> filename glob
11201 S_scan_inputsymbol(pTHX_ char *start)
11204 register char *s = start; /* current position in buffer */
11208 char *d = PL_tokenbuf; /* start of temp holding space */
11209 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11211 end = strchr(s, '\n');
11214 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11216 /* die if we didn't have space for the contents of the <>,
11217 or if it didn't end, or if we see a newline
11220 if (len >= (I32)sizeof PL_tokenbuf)
11221 Perl_croak(aTHX_ "Excessively long <> operator");
11223 Perl_croak(aTHX_ "Unterminated <> operator");
11228 Remember, only scalar variables are interpreted as filehandles by
11229 this code. Anything more complex (e.g., <$fh{$num}>) will be
11230 treated as a glob() call.
11231 This code makes use of the fact that except for the $ at the front,
11232 a scalar variable and a filehandle look the same.
11234 if (*d == '$' && d[1]) d++;
11236 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11237 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11240 /* If we've tried to read what we allow filehandles to look like, and
11241 there's still text left, then it must be a glob() and not a getline.
11242 Use scan_str to pull out the stuff between the <> and treat it
11243 as nothing more than a string.
11246 if (d - PL_tokenbuf != len) {
11247 yylval.ival = OP_GLOB;
11249 s = scan_str(start,!!PL_madskills,FALSE);
11251 Perl_croak(aTHX_ "Glob not terminated");
11255 bool readline_overriden = FALSE;
11258 /* we're in a filehandle read situation */
11261 /* turn <> into <ARGV> */
11263 Copy("ARGV",d,5,char);
11265 /* Check whether readline() is overriden */
11266 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11268 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11270 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11271 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
11272 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11273 readline_overriden = TRUE;
11275 /* if <$fh>, create the ops to turn the variable into a
11279 /* try to find it in the pad for this block, otherwise find
11280 add symbol table ops
11282 const PADOFFSET tmp = pad_findmy(d);
11283 if (tmp != NOT_IN_PAD) {
11284 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11285 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11286 HEK * const stashname = HvNAME_HEK(stash);
11287 SV * const sym = sv_2mortal(newSVhek(stashname));
11288 sv_catpvs(sym, "::");
11289 sv_catpv(sym, d+1);
11294 OP * const o = newOP(OP_PADSV, 0);
11296 PL_lex_op = readline_overriden
11297 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11298 append_elem(OP_LIST, o,
11299 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11300 : (OP*)newUNOP(OP_READLINE, 0, o);
11309 ? (GV_ADDMULTI | GV_ADDINEVAL)
11312 PL_lex_op = readline_overriden
11313 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11314 append_elem(OP_LIST,
11315 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11316 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11317 : (OP*)newUNOP(OP_READLINE, 0,
11318 newUNOP(OP_RV2SV, 0,
11319 newGVOP(OP_GV, 0, gv)));
11321 if (!readline_overriden)
11322 PL_lex_op->op_flags |= OPf_SPECIAL;
11323 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
11324 yylval.ival = OP_NULL;
11327 /* If it's none of the above, it must be a literal filehandle
11328 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11330 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11331 PL_lex_op = readline_overriden
11332 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11333 append_elem(OP_LIST,
11334 newGVOP(OP_GV, 0, gv),
11335 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11336 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11337 yylval.ival = OP_NULL;
11346 takes: start position in buffer
11347 keep_quoted preserve \ on the embedded delimiter(s)
11348 keep_delims preserve the delimiters around the string
11349 returns: position to continue reading from buffer
11350 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11351 updates the read buffer.
11353 This subroutine pulls a string out of the input. It is called for:
11354 q single quotes q(literal text)
11355 ' single quotes 'literal text'
11356 qq double quotes qq(interpolate $here please)
11357 " double quotes "interpolate $here please"
11358 qx backticks qx(/bin/ls -l)
11359 ` backticks `/bin/ls -l`
11360 qw quote words @EXPORT_OK = qw( func() $spam )
11361 m// regexp match m/this/
11362 s/// regexp substitute s/this/that/
11363 tr/// string transliterate tr/this/that/
11364 y/// string transliterate y/this/that/
11365 ($*@) sub prototypes sub foo ($)
11366 (stuff) sub attr parameters sub foo : attr(stuff)
11367 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11369 In most of these cases (all but <>, patterns and transliterate)
11370 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11371 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11372 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11375 It skips whitespace before the string starts, and treats the first
11376 character as the delimiter. If the delimiter is one of ([{< then
11377 the corresponding "close" character )]}> is used as the closing
11378 delimiter. It allows quoting of delimiters, and if the string has
11379 balanced delimiters ([{<>}]) it allows nesting.
11381 On success, the SV with the resulting string is put into lex_stuff or,
11382 if that is already non-NULL, into lex_repl. The second case occurs only
11383 when parsing the RHS of the special constructs s/// and tr/// (y///).
11384 For convenience, the terminating delimiter character is stuffed into
11389 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11392 SV *sv; /* scalar value: string */
11393 char *tmps; /* temp string, used for delimiter matching */
11394 register char *s = start; /* current position in the buffer */
11395 register char term; /* terminating character */
11396 register char *to; /* current position in the sv's data */
11397 I32 brackets = 1; /* bracket nesting level */
11398 bool has_utf8 = FALSE; /* is there any utf8 content? */
11399 I32 termcode; /* terminating char. code */
11400 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11401 STRLEN termlen; /* length of terminating string */
11402 char *last = NULL; /* last position for nesting bracket */
11408 /* skip space before the delimiter */
11414 if (PL_realtokenstart >= 0) {
11415 stuffstart = PL_realtokenstart;
11416 PL_realtokenstart = -1;
11419 stuffstart = start - SvPVX(PL_linestr);
11421 /* mark where we are, in case we need to report errors */
11424 /* after skipping whitespace, the next character is the terminator */
11427 termcode = termstr[0] = term;
11431 termcode = utf8_to_uvchr((U8*)s, &termlen);
11432 Copy(s, termstr, termlen, U8);
11433 if (!UTF8_IS_INVARIANT(term))
11437 /* mark where we are */
11438 PL_multi_start = CopLINE(PL_curcop);
11439 PL_multi_open = term;
11441 /* find corresponding closing delimiter */
11442 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11443 termcode = termstr[0] = term = tmps[5];
11445 PL_multi_close = term;
11447 /* create a new SV to hold the contents. 79 is the SV's initial length.
11448 What a random number. */
11450 sv_upgrade(sv, SVt_PVIV);
11451 SvIV_set(sv, termcode);
11452 (void)SvPOK_only(sv); /* validate pointer */
11454 /* move past delimiter and try to read a complete string */
11456 sv_catpvn(sv, s, termlen);
11459 tstart = SvPVX(PL_linestr) + stuffstart;
11460 if (!PL_thisopen && !keep_delims) {
11461 PL_thisopen = newSVpvn(tstart, s - tstart);
11462 stuffstart = s - SvPVX(PL_linestr);
11466 if (PL_encoding && !UTF) {
11470 int offset = s - SvPVX_const(PL_linestr);
11471 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11472 &offset, (char*)termstr, termlen);
11473 const char * const ns = SvPVX_const(PL_linestr) + offset;
11474 char * const svlast = SvEND(sv) - 1;
11476 for (; s < ns; s++) {
11477 if (*s == '\n' && !PL_rsfp)
11478 CopLINE_inc(PL_curcop);
11481 goto read_more_line;
11483 /* handle quoted delimiters */
11484 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11486 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11488 if ((svlast-1 - t) % 2) {
11489 if (!keep_quoted) {
11490 *(svlast-1) = term;
11492 SvCUR_set(sv, SvCUR(sv) - 1);
11497 if (PL_multi_open == PL_multi_close) {
11505 for (t = w = last; t < svlast; w++, t++) {
11506 /* At here, all closes are "was quoted" one,
11507 so we don't check PL_multi_close. */
11509 if (!keep_quoted && *(t+1) == PL_multi_open)
11514 else if (*t == PL_multi_open)
11522 SvCUR_set(sv, w - SvPVX_const(sv));
11525 if (--brackets <= 0)
11530 if (!keep_delims) {
11531 SvCUR_set(sv, SvCUR(sv) - 1);
11537 /* extend sv if need be */
11538 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11539 /* set 'to' to the next character in the sv's string */
11540 to = SvPVX(sv)+SvCUR(sv);
11542 /* if open delimiter is the close delimiter read unbridle */
11543 if (PL_multi_open == PL_multi_close) {
11544 for (; s < PL_bufend; s++,to++) {
11545 /* embedded newlines increment the current line number */
11546 if (*s == '\n' && !PL_rsfp)
11547 CopLINE_inc(PL_curcop);
11548 /* handle quoted delimiters */
11549 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11550 if (!keep_quoted && s[1] == term)
11552 /* any other quotes are simply copied straight through */
11556 /* terminate when run out of buffer (the for() condition), or
11557 have found the terminator */
11558 else if (*s == term) {
11561 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11564 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11570 /* if the terminator isn't the same as the start character (e.g.,
11571 matched brackets), we have to allow more in the quoting, and
11572 be prepared for nested brackets.
11575 /* read until we run out of string, or we find the terminator */
11576 for (; s < PL_bufend; s++,to++) {
11577 /* embedded newlines increment the line count */
11578 if (*s == '\n' && !PL_rsfp)
11579 CopLINE_inc(PL_curcop);
11580 /* backslashes can escape the open or closing characters */
11581 if (*s == '\\' && s+1 < PL_bufend) {
11582 if (!keep_quoted &&
11583 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11588 /* allow nested opens and closes */
11589 else if (*s == PL_multi_close && --brackets <= 0)
11591 else if (*s == PL_multi_open)
11593 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11598 /* terminate the copied string and update the sv's end-of-string */
11600 SvCUR_set(sv, to - SvPVX_const(sv));
11603 * this next chunk reads more into the buffer if we're not done yet
11607 break; /* handle case where we are done yet :-) */
11609 #ifndef PERL_STRICT_CR
11610 if (to - SvPVX_const(sv) >= 2) {
11611 if ((to[-2] == '\r' && to[-1] == '\n') ||
11612 (to[-2] == '\n' && to[-1] == '\r'))
11616 SvCUR_set(sv, to - SvPVX_const(sv));
11618 else if (to[-1] == '\r')
11621 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11626 /* if we're out of file, or a read fails, bail and reset the current
11627 line marker so we can report where the unterminated string began
11630 if (PL_madskills) {
11631 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11633 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11635 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11639 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11641 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11647 /* we read a line, so increment our line counter */
11648 CopLINE_inc(PL_curcop);
11650 /* update debugger info */
11651 if (PERLDB_LINE && PL_curstash != PL_debstash) {
11652 SV * const line_sv = newSV(0);
11654 sv_upgrade(line_sv, SVt_PVMG);
11655 sv_setsv(line_sv,PL_linestr);
11656 (void)SvIOK_on(line_sv);
11657 SvIV_set(line_sv, 0);
11658 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop), line_sv);
11661 /* having changed the buffer, we must update PL_bufend */
11662 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11663 PL_last_lop = PL_last_uni = NULL;
11666 /* at this point, we have successfully read the delimited string */
11668 if (!PL_encoding || UTF) {
11670 if (PL_madskills) {
11671 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11672 const int len = s - start;
11674 sv_catpvn(PL_thisstuff, tstart, len);
11676 PL_thisstuff = newSVpvn(tstart, len);
11677 if (!PL_thisclose && !keep_delims)
11678 PL_thisclose = newSVpvn(s,termlen);
11683 sv_catpvn(sv, s, termlen);
11688 if (PL_madskills) {
11689 char * const tstart = SvPVX(PL_linestr) + stuffstart;
11690 const int len = s - tstart - termlen;
11692 sv_catpvn(PL_thisstuff, tstart, len);
11694 PL_thisstuff = newSVpvn(tstart, len);
11695 if (!PL_thisclose && !keep_delims)
11696 PL_thisclose = newSVpvn(s - termlen,termlen);
11700 if (has_utf8 || PL_encoding)
11703 PL_multi_end = CopLINE(PL_curcop);
11705 /* if we allocated too much space, give some back */
11706 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11707 SvLEN_set(sv, SvCUR(sv) + 1);
11708 SvPV_renew(sv, SvLEN(sv));
11711 /* decide whether this is the first or second quoted string we've read
11724 takes: pointer to position in buffer
11725 returns: pointer to new position in buffer
11726 side-effects: builds ops for the constant in yylval.op
11728 Read a number in any of the formats that Perl accepts:
11730 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11731 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11734 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11736 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11739 If it reads a number without a decimal point or an exponent, it will
11740 try converting the number to an integer and see if it can do so
11741 without loss of precision.
11745 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11748 register const char *s = start; /* current position in buffer */
11749 register char *d; /* destination in temp buffer */
11750 register char *e; /* end of temp buffer */
11751 NV nv; /* number read, as a double */
11752 SV *sv = NULL; /* place to put the converted number */
11753 bool floatit; /* boolean: int or float? */
11754 const char *lastub = NULL; /* position of last underbar */
11755 static char const number_too_long[] = "Number too long";
11757 /* We use the first character to decide what type of number this is */
11761 Perl_croak(aTHX_ "panic: scan_num");
11763 /* if it starts with a 0, it could be an octal number, a decimal in
11764 0.13 disguise, or a hexadecimal number, or a binary number. */
11768 u holds the "number so far"
11769 shift the power of 2 of the base
11770 (hex == 4, octal == 3, binary == 1)
11771 overflowed was the number more than we can hold?
11773 Shift is used when we add a digit. It also serves as an "are
11774 we in octal/hex/binary?" indicator to disallow hex characters
11775 when in octal mode.
11780 bool overflowed = FALSE;
11781 bool just_zero = TRUE; /* just plain 0 or binary number? */
11782 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11783 static const char* const bases[5] =
11784 { "", "binary", "", "octal", "hexadecimal" };
11785 static const char* const Bases[5] =
11786 { "", "Binary", "", "Octal", "Hexadecimal" };
11787 static const char* const maxima[5] =
11789 "0b11111111111111111111111111111111",
11793 const char *base, *Base, *max;
11795 /* check for hex */
11800 } else if (s[1] == 'b') {
11805 /* check for a decimal in disguise */
11806 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11808 /* so it must be octal */
11815 if (ckWARN(WARN_SYNTAX))
11816 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11817 "Misplaced _ in number");
11821 base = bases[shift];
11822 Base = Bases[shift];
11823 max = maxima[shift];
11825 /* read the rest of the number */
11827 /* x is used in the overflow test,
11828 b is the digit we're adding on. */
11833 /* if we don't mention it, we're done */
11837 /* _ are ignored -- but warned about if consecutive */
11839 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11840 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11841 "Misplaced _ in number");
11845 /* 8 and 9 are not octal */
11846 case '8': case '9':
11848 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11852 case '2': case '3': case '4':
11853 case '5': case '6': case '7':
11855 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11858 case '0': case '1':
11859 b = *s++ & 15; /* ASCII digit -> value of digit */
11863 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11864 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11865 /* make sure they said 0x */
11868 b = (*s++ & 7) + 9;
11870 /* Prepare to put the digit we have onto the end
11871 of the number so far. We check for overflows.
11877 x = u << shift; /* make room for the digit */
11879 if ((x >> shift) != u
11880 && !(PL_hints & HINT_NEW_BINARY)) {
11883 if (ckWARN_d(WARN_OVERFLOW))
11884 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11885 "Integer overflow in %s number",
11888 u = x | b; /* add the digit to the end */
11891 n *= nvshift[shift];
11892 /* If an NV has not enough bits in its
11893 * mantissa to represent an UV this summing of
11894 * small low-order numbers is a waste of time
11895 * (because the NV cannot preserve the
11896 * low-order bits anyway): we could just
11897 * remember when did we overflow and in the
11898 * end just multiply n by the right
11906 /* if we get here, we had success: make a scalar value from
11911 /* final misplaced underbar check */
11912 if (s[-1] == '_') {
11913 if (ckWARN(WARN_SYNTAX))
11914 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11919 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
11920 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
11921 "%s number > %s non-portable",
11927 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
11928 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
11929 "%s number > %s non-portable",
11934 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11935 sv = new_constant(start, s - start, "integer",
11937 else if (PL_hints & HINT_NEW_BINARY)
11938 sv = new_constant(start, s - start, "binary", sv, NULL, NULL);
11943 handle decimal numbers.
11944 we're also sent here when we read a 0 as the first digit
11946 case '1': case '2': case '3': case '4': case '5':
11947 case '6': case '7': case '8': case '9': case '.':
11950 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11953 /* read next group of digits and _ and copy into d */
11954 while (isDIGIT(*s) || *s == '_') {
11955 /* skip underscores, checking for misplaced ones
11959 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11960 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11961 "Misplaced _ in number");
11965 /* check for end of fixed-length buffer */
11967 Perl_croak(aTHX_ number_too_long);
11968 /* if we're ok, copy the character */
11973 /* final misplaced underbar check */
11974 if (lastub && s == lastub + 1) {
11975 if (ckWARN(WARN_SYNTAX))
11976 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11979 /* read a decimal portion if there is one. avoid
11980 3..5 being interpreted as the number 3. followed
11983 if (*s == '.' && s[1] != '.') {
11988 if (ckWARN(WARN_SYNTAX))
11989 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11990 "Misplaced _ in number");
11994 /* copy, ignoring underbars, until we run out of digits.
11996 for (; isDIGIT(*s) || *s == '_'; s++) {
11997 /* fixed length buffer check */
11999 Perl_croak(aTHX_ number_too_long);
12001 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
12002 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12003 "Misplaced _ in number");
12009 /* fractional part ending in underbar? */
12010 if (s[-1] == '_') {
12011 if (ckWARN(WARN_SYNTAX))
12012 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12013 "Misplaced _ in number");
12015 if (*s == '.' && isDIGIT(s[1])) {
12016 /* oops, it's really a v-string, but without the "v" */
12022 /* read exponent part, if present */
12023 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12027 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12028 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
12030 /* stray preinitial _ */
12032 if (ckWARN(WARN_SYNTAX))
12033 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12034 "Misplaced _ in number");
12038 /* allow positive or negative exponent */
12039 if (*s == '+' || *s == '-')
12042 /* stray initial _ */
12044 if (ckWARN(WARN_SYNTAX))
12045 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12046 "Misplaced _ in number");
12050 /* read digits of exponent */
12051 while (isDIGIT(*s) || *s == '_') {
12054 Perl_croak(aTHX_ number_too_long);
12058 if (((lastub && s == lastub + 1) ||
12059 (!isDIGIT(s[1]) && s[1] != '_'))
12060 && ckWARN(WARN_SYNTAX))
12061 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
12062 "Misplaced _ in number");
12069 /* make an sv from the string */
12073 We try to do an integer conversion first if no characters
12074 indicating "float" have been found.
12079 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12081 if (flags == IS_NUMBER_IN_UV) {
12083 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12086 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12087 if (uv <= (UV) IV_MIN)
12088 sv_setiv(sv, -(IV)uv);
12095 /* terminate the string */
12097 nv = Atof(PL_tokenbuf);
12101 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
12102 (PL_hints & HINT_NEW_INTEGER) )
12103 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
12104 (floatit ? "float" : "integer"),
12108 /* if it starts with a v, it could be a v-string */
12111 sv = newSV(5); /* preallocate storage space */
12112 s = scan_vstring(s,sv);
12116 /* make the op for the constant and return */
12119 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12121 lvalp->opval = NULL;
12127 S_scan_formline(pTHX_ register char *s)
12130 register char *eol;
12132 SV * const stuff = newSVpvs("");
12133 bool needargs = FALSE;
12134 bool eofmt = FALSE;
12136 char *tokenstart = s;
12139 if (PL_madskills) {
12140 savewhite = PL_thiswhite;
12145 while (!needargs) {
12148 #ifdef PERL_STRICT_CR
12149 while (SPACE_OR_TAB(*t))
12152 while (SPACE_OR_TAB(*t) || *t == '\r')
12155 if (*t == '\n' || t == PL_bufend) {
12160 if (PL_in_eval && !PL_rsfp) {
12161 eol = (char *) memchr(s,'\n',PL_bufend-s);
12166 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12168 for (t = s; t < eol; t++) {
12169 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12171 goto enough; /* ~~ must be first line in formline */
12173 if (*t == '@' || *t == '^')
12177 sv_catpvn(stuff, s, eol-s);
12178 #ifndef PERL_STRICT_CR
12179 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12180 char *end = SvPVX(stuff) + SvCUR(stuff);
12183 SvCUR_set(stuff, SvCUR(stuff) - 1);
12193 if (PL_madskills) {
12195 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12197 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12200 s = filter_gets(PL_linestr, PL_rsfp, 0);
12202 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12204 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12206 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12207 PL_last_lop = PL_last_uni = NULL;
12216 if (SvCUR(stuff)) {
12219 PL_lex_state = LEX_NORMAL;
12220 start_force(PL_curforce);
12221 NEXTVAL_NEXTTOKE.ival = 0;
12225 PL_lex_state = LEX_FORMLINE;
12227 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12229 else if (PL_encoding)
12230 sv_recode_to_utf8(stuff, PL_encoding);
12232 start_force(PL_curforce);
12233 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12235 start_force(PL_curforce);
12236 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12240 SvREFCNT_dec(stuff);
12242 PL_lex_formbrack = 0;
12246 if (PL_madskills) {
12248 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12250 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12251 PL_thiswhite = savewhite;
12263 PL_cshlen = strlen(PL_cshname);
12265 #if defined(USE_ITHREADS)
12266 PERL_UNUSED_CONTEXT;
12272 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12275 const I32 oldsavestack_ix = PL_savestack_ix;
12276 CV* const outsidecv = PL_compcv;
12279 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12281 SAVEI32(PL_subline);
12282 save_item(PL_subname);
12283 SAVESPTR(PL_compcv);
12285 PL_compcv = (CV*)newSV(0);
12286 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
12287 CvFLAGS(PL_compcv) |= flags;
12289 PL_subline = CopLINE(PL_curcop);
12290 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12291 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
12292 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12294 return oldsavestack_ix;
12298 #pragma segment Perl_yylex
12301 Perl_yywarn(pTHX_ const char *s)
12304 PL_in_eval |= EVAL_WARNONLY;
12306 PL_in_eval &= ~EVAL_WARNONLY;
12311 Perl_yyerror(pTHX_ const char *s)
12314 const char *where = NULL;
12315 const char *context = NULL;
12319 if (!yychar || (yychar == ';' && !PL_rsfp))
12321 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12322 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12323 PL_oldbufptr != PL_bufptr) {
12326 The code below is removed for NetWare because it abends/crashes on NetWare
12327 when the script has error such as not having the closing quotes like:
12328 if ($var eq "value)
12329 Checking of white spaces is anyway done in NetWare code.
12332 while (isSPACE(*PL_oldoldbufptr))
12335 context = PL_oldoldbufptr;
12336 contlen = PL_bufptr - PL_oldoldbufptr;
12338 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12339 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12342 The code below is removed for NetWare because it abends/crashes on NetWare
12343 when the script has error such as not having the closing quotes like:
12344 if ($var eq "value)
12345 Checking of white spaces is anyway done in NetWare code.
12348 while (isSPACE(*PL_oldbufptr))
12351 context = PL_oldbufptr;
12352 contlen = PL_bufptr - PL_oldbufptr;
12354 else if (yychar > 255)
12355 where = "next token ???";
12356 else if (yychar == -2) { /* YYEMPTY */
12357 if (PL_lex_state == LEX_NORMAL ||
12358 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12359 where = "at end of line";
12360 else if (PL_lex_inpat)
12361 where = "within pattern";
12363 where = "within string";
12366 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
12368 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12369 else if (isPRINT_LC(yychar))
12370 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
12372 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12373 where = SvPVX_const(where_sv);
12375 msg = sv_2mortal(newSVpv(s, 0));
12376 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12377 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12379 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12381 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12382 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12383 Perl_sv_catpvf(aTHX_ msg,
12384 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12385 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12388 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
12389 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, (void*)msg);
12392 if (PL_error_count >= 10) {
12393 if (PL_in_eval && SvCUR(ERRSV))
12394 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12395 (void*)ERRSV, OutCopFILE(PL_curcop));
12397 Perl_croak(aTHX_ "%s has too many errors.\n",
12398 OutCopFILE(PL_curcop));
12401 PL_in_my_stash = NULL;
12405 #pragma segment Main
12409 S_swallow_bom(pTHX_ U8 *s)
12412 const STRLEN slen = SvCUR(PL_linestr);
12415 if (s[1] == 0xFE) {
12416 /* UTF-16 little-endian? (or UTF32-LE?) */
12417 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12418 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12419 #ifndef PERL_NO_UTF16_FILTER
12420 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12423 if (PL_bufend > (char*)s) {
12427 filter_add(utf16rev_textfilter, NULL);
12428 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12429 utf16_to_utf8_reversed(s, news,
12430 PL_bufend - (char*)s - 1,
12432 sv_setpvn(PL_linestr, (const char*)news, newlen);
12434 s = (U8*)SvPVX(PL_linestr);
12435 Copy(news, s, newlen, U8);
12439 SvUTF8_on(PL_linestr);
12440 s = (U8*)SvPVX(PL_linestr);
12442 /* FIXME - is this a general bug fix? */
12445 PL_bufend = SvPVX(PL_linestr) + newlen;
12448 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12453 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12454 #ifndef PERL_NO_UTF16_FILTER
12455 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12458 if (PL_bufend > (char *)s) {
12462 filter_add(utf16_textfilter, NULL);
12463 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12464 utf16_to_utf8(s, news,
12465 PL_bufend - (char*)s,
12467 sv_setpvn(PL_linestr, (const char*)news, newlen);
12469 SvUTF8_on(PL_linestr);
12470 s = (U8*)SvPVX(PL_linestr);
12471 PL_bufend = SvPVX(PL_linestr) + newlen;
12474 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12479 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12480 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12481 s += 3; /* UTF-8 */
12487 if (s[2] == 0xFE && s[3] == 0xFF) {
12488 /* UTF-32 big-endian */
12489 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12492 else if (s[2] == 0 && s[3] != 0) {
12495 * are a good indicator of UTF-16BE. */
12496 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12502 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12503 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12504 s += 4; /* UTF-8 */
12510 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12513 * are a good indicator of UTF-16LE. */
12514 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12523 * Restore a source filter.
12527 restore_rsfp(pTHX_ void *f)
12530 PerlIO * const fp = (PerlIO*)f;
12532 if (PL_rsfp == PerlIO_stdin())
12533 PerlIO_clearerr(PL_rsfp);
12534 else if (PL_rsfp && (PL_rsfp != fp))
12535 PerlIO_close(PL_rsfp);
12539 #ifndef PERL_NO_UTF16_FILTER
12541 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12544 const STRLEN old = SvCUR(sv);
12545 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12546 DEBUG_P(PerlIO_printf(Perl_debug_log,
12547 "utf16_textfilter(%p): %d %d (%d)\n",
12548 FPTR2DPTR(void *, utf16_textfilter),
12549 idx, maxlen, (int) count));
12553 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12554 Copy(SvPVX_const(sv), tmps, old, char);
12555 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12556 SvCUR(sv) - old, &newlen);
12557 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12559 DEBUG_P({sv_dump(sv);});
12564 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12567 const STRLEN old = SvCUR(sv);
12568 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12569 DEBUG_P(PerlIO_printf(Perl_debug_log,
12570 "utf16rev_textfilter(%p): %d %d (%d)\n",
12571 FPTR2DPTR(void *, utf16rev_textfilter),
12572 idx, maxlen, (int) count));
12576 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12577 Copy(SvPVX_const(sv), tmps, old, char);
12578 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12579 SvCUR(sv) - old, &newlen);
12580 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12582 DEBUG_P({ sv_dump(sv); });
12588 Returns a pointer to the next character after the parsed
12589 vstring, as well as updating the passed in sv.
12591 Function must be called like
12594 s = scan_vstring(s,sv);
12596 The sv should already be large enough to store the vstring
12597 passed in, for performance reasons.
12602 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
12605 const char *pos = s;
12606 const char *start = s;
12607 if (*pos == 'v') pos++; /* get past 'v' */
12608 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
12610 if ( *pos != '.') {
12611 /* this may not be a v-string if followed by => */
12612 const char *next = pos;
12613 while (next < PL_bufend && isSPACE(*next))
12615 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
12616 /* return string not v-string */
12617 sv_setpvn(sv,(char *)s,pos-s);
12618 return (char *)pos;
12622 if (!isALPHA(*pos)) {
12623 U8 tmpbuf[UTF8_MAXBYTES+1];
12626 s++; /* get past 'v' */
12628 sv_setpvn(sv, "", 0);
12631 /* this is atoi() that tolerates underscores */
12634 const char *end = pos;
12636 while (--end >= s) {
12638 const UV orev = rev;
12639 rev += (*end - '0') * mult;
12641 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12642 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12643 "Integer overflow in decimal number");
12647 if (rev > 0x7FFFFFFF)
12648 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12650 /* Append native character for the rev point */
12651 tmpend = uvchr_to_utf8(tmpbuf, rev);
12652 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12653 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12655 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
12661 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
12665 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12673 * c-indentation-style: bsd
12674 * c-basic-offset: 4
12675 * indent-tabs-mode: t
12678 * ex: set ts=8 sts=4 sw=4 noet: