3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] =
30 "Identifier too long";
31 static const char c_without_g[] =
32 "Use of /c modifier is meaningless without /g";
33 static const char c_in_subst[] =
34 "Use of /c modifier is meaningless in s///";
36 static void restore_rsfp(pTHX_ void *f);
37 #ifndef PERL_NO_UTF16_FILTER
38 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
42 #define XFAKEBRACK 128
45 #ifdef USE_UTF8_SCRIPTS
46 # define UTF (!IN_BYTES)
48 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
51 /* In variables named $^X, these are the legal values for X.
52 * 1999-02-27 mjd-perl-patch@plover.com */
53 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
55 /* On MacOS, respect nonbreaking spaces */
56 #ifdef MACOS_TRADITIONAL
57 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
59 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
62 /* LEX_* are values for PL_lex_state, the state of the lexer.
63 * They are arranged oddly so that the guard on the switch statement
64 * can get by with a single comparison (if the compiler is smart enough).
67 /* #define LEX_NOTPARSING 11 is done in perl.h. */
70 #define LEX_INTERPNORMAL 9
71 #define LEX_INTERPCASEMOD 8
72 #define LEX_INTERPPUSH 7
73 #define LEX_INTERPSTART 6
74 #define LEX_INTERPEND 5
75 #define LEX_INTERPENDMAYBE 4
76 #define LEX_INTERPCONCAT 3
77 #define LEX_INTERPCONST 2
78 #define LEX_FORMLINE 1
79 #define LEX_KNOWNEXT 0
82 static const char* const lex_state_names[] = {
101 #include "keywords.h"
103 /* CLINE is a macro that ensures PL_copline has a sane value */
108 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
110 /* According to some strict interpretations of ANSI C89 one cannot
111 * cast void pointers to code pointers or vice versa (as filter_add(),
112 * filter_del(), and filter_read() will want to do). We should still
113 * be able to use a union for sneaky "casting". */
120 * Convenience functions to return different tokens and prime the
121 * lexer for the next token. They all take an argument.
123 * TOKEN : generic token (used for '(', DOLSHARP, etc)
124 * OPERATOR : generic operator
125 * AOPERATOR : assignment operator
126 * PREBLOCK : beginning the block after an if, while, foreach, ...
127 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
128 * PREREF : *EXPR where EXPR is not a simple identifier
129 * TERM : expression term
130 * LOOPX : loop exiting command (goto, last, dump, etc)
131 * FTST : file test operator
132 * FUN0 : zero-argument function
133 * FUN1 : not used, except for not, which isn't a UNIOP
134 * BOop : bitwise or or xor
136 * SHop : shift operator
137 * PWop : power operator
138 * PMop : pattern-matching operator
139 * Aop : addition-level operator
140 * Mop : multiplication-level operator
141 * Eop : equality-testing operator
142 * Rop : relational operator <= != gt
144 * Also see LOP and lop() below.
147 #ifdef DEBUGGING /* Serve -DT. */
148 # define REPORT(retval) tokereport(s,(int)retval)
150 # define REPORT(retval) (retval)
153 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
154 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
155 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
156 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
157 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
158 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
159 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
160 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
161 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
162 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
163 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
164 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
165 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
166 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
167 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
168 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
169 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
170 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
171 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
172 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
174 /* This bit of chicanery makes a unary function followed by
175 * a parenthesis into a function with one argument, highest precedence.
176 * The UNIDOR macro is for unary functions that can be followed by the //
177 * operator (such as C<shift // 0>).
179 #define UNI2(f,x) { \
183 PL_last_uni = PL_oldbufptr; \
184 PL_last_lop_op = f; \
186 return REPORT( (int)FUNC1 ); \
188 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
190 #define UNI(f) UNI2(f,XTERM)
191 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
193 #define UNIBRACK(f) { \
196 PL_last_uni = PL_oldbufptr; \
198 return REPORT( (int)FUNC1 ); \
200 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
203 /* grandfather return to old style */
204 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
208 /* how to interpret the yylval associated with the token */
212 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
218 static struct debug_tokens { const int token, type; const char *name; }
219 const debug_tokens[] =
221 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
222 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
223 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
224 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
225 { ARROW, TOKENTYPE_NONE, "ARROW" },
226 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
227 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
228 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
229 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
230 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
231 { DO, TOKENTYPE_NONE, "DO" },
232 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
233 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
234 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
235 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
236 { ELSE, TOKENTYPE_NONE, "ELSE" },
237 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
238 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
239 { FOR, TOKENTYPE_IVAL, "FOR" },
240 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
241 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
242 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
243 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
244 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
245 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
246 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
247 { IF, TOKENTYPE_IVAL, "IF" },
248 { LABEL, TOKENTYPE_PVAL, "LABEL" },
249 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
250 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
251 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
252 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
253 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
254 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
255 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
256 { MY, TOKENTYPE_IVAL, "MY" },
257 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
258 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
259 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
260 { OROP, TOKENTYPE_IVAL, "OROP" },
261 { OROR, TOKENTYPE_NONE, "OROR" },
262 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
263 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
264 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
265 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
266 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
267 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
268 { PREINC, TOKENTYPE_NONE, "PREINC" },
269 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
270 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
271 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
272 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
273 { SUB, TOKENTYPE_NONE, "SUB" },
274 { THING, TOKENTYPE_OPVAL, "THING" },
275 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
276 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
277 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
278 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
279 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
280 { USE, TOKENTYPE_IVAL, "USE" },
281 { WHILE, TOKENTYPE_IVAL, "WHILE" },
282 { WORD, TOKENTYPE_OPVAL, "WORD" },
283 { 0, TOKENTYPE_NONE, 0 }
286 /* dump the returned token in rv, plus any optional arg in yylval */
289 S_tokereport(pTHX_ const char* s, I32 rv)
292 const char *name = Nullch;
293 enum token_type type = TOKENTYPE_NONE;
294 const struct debug_tokens *p;
295 SV* report = newSVpvn("<== ", 4);
297 for (p = debug_tokens; p->token; p++) {
298 if (p->token == (int)rv) {
305 Perl_sv_catpv(aTHX_ report, name);
306 else if ((char)rv > ' ' && (char)rv < '~')
307 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
309 Perl_sv_catpv(aTHX_ report, "EOF");
311 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
314 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
317 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
319 case TOKENTYPE_OPNUM:
320 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
321 PL_op_name[yylval.ival]);
324 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
326 case TOKENTYPE_OPVAL:
328 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
329 PL_op_name[yylval.opval->op_type]);
331 Perl_sv_catpv(aTHX_ report, "(opval=null)");
334 Perl_sv_catpvf(aTHX_ report, " at line %"IVdf" [", (IV)CopLINE(PL_curcop));
335 if (s - PL_bufptr > 0)
336 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
338 if (PL_oldbufptr && *PL_oldbufptr)
339 sv_catpv(report, PL_tokenbuf);
341 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
351 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
352 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
356 S_ao(pTHX_ int toketype)
358 if (*PL_bufptr == '=') {
360 if (toketype == ANDAND)
361 yylval.ival = OP_ANDASSIGN;
362 else if (toketype == OROR)
363 yylval.ival = OP_ORASSIGN;
364 else if (toketype == DORDOR)
365 yylval.ival = OP_DORASSIGN;
373 * When Perl expects an operator and finds something else, no_op
374 * prints the warning. It always prints "<something> found where
375 * operator expected. It prints "Missing semicolon on previous line?"
376 * if the surprise occurs at the start of the line. "do you need to
377 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
378 * where the compiler doesn't know if foo is a method call or a function.
379 * It prints "Missing operator before end of line" if there's nothing
380 * after the missing operator, or "... before <...>" if there is something
381 * after the missing operator.
385 S_no_op(pTHX_ const char *what, char *s)
387 char *oldbp = PL_bufptr;
388 bool is_first = (PL_oldbufptr == PL_linestart);
394 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
395 if (ckWARN_d(WARN_SYNTAX)) {
397 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
398 "\t(Missing semicolon on previous line?)\n");
399 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
401 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
402 if (t < PL_bufptr && isSPACE(*t))
403 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
404 "\t(Do you need to predeclare %.*s?)\n",
405 t - PL_oldoldbufptr, PL_oldoldbufptr);
409 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
410 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
418 * Complain about missing quote/regexp/heredoc terminator.
419 * If it's called with (char *)NULL then it cauterizes the line buffer.
420 * If we're in a delimited string and the delimiter is a control
421 * character, it's reformatted into a two-char sequence like ^C.
426 S_missingterm(pTHX_ char *s)
431 char *nl = strrchr(s,'\n');
437 iscntrl(PL_multi_close)
439 PL_multi_close < 32 || PL_multi_close == 127
443 tmpbuf[1] = toCTRL(PL_multi_close);
448 *tmpbuf = (char)PL_multi_close;
452 q = strchr(s,'"') ? '\'' : '"';
453 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
461 Perl_deprecate(pTHX_ const char *s)
463 if (ckWARN(WARN_DEPRECATED))
464 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
468 Perl_deprecate_old(pTHX_ const char *s)
470 /* This function should NOT be called for any new deprecated warnings */
471 /* Use Perl_deprecate instead */
473 /* It is here to maintain backward compatibility with the pre-5.8 */
474 /* warnings category hierarchy. The "deprecated" category used to */
475 /* live under the "syntax" category. It is now a top-level category */
476 /* in its own right. */
478 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
479 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
480 "Use of %s is deprecated", s);
485 * Deprecate a comma-less variable list.
491 deprecate_old("comma-less variable list");
495 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
496 * utf16-to-utf8-reversed.
499 #ifdef PERL_CR_FILTER
503 register const char *s = SvPVX_const(sv);
504 register const char *e = s + SvCUR(sv);
505 /* outer loop optimized to do nothing if there are no CR-LFs */
507 if (*s++ == '\r' && *s == '\n') {
508 /* hit a CR-LF, need to copy the rest */
509 register char *d = s - 1;
512 if (*s == '\r' && s[1] == '\n')
523 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
525 const I32 count = FILTER_READ(idx+1, sv, maxlen);
526 if (count > 0 && !maxlen)
534 * Initialize variables. Uses the Perl save_stack to save its state (for
535 * recursive calls to the parser).
539 Perl_lex_start(pTHX_ SV *line)
544 SAVEI32(PL_lex_dojoin);
545 SAVEI32(PL_lex_brackets);
546 SAVEI32(PL_lex_casemods);
547 SAVEI32(PL_lex_starts);
548 SAVEI32(PL_lex_state);
549 SAVEVPTR(PL_lex_inpat);
550 SAVEI32(PL_lex_inwhat);
551 if (PL_lex_state == LEX_KNOWNEXT) {
552 I32 toke = PL_nexttoke;
553 while (--toke >= 0) {
554 SAVEI32(PL_nexttype[toke]);
555 SAVEVPTR(PL_nextval[toke]);
557 SAVEI32(PL_nexttoke);
559 SAVECOPLINE(PL_curcop);
562 SAVEPPTR(PL_oldbufptr);
563 SAVEPPTR(PL_oldoldbufptr);
564 SAVEPPTR(PL_last_lop);
565 SAVEPPTR(PL_last_uni);
566 SAVEPPTR(PL_linestart);
567 SAVESPTR(PL_linestr);
568 SAVEGENERICPV(PL_lex_brackstack);
569 SAVEGENERICPV(PL_lex_casestack);
570 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
571 SAVESPTR(PL_lex_stuff);
572 SAVEI32(PL_lex_defer);
573 SAVEI32(PL_sublex_info.sub_inwhat);
574 SAVESPTR(PL_lex_repl);
576 SAVEINT(PL_lex_expect);
578 PL_lex_state = LEX_NORMAL;
582 New(899, PL_lex_brackstack, 120, char);
583 New(899, PL_lex_casestack, 12, char);
585 *PL_lex_casestack = '\0';
588 PL_lex_stuff = Nullsv;
589 PL_lex_repl = Nullsv;
593 PL_sublex_info.sub_inwhat = 0;
595 if (SvREADONLY(PL_linestr))
596 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
597 s = SvPV(PL_linestr, len);
598 if (!len || s[len-1] != ';') {
599 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
600 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
601 sv_catpvn(PL_linestr, "\n;", 2);
603 SvTEMP_off(PL_linestr);
604 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
605 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
606 PL_last_lop = PL_last_uni = Nullch;
612 * Finalizer for lexing operations. Must be called when the parser is
613 * done with the lexer.
619 PL_doextract = FALSE;
624 * This subroutine has nothing to do with tilting, whether at windmills
625 * or pinball tables. Its name is short for "increment line". It
626 * increments the current line number in CopLINE(PL_curcop) and checks
627 * to see whether the line starts with a comment of the form
628 * # line 500 "foo.pm"
629 * If so, it sets the current line number and file to the values in the comment.
633 S_incline(pTHX_ char *s)
640 CopLINE_inc(PL_curcop);
643 while (SPACE_OR_TAB(*s)) s++;
644 if (strnEQ(s, "line", 4))
648 if (SPACE_OR_TAB(*s))
652 while (SPACE_OR_TAB(*s)) s++;
658 while (SPACE_OR_TAB(*s))
660 if (*s == '"' && (t = strchr(s+1, '"'))) {
665 for (t = s; !isSPACE(*t); t++) ;
668 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
670 if (*e != '\n' && *e != '\0')
671 return; /* false alarm */
676 CopFILE_free(PL_curcop);
677 CopFILE_set(PL_curcop, s);
680 CopLINE_set(PL_curcop, atoi(n)-1);
685 * Called to gobble the appropriate amount and type of whitespace.
686 * Skips comments as well.
690 S_skipspace(pTHX_ register char *s)
692 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
693 while (s < PL_bufend && SPACE_OR_TAB(*s))
699 SSize_t oldprevlen, oldoldprevlen;
700 SSize_t oldloplen = 0, oldunilen = 0;
701 while (s < PL_bufend && isSPACE(*s)) {
702 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
707 if (s < PL_bufend && *s == '#') {
708 while (s < PL_bufend && *s != '\n')
712 if (PL_in_eval && !PL_rsfp) {
719 /* only continue to recharge the buffer if we're at the end
720 * of the buffer, we're not reading from a source filter, and
721 * we're in normal lexing mode
723 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
724 PL_lex_state == LEX_FORMLINE)
727 /* try to recharge the buffer */
728 if ((s = filter_gets(PL_linestr, PL_rsfp,
729 (prevlen = SvCUR(PL_linestr)))) == Nullch)
731 /* end of file. Add on the -p or -n magic */
734 ";}continue{print or die qq(-p destination: $!\\n);}");
735 PL_minus_n = PL_minus_p = 0;
737 else if (PL_minus_n) {
738 sv_setpvn(PL_linestr, ";}", 2);
742 sv_setpvn(PL_linestr,";", 1);
744 /* reset variables for next time we lex */
745 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
747 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
748 PL_last_lop = PL_last_uni = Nullch;
750 /* Close the filehandle. Could be from -P preprocessor,
751 * STDIN, or a regular file. If we were reading code from
752 * STDIN (because the commandline held no -e or filename)
753 * then we don't close it, we reset it so the code can
754 * read from STDIN too.
757 if (PL_preprocess && !PL_in_eval)
758 (void)PerlProc_pclose(PL_rsfp);
759 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
760 PerlIO_clearerr(PL_rsfp);
762 (void)PerlIO_close(PL_rsfp);
767 /* not at end of file, so we only read another line */
768 /* make corresponding updates to old pointers, for yyerror() */
769 oldprevlen = PL_oldbufptr - PL_bufend;
770 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
772 oldunilen = PL_last_uni - PL_bufend;
774 oldloplen = PL_last_lop - PL_bufend;
775 PL_linestart = PL_bufptr = s + prevlen;
776 PL_bufend = s + SvCUR(PL_linestr);
778 PL_oldbufptr = s + oldprevlen;
779 PL_oldoldbufptr = s + oldoldprevlen;
781 PL_last_uni = s + oldunilen;
783 PL_last_lop = s + oldloplen;
786 /* debugger active and we're not compiling the debugger code,
787 * so store the line into the debugger's array of lines
789 if (PERLDB_LINE && PL_curstash != PL_debstash) {
790 SV *sv = NEWSV(85,0);
792 sv_upgrade(sv, SVt_PVMG);
793 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
796 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
803 * Check the unary operators to ensure there's no ambiguity in how they're
804 * used. An ambiguous piece of code would be:
806 * This doesn't mean rand() + 5. Because rand() is a unary operator,
807 * the +5 is its argument.
816 if (PL_oldoldbufptr != PL_last_uni)
818 while (isSPACE(*PL_last_uni))
820 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
821 if ((t = strchr(s, '(')) && t < PL_bufptr)
823 if (ckWARN_d(WARN_AMBIGUOUS)){
826 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
827 "Warning: Use of \"%s\" without parentheses is ambiguous",
834 * LOP : macro to build a list operator. Its behaviour has been replaced
835 * with a subroutine, S_lop() for which LOP is just another name.
838 #define LOP(f,x) return lop(f,x,s)
842 * Build a list operator (or something that might be one). The rules:
843 * - if we have a next token, then it's a list operator [why?]
844 * - if the next thing is an opening paren, then it's a function
845 * - else it's a list operator
849 S_lop(pTHX_ I32 f, int x, char *s)
855 PL_last_lop = PL_oldbufptr;
856 PL_last_lop_op = (OPCODE)f;
858 return REPORT(LSTOP);
865 return REPORT(LSTOP);
870 * When the lexer realizes it knows the next token (for instance,
871 * it is reordering tokens for the parser) then it can call S_force_next
872 * to know what token to return the next time the lexer is called. Caller
873 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
874 * handles the token correctly.
878 S_force_next(pTHX_ I32 type)
880 PL_nexttype[PL_nexttoke] = type;
882 if (PL_lex_state != LEX_KNOWNEXT) {
883 PL_lex_defer = PL_lex_state;
884 PL_lex_expect = PL_expect;
885 PL_lex_state = LEX_KNOWNEXT;
890 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
892 SV *sv = newSVpvn(start,len);
893 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
900 * When the lexer knows the next thing is a word (for instance, it has
901 * just seen -> and it knows that the next char is a word char, then
902 * it calls S_force_word to stick the next word into the PL_next lookahead.
905 * char *start : buffer position (must be within PL_linestr)
906 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
907 * int check_keyword : if true, Perl checks to make sure the word isn't
908 * a keyword (do this if the word is a label, e.g. goto FOO)
909 * int allow_pack : if true, : characters will also be allowed (require,
911 * int allow_initial_tick : used by the "sub" lexer only.
915 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
920 start = skipspace(start);
922 if (isIDFIRST_lazy_if(s,UTF) ||
923 (allow_pack && *s == ':') ||
924 (allow_initial_tick && *s == '\'') )
926 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
927 if (check_keyword && keyword(PL_tokenbuf, len))
929 if (token == METHOD) {
934 PL_expect = XOPERATOR;
937 PL_nextval[PL_nexttoke].opval
938 = (OP*)newSVOP(OP_CONST,0,
939 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
940 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
948 * Called when the lexer wants $foo *foo &foo etc, but the program
949 * text only contains the "foo" portion. The first argument is a pointer
950 * to the "foo", and the second argument is the type symbol to prefix.
951 * Forces the next token to be a "WORD".
952 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
956 S_force_ident(pTHX_ register const char *s, int kind)
959 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
960 PL_nextval[PL_nexttoke].opval = o;
963 o->op_private = OPpCONST_ENTERED;
964 /* XXX see note in pp_entereval() for why we forgo typo
965 warnings if the symbol must be introduced in an eval.
967 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
968 kind == '$' ? SVt_PV :
969 kind == '@' ? SVt_PVAV :
970 kind == '%' ? SVt_PVHV :
978 Perl_str_to_version(pTHX_ SV *sv)
983 const char *start = SvPVx_const(sv,len);
984 const char *end = start + len;
985 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
986 while (start < end) {
990 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
995 retval += ((NV)n)/nshift;
1004 * Forces the next token to be a version number.
1005 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1006 * and if "guessing" is TRUE, then no new token is created (and the caller
1007 * must use an alternative parsing method).
1011 S_force_version(pTHX_ char *s, int guessing)
1013 OP *version = Nullop;
1022 while (isDIGIT(*d) || *d == '_' || *d == '.')
1024 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1026 s = scan_num(s, &yylval);
1027 version = yylval.opval;
1028 ver = cSVOPx(version)->op_sv;
1029 if (SvPOK(ver) && !SvNIOK(ver)) {
1030 SvUPGRADE(ver, SVt_PVNV);
1031 SvNV_set(ver, str_to_version(ver));
1032 SvNOK_on(ver); /* hint that it is a version */
1039 /* NOTE: The parser sees the package name and the VERSION swapped */
1040 PL_nextval[PL_nexttoke].opval = version;
1048 * Tokenize a quoted string passed in as an SV. It finds the next
1049 * chunk, up to end of string or a backslash. It may make a new
1050 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1055 S_tokeq(pTHX_ SV *sv)
1058 register char *send;
1066 s = SvPV_force(sv, len);
1067 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1070 while (s < send && *s != '\\')
1075 if ( PL_hints & HINT_NEW_STRING ) {
1076 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1082 if (s + 1 < send && (s[1] == '\\'))
1083 s++; /* all that, just for this */
1088 SvCUR_set(sv, d - SvPVX_const(sv));
1090 if ( PL_hints & HINT_NEW_STRING )
1091 return new_constant(NULL, 0, "q", sv, pv, "q");
1096 * Now come three functions related to double-quote context,
1097 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1098 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1099 * interact with PL_lex_state, and create fake ( ... ) argument lists
1100 * to handle functions and concatenation.
1101 * They assume that whoever calls them will be setting up a fake
1102 * join call, because each subthing puts a ',' after it. This lets
1105 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1107 * (I'm not sure whether the spurious commas at the end of lcfirst's
1108 * arguments and join's arguments are created or not).
1113 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1115 * Pattern matching will set PL_lex_op to the pattern-matching op to
1116 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1118 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1120 * Everything else becomes a FUNC.
1122 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1123 * had an OP_CONST or OP_READLINE). This just sets us up for a
1124 * call to S_sublex_push().
1128 S_sublex_start(pTHX)
1130 const register I32 op_type = yylval.ival;
1132 if (op_type == OP_NULL) {
1133 yylval.opval = PL_lex_op;
1137 if (op_type == OP_CONST || op_type == OP_READLINE) {
1138 SV *sv = tokeq(PL_lex_stuff);
1140 if (SvTYPE(sv) == SVt_PVIV) {
1141 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1143 const char *p = SvPV(sv, len);
1144 SV * const nsv = newSVpvn(p, len);
1150 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1151 PL_lex_stuff = Nullsv;
1152 /* Allow <FH> // "foo" */
1153 if (op_type == OP_READLINE)
1154 PL_expect = XTERMORDORDOR;
1158 PL_sublex_info.super_state = PL_lex_state;
1159 PL_sublex_info.sub_inwhat = op_type;
1160 PL_sublex_info.sub_op = PL_lex_op;
1161 PL_lex_state = LEX_INTERPPUSH;
1165 yylval.opval = PL_lex_op;
1175 * Create a new scope to save the lexing state. The scope will be
1176 * ended in S_sublex_done. Returns a '(', starting the function arguments
1177 * to the uc, lc, etc. found before.
1178 * Sets PL_lex_state to LEX_INTERPCONCAT.
1187 PL_lex_state = PL_sublex_info.super_state;
1188 SAVEI32(PL_lex_dojoin);
1189 SAVEI32(PL_lex_brackets);
1190 SAVEI32(PL_lex_casemods);
1191 SAVEI32(PL_lex_starts);
1192 SAVEI32(PL_lex_state);
1193 SAVEVPTR(PL_lex_inpat);
1194 SAVEI32(PL_lex_inwhat);
1195 SAVECOPLINE(PL_curcop);
1196 SAVEPPTR(PL_bufptr);
1197 SAVEPPTR(PL_bufend);
1198 SAVEPPTR(PL_oldbufptr);
1199 SAVEPPTR(PL_oldoldbufptr);
1200 SAVEPPTR(PL_last_lop);
1201 SAVEPPTR(PL_last_uni);
1202 SAVEPPTR(PL_linestart);
1203 SAVESPTR(PL_linestr);
1204 SAVEGENERICPV(PL_lex_brackstack);
1205 SAVEGENERICPV(PL_lex_casestack);
1207 PL_linestr = PL_lex_stuff;
1208 PL_lex_stuff = Nullsv;
1210 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1211 = SvPVX(PL_linestr);
1212 PL_bufend += SvCUR(PL_linestr);
1213 PL_last_lop = PL_last_uni = Nullch;
1214 SAVEFREESV(PL_linestr);
1216 PL_lex_dojoin = FALSE;
1217 PL_lex_brackets = 0;
1218 New(899, PL_lex_brackstack, 120, char);
1219 New(899, PL_lex_casestack, 12, char);
1220 PL_lex_casemods = 0;
1221 *PL_lex_casestack = '\0';
1223 PL_lex_state = LEX_INTERPCONCAT;
1224 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1226 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1227 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1228 PL_lex_inpat = PL_sublex_info.sub_op;
1230 PL_lex_inpat = Nullop;
1237 * Restores lexer state after a S_sublex_push.
1244 if (!PL_lex_starts++) {
1245 SV *sv = newSVpvn("",0);
1246 if (SvUTF8(PL_linestr))
1248 PL_expect = XOPERATOR;
1249 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1253 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1254 PL_lex_state = LEX_INTERPCASEMOD;
1258 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1259 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1260 PL_linestr = PL_lex_repl;
1262 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1263 PL_bufend += SvCUR(PL_linestr);
1264 PL_last_lop = PL_last_uni = Nullch;
1265 SAVEFREESV(PL_linestr);
1266 PL_lex_dojoin = FALSE;
1267 PL_lex_brackets = 0;
1268 PL_lex_casemods = 0;
1269 *PL_lex_casestack = '\0';
1271 if (SvEVALED(PL_lex_repl)) {
1272 PL_lex_state = LEX_INTERPNORMAL;
1274 /* we don't clear PL_lex_repl here, so that we can check later
1275 whether this is an evalled subst; that means we rely on the
1276 logic to ensure sublex_done() is called again only via the
1277 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1280 PL_lex_state = LEX_INTERPCONCAT;
1281 PL_lex_repl = Nullsv;
1287 PL_bufend = SvPVX(PL_linestr);
1288 PL_bufend += SvCUR(PL_linestr);
1289 PL_expect = XOPERATOR;
1290 PL_sublex_info.sub_inwhat = 0;
1298 Extracts a pattern, double-quoted string, or transliteration. This
1301 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1302 processing a pattern (PL_lex_inpat is true), a transliteration
1303 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1305 Returns a pointer to the character scanned up to. Iff this is
1306 advanced from the start pointer supplied (ie if anything was
1307 successfully parsed), will leave an OP for the substring scanned
1308 in yylval. Caller must intuit reason for not parsing further
1309 by looking at the next characters herself.
1313 double-quoted style: \r and \n
1314 regexp special ones: \D \s
1316 backrefs: \1 (deprecated in substitution replacements)
1317 case and quoting: \U \Q \E
1318 stops on @ and $, but not for $ as tail anchor
1320 In transliterations:
1321 characters are VERY literal, except for - not at the start or end
1322 of the string, which indicates a range. scan_const expands the
1323 range to the full set of intermediate characters.
1325 In double-quoted strings:
1327 double-quoted style: \r and \n
1329 backrefs: \1 (deprecated)
1330 case and quoting: \U \Q \E
1333 scan_const does *not* construct ops to handle interpolated strings.
1334 It stops processing as soon as it finds an embedded $ or @ variable
1335 and leaves it to the caller to work out what's going on.
1337 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1339 $ in pattern could be $foo or could be tail anchor. Assumption:
1340 it's a tail anchor if $ is the last thing in the string, or if it's
1341 followed by one of ")| \n\t"
1343 \1 (backreferences) are turned into $1
1345 The structure of the code is
1346 while (there's a character to process) {
1347 handle transliteration ranges
1348 skip regexp comments
1349 skip # initiated comments in //x patterns
1350 check for embedded @foo
1351 check for embedded scalars
1353 leave intact backslashes from leave (below)
1354 deprecate \1 in strings and sub replacements
1355 handle string-changing backslashes \l \U \Q \E, etc.
1356 switch (what was escaped) {
1357 handle - in a transliteration (becomes a literal -)
1358 handle \132 octal characters
1359 handle 0x15 hex characters
1360 handle \cV (control V)
1361 handle printf backslashes (\f, \r, \n, etc)
1363 } (end if backslash)
1364 } (end while character to read)
1369 S_scan_const(pTHX_ char *start)
1371 register char *send = PL_bufend; /* end of the constant */
1372 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1373 register char *s = start; /* start of the constant */
1374 register char *d = SvPVX(sv); /* destination for copies */
1375 bool dorange = FALSE; /* are we in a translit range? */
1376 bool didrange = FALSE; /* did we just finish a range? */
1377 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1378 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1381 const char *leaveit = /* set of acceptably-backslashed characters */
1383 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1386 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1387 /* If we are doing a trans and we know we want UTF8 set expectation */
1388 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1389 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1393 while (s < send || dorange) {
1394 /* get transliterations out of the way (they're most literal) */
1395 if (PL_lex_inwhat == OP_TRANS) {
1396 /* expand a range A-Z to the full set of characters. AIE! */
1398 I32 i; /* current expanded character */
1399 I32 min; /* first character in range */
1400 I32 max; /* last character in range */
1403 char *c = (char*)utf8_hop((U8*)d, -1);
1407 *c = (char)UTF_TO_NATIVE(0xff);
1408 /* mark the range as done, and continue */
1414 i = d - SvPVX_const(sv); /* remember current offset */
1415 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1416 d = SvPVX(sv) + i; /* refresh d after realloc */
1417 d -= 2; /* eat the first char and the - */
1419 min = (U8)*d; /* first char in range */
1420 max = (U8)d[1]; /* last char in range */
1424 "Invalid range \"%c-%c\" in transliteration operator",
1425 (char)min, (char)max);
1429 if ((isLOWER(min) && isLOWER(max)) ||
1430 (isUPPER(min) && isUPPER(max))) {
1432 for (i = min; i <= max; i++)
1434 *d++ = NATIVE_TO_NEED(has_utf8,i);
1436 for (i = min; i <= max; i++)
1438 *d++ = NATIVE_TO_NEED(has_utf8,i);
1443 for (i = min; i <= max; i++)
1446 /* mark the range as done, and continue */
1452 /* range begins (ignore - as first or last char) */
1453 else if (*s == '-' && s+1 < send && s != start) {
1455 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1458 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1470 /* if we get here, we're not doing a transliteration */
1472 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1473 except for the last char, which will be done separately. */
1474 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1476 while (s+1 < send && *s != ')')
1477 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1479 else if (s[2] == '{' /* This should match regcomp.c */
1480 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1483 char *regparse = s + (s[2] == '{' ? 3 : 4);
1486 while (count && (c = *regparse)) {
1487 if (c == '\\' && regparse[1])
1495 if (*regparse != ')')
1496 regparse--; /* Leave one char for continuation. */
1497 while (s < regparse)
1498 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1502 /* likewise skip #-initiated comments in //x patterns */
1503 else if (*s == '#' && PL_lex_inpat &&
1504 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1505 while (s+1 < send && *s != '\n')
1506 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1509 /* check for embedded arrays
1510 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1512 else if (*s == '@' && s[1]
1513 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1516 /* check for embedded scalars. only stop if we're sure it's a
1519 else if (*s == '$') {
1520 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1522 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1523 break; /* in regexp, $ might be tail anchor */
1526 /* End of else if chain - OP_TRANS rejoin rest */
1529 if (*s == '\\' && s+1 < send) {
1532 /* some backslashes we leave behind */
1533 if (*leaveit && *s && strchr(leaveit, *s)) {
1534 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1535 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1539 /* deprecate \1 in strings and substitution replacements */
1540 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1541 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1543 if (ckWARN(WARN_SYNTAX))
1544 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1549 /* string-change backslash escapes */
1550 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1555 /* if we get here, it's either a quoted -, or a digit */
1558 /* quoted - in transliterations */
1560 if (PL_lex_inwhat == OP_TRANS) {
1567 if (ckWARN(WARN_MISC) &&
1570 Perl_warner(aTHX_ packWARN(WARN_MISC),
1571 "Unrecognized escape \\%c passed through",
1573 /* default action is to copy the quoted character */
1574 goto default_action;
1577 /* \132 indicates an octal constant */
1578 case '0': case '1': case '2': case '3':
1579 case '4': case '5': case '6': case '7':
1583 uv = grok_oct(s, &len, &flags, NULL);
1586 goto NUM_ESCAPE_INSERT;
1588 /* \x24 indicates a hex constant */
1592 char* e = strchr(s, '}');
1593 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1594 PERL_SCAN_DISALLOW_PREFIX;
1599 yyerror("Missing right brace on \\x{}");
1603 uv = grok_hex(s, &len, &flags, NULL);
1609 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1610 uv = grok_hex(s, &len, &flags, NULL);
1616 /* Insert oct or hex escaped character.
1617 * There will always enough room in sv since such
1618 * escapes will be longer than any UTF-8 sequence
1619 * they can end up as. */
1621 /* We need to map to chars to ASCII before doing the tests
1624 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1625 if (!has_utf8 && uv > 255) {
1626 /* Might need to recode whatever we have
1627 * accumulated so far if it contains any
1630 * (Can't we keep track of that and avoid
1631 * this rescan? --jhi)
1635 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1636 if (!NATIVE_IS_INVARIANT(*c)) {
1641 STRLEN offset = d - SvPVX_const(sv);
1643 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1647 while (src >= (U8 *)SvPVX(sv)) {
1648 if (!NATIVE_IS_INVARIANT(*src)) {
1649 U8 ch = NATIVE_TO_ASCII(*src);
1650 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1651 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1661 if (has_utf8 || uv > 255) {
1662 d = (char*)uvchr_to_utf8((U8*)d, uv);
1664 if (PL_lex_inwhat == OP_TRANS &&
1665 PL_sublex_info.sub_op) {
1666 PL_sublex_info.sub_op->op_private |=
1667 (PL_lex_repl ? OPpTRANS_FROM_UTF
1680 /* \N{LATIN SMALL LETTER A} is a named character */
1684 char* e = strchr(s, '}');
1690 yyerror("Missing right brace on \\N{}");
1694 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1696 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1697 PERL_SCAN_DISALLOW_PREFIX;
1700 uv = grok_hex(s, &len, &flags, NULL);
1702 goto NUM_ESCAPE_INSERT;
1704 res = newSVpvn(s + 1, e - s - 1);
1705 res = new_constant( Nullch, 0, "charnames",
1706 res, Nullsv, "\\N{...}" );
1708 sv_utf8_upgrade(res);
1709 str = SvPV(res,len);
1710 #ifdef EBCDIC_NEVER_MIND
1711 /* charnames uses pack U and that has been
1712 * recently changed to do the below uni->native
1713 * mapping, so this would be redundant (and wrong,
1714 * the code point would be doubly converted).
1715 * But leave this in just in case the pack U change
1716 * gets revoked, but the semantics is still
1717 * desireable for charnames. --jhi */
1719 UV uv = utf8_to_uvchr((U8*)str, 0);
1722 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1724 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1725 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1726 str = SvPV(res, len);
1730 if (!has_utf8 && SvUTF8(res)) {
1731 const char *ostart = SvPVX_const(sv);
1732 SvCUR_set(sv, d - ostart);
1735 sv_utf8_upgrade(sv);
1736 /* this just broke our allocation above... */
1737 SvGROW(sv, (STRLEN)(send - start));
1738 d = SvPVX(sv) + SvCUR(sv);
1741 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1742 const char *odest = SvPVX_const(sv);
1744 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1745 d = SvPVX(sv) + (d - odest);
1747 Copy(str, d, len, char);
1754 yyerror("Missing braces on \\N{}");
1757 /* \c is a control character */
1766 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1769 yyerror("Missing control char name in \\c");
1773 /* printf-style backslashes, formfeeds, newlines, etc */
1775 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1778 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1781 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1784 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1787 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1790 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1793 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1799 } /* end if (backslash) */
1802 /* If we started with encoded form, or already know we want it
1803 and then encode the next character */
1804 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1806 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1807 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1810 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1811 STRLEN off = d - SvPVX_const(sv);
1812 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1814 d = (char*)uvchr_to_utf8((U8*)d, uv);
1818 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1820 } /* while loop to process each character */
1822 /* terminate the string and set up the sv */
1824 SvCUR_set(sv, d - SvPVX_const(sv));
1825 if (SvCUR(sv) >= SvLEN(sv))
1826 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1829 if (PL_encoding && !has_utf8) {
1830 sv_recode_to_utf8(sv, PL_encoding);
1836 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1837 PL_sublex_info.sub_op->op_private |=
1838 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1842 /* shrink the sv if we allocated more than we used */
1843 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1844 SvPV_shrink_to_cur(sv);
1847 /* return the substring (via yylval) only if we parsed anything */
1848 if (s > PL_bufptr) {
1849 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1850 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1852 ( PL_lex_inwhat == OP_TRANS
1854 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1857 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1864 * Returns TRUE if there's more to the expression (e.g., a subscript),
1867 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1869 * ->[ and ->{ return TRUE
1870 * { and [ outside a pattern are always subscripts, so return TRUE
1871 * if we're outside a pattern and it's not { or [, then return FALSE
1872 * if we're in a pattern and the first char is a {
1873 * {4,5} (any digits around the comma) returns FALSE
1874 * if we're in a pattern and the first char is a [
1876 * [SOMETHING] has a funky algorithm to decide whether it's a
1877 * character class or not. It has to deal with things like
1878 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1879 * anything else returns TRUE
1882 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1885 S_intuit_more(pTHX_ register char *s)
1887 if (PL_lex_brackets)
1889 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1891 if (*s != '{' && *s != '[')
1896 /* In a pattern, so maybe we have {n,m}. */
1913 /* On the other hand, maybe we have a character class */
1916 if (*s == ']' || *s == '^')
1919 /* this is terrifying, and it works */
1920 int weight = 2; /* let's weigh the evidence */
1922 unsigned char un_char = 255, last_un_char;
1923 const char *send = strchr(s,']');
1924 char tmpbuf[sizeof PL_tokenbuf * 4];
1926 if (!send) /* has to be an expression */
1929 Zero(seen,256,char);
1932 else if (isDIGIT(*s)) {
1934 if (isDIGIT(s[1]) && s[2] == ']')
1940 for (; s < send; s++) {
1941 last_un_char = un_char;
1942 un_char = (unsigned char)*s;
1947 weight -= seen[un_char] * 10;
1948 if (isALNUM_lazy_if(s+1,UTF)) {
1949 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
1950 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
1955 else if (*s == '$' && s[1] &&
1956 strchr("[#!%*<>()-=",s[1])) {
1957 if (/*{*/ strchr("])} =",s[2]))
1966 if (strchr("wds]",s[1]))
1968 else if (seen['\''] || seen['"'])
1970 else if (strchr("rnftbxcav",s[1]))
1972 else if (isDIGIT(s[1])) {
1974 while (s[1] && isDIGIT(s[1]))
1984 if (strchr("aA01! ",last_un_char))
1986 if (strchr("zZ79~",s[1]))
1988 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1989 weight -= 5; /* cope with negative subscript */
1992 if (!isALNUM(last_un_char)
1993 && !(last_un_char == '$' || last_un_char == '@'
1994 || last_un_char == '&')
1995 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2000 if (keyword(tmpbuf, d - tmpbuf))
2003 if (un_char == last_un_char + 1)
2005 weight -= seen[un_char];
2010 if (weight >= 0) /* probably a character class */
2020 * Does all the checking to disambiguate
2022 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2023 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2025 * First argument is the stuff after the first token, e.g. "bar".
2027 * Not a method if bar is a filehandle.
2028 * Not a method if foo is a subroutine prototyped to take a filehandle.
2029 * Not a method if it's really "Foo $bar"
2030 * Method if it's "foo $bar"
2031 * Not a method if it's really "print foo $bar"
2032 * Method if it's really "foo package::" (interpreted as package->foo)
2033 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2034 * Not a method if bar is a filehandle or package, but is quoted with
2039 S_intuit_method(pTHX_ char *start, GV *gv)
2041 char *s = start + (*start == '$');
2042 char tmpbuf[sizeof PL_tokenbuf];
2050 if ((cv = GvCVu(gv))) {
2051 const char *proto = SvPVX_const(cv);
2061 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2062 /* start is the beginning of the possible filehandle/object,
2063 * and s is the end of it
2064 * tmpbuf is a copy of it
2067 if (*start == '$') {
2068 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2073 return *s == '(' ? FUNCMETH : METHOD;
2075 if (!keyword(tmpbuf, len)) {
2076 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2081 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
2082 if (indirgv && GvCVu(indirgv))
2084 /* filehandle or package name makes it a method */
2085 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2087 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2088 return 0; /* no assumptions -- "=>" quotes bearword */
2090 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2091 newSVpvn(tmpbuf,len));
2092 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2096 return *s == '(' ? FUNCMETH : METHOD;
2104 * Return a string of Perl code to load the debugger. If PERL5DB
2105 * is set, it will return the contents of that, otherwise a
2106 * compile-time require of perl5db.pl.
2113 const char *pdb = PerlEnv_getenv("PERL5DB");
2117 SETERRNO(0,SS_NORMAL);
2118 return "BEGIN { require 'perl5db.pl' }";
2124 /* Encoded script support. filter_add() effectively inserts a
2125 * 'pre-processing' function into the current source input stream.
2126 * Note that the filter function only applies to the current source file
2127 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2129 * The datasv parameter (which may be NULL) can be used to pass
2130 * private data to this instance of the filter. The filter function
2131 * can recover the SV using the FILTER_DATA macro and use it to
2132 * store private buffers and state information.
2134 * The supplied datasv parameter is upgraded to a PVIO type
2135 * and the IoDIRP/IoANY field is used to store the function pointer,
2136 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2137 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2138 * private use must be set using malloc'd pointers.
2142 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2149 if (!PL_rsfp_filters)
2150 PL_rsfp_filters = newAV();
2152 datasv = NEWSV(255,0);
2153 SvUPGRADE(datasv, SVt_PVIO);
2155 IoANY(datasv) = u.iop; /* stash funcp into spare field */
2156 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2157 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2158 (void*)u.iop, SvPV_nolen(datasv)));
2159 av_unshift(PL_rsfp_filters, 1);
2160 av_store(PL_rsfp_filters, 0, datasv) ;
2165 /* Delete most recently added instance of this filter function. */
2167 Perl_filter_del(pTHX_ filter_t funcp)
2174 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)u.iop));
2176 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2178 /* if filter is on top of stack (usual case) just pop it off */
2179 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2180 u.iop = IoANY(datasv);
2181 if (u.filter == funcp) {
2182 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2183 IoANY(datasv) = (void *)NULL;
2184 sv_free(av_pop(PL_rsfp_filters));
2188 /* we need to search for the correct entry and clear it */
2189 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2193 /* Invoke the idxth filter function for the current rsfp. */
2194 /* maxlen 0 = read one text line */
2196 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2202 if (!PL_rsfp_filters)
2204 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2205 /* Provide a default input filter to make life easy. */
2206 /* Note that we append to the line. This is handy. */
2207 DEBUG_P(PerlIO_printf(Perl_debug_log,
2208 "filter_read %d: from rsfp\n", idx));
2212 const int old_len = SvCUR(buf_sv);
2214 /* ensure buf_sv is large enough */
2215 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2216 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2217 if (PerlIO_error(PL_rsfp))
2218 return -1; /* error */
2220 return 0 ; /* end of file */
2222 SvCUR_set(buf_sv, old_len + len) ;
2225 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2226 if (PerlIO_error(PL_rsfp))
2227 return -1; /* error */
2229 return 0 ; /* end of file */
2232 return SvCUR(buf_sv);
2234 /* Skip this filter slot if filter has been deleted */
2235 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2236 DEBUG_P(PerlIO_printf(Perl_debug_log,
2237 "filter_read %d: skipped (filter deleted)\n",
2239 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2241 /* Get function pointer hidden within datasv */
2242 u.iop = IoANY(datasv);
2244 DEBUG_P(PerlIO_printf(Perl_debug_log,
2245 "filter_read %d: via function %p (%s)\n",
2246 idx, (void*)u.iop, SvPV_nolen(datasv)));
2247 /* Call function. The function is expected to */
2248 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2249 /* Return: <0:error, =0:eof, >0:not eof */
2250 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2254 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2256 #ifdef PERL_CR_FILTER
2257 if (!PL_rsfp_filters) {
2258 filter_add(S_cr_textfilter,NULL);
2261 if (PL_rsfp_filters) {
2263 SvCUR_set(sv, 0); /* start with empty line */
2264 if (FILTER_READ(0, sv, 0) > 0)
2265 return ( SvPVX(sv) ) ;
2270 return (sv_gets(sv, fp, append));
2274 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2278 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2282 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2283 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2285 return GvHV(gv); /* Foo:: */
2288 /* use constant CLASS => 'MyClass' */
2289 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2291 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2292 pkgname = SvPV_nolen(sv);
2296 return gv_stashpv(pkgname, FALSE);
2300 static const char* const exp_name[] =
2301 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2302 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2309 Works out what to call the token just pulled out of the input
2310 stream. The yacc parser takes care of taking the ops we return and
2311 stitching them into a tree.
2317 if read an identifier
2318 if we're in a my declaration
2319 croak if they tried to say my($foo::bar)
2320 build the ops for a my() declaration
2321 if it's an access to a my() variable
2322 are we in a sort block?
2323 croak if my($a); $a <=> $b
2324 build ops for access to a my() variable
2325 if in a dq string, and they've said @foo and we can't find @foo
2327 build ops for a bareword
2328 if we already built the token before, use it.
2333 #pragma segment Perl_yylex
2338 register char *s = PL_bufptr;
2345 I32 orig_keyword = 0;
2348 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2349 lex_state_names[PL_lex_state]);
2351 /* check if there's an identifier for us to look at */
2352 if (PL_pending_ident)
2353 return REPORT(S_pending_ident(aTHX));
2355 /* no identifier pending identification */
2357 switch (PL_lex_state) {
2359 case LEX_NORMAL: /* Some compilers will produce faster */
2360 case LEX_INTERPNORMAL: /* code if we comment these out. */
2364 /* when we've already built the next token, just pull it out of the queue */
2367 yylval = PL_nextval[PL_nexttoke];
2369 PL_lex_state = PL_lex_defer;
2370 PL_expect = PL_lex_expect;
2371 PL_lex_defer = LEX_NORMAL;
2373 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2374 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2375 (IV)PL_nexttype[PL_nexttoke]); });
2377 return REPORT(PL_nexttype[PL_nexttoke]);
2379 /* interpolated case modifiers like \L \U, including \Q and \E.
2380 when we get here, PL_bufptr is at the \
2382 case LEX_INTERPCASEMOD:
2384 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2385 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2387 /* handle \E or end of string */
2388 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2390 if (PL_lex_casemods) {
2391 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2392 PL_lex_casestack[PL_lex_casemods] = '\0';
2394 if (PL_bufptr != PL_bufend
2395 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2397 PL_lex_state = LEX_INTERPCONCAT;
2401 if (PL_bufptr != PL_bufend)
2403 PL_lex_state = LEX_INTERPCONCAT;
2407 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2408 "### Saw case modifier at '%s'\n", PL_bufptr); });
2410 if (s[1] == '\\' && s[2] == 'E') {
2412 PL_lex_state = LEX_INTERPCONCAT;
2416 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2417 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2418 if ((*s == 'L' || *s == 'U') &&
2419 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2420 PL_lex_casestack[--PL_lex_casemods] = '\0';
2423 if (PL_lex_casemods > 10)
2424 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2425 PL_lex_casestack[PL_lex_casemods++] = *s;
2426 PL_lex_casestack[PL_lex_casemods] = '\0';
2427 PL_lex_state = LEX_INTERPCONCAT;
2428 PL_nextval[PL_nexttoke].ival = 0;
2431 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2433 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2435 PL_nextval[PL_nexttoke].ival = OP_LC;
2437 PL_nextval[PL_nexttoke].ival = OP_UC;
2439 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2441 Perl_croak(aTHX_ "panic: yylex");
2445 if (PL_lex_starts) {
2448 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2449 if (PL_lex_casemods == 1 && PL_lex_inpat)
2458 case LEX_INTERPPUSH:
2459 return REPORT(sublex_push());
2461 case LEX_INTERPSTART:
2462 if (PL_bufptr == PL_bufend)
2463 return REPORT(sublex_done());
2464 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2465 "### Interpolated variable at '%s'\n", PL_bufptr); });
2467 PL_lex_dojoin = (*PL_bufptr == '@');
2468 PL_lex_state = LEX_INTERPNORMAL;
2469 if (PL_lex_dojoin) {
2470 PL_nextval[PL_nexttoke].ival = 0;
2472 force_ident("\"", '$');
2473 PL_nextval[PL_nexttoke].ival = 0;
2475 PL_nextval[PL_nexttoke].ival = 0;
2477 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2480 if (PL_lex_starts++) {
2482 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2483 if (!PL_lex_casemods && PL_lex_inpat)
2490 case LEX_INTERPENDMAYBE:
2491 if (intuit_more(PL_bufptr)) {
2492 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2498 if (PL_lex_dojoin) {
2499 PL_lex_dojoin = FALSE;
2500 PL_lex_state = LEX_INTERPCONCAT;
2503 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2504 && SvEVALED(PL_lex_repl))
2506 if (PL_bufptr != PL_bufend)
2507 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2508 PL_lex_repl = Nullsv;
2511 case LEX_INTERPCONCAT:
2513 if (PL_lex_brackets)
2514 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2516 if (PL_bufptr == PL_bufend)
2517 return REPORT(sublex_done());
2519 if (SvIVX(PL_linestr) == '\'') {
2520 SV *sv = newSVsv(PL_linestr);
2523 else if ( PL_hints & HINT_NEW_RE )
2524 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2525 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2529 s = scan_const(PL_bufptr);
2531 PL_lex_state = LEX_INTERPCASEMOD;
2533 PL_lex_state = LEX_INTERPSTART;
2536 if (s != PL_bufptr) {
2537 PL_nextval[PL_nexttoke] = yylval;
2540 if (PL_lex_starts++) {
2541 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2542 if (!PL_lex_casemods && PL_lex_inpat)
2555 PL_lex_state = LEX_NORMAL;
2556 s = scan_formline(PL_bufptr);
2557 if (!PL_lex_formbrack)
2563 PL_oldoldbufptr = PL_oldbufptr;
2566 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
2567 exp_name[PL_expect], s);
2573 if (isIDFIRST_lazy_if(s,UTF))
2575 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2578 goto fake_eof; /* emulate EOF on ^D or ^Z */
2583 if (PL_lex_brackets) {
2584 if (PL_lex_formbrack)
2585 yyerror("Format not terminated");
2587 yyerror("Missing right curly or square bracket");
2589 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2590 "### Tokener got EOF\n");
2594 if (s++ < PL_bufend)
2595 goto retry; /* ignore stray nulls */
2598 if (!PL_in_eval && !PL_preambled) {
2599 PL_preambled = TRUE;
2600 sv_setpv(PL_linestr,incl_perldb());
2601 if (SvCUR(PL_linestr))
2602 sv_catpvn(PL_linestr,";", 1);
2604 while(AvFILLp(PL_preambleav) >= 0) {
2605 SV *tmpsv = av_shift(PL_preambleav);
2606 sv_catsv(PL_linestr, tmpsv);
2607 sv_catpvn(PL_linestr, ";", 1);
2610 sv_free((SV*)PL_preambleav);
2611 PL_preambleav = NULL;
2613 if (PL_minus_n || PL_minus_p) {
2614 sv_catpv(PL_linestr, "LINE: while (<>) {");
2616 sv_catpv(PL_linestr,"chomp;");
2619 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2620 || *PL_splitstr == '"')
2621 && strchr(PL_splitstr + 1, *PL_splitstr))
2622 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2624 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2625 bytes can be used as quoting characters. :-) */
2626 /* The count here deliberately includes the NUL
2627 that terminates the C string constant. This
2628 embeds the opening NUL into the string. */
2629 const char *splits = PL_splitstr;
2630 sv_catpvn(PL_linestr, "our @F=split(q", 15);
2633 if (*splits == '\\')
2634 sv_catpvn(PL_linestr, splits, 1);
2635 sv_catpvn(PL_linestr, splits, 1);
2636 } while (*splits++);
2637 /* This loop will embed the trailing NUL of
2638 PL_linestr as the last thing it does before
2640 sv_catpvn(PL_linestr, ");", 2);
2644 sv_catpv(PL_linestr,"our @F=split(' ');");
2647 sv_catpvn(PL_linestr, "\n", 1);
2648 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2649 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2650 PL_last_lop = PL_last_uni = Nullch;
2651 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2652 SV *sv = NEWSV(85,0);
2654 sv_upgrade(sv, SVt_PVMG);
2655 sv_setsv(sv,PL_linestr);
2658 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2663 bof = PL_rsfp ? TRUE : FALSE;
2664 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2667 if (PL_preprocess && !PL_in_eval)
2668 (void)PerlProc_pclose(PL_rsfp);
2669 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2670 PerlIO_clearerr(PL_rsfp);
2672 (void)PerlIO_close(PL_rsfp);
2674 PL_doextract = FALSE;
2676 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2677 sv_setpv(PL_linestr,PL_minus_p
2678 ? ";}continue{print;}" : ";}");
2679 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2680 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2681 PL_last_lop = PL_last_uni = Nullch;
2682 PL_minus_n = PL_minus_p = 0;
2685 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2686 PL_last_lop = PL_last_uni = Nullch;
2687 sv_setpvn(PL_linestr,"",0);
2688 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2690 /* If it looks like the start of a BOM or raw UTF-16,
2691 * check if it in fact is. */
2697 #ifdef PERLIO_IS_STDIO
2698 # ifdef __GNU_LIBRARY__
2699 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2700 # define FTELL_FOR_PIPE_IS_BROKEN
2704 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2705 # define FTELL_FOR_PIPE_IS_BROKEN
2710 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2711 /* This loses the possibility to detect the bof
2712 * situation on perl -P when the libc5 is being used.
2713 * Workaround? Maybe attach some extra state to PL_rsfp?
2716 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2718 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2721 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2722 s = swallow_bom((U8*)s);
2726 /* Incest with pod. */
2727 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2728 sv_setpvn(PL_linestr, "", 0);
2729 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2730 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2731 PL_last_lop = PL_last_uni = Nullch;
2732 PL_doextract = FALSE;
2736 } while (PL_doextract);
2737 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2738 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2739 SV *sv = NEWSV(85,0);
2741 sv_upgrade(sv, SVt_PVMG);
2742 sv_setsv(sv,PL_linestr);
2745 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2747 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2748 PL_last_lop = PL_last_uni = Nullch;
2749 if (CopLINE(PL_curcop) == 1) {
2750 while (s < PL_bufend && isSPACE(*s))
2752 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2756 if (*s == '#' && *(s+1) == '!')
2758 #ifdef ALTERNATE_SHEBANG
2760 static char const as[] = ALTERNATE_SHEBANG;
2761 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2762 d = s + (sizeof(as) - 1);
2764 #endif /* ALTERNATE_SHEBANG */
2773 while (*d && !isSPACE(*d))
2777 #ifdef ARG_ZERO_IS_SCRIPT
2778 if (ipathend > ipath) {
2780 * HP-UX (at least) sets argv[0] to the script name,
2781 * which makes $^X incorrect. And Digital UNIX and Linux,
2782 * at least, set argv[0] to the basename of the Perl
2783 * interpreter. So, having found "#!", we'll set it right.
2785 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
2786 assert(SvPOK(x) || SvGMAGICAL(x));
2787 if (sv_eq(x, CopFILESV(PL_curcop))) {
2788 sv_setpvn(x, ipath, ipathend - ipath);
2794 const char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2795 const char *lstart = SvPV(x,llen);
2797 bstart += blen - llen;
2798 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2799 sv_setpvn(x, ipath, ipathend - ipath);
2804 TAINT_NOT; /* $^X is always tainted, but that's OK */
2806 #endif /* ARG_ZERO_IS_SCRIPT */
2811 d = instr(s,"perl -");
2813 d = instr(s,"perl");
2815 /* avoid getting into infinite loops when shebang
2816 * line contains "Perl" rather than "perl" */
2818 for (d = ipathend-4; d >= ipath; --d) {
2819 if ((*d == 'p' || *d == 'P')
2820 && !ibcmp(d, "perl", 4))
2830 #ifdef ALTERNATE_SHEBANG
2832 * If the ALTERNATE_SHEBANG on this system starts with a
2833 * character that can be part of a Perl expression, then if
2834 * we see it but not "perl", we're probably looking at the
2835 * start of Perl code, not a request to hand off to some
2836 * other interpreter. Similarly, if "perl" is there, but
2837 * not in the first 'word' of the line, we assume the line
2838 * contains the start of the Perl program.
2840 if (d && *s != '#') {
2841 const char *c = ipath;
2842 while (*c && !strchr("; \t\r\n\f\v#", *c))
2845 d = Nullch; /* "perl" not in first word; ignore */
2847 *s = '#'; /* Don't try to parse shebang line */
2849 #endif /* ALTERNATE_SHEBANG */
2850 #ifndef MACOS_TRADITIONAL
2855 !instr(s,"indir") &&
2856 instr(PL_origargv[0],"perl"))
2863 while (s < PL_bufend && isSPACE(*s))
2865 if (s < PL_bufend) {
2866 Newz(899,newargv,PL_origargc+3,char*);
2868 while (s < PL_bufend && !isSPACE(*s))
2871 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2874 newargv = PL_origargv;
2877 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2879 Perl_croak(aTHX_ "Can't exec %s", ipath);
2883 const U32 oldpdb = PL_perldb;
2884 const bool oldn = PL_minus_n;
2885 const bool oldp = PL_minus_p;
2887 while (*d && !isSPACE(*d)) d++;
2888 while (SPACE_OR_TAB(*d)) d++;
2891 const bool switches_done = PL_doswitches;
2893 if (*d == 'M' || *d == 'm' || *d == 'C') {
2895 while (*d && !isSPACE(*d)) d++;
2896 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
2899 d = moreswitches(d);
2901 if (PL_doswitches && !switches_done) {
2902 int argc = PL_origargc;
2903 char **argv = PL_origargv;
2906 } while (argc && argv[0][0] == '-' && argv[0][1]);
2907 init_argv_symbols(argc,argv);
2909 if ((PERLDB_LINE && !oldpdb) ||
2910 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
2911 /* if we have already added "LINE: while (<>) {",
2912 we must not do it again */
2914 sv_setpvn(PL_linestr, "", 0);
2915 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2916 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2917 PL_last_lop = PL_last_uni = Nullch;
2918 PL_preambled = FALSE;
2920 (void)gv_fetchfile(PL_origfilename);
2923 if (PL_doswitches && !switches_done) {
2924 int argc = PL_origargc;
2925 char **argv = PL_origargv;
2928 } while (argc && argv[0][0] == '-' && argv[0][1]);
2929 init_argv_symbols(argc,argv);
2935 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2937 PL_lex_state = LEX_FORMLINE;
2942 #ifdef PERL_STRICT_CR
2943 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2945 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
2947 case ' ': case '\t': case '\f': case 013:
2948 #ifdef MACOS_TRADITIONAL
2955 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2956 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2957 /* handle eval qq[#line 1 "foo"\n ...] */
2958 CopLINE_dec(PL_curcop);
2962 while (s < d && *s != '\n')
2966 else if (s > d) /* Found by Ilya: feed random input to Perl. */
2967 Perl_croak(aTHX_ "panic: input overflow");
2969 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2971 PL_lex_state = LEX_FORMLINE;
2981 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
2988 while (s < PL_bufend && SPACE_OR_TAB(*s))
2991 if (strnEQ(s,"=>",2)) {
2992 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
2993 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2994 "### Saw unary minus before =>, forcing word '%s'\n", s);
2996 OPERATOR('-'); /* unary minus */
2998 PL_last_uni = PL_oldbufptr;
3000 case 'r': ftst = OP_FTEREAD; break;
3001 case 'w': ftst = OP_FTEWRITE; break;
3002 case 'x': ftst = OP_FTEEXEC; break;
3003 case 'o': ftst = OP_FTEOWNED; break;
3004 case 'R': ftst = OP_FTRREAD; break;
3005 case 'W': ftst = OP_FTRWRITE; break;
3006 case 'X': ftst = OP_FTREXEC; break;
3007 case 'O': ftst = OP_FTROWNED; break;
3008 case 'e': ftst = OP_FTIS; break;
3009 case 'z': ftst = OP_FTZERO; break;
3010 case 's': ftst = OP_FTSIZE; break;
3011 case 'f': ftst = OP_FTFILE; break;
3012 case 'd': ftst = OP_FTDIR; break;
3013 case 'l': ftst = OP_FTLINK; break;
3014 case 'p': ftst = OP_FTPIPE; break;
3015 case 'S': ftst = OP_FTSOCK; break;
3016 case 'u': ftst = OP_FTSUID; break;
3017 case 'g': ftst = OP_FTSGID; break;
3018 case 'k': ftst = OP_FTSVTX; break;
3019 case 'b': ftst = OP_FTBLK; break;
3020 case 'c': ftst = OP_FTCHR; break;
3021 case 't': ftst = OP_FTTTY; break;
3022 case 'T': ftst = OP_FTTEXT; break;
3023 case 'B': ftst = OP_FTBINARY; break;
3024 case 'M': case 'A': case 'C':
3025 gv_fetchpv("\024",TRUE, SVt_PV);
3027 case 'M': ftst = OP_FTMTIME; break;
3028 case 'A': ftst = OP_FTATIME; break;
3029 case 'C': ftst = OP_FTCTIME; break;
3037 PL_last_lop_op = (OPCODE)ftst;
3038 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3039 "### Saw file test %c\n", (int)ftst);
3044 /* Assume it was a minus followed by a one-letter named
3045 * subroutine call (or a -bareword), then. */
3046 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3047 "### '-%c' looked like a file test but was not\n",
3056 if (PL_expect == XOPERATOR)
3061 else if (*s == '>') {
3064 if (isIDFIRST_lazy_if(s,UTF)) {
3065 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3073 if (PL_expect == XOPERATOR)
3076 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3078 OPERATOR('-'); /* unary minus */
3085 if (PL_expect == XOPERATOR)
3090 if (PL_expect == XOPERATOR)
3093 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3099 if (PL_expect != XOPERATOR) {
3100 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3101 PL_expect = XOPERATOR;
3102 force_ident(PL_tokenbuf, '*');
3115 if (PL_expect == XOPERATOR) {
3119 PL_tokenbuf[0] = '%';
3120 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3121 if (!PL_tokenbuf[1]) {
3124 PL_pending_ident = '%';
3143 switch (PL_expect) {
3146 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3148 PL_bufptr = s; /* update in case we back off */
3154 PL_expect = XTERMBLOCK;
3158 while (isIDFIRST_lazy_if(s,UTF)) {
3159 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3160 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3161 if (tmp < 0) tmp = -tmp;
3177 d = scan_str(d,TRUE,TRUE);
3179 /* MUST advance bufptr here to avoid bogus
3180 "at end of line" context messages from yyerror().
3182 PL_bufptr = s + len;
3183 yyerror("Unterminated attribute parameter in attribute list");
3186 return REPORT(0); /* EOF indicator */
3190 SV *sv = newSVpvn(s, len);
3191 sv_catsv(sv, PL_lex_stuff);
3192 attrs = append_elem(OP_LIST, attrs,
3193 newSVOP(OP_CONST, 0, sv));
3194 SvREFCNT_dec(PL_lex_stuff);
3195 PL_lex_stuff = Nullsv;
3198 if (len == 6 && strnEQ(s, "unique", len)) {
3199 if (PL_in_my == KEY_our)
3201 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3203 ; /* skip to avoid loading attributes.pm */
3206 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3209 /* NOTE: any CV attrs applied here need to be part of
3210 the CVf_BUILTIN_ATTRS define in cv.h! */
3211 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3212 CvLVALUE_on(PL_compcv);
3213 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3214 CvLOCKED_on(PL_compcv);
3215 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3216 CvMETHOD_on(PL_compcv);
3217 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3218 CvASSERTION_on(PL_compcv);
3219 /* After we've set the flags, it could be argued that
3220 we don't need to do the attributes.pm-based setting
3221 process, and shouldn't bother appending recognized
3222 flags. To experiment with that, uncomment the
3223 following "else". (Note that's already been
3224 uncommented. That keeps the above-applied built-in
3225 attributes from being intercepted (and possibly
3226 rejected) by a package's attribute routines, but is
3227 justified by the performance win for the common case
3228 of applying only built-in attributes.) */
3230 attrs = append_elem(OP_LIST, attrs,
3231 newSVOP(OP_CONST, 0,
3235 if (*s == ':' && s[1] != ':')
3238 break; /* require real whitespace or :'s */
3240 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3241 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
3242 const char q = ((*s == '\'') ? '"' : '\'');
3243 /* If here for an expression, and parsed no attrs, back off. */
3244 if (tmp == '=' && !attrs) {
3248 /* MUST advance bufptr here to avoid bogus "at end of line"
3249 context messages from yyerror().
3253 yyerror("Unterminated attribute list");
3255 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3263 PL_nextval[PL_nexttoke].opval = attrs;
3271 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3272 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3289 if (PL_lex_brackets <= 0)
3290 yyerror("Unmatched right square bracket");
3293 if (PL_lex_state == LEX_INTERPNORMAL) {
3294 if (PL_lex_brackets == 0) {
3295 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3296 PL_lex_state = LEX_INTERPEND;
3303 if (PL_lex_brackets > 100) {
3304 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3306 switch (PL_expect) {
3308 if (PL_lex_formbrack) {
3312 if (PL_oldoldbufptr == PL_last_lop)
3313 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3315 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3316 OPERATOR(HASHBRACK);
3318 while (s < PL_bufend && SPACE_OR_TAB(*s))
3321 PL_tokenbuf[0] = '\0';
3322 if (d < PL_bufend && *d == '-') {
3323 PL_tokenbuf[0] = '-';
3325 while (d < PL_bufend && SPACE_OR_TAB(*d))
3328 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3329 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3331 while (d < PL_bufend && SPACE_OR_TAB(*d))
3334 const char minus = (PL_tokenbuf[0] == '-');
3335 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3343 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3348 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3353 if (PL_oldoldbufptr == PL_last_lop)
3354 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3356 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3359 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3361 /* This hack is to get the ${} in the message. */
3363 yyerror("syntax error");
3366 OPERATOR(HASHBRACK);
3368 /* This hack serves to disambiguate a pair of curlies
3369 * as being a block or an anon hash. Normally, expectation
3370 * determines that, but in cases where we're not in a
3371 * position to expect anything in particular (like inside
3372 * eval"") we have to resolve the ambiguity. This code
3373 * covers the case where the first term in the curlies is a
3374 * quoted string. Most other cases need to be explicitly
3375 * disambiguated by prepending a "+" before the opening
3376 * curly in order to force resolution as an anon hash.
3378 * XXX should probably propagate the outer expectation
3379 * into eval"" to rely less on this hack, but that could
3380 * potentially break current behavior of eval"".
3384 if (*s == '\'' || *s == '"' || *s == '`') {
3385 /* common case: get past first string, handling escapes */
3386 for (t++; t < PL_bufend && *t != *s;)
3387 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3391 else if (*s == 'q') {
3394 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3397 /* skip q//-like construct */
3399 char open, close, term;
3402 while (t < PL_bufend && isSPACE(*t))
3404 /* check for q => */
3405 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3406 OPERATOR(HASHBRACK);
3410 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3414 for (t++; t < PL_bufend; t++) {
3415 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3417 else if (*t == open)
3421 for (t++; t < PL_bufend; t++) {
3422 if (*t == '\\' && t+1 < PL_bufend)
3424 else if (*t == close && --brackets <= 0)
3426 else if (*t == open)
3433 /* skip plain q word */
3434 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3437 else if (isALNUM_lazy_if(t,UTF)) {
3439 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3442 while (t < PL_bufend && isSPACE(*t))
3444 /* if comma follows first term, call it an anon hash */
3445 /* XXX it could be a comma expression with loop modifiers */
3446 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3447 || (*t == '=' && t[1] == '>')))
3448 OPERATOR(HASHBRACK);
3449 if (PL_expect == XREF)
3452 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3458 yylval.ival = CopLINE(PL_curcop);
3459 if (isSPACE(*s) || *s == '#')
3460 PL_copline = NOLINE; /* invalidate current command line number */
3465 if (PL_lex_brackets <= 0)
3466 yyerror("Unmatched right curly bracket");
3468 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3469 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3470 PL_lex_formbrack = 0;
3471 if (PL_lex_state == LEX_INTERPNORMAL) {
3472 if (PL_lex_brackets == 0) {
3473 if (PL_expect & XFAKEBRACK) {
3474 PL_expect &= XENUMMASK;
3475 PL_lex_state = LEX_INTERPEND;
3477 return yylex(); /* ignore fake brackets */
3479 if (*s == '-' && s[1] == '>')
3480 PL_lex_state = LEX_INTERPENDMAYBE;
3481 else if (*s != '[' && *s != '{')
3482 PL_lex_state = LEX_INTERPEND;
3485 if (PL_expect & XFAKEBRACK) {
3486 PL_expect &= XENUMMASK;
3488 return yylex(); /* ignore fake brackets */
3498 if (PL_expect == XOPERATOR) {
3499 if (ckWARN(WARN_SEMICOLON)
3500 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3502 CopLINE_dec(PL_curcop);
3503 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3504 CopLINE_inc(PL_curcop);
3509 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3511 PL_expect = XOPERATOR;
3512 force_ident(PL_tokenbuf, '&');
3516 yylval.ival = (OPpENTERSUB_AMPER<<8);
3535 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
3536 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
3538 if (PL_expect == XSTATE && isALPHA(tmp) &&
3539 (s == PL_linestart+1 || s[-2] == '\n') )
3541 if (PL_in_eval && !PL_rsfp) {
3546 if (strnEQ(s,"=cut",4)) {
3560 PL_doextract = TRUE;
3563 if (PL_lex_brackets < PL_lex_formbrack) {
3565 #ifdef PERL_STRICT_CR
3566 for (t = s; SPACE_OR_TAB(*t); t++) ;
3568 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3570 if (*t == '\n' || *t == '#') {
3582 /* was this !=~ where !~ was meant?
3583 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3585 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3586 const char *t = s+1;
3588 while (t < PL_bufend && isSPACE(*t))
3591 if (*t == '/' || *t == '?' ||
3592 ((*t == 'm' || *t == 's' || *t == 'y') && !isALNUM(t[1])) ||
3593 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3594 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3595 "!=~ should be !~");
3604 if (PL_expect != XOPERATOR) {
3605 if (s[1] != '<' && !strchr(s,'>'))
3608 s = scan_heredoc(s);
3610 s = scan_inputsymbol(s);
3611 TERM(sublex_start());
3616 SHop(OP_LEFT_SHIFT);
3630 SHop(OP_RIGHT_SHIFT);
3639 if (PL_expect == XOPERATOR) {
3640 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3643 return REPORT(','); /* grandfather non-comma-format format */
3647 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3648 PL_tokenbuf[0] = '@';
3649 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3650 sizeof PL_tokenbuf - 1, FALSE);
3651 if (PL_expect == XOPERATOR)
3652 no_op("Array length", s);
3653 if (!PL_tokenbuf[1])
3655 PL_expect = XOPERATOR;
3656 PL_pending_ident = '#';
3660 PL_tokenbuf[0] = '$';
3661 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3662 sizeof PL_tokenbuf - 1, FALSE);
3663 if (PL_expect == XOPERATOR)
3665 if (!PL_tokenbuf[1]) {
3667 yyerror("Final $ should be \\$ or $name");
3671 /* This kludge not intended to be bulletproof. */
3672 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3673 yylval.opval = newSVOP(OP_CONST, 0,
3674 newSViv(PL_compiling.cop_arybase));
3675 yylval.opval->op_private = OPpCONST_ARYBASE;
3681 if (PL_lex_state == LEX_NORMAL)
3684 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3687 PL_tokenbuf[0] = '@';
3688 if (ckWARN(WARN_SYNTAX)) {
3690 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3693 PL_bufptr = skipspace(PL_bufptr);
3694 while (t < PL_bufend && *t != ']')
3696 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3697 "Multidimensional syntax %.*s not supported",
3698 (t - PL_bufptr) + 1, PL_bufptr);
3702 else if (*s == '{') {
3703 PL_tokenbuf[0] = '%';
3704 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
3705 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3707 char tmpbuf[sizeof PL_tokenbuf];
3708 for (t++; isSPACE(*t); t++) ;
3709 if (isIDFIRST_lazy_if(t,UTF)) {
3711 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
3712 for (; isSPACE(*t); t++) ;
3713 if (*t == ';' && get_cv(tmpbuf, FALSE))
3714 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3715 "You need to quote \"%s\"", tmpbuf);
3721 PL_expect = XOPERATOR;
3722 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3723 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3724 if (!islop || PL_last_lop_op == OP_GREPSTART)
3725 PL_expect = XOPERATOR;
3726 else if (strchr("$@\"'`q", *s))
3727 PL_expect = XTERM; /* e.g. print $fh "foo" */
3728 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3729 PL_expect = XTERM; /* e.g. print $fh &sub */
3730 else if (isIDFIRST_lazy_if(s,UTF)) {
3731 char tmpbuf[sizeof PL_tokenbuf];
3732 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3733 if ((tmp = keyword(tmpbuf, len))) {
3734 /* binary operators exclude handle interpretations */
3746 PL_expect = XTERM; /* e.g. print $fh length() */
3751 PL_expect = XTERM; /* e.g. print $fh subr() */
3754 else if (isDIGIT(*s))
3755 PL_expect = XTERM; /* e.g. print $fh 3 */
3756 else if (*s == '.' && isDIGIT(s[1]))
3757 PL_expect = XTERM; /* e.g. print $fh .3 */
3758 else if ((*s == '?' || *s == '-' || *s == '+')
3759 && !isSPACE(s[1]) && s[1] != '=')
3760 PL_expect = XTERM; /* e.g. print $fh -1 */
3761 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/')
3762 PL_expect = XTERM; /* e.g. print $fh /.../
3763 XXX except DORDOR operator */
3764 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3765 PL_expect = XTERM; /* print $fh <<"EOF" */
3767 PL_pending_ident = '$';
3771 if (PL_expect == XOPERATOR)
3773 PL_tokenbuf[0] = '@';
3774 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3775 if (!PL_tokenbuf[1]) {
3778 if (PL_lex_state == LEX_NORMAL)
3780 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3782 PL_tokenbuf[0] = '%';
3784 /* Warn about @ where they meant $. */
3785 if (ckWARN(WARN_SYNTAX)) {
3786 if (*s == '[' || *s == '{') {
3787 const char *t = s + 1;
3788 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3790 if (*t == '}' || *t == ']') {
3792 PL_bufptr = skipspace(PL_bufptr);
3793 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3794 "Scalar value %.*s better written as $%.*s",
3795 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
3800 PL_pending_ident = '@';
3803 case '/': /* may be division, defined-or, or pattern */
3804 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3808 case '?': /* may either be conditional or pattern */
3809 if(PL_expect == XOPERATOR) {
3817 /* A // operator. */
3827 /* Disable warning on "study /blah/" */
3828 if (PL_oldoldbufptr == PL_last_uni
3829 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3830 || memNE(PL_last_uni, "study", 5)
3831 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3834 s = scan_pat(s,OP_MATCH);
3835 TERM(sublex_start());
3839 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3840 #ifdef PERL_STRICT_CR
3843 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3845 && (s == PL_linestart || s[-1] == '\n') )
3847 PL_lex_formbrack = 0;
3851 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3857 yylval.ival = OPf_SPECIAL;
3863 if (PL_expect != XOPERATOR)
3868 case '0': case '1': case '2': case '3': case '4':
3869 case '5': case '6': case '7': case '8': case '9':
3870 s = scan_num(s, &yylval);
3871 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3872 "### Saw number in '%s'\n", s);
3874 if (PL_expect == XOPERATOR)
3879 s = scan_str(s,FALSE,FALSE);
3880 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3881 "### Saw string before '%s'\n", s);
3883 if (PL_expect == XOPERATOR) {
3884 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3887 return REPORT(','); /* grandfather non-comma-format format */
3893 missingterm((char*)0);
3894 yylval.ival = OP_CONST;
3895 TERM(sublex_start());
3898 s = scan_str(s,FALSE,FALSE);
3899 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3900 "### Saw string before '%s'\n", s);
3902 if (PL_expect == XOPERATOR) {
3903 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3906 return REPORT(','); /* grandfather non-comma-format format */
3912 missingterm((char*)0);
3913 yylval.ival = OP_CONST;
3914 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
3915 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
3916 yylval.ival = OP_STRINGIFY;
3920 TERM(sublex_start());
3923 s = scan_str(s,FALSE,FALSE);
3924 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3925 "### Saw backtick string before '%s'\n", s);
3927 if (PL_expect == XOPERATOR)
3928 no_op("Backticks",s);
3930 missingterm((char*)0);
3931 yylval.ival = OP_BACKTICK;
3933 TERM(sublex_start());
3937 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
3938 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
3940 if (PL_expect == XOPERATOR)
3941 no_op("Backslash",s);
3945 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
3946 char *start = s + 2;
3947 while (isDIGIT(*start) || *start == '_')
3949 if (*start == '.' && isDIGIT(start[1])) {
3950 s = scan_num(s, &yylval);
3953 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
3954 else if (!isALPHA(*start) && (PL_expect == XTERM
3955 || PL_expect == XREF || PL_expect == XSTATE
3956 || PL_expect == XTERMORDORDOR)) {
3957 const char c = *start;
3960 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3963 s = scan_num(s, &yylval);
3970 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
4010 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4012 /* Some keywords can be followed by any delimiter, including ':' */
4013 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
4014 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4015 (PL_tokenbuf[0] == 'q' &&
4016 strchr("qwxr", PL_tokenbuf[1])))));
4018 /* x::* is just a word, unless x is "CORE" */
4019 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4023 while (d < PL_bufend && isSPACE(*d))
4024 d++; /* no comments skipped here, or s### is misparsed */
4026 /* Is this a label? */
4027 if (!tmp && PL_expect == XSTATE
4028 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4030 yylval.pval = savepv(PL_tokenbuf);
4035 /* Check for keywords */
4036 tmp = keyword(PL_tokenbuf, len);
4038 /* Is this a word before a => operator? */
4039 if (*d == '=' && d[1] == '>') {
4042 = (OP*)newSVOP(OP_CONST, 0,
4043 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4044 yylval.opval->op_private = OPpCONST_BARE;
4048 if (tmp < 0) { /* second-class keyword? */
4049 GV *ogv = Nullgv; /* override (winner) */
4050 GV *hgv = Nullgv; /* hidden (loser) */
4051 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4053 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
4056 if (GvIMPORTED_CV(gv))
4058 else if (! CvMETHOD(cv))
4062 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4063 (gv = *gvp) != (GV*)&PL_sv_undef &&
4064 GvCVu(gv) && GvIMPORTED_CV(gv))
4071 tmp = 0; /* overridden by import or by GLOBAL */
4074 && -tmp==KEY_lock /* XXX generalizable kludge */
4076 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
4078 tmp = 0; /* any sub overrides "weak" keyword */
4083 && PL_expect != XOPERATOR
4084 && PL_expect != XTERMORDORDOR)
4086 /* any sub overrides the "err" keyword, except when really an
4087 * operator is expected */
4090 else { /* no override */
4092 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4093 Perl_warner(aTHX_ packWARN(WARN_MISC),
4094 "dump() better written as CORE::dump()");
4098 if (ckWARN(WARN_AMBIGUOUS) && hgv
4099 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
4100 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4101 "Ambiguous call resolved as CORE::%s(), %s",
4102 GvENAME(hgv), "qualify as such or use &");
4109 default: /* not a keyword */
4113 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4115 /* Get the rest if it looks like a package qualifier */
4117 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4119 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4122 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4123 *s == '\'' ? "'" : "::");
4128 if (PL_expect == XOPERATOR) {
4129 if (PL_bufptr == PL_linestart) {
4130 CopLINE_dec(PL_curcop);
4131 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4132 CopLINE_inc(PL_curcop);
4135 no_op("Bareword",s);
4138 /* Look for a subroutine with this name in current package,
4139 unless name is "Foo::", in which case Foo is a bearword
4140 (and a package name). */
4143 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4145 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
4146 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4147 "Bareword \"%s\" refers to nonexistent package",
4150 PL_tokenbuf[len] = '\0';
4157 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
4160 /* if we saw a global override before, get the right name */
4163 sv = newSVpvn("CORE::GLOBAL::",14);
4164 sv_catpv(sv,PL_tokenbuf);
4167 /* If len is 0, newSVpv does strlen(), which is correct.
4168 If len is non-zero, then it will be the true length,
4169 and so the scalar will be created correctly. */
4170 sv = newSVpv(PL_tokenbuf,len);
4173 /* Presume this is going to be a bareword of some sort. */
4176 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4177 yylval.opval->op_private = OPpCONST_BARE;
4178 /* UTF-8 package name? */
4179 if (UTF && !IN_BYTES &&
4180 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
4183 /* And if "Foo::", then that's what it certainly is. */
4188 /* See if it's the indirect object for a list operator. */
4190 if (PL_oldoldbufptr &&
4191 PL_oldoldbufptr < PL_bufptr &&
4192 (PL_oldoldbufptr == PL_last_lop
4193 || PL_oldoldbufptr == PL_last_uni) &&
4194 /* NO SKIPSPACE BEFORE HERE! */
4195 (PL_expect == XREF ||
4196 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4198 bool immediate_paren = *s == '(';
4200 /* (Now we can afford to cross potential line boundary.) */
4203 /* Two barewords in a row may indicate method call. */
4205 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
4208 /* If not a declared subroutine, it's an indirect object. */
4209 /* (But it's an indir obj regardless for sort.) */
4211 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4212 ((!gv || !GvCVu(gv)) &&
4213 (PL_last_lop_op != OP_MAPSTART &&
4214 PL_last_lop_op != OP_GREPSTART))))
4216 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4221 PL_expect = XOPERATOR;
4224 /* Is this a word before a => operator? */
4225 if (*s == '=' && s[1] == '>' && !pkgname) {
4227 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4228 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4229 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4233 /* If followed by a paren, it's certainly a subroutine. */
4236 if (gv && GvCVu(gv)) {
4237 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4238 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
4243 PL_nextval[PL_nexttoke].opval = yylval.opval;
4244 PL_expect = XOPERATOR;
4250 /* If followed by var or block, call it a method (unless sub) */
4252 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
4253 PL_last_lop = PL_oldbufptr;
4254 PL_last_lop_op = OP_METHOD;
4258 /* If followed by a bareword, see if it looks like indir obj. */
4261 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4262 && (tmp = intuit_method(s,gv)))
4265 /* Not a method, so call it a subroutine (if defined) */
4267 if (gv && GvCVu(gv)) {
4269 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4270 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4271 "Ambiguous use of -%s resolved as -&%s()",
4272 PL_tokenbuf, PL_tokenbuf);
4273 /* Check for a constant sub */
4275 if ((sv = cv_const_sv(cv))) {
4277 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4278 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4279 yylval.opval->op_private = 0;
4283 /* Resolve to GV now. */
4284 op_free(yylval.opval);
4285 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4286 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4287 PL_last_lop = PL_oldbufptr;
4288 PL_last_lop_op = OP_ENTERSUB;
4289 /* Is there a prototype? */
4292 char *proto = SvPV((SV*)cv, len);
4295 if (*proto == '$' && proto[1] == '\0')
4297 while (*proto == ';')
4299 if (*proto == '&' && *s == '{') {
4300 sv_setpv(PL_subname, PL_curstash ?
4301 "__ANON__" : "__ANON__::__ANON__");
4305 PL_nextval[PL_nexttoke].opval = yylval.opval;
4311 /* Call it a bare word */
4313 if (PL_hints & HINT_STRICT_SUBS)
4314 yylval.opval->op_private |= OPpCONST_STRICT;
4317 if (ckWARN(WARN_RESERVED)) {
4318 if (lastchar != '-') {
4319 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4320 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4321 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4328 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4329 && ckWARN_d(WARN_AMBIGUOUS)) {
4330 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4331 "Operator or semicolon missing before %c%s",
4332 lastchar, PL_tokenbuf);
4333 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4334 "Ambiguous use of %c resolved as operator %c",
4335 lastchar, lastchar);
4341 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4342 newSVpv(CopFILE(PL_curcop),0));
4346 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4347 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4350 case KEY___PACKAGE__:
4351 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4353 ? newSVhek(HvNAME_HEK(PL_curstash))
4362 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4363 const char *pname = "main";
4364 if (PL_tokenbuf[2] == 'D')
4365 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
4366 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
4369 GvIOp(gv) = newIO();
4370 IoIFP(GvIOp(gv)) = PL_rsfp;
4371 #if defined(HAS_FCNTL) && defined(F_SETFD)
4373 const int fd = PerlIO_fileno(PL_rsfp);
4374 fcntl(fd,F_SETFD,fd >= 3);
4377 /* Mark this internal pseudo-handle as clean */
4378 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4380 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4381 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4382 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4384 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4385 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4386 /* if the script was opened in binmode, we need to revert
4387 * it to text mode for compatibility; but only iff it has CRs
4388 * XXX this is a questionable hack at best. */
4389 if (PL_bufend-PL_bufptr > 2
4390 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4393 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4394 loc = PerlIO_tell(PL_rsfp);
4395 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4398 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4400 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4401 #endif /* NETWARE */
4402 #ifdef PERLIO_IS_STDIO /* really? */
4403 # if defined(__BORLANDC__)
4404 /* XXX see note in do_binmode() */
4405 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4409 PerlIO_seek(PL_rsfp, loc, 0);
4413 #ifdef PERLIO_LAYERS
4416 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4417 else if (PL_encoding) {
4424 XPUSHs(PL_encoding);
4426 call_method("name", G_SCALAR);
4430 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4431 Perl_form(aTHX_ ":encoding(%"SVf")",
4449 if (PL_expect == XSTATE) {
4456 if (*s == ':' && s[1] == ':') {
4459 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4460 if (!(tmp = keyword(PL_tokenbuf, len)))
4461 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4475 LOP(OP_ACCEPT,XTERM);
4481 LOP(OP_ATAN2,XTERM);
4487 LOP(OP_BINMODE,XTERM);
4490 LOP(OP_BLESS,XTERM);
4499 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
4516 if (!PL_cryptseen) {
4517 PL_cryptseen = TRUE;
4521 LOP(OP_CRYPT,XTERM);
4524 LOP(OP_CHMOD,XTERM);
4527 LOP(OP_CHOWN,XTERM);
4530 LOP(OP_CONNECT,XTERM);
4546 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4550 PL_hints |= HINT_BLOCK_SCOPE;
4560 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4561 LOP(OP_DBMOPEN,XTERM);
4567 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4574 yylval.ival = CopLINE(PL_curcop);
4588 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4589 UNIBRACK(OP_ENTEREVAL);
4607 case KEY_endhostent:
4613 case KEY_endservent:
4616 case KEY_endprotoent:
4627 yylval.ival = CopLINE(PL_curcop);
4629 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4631 if ((PL_bufend - p) >= 3 &&
4632 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4634 else if ((PL_bufend - p) >= 4 &&
4635 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4638 if (isIDFIRST_lazy_if(p,UTF)) {
4639 p = scan_ident(p, PL_bufend,
4640 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4644 Perl_croak(aTHX_ "Missing $ on loop variable");
4649 LOP(OP_FORMLINE,XTERM);
4655 LOP(OP_FCNTL,XTERM);
4661 LOP(OP_FLOCK,XTERM);
4670 LOP(OP_GREPSTART, XREF);
4673 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4688 case KEY_getpriority:
4689 LOP(OP_GETPRIORITY,XTERM);
4691 case KEY_getprotobyname:
4694 case KEY_getprotobynumber:
4695 LOP(OP_GPBYNUMBER,XTERM);
4697 case KEY_getprotoent:
4709 case KEY_getpeername:
4710 UNI(OP_GETPEERNAME);
4712 case KEY_gethostbyname:
4715 case KEY_gethostbyaddr:
4716 LOP(OP_GHBYADDR,XTERM);
4718 case KEY_gethostent:
4721 case KEY_getnetbyname:
4724 case KEY_getnetbyaddr:
4725 LOP(OP_GNBYADDR,XTERM);
4730 case KEY_getservbyname:
4731 LOP(OP_GSBYNAME,XTERM);
4733 case KEY_getservbyport:
4734 LOP(OP_GSBYPORT,XTERM);
4736 case KEY_getservent:
4739 case KEY_getsockname:
4740 UNI(OP_GETSOCKNAME);
4742 case KEY_getsockopt:
4743 LOP(OP_GSOCKOPT,XTERM);
4765 yylval.ival = CopLINE(PL_curcop);
4769 LOP(OP_INDEX,XTERM);
4775 LOP(OP_IOCTL,XTERM);
4787 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4819 LOP(OP_LISTEN,XTERM);
4828 s = scan_pat(s,OP_MATCH);
4829 TERM(sublex_start());
4832 LOP(OP_MAPSTART, XREF);
4835 LOP(OP_MKDIR,XTERM);
4838 LOP(OP_MSGCTL,XTERM);
4841 LOP(OP_MSGGET,XTERM);
4844 LOP(OP_MSGRCV,XTERM);
4847 LOP(OP_MSGSND,XTERM);
4853 if (isIDFIRST_lazy_if(s,UTF)) {
4854 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
4855 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4857 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
4858 if (!PL_in_my_stash) {
4861 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
4869 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4876 if (PL_expect != XSTATE)
4877 yyerror("\"no\" not allowed in expression");
4878 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4879 s = force_version(s, FALSE);
4884 if (*s == '(' || (s = skipspace(s), *s == '('))
4891 if (isIDFIRST_lazy_if(s,UTF)) {
4893 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
4894 for (t=d; *t && isSPACE(*t); t++) ;
4895 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
4897 && !(t[0] == '=' && t[1] == '>')
4899 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
4900 "Precedence problem: open %.*s should be open(%.*s)",
4901 d - s, s, d - s, s);
4907 yylval.ival = OP_OR;
4917 LOP(OP_OPEN_DIR,XTERM);
4920 checkcomma(s,PL_tokenbuf,"filehandle");
4924 checkcomma(s,PL_tokenbuf,"filehandle");
4943 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4947 LOP(OP_PIPE_OP,XTERM);
4950 s = scan_str(s,FALSE,FALSE);
4952 missingterm((char*)0);
4953 yylval.ival = OP_CONST;
4954 TERM(sublex_start());
4960 s = scan_str(s,FALSE,FALSE);
4962 missingterm((char*)0);
4963 PL_expect = XOPERATOR;
4965 if (SvCUR(PL_lex_stuff)) {
4968 d = SvPV_force(PL_lex_stuff, len);
4971 for (; isSPACE(*d) && len; --len, ++d) ;
4974 if (!warned && ckWARN(WARN_QW)) {
4975 for (; !isSPACE(*d) && len; --len, ++d) {
4977 Perl_warner(aTHX_ packWARN(WARN_QW),
4978 "Possible attempt to separate words with commas");
4981 else if (*d == '#') {
4982 Perl_warner(aTHX_ packWARN(WARN_QW),
4983 "Possible attempt to put comments in qw() list");
4989 for (; !isSPACE(*d) && len; --len, ++d) ;
4991 sv = newSVpvn(b, d-b);
4992 if (DO_UTF8(PL_lex_stuff))
4994 words = append_elem(OP_LIST, words,
4995 newSVOP(OP_CONST, 0, tokeq(sv)));
4999 PL_nextval[PL_nexttoke].opval = words;
5004 SvREFCNT_dec(PL_lex_stuff);
5005 PL_lex_stuff = Nullsv;
5011 s = scan_str(s,FALSE,FALSE);
5013 missingterm((char*)0);
5014 yylval.ival = OP_STRINGIFY;
5015 if (SvIVX(PL_lex_stuff) == '\'')
5016 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
5017 TERM(sublex_start());
5020 s = scan_pat(s,OP_QR);
5021 TERM(sublex_start());
5024 s = scan_str(s,FALSE,FALSE);
5026 missingterm((char*)0);
5027 yylval.ival = OP_BACKTICK;
5029 TERM(sublex_start());
5037 s = force_version(s, FALSE);
5039 else if (*s != 'v' || !isDIGIT(s[1])
5040 || (s = force_version(s, TRUE), *s == 'v'))
5042 *PL_tokenbuf = '\0';
5043 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5044 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5045 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5047 yyerror("<> should be quotes");
5055 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5059 LOP(OP_RENAME,XTERM);
5068 LOP(OP_RINDEX,XTERM);
5078 UNIDOR(OP_READLINE);
5091 LOP(OP_REVERSE,XTERM);
5094 UNIDOR(OP_READLINK);
5102 TERM(sublex_start());
5104 TOKEN(1); /* force error */
5113 LOP(OP_SELECT,XTERM);
5119 LOP(OP_SEMCTL,XTERM);
5122 LOP(OP_SEMGET,XTERM);
5125 LOP(OP_SEMOP,XTERM);
5131 LOP(OP_SETPGRP,XTERM);
5133 case KEY_setpriority:
5134 LOP(OP_SETPRIORITY,XTERM);
5136 case KEY_sethostent:
5142 case KEY_setservent:
5145 case KEY_setprotoent:
5155 LOP(OP_SEEKDIR,XTERM);
5157 case KEY_setsockopt:
5158 LOP(OP_SSOCKOPT,XTERM);
5164 LOP(OP_SHMCTL,XTERM);
5167 LOP(OP_SHMGET,XTERM);
5170 LOP(OP_SHMREAD,XTERM);
5173 LOP(OP_SHMWRITE,XTERM);
5176 LOP(OP_SHUTDOWN,XTERM);
5185 LOP(OP_SOCKET,XTERM);
5187 case KEY_socketpair:
5188 LOP(OP_SOCKPAIR,XTERM);
5191 checkcomma(s,PL_tokenbuf,"subroutine name");
5193 if (*s == ';' || *s == ')') /* probably a close */
5194 Perl_croak(aTHX_ "sort is now a reserved word");
5196 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5200 LOP(OP_SPLIT,XTERM);
5203 LOP(OP_SPRINTF,XTERM);
5206 LOP(OP_SPLICE,XTERM);
5221 LOP(OP_SUBSTR,XTERM);
5227 char tmpbuf[sizeof PL_tokenbuf];
5228 SSize_t tboffset = 0;
5229 expectation attrful;
5230 bool have_name, have_proto, bad_proto;
5231 const int key = tmp;
5235 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5236 (*s == ':' && s[1] == ':'))
5239 attrful = XATTRBLOCK;
5240 /* remember buffer pos'n for later force_word */
5241 tboffset = s - PL_oldbufptr;
5242 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5243 if (strchr(tmpbuf, ':'))
5244 sv_setpv(PL_subname, tmpbuf);
5246 sv_setsv(PL_subname,PL_curstname);
5247 sv_catpvn(PL_subname,"::",2);
5248 sv_catpvn(PL_subname,tmpbuf,len);
5255 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5256 PL_expect = XTERMBLOCK;
5257 attrful = XATTRTERM;
5258 sv_setpvn(PL_subname,"?",1);
5262 if (key == KEY_format) {
5264 PL_lex_formbrack = PL_lex_brackets + 1;
5266 (void) force_word(PL_oldbufptr + tboffset, WORD,
5271 /* Look for a prototype */
5275 s = scan_str(s,FALSE,FALSE);
5277 Perl_croak(aTHX_ "Prototype not terminated");
5278 /* strip spaces and check for bad characters */
5279 d = SvPVX(PL_lex_stuff);
5282 for (p = d; *p; ++p) {
5285 if (!strchr("$@%*;[]&\\", *p))
5290 if (bad_proto && ckWARN(WARN_SYNTAX))
5291 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5292 "Illegal character in prototype for %"SVf" : %s",
5294 SvCUR_set(PL_lex_stuff, tmp);
5302 if (*s == ':' && s[1] != ':')
5303 PL_expect = attrful;
5304 else if (*s != '{' && key == KEY_sub) {
5306 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5308 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5312 PL_nextval[PL_nexttoke].opval =
5313 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5314 PL_lex_stuff = Nullsv;
5318 sv_setpv(PL_subname,
5319 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5322 (void) force_word(PL_oldbufptr + tboffset, WORD,
5331 LOP(OP_SYSTEM,XREF);
5334 LOP(OP_SYMLINK,XTERM);
5337 LOP(OP_SYSCALL,XTERM);
5340 LOP(OP_SYSOPEN,XTERM);
5343 LOP(OP_SYSSEEK,XTERM);
5346 LOP(OP_SYSREAD,XTERM);
5349 LOP(OP_SYSWRITE,XTERM);
5353 TERM(sublex_start());
5374 LOP(OP_TRUNCATE,XTERM);
5386 yylval.ival = CopLINE(PL_curcop);
5390 yylval.ival = CopLINE(PL_curcop);
5394 LOP(OP_UNLINK,XTERM);
5400 LOP(OP_UNPACK,XTERM);
5403 LOP(OP_UTIME,XTERM);
5409 LOP(OP_UNSHIFT,XTERM);
5412 if (PL_expect != XSTATE)
5413 yyerror("\"use\" not allowed in expression");
5415 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
5416 s = force_version(s, TRUE);
5417 if (*s == ';' || (s = skipspace(s), *s == ';')) {
5418 PL_nextval[PL_nexttoke].opval = Nullop;
5421 else if (*s == 'v') {
5422 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5423 s = force_version(s, FALSE);
5427 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5428 s = force_version(s, FALSE);
5440 yylval.ival = CopLINE(PL_curcop);
5444 PL_hints |= HINT_BLOCK_SCOPE;
5451 LOP(OP_WAITPID,XTERM);
5460 ctl_l[0] = toCTRL('L');
5462 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5465 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5470 if (PL_expect == XOPERATOR)
5476 yylval.ival = OP_XOR;
5481 TERM(sublex_start());
5486 #pragma segment Main
5490 S_pending_ident(pTHX)
5493 register I32 tmp = 0;
5494 /* pit holds the identifier we read and pending_ident is reset */
5495 char pit = PL_pending_ident;
5496 PL_pending_ident = 0;
5498 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5499 "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
5501 /* if we're in a my(), we can't allow dynamics here.
5502 $foo'bar has already been turned into $foo::bar, so
5503 just check for colons.
5505 if it's a legal name, the OP is a PADANY.
5508 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5509 if (strchr(PL_tokenbuf,':'))
5510 yyerror(Perl_form(aTHX_ "No package name allowed for "
5511 "variable %s in \"our\"",
5513 tmp = allocmy(PL_tokenbuf);
5516 if (strchr(PL_tokenbuf,':'))
5517 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5519 yylval.opval = newOP(OP_PADANY, 0);
5520 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5526 build the ops for accesses to a my() variable.
5528 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5529 then used in a comparison. This catches most, but not
5530 all cases. For instance, it catches
5531 sort { my($a); $a <=> $b }
5533 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5534 (although why you'd do that is anyone's guess).
5537 if (!strchr(PL_tokenbuf,':')) {
5539 tmp = pad_findmy(PL_tokenbuf);
5540 if (tmp != NOT_IN_PAD) {
5541 /* might be an "our" variable" */
5542 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5543 /* build ops for a bareword */
5544 HV *stash = PAD_COMPNAME_OURSTASH(tmp);
5545 HEK *stashname = HvNAME_HEK(stash);
5546 SV *sym = newSVhek(stashname);
5547 sv_catpvn(sym, "::", 2);
5548 sv_catpv(sym, PL_tokenbuf+1);
5549 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5550 yylval.opval->op_private = OPpCONST_ENTERED;
5553 ? (GV_ADDMULTI | GV_ADDINEVAL)
5556 ((PL_tokenbuf[0] == '$') ? SVt_PV
5557 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5562 /* if it's a sort block and they're naming $a or $b */
5563 if (PL_last_lop_op == OP_SORT &&
5564 PL_tokenbuf[0] == '$' &&
5565 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5568 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5569 d < PL_bufend && *d != '\n';
5572 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5573 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5579 yylval.opval = newOP(OP_PADANY, 0);
5580 yylval.opval->op_targ = tmp;
5586 Whine if they've said @foo in a doublequoted string,
5587 and @foo isn't a variable we can find in the symbol
5590 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5591 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
5592 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5593 && ckWARN(WARN_AMBIGUOUS))
5595 /* Downgraded from fatal to warning 20000522 mjd */
5596 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5597 "Possible unintended interpolation of %s in string",
5602 /* build ops for a bareword */
5603 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5604 yylval.opval->op_private = OPpCONST_ENTERED;
5605 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
5606 ((PL_tokenbuf[0] == '$') ? SVt_PV
5607 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5613 * The following code was generated by perl_keyword.pl.
5617 Perl_keyword (pTHX_ const char *name, I32 len)
5621 case 1: /* 5 tokens of length 1 */
5653 case 2: /* 18 tokens of length 2 */
5799 case 3: /* 28 tokens of length 3 */
5803 if (name[1] == 'N' &&
5866 if (name[1] == 'i' &&
5906 if (name[1] == 'o' &&
5915 if (name[1] == 'e' &&
5924 if (name[1] == 'n' &&
5933 if (name[1] == 'o' &&
5942 if (name[1] == 'a' &&
5951 if (name[1] == 'o' &&
6013 if (name[1] == 'e' &&
6045 if (name[1] == 'i' &&
6054 if (name[1] == 's' &&
6063 if (name[1] == 'e' &&
6072 if (name[1] == 'o' &&
6084 case 4: /* 40 tokens of length 4 */
6088 if (name[1] == 'O' &&
6098 if (name[1] == 'N' &&
6108 if (name[1] == 'i' &&
6118 if (name[1] == 'h' &&
6128 if (name[1] == 'u' &&
6141 if (name[2] == 'c' &&
6150 if (name[2] == 's' &&
6159 if (name[2] == 'a' &&
6195 if (name[1] == 'o' &&
6208 if (name[2] == 't' &&
6217 if (name[2] == 'o' &&
6226 if (name[2] == 't' &&
6235 if (name[2] == 'e' &&
6248 if (name[1] == 'o' &&
6261 if (name[2] == 'y' &&
6270 if (name[2] == 'l' &&
6286 if (name[2] == 's' &&
6295 if (name[2] == 'n' &&
6304 if (name[2] == 'c' &&
6317 if (name[1] == 'e' &&
6327 if (name[1] == 'p' &&
6340 if (name[2] == 'c' &&
6349 if (name[2] == 'p' &&
6358 if (name[2] == 's' &&
6374 if (name[2] == 'n' &&
6444 if (name[2] == 'r' &&
6453 if (name[2] == 'r' &&
6462 if (name[2] == 'a' &&
6478 if (name[2] == 'l' &&
6545 case 5: /* 36 tokens of length 5 */
6549 if (name[1] == 'E' &&
6560 if (name[1] == 'H' &&
6574 if (name[2] == 'a' &&
6584 if (name[2] == 'a' &&
6598 if (name[1] == 'l' &&
6615 if (name[3] == 'i' &&
6624 if (name[3] == 'o' &&
6660 if (name[2] == 'o' &&
6670 if (name[2] == 'y' &&
6684 if (name[1] == 'l' &&
6698 if (name[2] == 'n' &&
6708 if (name[2] == 'o' &&
6725 if (name[2] == 'd' &&
6735 if (name[2] == 'c' &&
6752 if (name[2] == 'c' &&
6762 if (name[2] == 't' &&
6776 if (name[1] == 'k' &&
6787 if (name[1] == 'r' &&
6801 if (name[2] == 's' &&
6811 if (name[2] == 'd' &&
6828 if (name[2] == 'm' &&
6838 if (name[2] == 'i' &&
6848 if (name[2] == 'e' &&
6858 if (name[2] == 'l' &&
6868 if (name[2] == 'a' &&
6878 if (name[2] == 'u' &&
6892 if (name[1] == 'i' &&
6906 if (name[2] == 'a' &&
6919 if (name[3] == 'e' &&
6954 if (name[2] == 'i' &&
6971 if (name[2] == 'i' &&
6981 if (name[2] == 'i' &&
6998 case 6: /* 33 tokens of length 6 */
7002 if (name[1] == 'c' &&
7017 if (name[2] == 'l' &&
7028 if (name[2] == 'r' &&
7043 if (name[1] == 'e' &&
7058 if (name[2] == 's' &&
7063 if(ckWARN_d(WARN_SYNTAX))
7064 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
7070 if (name[2] == 'i' &&
7088 if (name[2] == 'l' &&
7099 if (name[2] == 'r' &&
7114 if (name[1] == 'm' &&
7129 if (name[2] == 'n' &&
7140 if (name[2] == 's' &&
7155 if (name[1] == 's' &&
7161 if (name[4] == 't' &&
7170 if (name[4] == 'e' &&
7179 if (name[4] == 'c' &&
7188 if (name[4] == 'n' &&
7204 if (name[1] == 'r' &&
7222 if (name[3] == 'a' &&
7232 if (name[3] == 'u' &&
7246 if (name[2] == 'n' &&
7264 if (name[2] == 'a' &&
7278 if (name[3] == 'e' &&
7291 if (name[4] == 't' &&
7300 if (name[4] == 'e' &&
7322 if (name[4] == 't' &&
7331 if (name[4] == 'e' &&
7347 if (name[2] == 'c' &&
7358 if (name[2] == 'l' &&
7369 if (name[2] == 'b' &&
7380 if (name[2] == 's' &&
7403 if (name[4] == 's' &&
7412 if (name[4] == 'n' &&
7425 if (name[3] == 'a' &&
7442 if (name[1] == 'a' &&
7457 case 7: /* 28 tokens of length 7 */
7461 if (name[1] == 'E' &&
7474 if (name[1] == '_' &&
7487 if (name[1] == 'i' &&
7494 return -KEY_binmode;
7500 if (name[1] == 'o' &&
7507 return -KEY_connect;
7516 if (name[2] == 'm' &&
7522 return -KEY_dbmopen;
7528 if (name[2] == 'f' &&
7544 if (name[1] == 'o' &&
7557 if (name[1] == 'e' &&
7564 if (name[5] == 'r' &&
7567 return -KEY_getpgrp;
7573 if (name[5] == 'i' &&
7576 return -KEY_getppid;
7589 if (name[1] == 'c' &&
7596 return -KEY_lcfirst;
7602 if (name[1] == 'p' &&
7609 return -KEY_opendir;
7615 if (name[1] == 'a' &&
7633 if (name[3] == 'd' &&
7638 return -KEY_readdir;
7644 if (name[3] == 'u' &&
7655 if (name[3] == 'e' &&
7660 return -KEY_reverse;
7679 if (name[3] == 'k' &&
7684 return -KEY_seekdir;
7690 if (name[3] == 'p' &&
7695 return -KEY_setpgrp;
7705 if (name[2] == 'm' &&
7711 return -KEY_shmread;
7717 if (name[2] == 'r' &&
7723 return -KEY_sprintf;
7732 if (name[3] == 'l' &&
7737 return -KEY_symlink;
7746 if (name[4] == 'a' &&
7750 return -KEY_syscall;
7756 if (name[4] == 'p' &&
7760 return -KEY_sysopen;
7766 if (name[4] == 'e' &&
7770 return -KEY_sysread;
7776 if (name[4] == 'e' &&
7780 return -KEY_sysseek;
7798 if (name[1] == 'e' &&
7805 return -KEY_telldir;
7814 if (name[2] == 'f' &&
7820 return -KEY_ucfirst;
7826 if (name[2] == 's' &&
7832 return -KEY_unshift;
7842 if (name[1] == 'a' &&
7849 return -KEY_waitpid;
7858 case 8: /* 26 tokens of length 8 */
7862 if (name[1] == 'U' &&
7870 return KEY_AUTOLOAD;
7881 if (name[3] == 'A' &&
7887 return KEY___DATA__;
7893 if (name[3] == 'I' &&
7899 return -KEY___FILE__;
7905 if (name[3] == 'I' &&
7911 return -KEY___LINE__;
7927 if (name[2] == 'o' &&
7934 return -KEY_closedir;
7940 if (name[2] == 'n' &&
7947 return -KEY_continue;
7957 if (name[1] == 'b' &&
7965 return -KEY_dbmclose;
7971 if (name[1] == 'n' &&
7977 if (name[4] == 'r' &&
7982 return -KEY_endgrent;
7988 if (name[4] == 'w' &&
7993 return -KEY_endpwent;
8006 if (name[1] == 'o' &&
8014 return -KEY_formline;
8020 if (name[1] == 'e' &&
8031 if (name[6] == 'n' &&
8034 return -KEY_getgrent;
8040 if (name[6] == 'i' &&
8043 return -KEY_getgrgid;
8049 if (name[6] == 'a' &&
8052 return -KEY_getgrnam;
8065 if (name[4] == 'o' &&
8070 return -KEY_getlogin;
8081 if (name[6] == 'n' &&
8084 return -KEY_getpwent;
8090 if (name[6] == 'a' &&
8093 return -KEY_getpwnam;
8099 if (name[6] == 'i' &&
8102 return -KEY_getpwuid;
8122 if (name[1] == 'e' &&
8129 if (name[5] == 'i' &&
8136 return -KEY_readline;
8141 return -KEY_readlink;
8152 if (name[5] == 'i' &&
8156 return -KEY_readpipe;
8177 if (name[4] == 'r' &&
8182 return -KEY_setgrent;
8188 if (name[4] == 'w' &&
8193 return -KEY_setpwent;
8209 if (name[3] == 'w' &&
8215 return -KEY_shmwrite;
8221 if (name[3] == 't' &&
8227 return -KEY_shutdown;
8237 if (name[2] == 's' &&
8244 return -KEY_syswrite;
8254 if (name[1] == 'r' &&
8262 return -KEY_truncate;
8271 case 9: /* 8 tokens of length 9 */
8275 if (name[1] == 'n' &&
8284 return -KEY_endnetent;
8290 if (name[1] == 'e' &&
8299 return -KEY_getnetent;
8305 if (name[1] == 'o' &&
8314 return -KEY_localtime;
8320 if (name[1] == 'r' &&
8329 return KEY_prototype;
8335 if (name[1] == 'u' &&
8344 return -KEY_quotemeta;
8350 if (name[1] == 'e' &&
8359 return -KEY_rewinddir;
8365 if (name[1] == 'e' &&
8374 return -KEY_setnetent;
8380 if (name[1] == 'a' &&
8389 return -KEY_wantarray;
8398 case 10: /* 9 tokens of length 10 */
8402 if (name[1] == 'n' &&
8408 if (name[4] == 'o' &&
8415 return -KEY_endhostent;
8421 if (name[4] == 'e' &&
8428 return -KEY_endservent;
8441 if (name[1] == 'e' &&
8447 if (name[4] == 'o' &&
8454 return -KEY_gethostent;
8463 if (name[5] == 'r' &&
8469 return -KEY_getservent;
8475 if (name[5] == 'c' &&
8481 return -KEY_getsockopt;
8506 if (name[4] == 'o' &&
8513 return -KEY_sethostent;
8522 if (name[5] == 'r' &&
8528 return -KEY_setservent;
8534 if (name[5] == 'c' &&
8540 return -KEY_setsockopt;
8557 if (name[2] == 'c' &&
8566 return -KEY_socketpair;
8579 case 11: /* 8 tokens of length 11 */
8583 if (name[1] == '_' &&
8594 return -KEY___PACKAGE__;
8600 if (name[1] == 'n' &&
8611 return -KEY_endprotoent;
8617 if (name[1] == 'e' &&
8626 if (name[5] == 'e' &&
8633 return -KEY_getpeername;
8642 if (name[6] == 'o' &&
8648 return -KEY_getpriority;
8654 if (name[6] == 't' &&
8660 return -KEY_getprotoent;
8674 if (name[4] == 'o' &&
8682 return -KEY_getsockname;
8695 if (name[1] == 'e' &&
8703 if (name[6] == 'o' &&
8709 return -KEY_setpriority;
8715 if (name[6] == 't' &&
8721 return -KEY_setprotoent;
8737 case 12: /* 2 tokens of length 12 */
8738 if (name[0] == 'g' &&
8750 if (name[9] == 'd' &&
8753 { /* getnetbyaddr */
8754 return -KEY_getnetbyaddr;
8760 if (name[9] == 'a' &&
8763 { /* getnetbyname */
8764 return -KEY_getnetbyname;
8776 case 13: /* 4 tokens of length 13 */
8777 if (name[0] == 'g' &&
8784 if (name[4] == 'o' &&
8793 if (name[10] == 'd' &&
8796 { /* gethostbyaddr */
8797 return -KEY_gethostbyaddr;
8803 if (name[10] == 'a' &&
8806 { /* gethostbyname */
8807 return -KEY_gethostbyname;
8820 if (name[4] == 'e' &&
8829 if (name[10] == 'a' &&
8832 { /* getservbyname */
8833 return -KEY_getservbyname;
8839 if (name[10] == 'o' &&
8842 { /* getservbyport */
8843 return -KEY_getservbyport;
8862 case 14: /* 1 tokens of length 14 */
8863 if (name[0] == 'g' &&
8877 { /* getprotobyname */
8878 return -KEY_getprotobyname;
8883 case 16: /* 1 tokens of length 16 */
8884 if (name[0] == 'g' &&
8900 { /* getprotobynumber */
8901 return -KEY_getprotobynumber;
8915 S_checkcomma(pTHX_ register char *s, const char *name, const char *what)
8919 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8920 if (ckWARN(WARN_SYNTAX)) {
8922 for (w = s+2; *w && level; w++) {
8929 for (; *w && isSPACE(*w); w++) ;
8930 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
8931 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8932 "%s (...) interpreted as function",name);
8935 while (s < PL_bufend && isSPACE(*s))
8939 while (s < PL_bufend && isSPACE(*s))
8941 if (isIDFIRST_lazy_if(s,UTF)) {
8943 while (isALNUM_lazy_if(s,UTF))
8945 while (s < PL_bufend && isSPACE(*s))
8949 *s = '\0'; /* XXX If we didn't do this, we could const a lot of toke.c */
8950 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
8954 Perl_croak(aTHX_ "No comma allowed after %s", what);
8959 /* Either returns sv, or mortalizes sv and returns a new SV*.
8960 Best used as sv=new_constant(..., sv, ...).
8961 If s, pv are NULL, calls subroutine with one argument,
8962 and type is used with error messages only. */
8965 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
8969 HV *table = GvHV(PL_hintgv); /* ^H */
8973 const char *why1, *why2, *why3;
8975 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8978 why2 = strEQ(key,"charnames")
8979 ? "(possibly a missing \"use charnames ...\")"
8981 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8982 (type ? type: "undef"), why2);
8984 /* This is convoluted and evil ("goto considered harmful")
8985 * but I do not understand the intricacies of all the different
8986 * failure modes of %^H in here. The goal here is to make
8987 * the most probable error message user-friendly. --jhi */
8992 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8993 (type ? type: "undef"), why1, why2, why3);
8995 yyerror(SvPVX_const(msg));
8999 cvp = hv_fetch(table, key, strlen(key), FALSE);
9000 if (!cvp || !SvOK(*cvp)) {
9003 why3 = "} is not defined";
9006 sv_2mortal(sv); /* Parent created it permanently */
9009 pv = sv_2mortal(newSVpvn(s, len));
9011 typesv = sv_2mortal(newSVpv(type, 0));
9013 typesv = &PL_sv_undef;
9015 PUSHSTACKi(PERLSI_OVERLOAD);
9027 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9031 /* Check the eval first */
9032 if (!PL_in_eval && SvTRUE(ERRSV)) {
9034 sv_catpv(ERRSV, "Propagated");
9035 yyerror(SvPV(ERRSV, n_a)); /* Duplicates the message inside eval */
9037 res = SvREFCNT_inc(sv);
9041 (void)SvREFCNT_inc(res);
9050 why1 = "Call to &{$^H{";
9052 why3 = "}} did not return a defined value";
9060 /* Returns a NUL terminated string, with the length of the string written to
9064 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9066 register char *d = dest;
9067 register char *e = d + destlen - 3; /* two-character token, ending NUL */
9070 Perl_croak(aTHX_ ident_too_long);
9071 if (isALNUM(*s)) /* UTF handled below */
9073 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
9078 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
9082 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9083 char *t = s + UTF8SKIP(s);
9084 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9086 if (d + (t - s) > e)
9087 Perl_croak(aTHX_ ident_too_long);
9088 Copy(s, d, t - s, char);
9101 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9111 e = d + destlen - 3; /* two-character token, ending NUL */
9113 while (isDIGIT(*s)) {
9115 Perl_croak(aTHX_ ident_too_long);
9122 Perl_croak(aTHX_ ident_too_long);
9123 if (isALNUM(*s)) /* UTF handled below */
9125 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9130 else if (*s == ':' && s[1] == ':') {
9134 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9135 char *t = s + UTF8SKIP(s);
9136 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9138 if (d + (t - s) > e)
9139 Perl_croak(aTHX_ ident_too_long);
9140 Copy(s, d, t - s, char);
9151 if (PL_lex_state != LEX_NORMAL)
9152 PL_lex_state = LEX_INTERPENDMAYBE;
9155 if (*s == '$' && s[1] &&
9156 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9169 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9174 if (isSPACE(s[-1])) {
9176 const char ch = *s++;
9177 if (!SPACE_OR_TAB(ch)) {
9183 if (isIDFIRST_lazy_if(d,UTF)) {
9187 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
9189 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
9192 Copy(s, d, e - s, char);
9197 while ((isALNUM(*s) || *s == ':') && d < e)
9200 Perl_croak(aTHX_ ident_too_long);
9203 while (s < send && SPACE_OR_TAB(*s)) s++;
9204 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9205 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
9206 const char *brack = *s == '[' ? "[...]" : "{...}";
9207 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9208 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9209 funny, dest, brack, funny, dest, brack);
9212 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9216 /* Handle extended ${^Foo} variables
9217 * 1999-02-27 mjd-perl-patch@plover.com */
9218 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9222 while (isALNUM(*s) && d < e) {
9226 Perl_croak(aTHX_ ident_too_long);
9231 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9232 PL_lex_state = LEX_INTERPEND;
9237 if (PL_lex_state == LEX_NORMAL) {
9238 if (ckWARN(WARN_AMBIGUOUS) &&
9239 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
9241 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9242 "Ambiguous use of %c{%s} resolved to %c%s",
9243 funny, dest, funny, dest);
9248 s = bracket; /* let the parser handle it */
9252 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9253 PL_lex_state = LEX_INTERPEND;
9258 Perl_pmflag(pTHX_ U32* pmfl, int ch)
9263 *pmfl |= PMf_GLOBAL;
9265 *pmfl |= PMf_CONTINUE;
9269 *pmfl |= PMf_MULTILINE;
9271 *pmfl |= PMf_SINGLELINE;
9273 *pmfl |= PMf_EXTENDED;
9277 S_scan_pat(pTHX_ char *start, I32 type)
9280 char *s = scan_str(start,FALSE,FALSE);
9283 Perl_croak(aTHX_ "Search pattern not terminated");
9285 pm = (PMOP*)newPMOP(type, 0);
9286 if (PL_multi_open == '?')
9287 pm->op_pmflags |= PMf_ONCE;
9289 while (*s && strchr("iomsx", *s))
9290 pmflag(&pm->op_pmflags,*s++);
9293 while (*s && strchr("iogcmsx", *s))
9294 pmflag(&pm->op_pmflags,*s++);
9296 /* issue a warning if /c is specified,but /g is not */
9297 if (ckWARN(WARN_REGEXP) &&
9298 (pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9300 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_without_g);
9303 pm->op_pmpermflags = pm->op_pmflags;
9305 PL_lex_op = (OP*)pm;
9306 yylval.ival = OP_MATCH;
9311 S_scan_subst(pTHX_ char *start)
9319 yylval.ival = OP_NULL;
9321 s = scan_str(start,FALSE,FALSE);
9324 Perl_croak(aTHX_ "Substitution pattern not terminated");
9326 if (s[-1] == PL_multi_open)
9329 first_start = PL_multi_start;
9330 s = scan_str(s,FALSE,FALSE);
9333 SvREFCNT_dec(PL_lex_stuff);
9334 PL_lex_stuff = Nullsv;
9336 Perl_croak(aTHX_ "Substitution replacement not terminated");
9338 PL_multi_start = first_start; /* so whole substitution is taken together */
9340 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9346 else if (strchr("iogcmsx", *s))
9347 pmflag(&pm->op_pmflags,*s++);
9352 /* /c is not meaningful with s/// */
9353 if (ckWARN(WARN_REGEXP) && (pm->op_pmflags & PMf_CONTINUE))
9355 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_in_subst);
9360 PL_sublex_info.super_bufptr = s;
9361 PL_sublex_info.super_bufend = PL_bufend;
9363 pm->op_pmflags |= PMf_EVAL;
9364 repl = newSVpvn("",0);
9366 sv_catpv(repl, es ? "eval " : "do ");
9367 sv_catpvn(repl, "{ ", 2);
9368 sv_catsv(repl, PL_lex_repl);
9369 sv_catpvn(repl, " };", 2);
9371 SvREFCNT_dec(PL_lex_repl);
9375 pm->op_pmpermflags = pm->op_pmflags;
9376 PL_lex_op = (OP*)pm;
9377 yylval.ival = OP_SUBST;
9382 S_scan_trans(pTHX_ char *start)
9391 yylval.ival = OP_NULL;
9393 s = scan_str(start,FALSE,FALSE);
9395 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9396 if (s[-1] == PL_multi_open)
9399 s = scan_str(s,FALSE,FALSE);
9402 SvREFCNT_dec(PL_lex_stuff);
9403 PL_lex_stuff = Nullsv;
9405 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9408 complement = del = squash = 0;
9412 complement = OPpTRANS_COMPLEMENT;
9415 del = OPpTRANS_DELETE;
9418 squash = OPpTRANS_SQUASH;
9427 New(803, tbl, complement&&!del?258:256, short);
9428 o = newPVOP(OP_TRANS, 0, (char*)tbl);
9429 o->op_private &= ~OPpTRANS_ALL;
9430 o->op_private |= del|squash|complement|
9431 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9432 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9435 yylval.ival = OP_TRANS;
9440 S_scan_heredoc(pTHX_ register char *s)
9443 I32 op_type = OP_SCALAR;
9447 const char newline[] = "\n";
9448 const char *found_newline;
9452 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9456 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9459 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
9460 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9463 s = delimcpy(d, e, s, PL_bufend, term, &len);
9473 if (!isALNUM_lazy_if(s,UTF))
9474 deprecate_old("bare << to mean <<\"\"");
9475 for (; isALNUM_lazy_if(s,UTF); s++) {
9480 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9481 Perl_croak(aTHX_ "Delimiter for here document is too long");
9484 len = d - PL_tokenbuf;
9485 #ifndef PERL_STRICT_CR
9486 d = strchr(s, '\r');
9488 char * const olds = s;
9490 while (s < PL_bufend) {
9496 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9505 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9509 if ( outer || !(found_newline = ninstr(s,PL_bufend,newline,newline+1)) ) {
9510 herewas = newSVpvn(s,PL_bufend-s);
9514 herewas = newSVpvn(s,found_newline-s);
9516 s += SvCUR(herewas);
9518 tmpstr = NEWSV(87,79);
9519 sv_upgrade(tmpstr, SVt_PVIV);
9522 SvIV_set(tmpstr, -1);
9524 else if (term == '`') {
9525 op_type = OP_BACKTICK;
9526 SvIV_set(tmpstr, '\\');
9530 PL_multi_start = CopLINE(PL_curcop);
9531 PL_multi_open = PL_multi_close = '<';
9532 term = *PL_tokenbuf;
9533 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9534 char *bufptr = PL_sublex_info.super_bufptr;
9535 char *bufend = PL_sublex_info.super_bufend;
9536 char * const olds = s - SvCUR(herewas);
9537 s = strchr(bufptr, '\n');
9541 while (s < bufend &&
9542 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9544 CopLINE_inc(PL_curcop);
9547 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9548 missingterm(PL_tokenbuf);
9550 sv_setpvn(herewas,bufptr,d-bufptr+1);
9551 sv_setpvn(tmpstr,d+1,s-d);
9553 sv_catpvn(herewas,s,bufend-s);
9554 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9561 while (s < PL_bufend &&
9562 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9564 CopLINE_inc(PL_curcop);
9566 if (s >= PL_bufend) {
9567 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9568 missingterm(PL_tokenbuf);
9570 sv_setpvn(tmpstr,d+1,s-d);
9572 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9574 sv_catpvn(herewas,s,PL_bufend-s);
9575 sv_setsv(PL_linestr,herewas);
9576 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9577 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9578 PL_last_lop = PL_last_uni = Nullch;
9581 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
9582 while (s >= PL_bufend) { /* multiple line string? */
9584 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
9585 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9586 missingterm(PL_tokenbuf);
9588 CopLINE_inc(PL_curcop);
9589 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9590 PL_last_lop = PL_last_uni = Nullch;
9591 #ifndef PERL_STRICT_CR
9592 if (PL_bufend - PL_linestart >= 2) {
9593 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9594 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9596 PL_bufend[-2] = '\n';
9598 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9600 else if (PL_bufend[-1] == '\r')
9601 PL_bufend[-1] = '\n';
9603 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9604 PL_bufend[-1] = '\n';
9606 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9607 SV *sv = NEWSV(88,0);
9609 sv_upgrade(sv, SVt_PVMG);
9610 sv_setsv(sv,PL_linestr);
9613 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
9615 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9616 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9617 *(SvPVX(PL_linestr) + off ) = ' ';
9618 sv_catsv(PL_linestr,herewas);
9619 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9620 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9624 sv_catsv(tmpstr,PL_linestr);
9629 PL_multi_end = CopLINE(PL_curcop);
9630 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9631 SvPV_shrink_to_cur(tmpstr);
9633 SvREFCNT_dec(herewas);
9635 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9637 else if (PL_encoding)
9638 sv_recode_to_utf8(tmpstr, PL_encoding);
9640 PL_lex_stuff = tmpstr;
9641 yylval.ival = op_type;
9646 takes: current position in input buffer
9647 returns: new position in input buffer
9648 side-effects: yylval and lex_op are set.
9653 <FH> read from filehandle
9654 <pkg::FH> read from package qualified filehandle
9655 <pkg'FH> read from package qualified filehandle
9656 <$fh> read from filehandle in $fh
9662 S_scan_inputsymbol(pTHX_ char *start)
9664 register char *s = start; /* current position in buffer */
9670 d = PL_tokenbuf; /* start of temp holding space */
9671 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9672 end = strchr(s, '\n');
9675 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9677 /* die if we didn't have space for the contents of the <>,
9678 or if it didn't end, or if we see a newline
9681 if (len >= sizeof PL_tokenbuf)
9682 Perl_croak(aTHX_ "Excessively long <> operator");
9684 Perl_croak(aTHX_ "Unterminated <> operator");
9689 Remember, only scalar variables are interpreted as filehandles by
9690 this code. Anything more complex (e.g., <$fh{$num}>) will be
9691 treated as a glob() call.
9692 This code makes use of the fact that except for the $ at the front,
9693 a scalar variable and a filehandle look the same.
9695 if (*d == '$' && d[1]) d++;
9697 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9698 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9701 /* If we've tried to read what we allow filehandles to look like, and
9702 there's still text left, then it must be a glob() and not a getline.
9703 Use scan_str to pull out the stuff between the <> and treat it
9704 as nothing more than a string.
9707 if (d - PL_tokenbuf != len) {
9708 yylval.ival = OP_GLOB;
9710 s = scan_str(start,FALSE,FALSE);
9712 Perl_croak(aTHX_ "Glob not terminated");
9716 bool readline_overriden = FALSE;
9717 GV *gv_readline = Nullgv;
9719 /* we're in a filehandle read situation */
9722 /* turn <> into <ARGV> */
9724 Copy("ARGV",d,5,char);
9726 /* Check whether readline() is overriden */
9727 if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
9728 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9730 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
9731 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
9732 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9733 readline_overriden = TRUE;
9735 /* if <$fh>, create the ops to turn the variable into a
9741 /* try to find it in the pad for this block, otherwise find
9742 add symbol table ops
9744 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
9745 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
9746 HV *stash = PAD_COMPNAME_OURSTASH(tmp);
9747 HEK *stashname = HvNAME_HEK(stash);
9748 SV *sym = sv_2mortal(newSVhek(stashname));
9749 sv_catpvn(sym, "::", 2);
9755 OP *o = newOP(OP_PADSV, 0);
9757 PL_lex_op = readline_overriden
9758 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9759 append_elem(OP_LIST, o,
9760 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9761 : (OP*)newUNOP(OP_READLINE, 0, o);
9770 ? (GV_ADDMULTI | GV_ADDINEVAL)
9773 PL_lex_op = readline_overriden
9774 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9775 append_elem(OP_LIST,
9776 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9777 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9778 : (OP*)newUNOP(OP_READLINE, 0,
9779 newUNOP(OP_RV2SV, 0,
9780 newGVOP(OP_GV, 0, gv)));
9782 if (!readline_overriden)
9783 PL_lex_op->op_flags |= OPf_SPECIAL;
9784 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
9785 yylval.ival = OP_NULL;
9788 /* If it's none of the above, it must be a literal filehandle
9789 (<Foo::BAR> or <FOO>) so build a simple readline OP */
9791 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
9792 PL_lex_op = readline_overriden
9793 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9794 append_elem(OP_LIST,
9795 newGVOP(OP_GV, 0, gv),
9796 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9797 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9798 yylval.ival = OP_NULL;
9807 takes: start position in buffer
9808 keep_quoted preserve \ on the embedded delimiter(s)
9809 keep_delims preserve the delimiters around the string
9810 returns: position to continue reading from buffer
9811 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9812 updates the read buffer.
9814 This subroutine pulls a string out of the input. It is called for:
9815 q single quotes q(literal text)
9816 ' single quotes 'literal text'
9817 qq double quotes qq(interpolate $here please)
9818 " double quotes "interpolate $here please"
9819 qx backticks qx(/bin/ls -l)
9820 ` backticks `/bin/ls -l`
9821 qw quote words @EXPORT_OK = qw( func() $spam )
9822 m// regexp match m/this/
9823 s/// regexp substitute s/this/that/
9824 tr/// string transliterate tr/this/that/
9825 y/// string transliterate y/this/that/
9826 ($*@) sub prototypes sub foo ($)
9827 (stuff) sub attr parameters sub foo : attr(stuff)
9828 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9830 In most of these cases (all but <>, patterns and transliterate)
9831 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9832 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9833 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9836 It skips whitespace before the string starts, and treats the first
9837 character as the delimiter. If the delimiter is one of ([{< then
9838 the corresponding "close" character )]}> is used as the closing
9839 delimiter. It allows quoting of delimiters, and if the string has
9840 balanced delimiters ([{<>}]) it allows nesting.
9842 On success, the SV with the resulting string is put into lex_stuff or,
9843 if that is already non-NULL, into lex_repl. The second case occurs only
9844 when parsing the RHS of the special constructs s/// and tr/// (y///).
9845 For convenience, the terminating delimiter character is stuffed into
9850 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9852 SV *sv; /* scalar value: string */
9853 char *tmps; /* temp string, used for delimiter matching */
9854 register char *s = start; /* current position in the buffer */
9855 register char term; /* terminating character */
9856 register char *to; /* current position in the sv's data */
9857 I32 brackets = 1; /* bracket nesting level */
9858 bool has_utf8 = FALSE; /* is there any utf8 content? */
9859 I32 termcode; /* terminating char. code */
9860 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
9861 STRLEN termlen; /* length of terminating string */
9862 char *last = NULL; /* last position for nesting bracket */
9864 /* skip space before the delimiter */
9868 /* mark where we are, in case we need to report errors */
9871 /* after skipping whitespace, the next character is the terminator */
9874 termcode = termstr[0] = term;
9878 termcode = utf8_to_uvchr((U8*)s, &termlen);
9879 Copy(s, termstr, termlen, U8);
9880 if (!UTF8_IS_INVARIANT(term))
9884 /* mark where we are */
9885 PL_multi_start = CopLINE(PL_curcop);
9886 PL_multi_open = term;
9888 /* find corresponding closing delimiter */
9889 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9890 termcode = termstr[0] = term = tmps[5];
9892 PL_multi_close = term;
9894 /* create a new SV to hold the contents. 87 is leak category, I'm
9895 assuming. 79 is the SV's initial length. What a random number. */
9897 sv_upgrade(sv, SVt_PVIV);
9898 SvIV_set(sv, termcode);
9899 (void)SvPOK_only(sv); /* validate pointer */
9901 /* move past delimiter and try to read a complete string */
9903 sv_catpvn(sv, s, termlen);
9906 if (PL_encoding && !UTF) {
9910 int offset = s - SvPVX_const(PL_linestr);
9911 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9912 &offset, (char*)termstr, termlen);
9913 const char *ns = SvPVX_const(PL_linestr) + offset;
9914 char *svlast = SvEND(sv) - 1;
9916 for (; s < ns; s++) {
9917 if (*s == '\n' && !PL_rsfp)
9918 CopLINE_inc(PL_curcop);
9921 goto read_more_line;
9923 /* handle quoted delimiters */
9924 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9926 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9928 if ((svlast-1 - t) % 2) {
9932 SvCUR_set(sv, SvCUR(sv) - 1);
9937 if (PL_multi_open == PL_multi_close) {
9945 for (t = w = last; t < svlast; w++, t++) {
9946 /* At here, all closes are "was quoted" one,
9947 so we don't check PL_multi_close. */
9949 if (!keep_quoted && *(t+1) == PL_multi_open)
9954 else if (*t == PL_multi_open)
9962 SvCUR_set(sv, w - SvPVX_const(sv));
9965 if (--brackets <= 0)
9971 SvCUR_set(sv, SvCUR(sv) - 1);
9977 /* extend sv if need be */
9978 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9979 /* set 'to' to the next character in the sv's string */
9980 to = SvPVX(sv)+SvCUR(sv);
9982 /* if open delimiter is the close delimiter read unbridle */
9983 if (PL_multi_open == PL_multi_close) {
9984 for (; s < PL_bufend; s++,to++) {
9985 /* embedded newlines increment the current line number */
9986 if (*s == '\n' && !PL_rsfp)
9987 CopLINE_inc(PL_curcop);
9988 /* handle quoted delimiters */
9989 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9990 if (!keep_quoted && s[1] == term)
9992 /* any other quotes are simply copied straight through */
9996 /* terminate when run out of buffer (the for() condition), or
9997 have found the terminator */
9998 else if (*s == term) {
10001 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10004 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10010 /* if the terminator isn't the same as the start character (e.g.,
10011 matched brackets), we have to allow more in the quoting, and
10012 be prepared for nested brackets.
10015 /* read until we run out of string, or we find the terminator */
10016 for (; s < PL_bufend; s++,to++) {
10017 /* embedded newlines increment the line count */
10018 if (*s == '\n' && !PL_rsfp)
10019 CopLINE_inc(PL_curcop);
10020 /* backslashes can escape the open or closing characters */
10021 if (*s == '\\' && s+1 < PL_bufend) {
10022 if (!keep_quoted &&
10023 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10028 /* allow nested opens and closes */
10029 else if (*s == PL_multi_close && --brackets <= 0)
10031 else if (*s == PL_multi_open)
10033 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10038 /* terminate the copied string and update the sv's end-of-string */
10040 SvCUR_set(sv, to - SvPVX_const(sv));
10043 * this next chunk reads more into the buffer if we're not done yet
10047 break; /* handle case where we are done yet :-) */
10049 #ifndef PERL_STRICT_CR
10050 if (to - SvPVX_const(sv) >= 2) {
10051 if ((to[-2] == '\r' && to[-1] == '\n') ||
10052 (to[-2] == '\n' && to[-1] == '\r'))
10056 SvCUR_set(sv, to - SvPVX_const(sv));
10058 else if (to[-1] == '\r')
10061 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10066 /* if we're out of file, or a read fails, bail and reset the current
10067 line marker so we can report where the unterminated string began
10070 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10072 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10075 /* we read a line, so increment our line counter */
10076 CopLINE_inc(PL_curcop);
10078 /* update debugger info */
10079 if (PERLDB_LINE && PL_curstash != PL_debstash) {
10080 SV *sv = NEWSV(88,0);
10082 sv_upgrade(sv, SVt_PVMG);
10083 sv_setsv(sv,PL_linestr);
10084 (void)SvIOK_on(sv);
10086 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
10089 /* having changed the buffer, we must update PL_bufend */
10090 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10091 PL_last_lop = PL_last_uni = Nullch;
10094 /* at this point, we have successfully read the delimited string */
10096 if (!PL_encoding || UTF) {
10098 sv_catpvn(sv, s, termlen);
10101 if (has_utf8 || PL_encoding)
10104 PL_multi_end = CopLINE(PL_curcop);
10106 /* if we allocated too much space, give some back */
10107 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10108 SvLEN_set(sv, SvCUR(sv) + 1);
10109 SvPV_renew(sv, SvLEN(sv));
10112 /* decide whether this is the first or second quoted string we've read
10125 takes: pointer to position in buffer
10126 returns: pointer to new position in buffer
10127 side-effects: builds ops for the constant in yylval.op
10129 Read a number in any of the formats that Perl accepts:
10131 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10132 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10135 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10137 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10140 If it reads a number without a decimal point or an exponent, it will
10141 try converting the number to an integer and see if it can do so
10142 without loss of precision.
10146 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10148 register const char *s = start; /* current position in buffer */
10149 register char *d; /* destination in temp buffer */
10150 register char *e; /* end of temp buffer */
10151 NV nv; /* number read, as a double */
10152 SV *sv = Nullsv; /* place to put the converted number */
10153 bool floatit; /* boolean: int or float? */
10154 const char *lastub = 0; /* position of last underbar */
10155 static char const number_too_long[] = "Number too long";
10157 /* We use the first character to decide what type of number this is */
10161 Perl_croak(aTHX_ "panic: scan_num");
10163 /* if it starts with a 0, it could be an octal number, a decimal in
10164 0.13 disguise, or a hexadecimal number, or a binary number. */
10168 u holds the "number so far"
10169 shift the power of 2 of the base
10170 (hex == 4, octal == 3, binary == 1)
10171 overflowed was the number more than we can hold?
10173 Shift is used when we add a digit. It also serves as an "are
10174 we in octal/hex/binary?" indicator to disallow hex characters
10175 when in octal mode.
10180 bool overflowed = FALSE;
10181 bool just_zero = TRUE; /* just plain 0 or binary number? */
10182 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10183 static const char* const bases[5] =
10184 { "", "binary", "", "octal", "hexadecimal" };
10185 static const char* const Bases[5] =
10186 { "", "Binary", "", "Octal", "Hexadecimal" };
10187 static const char* const maxima[5] =
10189 "0b11111111111111111111111111111111",
10193 const char *base, *Base, *max;
10195 /* check for hex */
10200 } else if (s[1] == 'b') {
10205 /* check for a decimal in disguise */
10206 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10208 /* so it must be octal */
10215 if (ckWARN(WARN_SYNTAX))
10216 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10217 "Misplaced _ in number");
10221 base = bases[shift];
10222 Base = Bases[shift];
10223 max = maxima[shift];
10225 /* read the rest of the number */
10227 /* x is used in the overflow test,
10228 b is the digit we're adding on. */
10233 /* if we don't mention it, we're done */
10237 /* _ are ignored -- but warned about if consecutive */
10239 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
10240 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10241 "Misplaced _ in number");
10245 /* 8 and 9 are not octal */
10246 case '8': case '9':
10248 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10252 case '2': case '3': case '4':
10253 case '5': case '6': case '7':
10255 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10258 case '0': case '1':
10259 b = *s++ & 15; /* ASCII digit -> value of digit */
10263 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10264 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10265 /* make sure they said 0x */
10268 b = (*s++ & 7) + 9;
10270 /* Prepare to put the digit we have onto the end
10271 of the number so far. We check for overflows.
10277 x = u << shift; /* make room for the digit */
10279 if ((x >> shift) != u
10280 && !(PL_hints & HINT_NEW_BINARY)) {
10283 if (ckWARN_d(WARN_OVERFLOW))
10284 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10285 "Integer overflow in %s number",
10288 u = x | b; /* add the digit to the end */
10291 n *= nvshift[shift];
10292 /* If an NV has not enough bits in its
10293 * mantissa to represent an UV this summing of
10294 * small low-order numbers is a waste of time
10295 * (because the NV cannot preserve the
10296 * low-order bits anyway): we could just
10297 * remember when did we overflow and in the
10298 * end just multiply n by the right
10306 /* if we get here, we had success: make a scalar value from
10311 /* final misplaced underbar check */
10312 if (s[-1] == '_') {
10313 if (ckWARN(WARN_SYNTAX))
10314 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10319 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
10320 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10321 "%s number > %s non-portable",
10327 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
10328 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10329 "%s number > %s non-portable",
10334 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10335 sv = new_constant(start, s - start, "integer",
10337 else if (PL_hints & HINT_NEW_BINARY)
10338 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
10343 handle decimal numbers.
10344 we're also sent here when we read a 0 as the first digit
10346 case '1': case '2': case '3': case '4': case '5':
10347 case '6': case '7': case '8': case '9': case '.':
10350 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10353 /* read next group of digits and _ and copy into d */
10354 while (isDIGIT(*s) || *s == '_') {
10355 /* skip underscores, checking for misplaced ones
10359 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
10360 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10361 "Misplaced _ in number");
10365 /* check for end of fixed-length buffer */
10367 Perl_croak(aTHX_ number_too_long);
10368 /* if we're ok, copy the character */
10373 /* final misplaced underbar check */
10374 if (lastub && s == lastub + 1) {
10375 if (ckWARN(WARN_SYNTAX))
10376 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10379 /* read a decimal portion if there is one. avoid
10380 3..5 being interpreted as the number 3. followed
10383 if (*s == '.' && s[1] != '.') {
10388 if (ckWARN(WARN_SYNTAX))
10389 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10390 "Misplaced _ in number");
10394 /* copy, ignoring underbars, until we run out of digits.
10396 for (; isDIGIT(*s) || *s == '_'; s++) {
10397 /* fixed length buffer check */
10399 Perl_croak(aTHX_ number_too_long);
10401 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
10402 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10403 "Misplaced _ in number");
10409 /* fractional part ending in underbar? */
10410 if (s[-1] == '_') {
10411 if (ckWARN(WARN_SYNTAX))
10412 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10413 "Misplaced _ in number");
10415 if (*s == '.' && isDIGIT(s[1])) {
10416 /* oops, it's really a v-string, but without the "v" */
10422 /* read exponent part, if present */
10423 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10427 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10428 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10430 /* stray preinitial _ */
10432 if (ckWARN(WARN_SYNTAX))
10433 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10434 "Misplaced _ in number");
10438 /* allow positive or negative exponent */
10439 if (*s == '+' || *s == '-')
10442 /* stray initial _ */
10444 if (ckWARN(WARN_SYNTAX))
10445 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10446 "Misplaced _ in number");
10450 /* read digits of exponent */
10451 while (isDIGIT(*s) || *s == '_') {
10454 Perl_croak(aTHX_ number_too_long);
10458 if (ckWARN(WARN_SYNTAX) &&
10459 ((lastub && s == lastub + 1) ||
10460 (!isDIGIT(s[1]) && s[1] != '_')))
10461 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10462 "Misplaced _ in number");
10469 /* make an sv from the string */
10473 We try to do an integer conversion first if no characters
10474 indicating "float" have been found.
10479 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10481 if (flags == IS_NUMBER_IN_UV) {
10483 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
10486 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10487 if (uv <= (UV) IV_MIN)
10488 sv_setiv(sv, -(IV)uv);
10495 /* terminate the string */
10497 nv = Atof(PL_tokenbuf);
10501 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
10502 (PL_hints & HINT_NEW_INTEGER) )
10503 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
10504 (floatit ? "float" : "integer"),
10508 /* if it starts with a v, it could be a v-string */
10511 sv = NEWSV(92,5); /* preallocate storage space */
10512 s = scan_vstring(s,sv);
10516 /* make the op for the constant and return */
10519 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10521 lvalp->opval = Nullop;
10527 S_scan_formline(pTHX_ register char *s)
10529 register char *eol;
10531 SV *stuff = newSVpvn("",0);
10532 bool needargs = FALSE;
10533 bool eofmt = FALSE;
10535 while (!needargs) {
10538 #ifdef PERL_STRICT_CR
10539 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
10541 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
10543 if (*t == '\n' || t == PL_bufend) {
10548 if (PL_in_eval && !PL_rsfp) {
10549 eol = (char *) memchr(s,'\n',PL_bufend-s);
10554 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10556 for (t = s; t < eol; t++) {
10557 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10559 goto enough; /* ~~ must be first line in formline */
10561 if (*t == '@' || *t == '^')
10565 sv_catpvn(stuff, s, eol-s);
10566 #ifndef PERL_STRICT_CR
10567 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10568 char *end = SvPVX(stuff) + SvCUR(stuff);
10571 SvCUR_set(stuff, SvCUR(stuff) - 1);
10580 s = filter_gets(PL_linestr, PL_rsfp, 0);
10581 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
10582 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
10583 PL_last_lop = PL_last_uni = Nullch;
10592 if (SvCUR(stuff)) {
10595 PL_lex_state = LEX_NORMAL;
10596 PL_nextval[PL_nexttoke].ival = 0;
10600 PL_lex_state = LEX_FORMLINE;
10602 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10604 else if (PL_encoding)
10605 sv_recode_to_utf8(stuff, PL_encoding);
10607 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10609 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
10613 SvREFCNT_dec(stuff);
10615 PL_lex_formbrack = 0;
10626 PL_cshlen = strlen(PL_cshname);
10631 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10633 const I32 oldsavestack_ix = PL_savestack_ix;
10634 CV* outsidecv = PL_compcv;
10637 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10639 SAVEI32(PL_subline);
10640 save_item(PL_subname);
10641 SAVESPTR(PL_compcv);
10643 PL_compcv = (CV*)NEWSV(1104,0);
10644 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
10645 CvFLAGS(PL_compcv) |= flags;
10647 PL_subline = CopLINE(PL_curcop);
10648 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10649 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
10650 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10652 return oldsavestack_ix;
10656 #pragma segment Perl_yylex
10659 Perl_yywarn(pTHX_ const char *s)
10661 PL_in_eval |= EVAL_WARNONLY;
10663 PL_in_eval &= ~EVAL_WARNONLY;
10668 Perl_yyerror(pTHX_ const char *s)
10670 const char *where = NULL;
10671 const char *context = NULL;
10675 if (!yychar || (yychar == ';' && !PL_rsfp))
10677 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10678 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10679 PL_oldbufptr != PL_bufptr) {
10682 The code below is removed for NetWare because it abends/crashes on NetWare
10683 when the script has error such as not having the closing quotes like:
10684 if ($var eq "value)
10685 Checking of white spaces is anyway done in NetWare code.
10688 while (isSPACE(*PL_oldoldbufptr))
10691 context = PL_oldoldbufptr;
10692 contlen = PL_bufptr - PL_oldoldbufptr;
10694 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10695 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10698 The code below is removed for NetWare because it abends/crashes on NetWare
10699 when the script has error such as not having the closing quotes like:
10700 if ($var eq "value)
10701 Checking of white spaces is anyway done in NetWare code.
10704 while (isSPACE(*PL_oldbufptr))
10707 context = PL_oldbufptr;
10708 contlen = PL_bufptr - PL_oldbufptr;
10710 else if (yychar > 255)
10711 where = "next token ???";
10712 else if (yychar == -2) { /* YYEMPTY */
10713 if (PL_lex_state == LEX_NORMAL ||
10714 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10715 where = "at end of line";
10716 else if (PL_lex_inpat)
10717 where = "within pattern";
10719 where = "within string";
10722 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
10724 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10725 else if (isPRINT_LC(yychar))
10726 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
10728 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10729 where = SvPVX_const(where_sv);
10731 msg = sv_2mortal(newSVpv(s, 0));
10732 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10733 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10735 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10737 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10738 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10739 Perl_sv_catpvf(aTHX_ msg,
10740 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10741 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10744 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
10745 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
10748 if (PL_error_count >= 10) {
10749 if (PL_in_eval && SvCUR(ERRSV))
10750 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10751 ERRSV, OutCopFILE(PL_curcop));
10753 Perl_croak(aTHX_ "%s has too many errors.\n",
10754 OutCopFILE(PL_curcop));
10757 PL_in_my_stash = Nullhv;
10761 #pragma segment Main
10765 S_swallow_bom(pTHX_ U8 *s)
10767 const STRLEN slen = SvCUR(PL_linestr);
10770 if (s[1] == 0xFE) {
10771 /* UTF-16 little-endian? (or UTF32-LE?) */
10772 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
10773 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
10774 #ifndef PERL_NO_UTF16_FILTER
10775 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
10778 if (PL_bufend > (char*)s) {
10782 filter_add(utf16rev_textfilter, NULL);
10783 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
10784 utf16_to_utf8_reversed(s, news,
10785 PL_bufend - (char*)s - 1,
10787 sv_setpvn(PL_linestr, (const char*)news, newlen);
10789 SvUTF8_on(PL_linestr);
10790 s = (U8*)SvPVX(PL_linestr);
10791 PL_bufend = SvPVX(PL_linestr) + newlen;
10794 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
10799 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
10800 #ifndef PERL_NO_UTF16_FILTER
10801 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10804 if (PL_bufend > (char *)s) {
10808 filter_add(utf16_textfilter, NULL);
10809 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
10810 utf16_to_utf8(s, news,
10811 PL_bufend - (char*)s,
10813 sv_setpvn(PL_linestr, (const char*)news, newlen);
10815 SvUTF8_on(PL_linestr);
10816 s = (U8*)SvPVX(PL_linestr);
10817 PL_bufend = SvPVX(PL_linestr) + newlen;
10820 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
10825 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10826 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10827 s += 3; /* UTF-8 */
10833 if (s[2] == 0xFE && s[3] == 0xFF) {
10834 /* UTF-32 big-endian */
10835 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
10838 else if (s[2] == 0 && s[3] != 0) {
10841 * are a good indicator of UTF-16BE. */
10842 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10847 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10850 * are a good indicator of UTF-16LE. */
10851 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10860 * Restore a source filter.
10864 restore_rsfp(pTHX_ void *f)
10866 PerlIO *fp = (PerlIO*)f;
10868 if (PL_rsfp == PerlIO_stdin())
10869 PerlIO_clearerr(PL_rsfp);
10870 else if (PL_rsfp && (PL_rsfp != fp))
10871 PerlIO_close(PL_rsfp);
10875 #ifndef PERL_NO_UTF16_FILTER
10877 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10879 const STRLEN old = SvCUR(sv);
10880 const I32 count = FILTER_READ(idx+1, sv, maxlen);
10881 DEBUG_P(PerlIO_printf(Perl_debug_log,
10882 "utf16_textfilter(%p): %d %d (%d)\n",
10883 utf16_textfilter, idx, maxlen, (int) count));
10887 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
10888 Copy(SvPVX_const(sv), tmps, old, char);
10889 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
10890 SvCUR(sv) - old, &newlen);
10891 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
10893 DEBUG_P({sv_dump(sv);});
10898 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10900 const STRLEN old = SvCUR(sv);
10901 const I32 count = FILTER_READ(idx+1, sv, maxlen);
10902 DEBUG_P(PerlIO_printf(Perl_debug_log,
10903 "utf16rev_textfilter(%p): %d %d (%d)\n",
10904 utf16rev_textfilter, idx, maxlen, (int) count));
10908 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
10909 Copy(SvPVX_const(sv), tmps, old, char);
10910 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
10911 SvCUR(sv) - old, &newlen);
10912 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
10914 DEBUG_P({ sv_dump(sv); });
10920 Returns a pointer to the next character after the parsed
10921 vstring, as well as updating the passed in sv.
10923 Function must be called like
10926 s = scan_vstring(s,sv);
10928 The sv should already be large enough to store the vstring
10929 passed in, for performance reasons.
10934 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
10936 const char *pos = s;
10937 const char *start = s;
10938 if (*pos == 'v') pos++; /* get past 'v' */
10939 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
10941 if ( *pos != '.') {
10942 /* this may not be a v-string if followed by => */
10943 const char *next = pos;
10944 while (next < PL_bufend && isSPACE(*next))
10946 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
10947 /* return string not v-string */
10948 sv_setpvn(sv,(char *)s,pos-s);
10949 return (char *)pos;
10953 if (!isALPHA(*pos)) {
10955 U8 tmpbuf[UTF8_MAXBYTES+1];
10958 if (*s == 'v') s++; /* get past 'v' */
10960 sv_setpvn(sv, "", 0);
10965 /* this is atoi() that tolerates underscores */
10966 const char *end = pos;
10968 while (--end >= s) {
10973 rev += (*end - '0') * mult;
10975 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
10976 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10977 "Integer overflow in decimal number");
10981 if (rev > 0x7FFFFFFF)
10982 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10984 /* Append native character for the rev point */
10985 tmpend = uvchr_to_utf8(tmpbuf, rev);
10986 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10987 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10989 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
10995 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
10999 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11007 * c-indentation-style: bsd
11008 * c-basic-offset: 4
11009 * indent-tabs-mode: t
11012 * ex: set ts=8 sts=4 sw=4 noet: