3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] =
30 "Identifier too long";
31 static const char c_without_g[] =
32 "Use of /c modifier is meaningless without /g";
33 static const char c_in_subst[] =
34 "Use of /c modifier is meaningless in s///";
36 static void restore_rsfp(pTHX_ void *f);
37 #ifndef PERL_NO_UTF16_FILTER
38 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
42 #define XFAKEBRACK 128
45 #ifdef USE_UTF8_SCRIPTS
46 # define UTF (!IN_BYTES)
48 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
51 /* In variables named $^X, these are the legal values for X.
52 * 1999-02-27 mjd-perl-patch@plover.com */
53 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
55 /* On MacOS, respect nonbreaking spaces */
56 #ifdef MACOS_TRADITIONAL
57 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
59 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
62 /* LEX_* are values for PL_lex_state, the state of the lexer.
63 * They are arranged oddly so that the guard on the switch statement
64 * can get by with a single comparison (if the compiler is smart enough).
67 /* #define LEX_NOTPARSING 11 is done in perl.h. */
70 #define LEX_INTERPNORMAL 9
71 #define LEX_INTERPCASEMOD 8
72 #define LEX_INTERPPUSH 7
73 #define LEX_INTERPSTART 6
74 #define LEX_INTERPEND 5
75 #define LEX_INTERPENDMAYBE 4
76 #define LEX_INTERPCONCAT 3
77 #define LEX_INTERPCONST 2
78 #define LEX_FORMLINE 1
79 #define LEX_KNOWNEXT 0
82 static const char* const lex_state_names[] = {
101 #include "keywords.h"
103 /* CLINE is a macro that ensures PL_copline has a sane value */
108 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
111 * Convenience functions to return different tokens and prime the
112 * lexer for the next token. They all take an argument.
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
124 * FUN1 : not used, except for not, which isn't a UNIOP
125 * BOop : bitwise or or xor
127 * SHop : shift operator
128 * PWop : power operator
129 * PMop : pattern-matching operator
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
133 * Rop : relational operator <= != gt
135 * Also see LOP and lop() below.
138 #ifdef DEBUGGING /* Serve -DT. */
139 # define REPORT(retval) tokereport(s,(int)retval)
141 # define REPORT(retval) (retval)
144 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
145 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
146 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
147 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
148 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
149 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
150 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
151 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
152 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
153 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
154 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
155 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
156 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
157 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
158 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
159 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
160 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
161 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
162 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
163 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
165 /* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
170 #define UNI2(f,x) return ( \
174 PL_last_uni = PL_oldbufptr, \
175 PL_last_lop_op = f, \
177 (*s == '(' || (s = skipspace(s), *s == '(') \
178 ? (int)FUNC1 : (int)UNIOP)))
179 #define UNI(f) UNI2(f,XTERM)
180 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
182 #define UNIBRACK(f) return ( \
185 PL_last_uni = PL_oldbufptr, \
187 (*s == '(' || (s = skipspace(s), *s == '(') \
188 ? (int)FUNC1 : (int)UNIOP)))
190 /* grandfather return to old style */
191 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
195 /* how to interpret the yylval associated with the token */
199 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
205 static struct debug_tokens { const int token, type; const char *name; }
206 const debug_tokens[] =
208 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
209 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
210 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
211 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
212 { ARROW, TOKENTYPE_NONE, "ARROW" },
213 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
214 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
215 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
216 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
217 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
218 { DO, TOKENTYPE_NONE, "DO" },
219 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
220 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
221 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
222 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
223 { ELSE, TOKENTYPE_NONE, "ELSE" },
224 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
225 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
226 { FOR, TOKENTYPE_IVAL, "FOR" },
227 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
228 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
229 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
230 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
231 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
232 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
233 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
234 { IF, TOKENTYPE_IVAL, "IF" },
235 { LABEL, TOKENTYPE_PVAL, "LABEL" },
236 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
237 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
238 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
239 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
240 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
241 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
242 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
243 { MY, TOKENTYPE_IVAL, "MY" },
244 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
245 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
246 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
247 { OROP, TOKENTYPE_IVAL, "OROP" },
248 { OROR, TOKENTYPE_NONE, "OROR" },
249 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
250 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
251 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
252 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
253 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
254 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
255 { PREINC, TOKENTYPE_NONE, "PREINC" },
256 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
257 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
258 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
259 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
260 { SUB, TOKENTYPE_NONE, "SUB" },
261 { THING, TOKENTYPE_OPVAL, "THING" },
262 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
263 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
264 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
265 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
266 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
267 { USE, TOKENTYPE_IVAL, "USE" },
268 { WHILE, TOKENTYPE_IVAL, "WHILE" },
269 { WORD, TOKENTYPE_OPVAL, "WORD" },
270 { 0, TOKENTYPE_NONE, 0 }
273 /* dump the returned token in rv, plus any optional arg in yylval */
276 S_tokereport(pTHX_ const char* s, I32 rv)
279 const char *name = Nullch;
280 enum token_type type = TOKENTYPE_NONE;
281 const struct debug_tokens *p;
282 SV* report = newSVpvn("<== ", 4);
284 for (p = debug_tokens; p->token; p++) {
285 if (p->token == (int)rv) {
292 Perl_sv_catpv(aTHX_ report, name);
293 else if ((char)rv > ' ' && (char)rv < '~')
294 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
296 Perl_sv_catpv(aTHX_ report, "EOF");
298 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
301 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
304 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
306 case TOKENTYPE_OPNUM:
307 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
308 PL_op_name[yylval.ival]);
311 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
313 case TOKENTYPE_OPVAL:
315 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
316 PL_op_name[yylval.opval->op_type]);
318 Perl_sv_catpv(aTHX_ report, "(opval=null)");
321 Perl_sv_catpvf(aTHX_ report, " at line %"IVdf" [", (IV)CopLINE(PL_curcop));
322 if (s - PL_bufptr > 0)
323 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
325 if (PL_oldbufptr && *PL_oldbufptr)
326 sv_catpv(report, PL_tokenbuf);
328 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
338 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
339 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
343 S_ao(pTHX_ int toketype)
345 if (*PL_bufptr == '=') {
347 if (toketype == ANDAND)
348 yylval.ival = OP_ANDASSIGN;
349 else if (toketype == OROR)
350 yylval.ival = OP_ORASSIGN;
351 else if (toketype == DORDOR)
352 yylval.ival = OP_DORASSIGN;
360 * When Perl expects an operator and finds something else, no_op
361 * prints the warning. It always prints "<something> found where
362 * operator expected. It prints "Missing semicolon on previous line?"
363 * if the surprise occurs at the start of the line. "do you need to
364 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
365 * where the compiler doesn't know if foo is a method call or a function.
366 * It prints "Missing operator before end of line" if there's nothing
367 * after the missing operator, or "... before <...>" if there is something
368 * after the missing operator.
372 S_no_op(pTHX_ const char *what, char *s)
374 char *oldbp = PL_bufptr;
375 bool is_first = (PL_oldbufptr == PL_linestart);
381 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
382 if (ckWARN_d(WARN_SYNTAX)) {
384 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
385 "\t(Missing semicolon on previous line?)\n");
386 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
388 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
389 if (t < PL_bufptr && isSPACE(*t))
390 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
391 "\t(Do you need to predeclare %.*s?)\n",
392 t - PL_oldoldbufptr, PL_oldoldbufptr);
396 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
397 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
405 * Complain about missing quote/regexp/heredoc terminator.
406 * If it's called with (char *)NULL then it cauterizes the line buffer.
407 * If we're in a delimited string and the delimiter is a control
408 * character, it's reformatted into a two-char sequence like ^C.
413 S_missingterm(pTHX_ char *s)
418 char *nl = strrchr(s,'\n');
424 iscntrl(PL_multi_close)
426 PL_multi_close < 32 || PL_multi_close == 127
430 tmpbuf[1] = toCTRL(PL_multi_close);
435 *tmpbuf = (char)PL_multi_close;
439 q = strchr(s,'"') ? '\'' : '"';
440 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
448 Perl_deprecate(pTHX_ const char *s)
450 if (ckWARN(WARN_DEPRECATED))
451 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
455 Perl_deprecate_old(pTHX_ const char *s)
457 /* This function should NOT be called for any new deprecated warnings */
458 /* Use Perl_deprecate instead */
460 /* It is here to maintain backward compatibility with the pre-5.8 */
461 /* warnings category hierarchy. The "deprecated" category used to */
462 /* live under the "syntax" category. It is now a top-level category */
463 /* in its own right. */
465 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
466 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
467 "Use of %s is deprecated", s);
472 * Deprecate a comma-less variable list.
478 deprecate_old("comma-less variable list");
482 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
483 * utf16-to-utf8-reversed.
486 #ifdef PERL_CR_FILTER
490 register const char *s = SvPVX(sv);
491 register const char *e = s + SvCUR(sv);
492 /* outer loop optimized to do nothing if there are no CR-LFs */
494 if (*s++ == '\r' && *s == '\n') {
495 /* hit a CR-LF, need to copy the rest */
496 register char *d = s - 1;
499 if (*s == '\r' && s[1] == '\n')
510 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
512 const I32 count = FILTER_READ(idx+1, sv, maxlen);
513 if (count > 0 && !maxlen)
521 * Initialize variables. Uses the Perl save_stack to save its state (for
522 * recursive calls to the parser).
526 Perl_lex_start(pTHX_ SV *line)
531 SAVEI32(PL_lex_dojoin);
532 SAVEI32(PL_lex_brackets);
533 SAVEI32(PL_lex_casemods);
534 SAVEI32(PL_lex_starts);
535 SAVEI32(PL_lex_state);
536 SAVEVPTR(PL_lex_inpat);
537 SAVEI32(PL_lex_inwhat);
538 if (PL_lex_state == LEX_KNOWNEXT) {
539 I32 toke = PL_nexttoke;
540 while (--toke >= 0) {
541 SAVEI32(PL_nexttype[toke]);
542 SAVEVPTR(PL_nextval[toke]);
544 SAVEI32(PL_nexttoke);
546 SAVECOPLINE(PL_curcop);
549 SAVEPPTR(PL_oldbufptr);
550 SAVEPPTR(PL_oldoldbufptr);
551 SAVEPPTR(PL_last_lop);
552 SAVEPPTR(PL_last_uni);
553 SAVEPPTR(PL_linestart);
554 SAVESPTR(PL_linestr);
555 SAVEGENERICPV(PL_lex_brackstack);
556 SAVEGENERICPV(PL_lex_casestack);
557 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
558 SAVESPTR(PL_lex_stuff);
559 SAVEI32(PL_lex_defer);
560 SAVEI32(PL_sublex_info.sub_inwhat);
561 SAVESPTR(PL_lex_repl);
563 SAVEINT(PL_lex_expect);
565 PL_lex_state = LEX_NORMAL;
569 New(899, PL_lex_brackstack, 120, char);
570 New(899, PL_lex_casestack, 12, char);
572 *PL_lex_casestack = '\0';
575 PL_lex_stuff = Nullsv;
576 PL_lex_repl = Nullsv;
580 PL_sublex_info.sub_inwhat = 0;
582 if (SvREADONLY(PL_linestr))
583 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
584 s = SvPV(PL_linestr, len);
585 if (!len || s[len-1] != ';') {
586 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
587 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
588 sv_catpvn(PL_linestr, "\n;", 2);
590 SvTEMP_off(PL_linestr);
591 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
592 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
593 PL_last_lop = PL_last_uni = Nullch;
599 * Finalizer for lexing operations. Must be called when the parser is
600 * done with the lexer.
606 PL_doextract = FALSE;
611 * This subroutine has nothing to do with tilting, whether at windmills
612 * or pinball tables. Its name is short for "increment line". It
613 * increments the current line number in CopLINE(PL_curcop) and checks
614 * to see whether the line starts with a comment of the form
615 * # line 500 "foo.pm"
616 * If so, it sets the current line number and file to the values in the comment.
620 S_incline(pTHX_ char *s)
627 CopLINE_inc(PL_curcop);
630 while (SPACE_OR_TAB(*s)) s++;
631 if (strnEQ(s, "line", 4))
635 if (SPACE_OR_TAB(*s))
639 while (SPACE_OR_TAB(*s)) s++;
645 while (SPACE_OR_TAB(*s))
647 if (*s == '"' && (t = strchr(s+1, '"'))) {
652 for (t = s; !isSPACE(*t); t++) ;
655 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
657 if (*e != '\n' && *e != '\0')
658 return; /* false alarm */
663 CopFILE_free(PL_curcop);
664 CopFILE_set(PL_curcop, s);
667 CopLINE_set(PL_curcop, atoi(n)-1);
672 * Called to gobble the appropriate amount and type of whitespace.
673 * Skips comments as well.
677 S_skipspace(pTHX_ register char *s)
679 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
680 while (s < PL_bufend && SPACE_OR_TAB(*s))
686 SSize_t oldprevlen, oldoldprevlen;
687 SSize_t oldloplen = 0, oldunilen = 0;
688 while (s < PL_bufend && isSPACE(*s)) {
689 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
694 if (s < PL_bufend && *s == '#') {
695 while (s < PL_bufend && *s != '\n')
699 if (PL_in_eval && !PL_rsfp) {
706 /* only continue to recharge the buffer if we're at the end
707 * of the buffer, we're not reading from a source filter, and
708 * we're in normal lexing mode
710 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
711 PL_lex_state == LEX_FORMLINE)
714 /* try to recharge the buffer */
715 if ((s = filter_gets(PL_linestr, PL_rsfp,
716 (prevlen = SvCUR(PL_linestr)))) == Nullch)
718 /* end of file. Add on the -p or -n magic */
721 ";}continue{print or die qq(-p destination: $!\\n);}");
722 PL_minus_n = PL_minus_p = 0;
724 else if (PL_minus_n) {
725 sv_setpvn(PL_linestr, ";}", 2);
729 sv_setpvn(PL_linestr,";", 1);
731 /* reset variables for next time we lex */
732 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
734 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
735 PL_last_lop = PL_last_uni = Nullch;
737 /* Close the filehandle. Could be from -P preprocessor,
738 * STDIN, or a regular file. If we were reading code from
739 * STDIN (because the commandline held no -e or filename)
740 * then we don't close it, we reset it so the code can
741 * read from STDIN too.
744 if (PL_preprocess && !PL_in_eval)
745 (void)PerlProc_pclose(PL_rsfp);
746 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
747 PerlIO_clearerr(PL_rsfp);
749 (void)PerlIO_close(PL_rsfp);
754 /* not at end of file, so we only read another line */
755 /* make corresponding updates to old pointers, for yyerror() */
756 oldprevlen = PL_oldbufptr - PL_bufend;
757 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
759 oldunilen = PL_last_uni - PL_bufend;
761 oldloplen = PL_last_lop - PL_bufend;
762 PL_linestart = PL_bufptr = s + prevlen;
763 PL_bufend = s + SvCUR(PL_linestr);
765 PL_oldbufptr = s + oldprevlen;
766 PL_oldoldbufptr = s + oldoldprevlen;
768 PL_last_uni = s + oldunilen;
770 PL_last_lop = s + oldloplen;
773 /* debugger active and we're not compiling the debugger code,
774 * so store the line into the debugger's array of lines
776 if (PERLDB_LINE && PL_curstash != PL_debstash) {
777 SV *sv = NEWSV(85,0);
779 sv_upgrade(sv, SVt_PVMG);
780 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
783 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
790 * Check the unary operators to ensure there's no ambiguity in how they're
791 * used. An ambiguous piece of code would be:
793 * This doesn't mean rand() + 5. Because rand() is a unary operator,
794 * the +5 is its argument.
803 if (PL_oldoldbufptr != PL_last_uni)
805 while (isSPACE(*PL_last_uni))
807 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
808 if ((t = strchr(s, '(')) && t < PL_bufptr)
810 if (ckWARN_d(WARN_AMBIGUOUS)){
813 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
814 "Warning: Use of \"%s\" without parentheses is ambiguous",
821 * LOP : macro to build a list operator. Its behaviour has been replaced
822 * with a subroutine, S_lop() for which LOP is just another name.
825 #define LOP(f,x) return lop(f,x,s)
829 * Build a list operator (or something that might be one). The rules:
830 * - if we have a next token, then it's a list operator [why?]
831 * - if the next thing is an opening paren, then it's a function
832 * - else it's a list operator
836 S_lop(pTHX_ I32 f, int x, char *s)
842 PL_last_lop = PL_oldbufptr;
843 PL_last_lop_op = (OPCODE)f;
845 return REPORT(LSTOP);
852 return REPORT(LSTOP);
857 * When the lexer realizes it knows the next token (for instance,
858 * it is reordering tokens for the parser) then it can call S_force_next
859 * to know what token to return the next time the lexer is called. Caller
860 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
861 * handles the token correctly.
865 S_force_next(pTHX_ I32 type)
867 PL_nexttype[PL_nexttoke] = type;
869 if (PL_lex_state != LEX_KNOWNEXT) {
870 PL_lex_defer = PL_lex_state;
871 PL_lex_expect = PL_expect;
872 PL_lex_state = LEX_KNOWNEXT;
877 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
879 SV *sv = newSVpvn(start,len);
880 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
887 * When the lexer knows the next thing is a word (for instance, it has
888 * just seen -> and it knows that the next char is a word char, then
889 * it calls S_force_word to stick the next word into the PL_next lookahead.
892 * char *start : buffer position (must be within PL_linestr)
893 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
894 * int check_keyword : if true, Perl checks to make sure the word isn't
895 * a keyword (do this if the word is a label, e.g. goto FOO)
896 * int allow_pack : if true, : characters will also be allowed (require,
898 * int allow_initial_tick : used by the "sub" lexer only.
902 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
907 start = skipspace(start);
909 if (isIDFIRST_lazy_if(s,UTF) ||
910 (allow_pack && *s == ':') ||
911 (allow_initial_tick && *s == '\'') )
913 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
914 if (check_keyword && keyword(PL_tokenbuf, len))
916 if (token == METHOD) {
921 PL_expect = XOPERATOR;
924 PL_nextval[PL_nexttoke].opval
925 = (OP*)newSVOP(OP_CONST,0,
926 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
927 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
935 * Called when the lexer wants $foo *foo &foo etc, but the program
936 * text only contains the "foo" portion. The first argument is a pointer
937 * to the "foo", and the second argument is the type symbol to prefix.
938 * Forces the next token to be a "WORD".
939 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
943 S_force_ident(pTHX_ register const char *s, int kind)
946 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
947 PL_nextval[PL_nexttoke].opval = o;
950 o->op_private = OPpCONST_ENTERED;
951 /* XXX see note in pp_entereval() for why we forgo typo
952 warnings if the symbol must be introduced in an eval.
954 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
955 kind == '$' ? SVt_PV :
956 kind == '@' ? SVt_PVAV :
957 kind == '%' ? SVt_PVHV :
965 Perl_str_to_version(pTHX_ SV *sv)
970 const char *start = SvPVx(sv,len);
971 const char *end = start + len;
972 bool utf = SvUTF8(sv) ? TRUE : FALSE;
973 while (start < end) {
977 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
982 retval += ((NV)n)/nshift;
991 * Forces the next token to be a version number.
992 * If the next token appears to be an invalid version number, (e.g. "v2b"),
993 * and if "guessing" is TRUE, then no new token is created (and the caller
994 * must use an alternative parsing method).
998 S_force_version(pTHX_ char *s, int guessing)
1000 OP *version = Nullop;
1009 while (isDIGIT(*d) || *d == '_' || *d == '.')
1011 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1013 s = scan_num(s, &yylval);
1014 version = yylval.opval;
1015 ver = cSVOPx(version)->op_sv;
1016 if (SvPOK(ver) && !SvNIOK(ver)) {
1017 (void)SvUPGRADE(ver, SVt_PVNV);
1018 SvNV_set(ver, str_to_version(ver));
1019 SvNOK_on(ver); /* hint that it is a version */
1026 /* NOTE: The parser sees the package name and the VERSION swapped */
1027 PL_nextval[PL_nexttoke].opval = version;
1035 * Tokenize a quoted string passed in as an SV. It finds the next
1036 * chunk, up to end of string or a backslash. It may make a new
1037 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1042 S_tokeq(pTHX_ SV *sv)
1045 register char *send;
1053 s = SvPV_force(sv, len);
1054 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1057 while (s < send && *s != '\\')
1062 if ( PL_hints & HINT_NEW_STRING ) {
1063 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
1069 if (s + 1 < send && (s[1] == '\\'))
1070 s++; /* all that, just for this */
1075 SvCUR_set(sv, d - SvPVX(sv));
1077 if ( PL_hints & HINT_NEW_STRING )
1078 return new_constant(NULL, 0, "q", sv, pv, "q");
1083 * Now come three functions related to double-quote context,
1084 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1085 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1086 * interact with PL_lex_state, and create fake ( ... ) argument lists
1087 * to handle functions and concatenation.
1088 * They assume that whoever calls them will be setting up a fake
1089 * join call, because each subthing puts a ',' after it. This lets
1092 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1094 * (I'm not sure whether the spurious commas at the end of lcfirst's
1095 * arguments and join's arguments are created or not).
1100 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1102 * Pattern matching will set PL_lex_op to the pattern-matching op to
1103 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1105 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1107 * Everything else becomes a FUNC.
1109 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1110 * had an OP_CONST or OP_READLINE). This just sets us up for a
1111 * call to S_sublex_push().
1115 S_sublex_start(pTHX)
1117 const register I32 op_type = yylval.ival;
1119 if (op_type == OP_NULL) {
1120 yylval.opval = PL_lex_op;
1124 if (op_type == OP_CONST || op_type == OP_READLINE) {
1125 SV *sv = tokeq(PL_lex_stuff);
1127 if (SvTYPE(sv) == SVt_PVIV) {
1128 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1130 const char *p = SvPV(sv, len);
1131 SV * const nsv = newSVpvn(p, len);
1137 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1138 PL_lex_stuff = Nullsv;
1139 /* Allow <FH> // "foo" */
1140 if (op_type == OP_READLINE)
1141 PL_expect = XTERMORDORDOR;
1145 PL_sublex_info.super_state = PL_lex_state;
1146 PL_sublex_info.sub_inwhat = op_type;
1147 PL_sublex_info.sub_op = PL_lex_op;
1148 PL_lex_state = LEX_INTERPPUSH;
1152 yylval.opval = PL_lex_op;
1162 * Create a new scope to save the lexing state. The scope will be
1163 * ended in S_sublex_done. Returns a '(', starting the function arguments
1164 * to the uc, lc, etc. found before.
1165 * Sets PL_lex_state to LEX_INTERPCONCAT.
1174 PL_lex_state = PL_sublex_info.super_state;
1175 SAVEI32(PL_lex_dojoin);
1176 SAVEI32(PL_lex_brackets);
1177 SAVEI32(PL_lex_casemods);
1178 SAVEI32(PL_lex_starts);
1179 SAVEI32(PL_lex_state);
1180 SAVEVPTR(PL_lex_inpat);
1181 SAVEI32(PL_lex_inwhat);
1182 SAVECOPLINE(PL_curcop);
1183 SAVEPPTR(PL_bufptr);
1184 SAVEPPTR(PL_bufend);
1185 SAVEPPTR(PL_oldbufptr);
1186 SAVEPPTR(PL_oldoldbufptr);
1187 SAVEPPTR(PL_last_lop);
1188 SAVEPPTR(PL_last_uni);
1189 SAVEPPTR(PL_linestart);
1190 SAVESPTR(PL_linestr);
1191 SAVEGENERICPV(PL_lex_brackstack);
1192 SAVEGENERICPV(PL_lex_casestack);
1194 PL_linestr = PL_lex_stuff;
1195 PL_lex_stuff = Nullsv;
1197 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1198 = SvPVX(PL_linestr);
1199 PL_bufend += SvCUR(PL_linestr);
1200 PL_last_lop = PL_last_uni = Nullch;
1201 SAVEFREESV(PL_linestr);
1203 PL_lex_dojoin = FALSE;
1204 PL_lex_brackets = 0;
1205 New(899, PL_lex_brackstack, 120, char);
1206 New(899, PL_lex_casestack, 12, char);
1207 PL_lex_casemods = 0;
1208 *PL_lex_casestack = '\0';
1210 PL_lex_state = LEX_INTERPCONCAT;
1211 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1213 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1214 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1215 PL_lex_inpat = PL_sublex_info.sub_op;
1217 PL_lex_inpat = Nullop;
1224 * Restores lexer state after a S_sublex_push.
1231 if (!PL_lex_starts++) {
1232 SV *sv = newSVpvn("",0);
1233 if (SvUTF8(PL_linestr))
1235 PL_expect = XOPERATOR;
1236 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1240 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1241 PL_lex_state = LEX_INTERPCASEMOD;
1245 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1246 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1247 PL_linestr = PL_lex_repl;
1249 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1250 PL_bufend += SvCUR(PL_linestr);
1251 PL_last_lop = PL_last_uni = Nullch;
1252 SAVEFREESV(PL_linestr);
1253 PL_lex_dojoin = FALSE;
1254 PL_lex_brackets = 0;
1255 PL_lex_casemods = 0;
1256 *PL_lex_casestack = '\0';
1258 if (SvEVALED(PL_lex_repl)) {
1259 PL_lex_state = LEX_INTERPNORMAL;
1261 /* we don't clear PL_lex_repl here, so that we can check later
1262 whether this is an evalled subst; that means we rely on the
1263 logic to ensure sublex_done() is called again only via the
1264 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1267 PL_lex_state = LEX_INTERPCONCAT;
1268 PL_lex_repl = Nullsv;
1274 PL_bufend = SvPVX(PL_linestr);
1275 PL_bufend += SvCUR(PL_linestr);
1276 PL_expect = XOPERATOR;
1277 PL_sublex_info.sub_inwhat = 0;
1285 Extracts a pattern, double-quoted string, or transliteration. This
1288 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1289 processing a pattern (PL_lex_inpat is true), a transliteration
1290 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1292 Returns a pointer to the character scanned up to. Iff this is
1293 advanced from the start pointer supplied (ie if anything was
1294 successfully parsed), will leave an OP for the substring scanned
1295 in yylval. Caller must intuit reason for not parsing further
1296 by looking at the next characters herself.
1300 double-quoted style: \r and \n
1301 regexp special ones: \D \s
1303 backrefs: \1 (deprecated in substitution replacements)
1304 case and quoting: \U \Q \E
1305 stops on @ and $, but not for $ as tail anchor
1307 In transliterations:
1308 characters are VERY literal, except for - not at the start or end
1309 of the string, which indicates a range. scan_const expands the
1310 range to the full set of intermediate characters.
1312 In double-quoted strings:
1314 double-quoted style: \r and \n
1316 backrefs: \1 (deprecated)
1317 case and quoting: \U \Q \E
1320 scan_const does *not* construct ops to handle interpolated strings.
1321 It stops processing as soon as it finds an embedded $ or @ variable
1322 and leaves it to the caller to work out what's going on.
1324 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1326 $ in pattern could be $foo or could be tail anchor. Assumption:
1327 it's a tail anchor if $ is the last thing in the string, or if it's
1328 followed by one of ")| \n\t"
1330 \1 (backreferences) are turned into $1
1332 The structure of the code is
1333 while (there's a character to process) {
1334 handle transliteration ranges
1335 skip regexp comments
1336 skip # initiated comments in //x patterns
1337 check for embedded @foo
1338 check for embedded scalars
1340 leave intact backslashes from leave (below)
1341 deprecate \1 in strings and sub replacements
1342 handle string-changing backslashes \l \U \Q \E, etc.
1343 switch (what was escaped) {
1344 handle - in a transliteration (becomes a literal -)
1345 handle \132 octal characters
1346 handle 0x15 hex characters
1347 handle \cV (control V)
1348 handle printf backslashes (\f, \r, \n, etc)
1350 } (end if backslash)
1351 } (end while character to read)
1356 S_scan_const(pTHX_ char *start)
1358 register char *send = PL_bufend; /* end of the constant */
1359 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1360 register char *s = start; /* start of the constant */
1361 register char *d = SvPVX(sv); /* destination for copies */
1362 bool dorange = FALSE; /* are we in a translit range? */
1363 bool didrange = FALSE; /* did we just finish a range? */
1364 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1365 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1368 const char *leaveit = /* set of acceptably-backslashed characters */
1370 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1373 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1374 /* If we are doing a trans and we know we want UTF8 set expectation */
1375 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1376 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1380 while (s < send || dorange) {
1381 /* get transliterations out of the way (they're most literal) */
1382 if (PL_lex_inwhat == OP_TRANS) {
1383 /* expand a range A-Z to the full set of characters. AIE! */
1385 I32 i; /* current expanded character */
1386 I32 min; /* first character in range */
1387 I32 max; /* last character in range */
1390 char *c = (char*)utf8_hop((U8*)d, -1);
1394 *c = (char)UTF_TO_NATIVE(0xff);
1395 /* mark the range as done, and continue */
1401 i = d - SvPVX(sv); /* remember current offset */
1402 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1403 d = SvPVX(sv) + i; /* refresh d after realloc */
1404 d -= 2; /* eat the first char and the - */
1406 min = (U8)*d; /* first char in range */
1407 max = (U8)d[1]; /* last char in range */
1411 "Invalid range \"%c-%c\" in transliteration operator",
1412 (char)min, (char)max);
1416 if ((isLOWER(min) && isLOWER(max)) ||
1417 (isUPPER(min) && isUPPER(max))) {
1419 for (i = min; i <= max; i++)
1421 *d++ = NATIVE_TO_NEED(has_utf8,i);
1423 for (i = min; i <= max; i++)
1425 *d++ = NATIVE_TO_NEED(has_utf8,i);
1430 for (i = min; i <= max; i++)
1433 /* mark the range as done, and continue */
1439 /* range begins (ignore - as first or last char) */
1440 else if (*s == '-' && s+1 < send && s != start) {
1442 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1445 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1457 /* if we get here, we're not doing a transliteration */
1459 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1460 except for the last char, which will be done separately. */
1461 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1463 while (s+1 < send && *s != ')')
1464 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1466 else if (s[2] == '{' /* This should match regcomp.c */
1467 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1470 char *regparse = s + (s[2] == '{' ? 3 : 4);
1473 while (count && (c = *regparse)) {
1474 if (c == '\\' && regparse[1])
1482 if (*regparse != ')')
1483 regparse--; /* Leave one char for continuation. */
1484 while (s < regparse)
1485 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1489 /* likewise skip #-initiated comments in //x patterns */
1490 else if (*s == '#' && PL_lex_inpat &&
1491 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1492 while (s+1 < send && *s != '\n')
1493 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1496 /* check for embedded arrays
1497 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1499 else if (*s == '@' && s[1]
1500 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1503 /* check for embedded scalars. only stop if we're sure it's a
1506 else if (*s == '$') {
1507 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1509 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1510 break; /* in regexp, $ might be tail anchor */
1513 /* End of else if chain - OP_TRANS rejoin rest */
1516 if (*s == '\\' && s+1 < send) {
1519 /* some backslashes we leave behind */
1520 if (*leaveit && *s && strchr(leaveit, *s)) {
1521 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1522 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1526 /* deprecate \1 in strings and substitution replacements */
1527 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1528 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1530 if (ckWARN(WARN_SYNTAX))
1531 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1536 /* string-change backslash escapes */
1537 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1542 /* if we get here, it's either a quoted -, or a digit */
1545 /* quoted - in transliterations */
1547 if (PL_lex_inwhat == OP_TRANS) {
1554 if (ckWARN(WARN_MISC) &&
1557 Perl_warner(aTHX_ packWARN(WARN_MISC),
1558 "Unrecognized escape \\%c passed through",
1560 /* default action is to copy the quoted character */
1561 goto default_action;
1564 /* \132 indicates an octal constant */
1565 case '0': case '1': case '2': case '3':
1566 case '4': case '5': case '6': case '7':
1570 uv = grok_oct(s, &len, &flags, NULL);
1573 goto NUM_ESCAPE_INSERT;
1575 /* \x24 indicates a hex constant */
1579 char* e = strchr(s, '}');
1580 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1581 PERL_SCAN_DISALLOW_PREFIX;
1586 yyerror("Missing right brace on \\x{}");
1590 uv = grok_hex(s, &len, &flags, NULL);
1596 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1597 uv = grok_hex(s, &len, &flags, NULL);
1603 /* Insert oct or hex escaped character.
1604 * There will always enough room in sv since such
1605 * escapes will be longer than any UTF-8 sequence
1606 * they can end up as. */
1608 /* We need to map to chars to ASCII before doing the tests
1611 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1612 if (!has_utf8 && uv > 255) {
1613 /* Might need to recode whatever we have
1614 * accumulated so far if it contains any
1617 * (Can't we keep track of that and avoid
1618 * this rescan? --jhi)
1622 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1623 if (!NATIVE_IS_INVARIANT(*c)) {
1628 STRLEN offset = d - SvPVX(sv);
1630 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1634 while (src >= (U8 *)SvPVX(sv)) {
1635 if (!NATIVE_IS_INVARIANT(*src)) {
1636 U8 ch = NATIVE_TO_ASCII(*src);
1637 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1638 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1648 if (has_utf8 || uv > 255) {
1649 d = (char*)uvchr_to_utf8((U8*)d, uv);
1651 if (PL_lex_inwhat == OP_TRANS &&
1652 PL_sublex_info.sub_op) {
1653 PL_sublex_info.sub_op->op_private |=
1654 (PL_lex_repl ? OPpTRANS_FROM_UTF
1667 /* \N{LATIN SMALL LETTER A} is a named character */
1671 char* e = strchr(s, '}');
1677 yyerror("Missing right brace on \\N{}");
1681 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1683 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1684 PERL_SCAN_DISALLOW_PREFIX;
1687 uv = grok_hex(s, &len, &flags, NULL);
1689 goto NUM_ESCAPE_INSERT;
1691 res = newSVpvn(s + 1, e - s - 1);
1692 res = new_constant( Nullch, 0, "charnames",
1693 res, Nullsv, "\\N{...}" );
1695 sv_utf8_upgrade(res);
1696 str = SvPV(res,len);
1697 #ifdef EBCDIC_NEVER_MIND
1698 /* charnames uses pack U and that has been
1699 * recently changed to do the below uni->native
1700 * mapping, so this would be redundant (and wrong,
1701 * the code point would be doubly converted).
1702 * But leave this in just in case the pack U change
1703 * gets revoked, but the semantics is still
1704 * desireable for charnames. --jhi */
1706 UV uv = utf8_to_uvchr((U8*)str, 0);
1709 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1711 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1712 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1713 str = SvPV(res, len);
1717 if (!has_utf8 && SvUTF8(res)) {
1718 char *ostart = SvPVX(sv);
1719 SvCUR_set(sv, d - ostart);
1722 sv_utf8_upgrade(sv);
1723 /* this just broke our allocation above... */
1724 SvGROW(sv, (STRLEN)(send - start));
1725 d = SvPVX(sv) + SvCUR(sv);
1728 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1729 char *odest = SvPVX(sv);
1731 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1732 d = SvPVX(sv) + (d - odest);
1734 Copy(str, d, len, char);
1741 yyerror("Missing braces on \\N{}");
1744 /* \c is a control character */
1753 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1756 yyerror("Missing control char name in \\c");
1760 /* printf-style backslashes, formfeeds, newlines, etc */
1762 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1765 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1768 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1771 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1774 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1777 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1780 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1786 } /* end if (backslash) */
1789 /* If we started with encoded form, or already know we want it
1790 and then encode the next character */
1791 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1793 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1794 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1797 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1798 STRLEN off = d - SvPVX(sv);
1799 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1801 d = (char*)uvchr_to_utf8((U8*)d, uv);
1805 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1807 } /* while loop to process each character */
1809 /* terminate the string and set up the sv */
1811 SvCUR_set(sv, d - SvPVX(sv));
1812 if (SvCUR(sv) >= SvLEN(sv))
1813 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1816 if (PL_encoding && !has_utf8) {
1817 sv_recode_to_utf8(sv, PL_encoding);
1823 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1824 PL_sublex_info.sub_op->op_private |=
1825 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1829 /* shrink the sv if we allocated more than we used */
1830 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1831 SvPV_shrink_to_cur(sv);
1834 /* return the substring (via yylval) only if we parsed anything */
1835 if (s > PL_bufptr) {
1836 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1837 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1839 ( PL_lex_inwhat == OP_TRANS
1841 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1844 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1851 * Returns TRUE if there's more to the expression (e.g., a subscript),
1854 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1856 * ->[ and ->{ return TRUE
1857 * { and [ outside a pattern are always subscripts, so return TRUE
1858 * if we're outside a pattern and it's not { or [, then return FALSE
1859 * if we're in a pattern and the first char is a {
1860 * {4,5} (any digits around the comma) returns FALSE
1861 * if we're in a pattern and the first char is a [
1863 * [SOMETHING] has a funky algorithm to decide whether it's a
1864 * character class or not. It has to deal with things like
1865 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1866 * anything else returns TRUE
1869 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1872 S_intuit_more(pTHX_ register char *s)
1874 if (PL_lex_brackets)
1876 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1878 if (*s != '{' && *s != '[')
1883 /* In a pattern, so maybe we have {n,m}. */
1900 /* On the other hand, maybe we have a character class */
1903 if (*s == ']' || *s == '^')
1906 /* this is terrifying, and it works */
1907 int weight = 2; /* let's weigh the evidence */
1909 unsigned char un_char = 255, last_un_char;
1910 const char *send = strchr(s,']');
1911 char tmpbuf[sizeof PL_tokenbuf * 4];
1913 if (!send) /* has to be an expression */
1916 Zero(seen,256,char);
1919 else if (isDIGIT(*s)) {
1921 if (isDIGIT(s[1]) && s[2] == ']')
1927 for (; s < send; s++) {
1928 last_un_char = un_char;
1929 un_char = (unsigned char)*s;
1934 weight -= seen[un_char] * 10;
1935 if (isALNUM_lazy_if(s+1,UTF)) {
1936 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
1937 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
1942 else if (*s == '$' && s[1] &&
1943 strchr("[#!%*<>()-=",s[1])) {
1944 if (/*{*/ strchr("])} =",s[2]))
1953 if (strchr("wds]",s[1]))
1955 else if (seen['\''] || seen['"'])
1957 else if (strchr("rnftbxcav",s[1]))
1959 else if (isDIGIT(s[1])) {
1961 while (s[1] && isDIGIT(s[1]))
1971 if (strchr("aA01! ",last_un_char))
1973 if (strchr("zZ79~",s[1]))
1975 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1976 weight -= 5; /* cope with negative subscript */
1979 if (!isALNUM(last_un_char)
1980 && !(last_un_char == '$' || last_un_char == '@'
1981 || last_un_char == '&')
1982 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
1987 if (keyword(tmpbuf, d - tmpbuf))
1990 if (un_char == last_un_char + 1)
1992 weight -= seen[un_char];
1997 if (weight >= 0) /* probably a character class */
2007 * Does all the checking to disambiguate
2009 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2010 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2012 * First argument is the stuff after the first token, e.g. "bar".
2014 * Not a method if bar is a filehandle.
2015 * Not a method if foo is a subroutine prototyped to take a filehandle.
2016 * Not a method if it's really "Foo $bar"
2017 * Method if it's "foo $bar"
2018 * Not a method if it's really "print foo $bar"
2019 * Method if it's really "foo package::" (interpreted as package->foo)
2020 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2021 * Not a method if bar is a filehandle or package, but is quoted with
2026 S_intuit_method(pTHX_ char *start, GV *gv)
2028 char *s = start + (*start == '$');
2029 char tmpbuf[sizeof PL_tokenbuf];
2037 if ((cv = GvCVu(gv))) {
2038 const char *proto = SvPVX(cv);
2048 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2049 /* start is the beginning of the possible filehandle/object,
2050 * and s is the end of it
2051 * tmpbuf is a copy of it
2054 if (*start == '$') {
2055 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2060 return *s == '(' ? FUNCMETH : METHOD;
2062 if (!keyword(tmpbuf, len)) {
2063 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2068 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
2069 if (indirgv && GvCVu(indirgv))
2071 /* filehandle or package name makes it a method */
2072 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2074 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2075 return 0; /* no assumptions -- "=>" quotes bearword */
2077 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2078 newSVpvn(tmpbuf,len));
2079 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2083 return *s == '(' ? FUNCMETH : METHOD;
2091 * Return a string of Perl code to load the debugger. If PERL5DB
2092 * is set, it will return the contents of that, otherwise a
2093 * compile-time require of perl5db.pl.
2100 const char *pdb = PerlEnv_getenv("PERL5DB");
2104 SETERRNO(0,SS_NORMAL);
2105 return "BEGIN { require 'perl5db.pl' }";
2111 /* Encoded script support. filter_add() effectively inserts a
2112 * 'pre-processing' function into the current source input stream.
2113 * Note that the filter function only applies to the current source file
2114 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2116 * The datasv parameter (which may be NULL) can be used to pass
2117 * private data to this instance of the filter. The filter function
2118 * can recover the SV using the FILTER_DATA macro and use it to
2119 * store private buffers and state information.
2121 * The supplied datasv parameter is upgraded to a PVIO type
2122 * and the IoDIRP/IoANY field is used to store the function pointer,
2123 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2124 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2125 * private use must be set using malloc'd pointers.
2129 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2134 if (!PL_rsfp_filters)
2135 PL_rsfp_filters = newAV();
2137 datasv = NEWSV(255,0);
2138 if (!SvUPGRADE(datasv, SVt_PVIO))
2139 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
2140 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
2141 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2142 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2143 (void*)funcp, SvPV_nolen(datasv)));
2144 av_unshift(PL_rsfp_filters, 1);
2145 av_store(PL_rsfp_filters, 0, datasv) ;
2150 /* Delete most recently added instance of this filter function. */
2152 Perl_filter_del(pTHX_ filter_t funcp)
2155 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
2156 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2158 /* if filter is on top of stack (usual case) just pop it off */
2159 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2160 if (IoANY(datasv) == (void *)funcp) {
2161 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2162 IoANY(datasv) = (void *)NULL;
2163 sv_free(av_pop(PL_rsfp_filters));
2167 /* we need to search for the correct entry and clear it */
2168 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2172 /* Invoke the idxth filter function for the current rsfp. */
2173 /* maxlen 0 = read one text line */
2175 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2180 if (!PL_rsfp_filters)
2182 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2183 /* Provide a default input filter to make life easy. */
2184 /* Note that we append to the line. This is handy. */
2185 DEBUG_P(PerlIO_printf(Perl_debug_log,
2186 "filter_read %d: from rsfp\n", idx));
2190 const int old_len = SvCUR(buf_sv);
2192 /* ensure buf_sv is large enough */
2193 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2194 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2195 if (PerlIO_error(PL_rsfp))
2196 return -1; /* error */
2198 return 0 ; /* end of file */
2200 SvCUR_set(buf_sv, old_len + len) ;
2203 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2204 if (PerlIO_error(PL_rsfp))
2205 return -1; /* error */
2207 return 0 ; /* end of file */
2210 return SvCUR(buf_sv);
2212 /* Skip this filter slot if filter has been deleted */
2213 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2214 DEBUG_P(PerlIO_printf(Perl_debug_log,
2215 "filter_read %d: skipped (filter deleted)\n",
2217 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2219 /* Get function pointer hidden within datasv */
2220 funcp = (filter_t)IoANY(datasv);
2221 DEBUG_P(PerlIO_printf(Perl_debug_log,
2222 "filter_read %d: via function %p (%s)\n",
2223 idx, (void*)funcp, SvPV_nolen(datasv)));
2224 /* Call function. The function is expected to */
2225 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2226 /* Return: <0:error, =0:eof, >0:not eof */
2227 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2231 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2233 #ifdef PERL_CR_FILTER
2234 if (!PL_rsfp_filters) {
2235 filter_add(S_cr_textfilter,NULL);
2238 if (PL_rsfp_filters) {
2240 SvCUR_set(sv, 0); /* start with empty line */
2241 if (FILTER_READ(0, sv, 0) > 0)
2242 return ( SvPVX(sv) ) ;
2247 return (sv_gets(sv, fp, append));
2251 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2255 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2259 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2260 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2262 return GvHV(gv); /* Foo:: */
2265 /* use constant CLASS => 'MyClass' */
2266 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2268 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2269 pkgname = SvPV_nolen(sv);
2273 return gv_stashpv(pkgname, FALSE);
2277 static const char* const exp_name[] =
2278 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2279 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2286 Works out what to call the token just pulled out of the input
2287 stream. The yacc parser takes care of taking the ops we return and
2288 stitching them into a tree.
2294 if read an identifier
2295 if we're in a my declaration
2296 croak if they tried to say my($foo::bar)
2297 build the ops for a my() declaration
2298 if it's an access to a my() variable
2299 are we in a sort block?
2300 croak if my($a); $a <=> $b
2301 build ops for access to a my() variable
2302 if in a dq string, and they've said @foo and we can't find @foo
2304 build ops for a bareword
2305 if we already built the token before, use it.
2310 #pragma segment Perl_yylex
2315 register char *s = PL_bufptr;
2322 I32 orig_keyword = 0;
2325 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2326 lex_state_names[PL_lex_state]);
2328 /* check if there's an identifier for us to look at */
2329 if (PL_pending_ident)
2330 return REPORT(S_pending_ident(aTHX));
2332 /* no identifier pending identification */
2334 switch (PL_lex_state) {
2336 case LEX_NORMAL: /* Some compilers will produce faster */
2337 case LEX_INTERPNORMAL: /* code if we comment these out. */
2341 /* when we've already built the next token, just pull it out of the queue */
2344 yylval = PL_nextval[PL_nexttoke];
2346 PL_lex_state = PL_lex_defer;
2347 PL_expect = PL_lex_expect;
2348 PL_lex_defer = LEX_NORMAL;
2350 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2351 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2352 (IV)PL_nexttype[PL_nexttoke]); });
2354 return REPORT(PL_nexttype[PL_nexttoke]);
2356 /* interpolated case modifiers like \L \U, including \Q and \E.
2357 when we get here, PL_bufptr is at the \
2359 case LEX_INTERPCASEMOD:
2361 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2362 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2364 /* handle \E or end of string */
2365 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2367 if (PL_lex_casemods) {
2368 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2369 PL_lex_casestack[PL_lex_casemods] = '\0';
2371 if (PL_bufptr != PL_bufend
2372 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2374 PL_lex_state = LEX_INTERPCONCAT;
2378 if (PL_bufptr != PL_bufend)
2380 PL_lex_state = LEX_INTERPCONCAT;
2384 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2385 "### Saw case modifier at '%s'\n", PL_bufptr); });
2387 if (s[1] == '\\' && s[2] == 'E') {
2389 PL_lex_state = LEX_INTERPCONCAT;
2393 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2394 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2395 if ((*s == 'L' || *s == 'U') &&
2396 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2397 PL_lex_casestack[--PL_lex_casemods] = '\0';
2400 if (PL_lex_casemods > 10)
2401 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2402 PL_lex_casestack[PL_lex_casemods++] = *s;
2403 PL_lex_casestack[PL_lex_casemods] = '\0';
2404 PL_lex_state = LEX_INTERPCONCAT;
2405 PL_nextval[PL_nexttoke].ival = 0;
2408 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2410 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2412 PL_nextval[PL_nexttoke].ival = OP_LC;
2414 PL_nextval[PL_nexttoke].ival = OP_UC;
2416 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2418 Perl_croak(aTHX_ "panic: yylex");
2422 if (PL_lex_starts) {
2425 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2426 if (PL_lex_casemods == 1 && PL_lex_inpat)
2435 case LEX_INTERPPUSH:
2436 return REPORT(sublex_push());
2438 case LEX_INTERPSTART:
2439 if (PL_bufptr == PL_bufend)
2440 return REPORT(sublex_done());
2441 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2442 "### Interpolated variable at '%s'\n", PL_bufptr); });
2444 PL_lex_dojoin = (*PL_bufptr == '@');
2445 PL_lex_state = LEX_INTERPNORMAL;
2446 if (PL_lex_dojoin) {
2447 PL_nextval[PL_nexttoke].ival = 0;
2449 force_ident("\"", '$');
2450 PL_nextval[PL_nexttoke].ival = 0;
2452 PL_nextval[PL_nexttoke].ival = 0;
2454 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2457 if (PL_lex_starts++) {
2459 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2460 if (!PL_lex_casemods && PL_lex_inpat)
2467 case LEX_INTERPENDMAYBE:
2468 if (intuit_more(PL_bufptr)) {
2469 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2475 if (PL_lex_dojoin) {
2476 PL_lex_dojoin = FALSE;
2477 PL_lex_state = LEX_INTERPCONCAT;
2480 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2481 && SvEVALED(PL_lex_repl))
2483 if (PL_bufptr != PL_bufend)
2484 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2485 PL_lex_repl = Nullsv;
2488 case LEX_INTERPCONCAT:
2490 if (PL_lex_brackets)
2491 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2493 if (PL_bufptr == PL_bufend)
2494 return REPORT(sublex_done());
2496 if (SvIVX(PL_linestr) == '\'') {
2497 SV *sv = newSVsv(PL_linestr);
2500 else if ( PL_hints & HINT_NEW_RE )
2501 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2502 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2506 s = scan_const(PL_bufptr);
2508 PL_lex_state = LEX_INTERPCASEMOD;
2510 PL_lex_state = LEX_INTERPSTART;
2513 if (s != PL_bufptr) {
2514 PL_nextval[PL_nexttoke] = yylval;
2517 if (PL_lex_starts++) {
2518 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2519 if (!PL_lex_casemods && PL_lex_inpat)
2532 PL_lex_state = LEX_NORMAL;
2533 s = scan_formline(PL_bufptr);
2534 if (!PL_lex_formbrack)
2540 PL_oldoldbufptr = PL_oldbufptr;
2543 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
2544 exp_name[PL_expect], s);
2550 if (isIDFIRST_lazy_if(s,UTF))
2552 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2555 goto fake_eof; /* emulate EOF on ^D or ^Z */
2560 if (PL_lex_brackets) {
2561 if (PL_lex_formbrack)
2562 yyerror("Format not terminated");
2564 yyerror("Missing right curly or square bracket");
2566 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2567 "### Tokener got EOF\n");
2571 if (s++ < PL_bufend)
2572 goto retry; /* ignore stray nulls */
2575 if (!PL_in_eval && !PL_preambled) {
2576 PL_preambled = TRUE;
2577 sv_setpv(PL_linestr,incl_perldb());
2578 if (SvCUR(PL_linestr))
2579 sv_catpvn(PL_linestr,";", 1);
2581 while(AvFILLp(PL_preambleav) >= 0) {
2582 SV *tmpsv = av_shift(PL_preambleav);
2583 sv_catsv(PL_linestr, tmpsv);
2584 sv_catpvn(PL_linestr, ";", 1);
2587 sv_free((SV*)PL_preambleav);
2588 PL_preambleav = NULL;
2590 if (PL_minus_n || PL_minus_p) {
2591 sv_catpv(PL_linestr, "LINE: while (<>) {");
2593 sv_catpv(PL_linestr,"chomp;");
2596 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2597 || *PL_splitstr == '"')
2598 && strchr(PL_splitstr + 1, *PL_splitstr))
2599 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2601 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2602 bytes can be used as quoting characters. :-) */
2603 /* The count here deliberately includes the NUL
2604 that terminates the C string constant. This
2605 embeds the opening NUL into the string. */
2606 const char *splits = PL_splitstr;
2607 sv_catpvn(PL_linestr, "our @F=split(q", 15);
2610 if (*splits == '\\')
2611 sv_catpvn(PL_linestr, splits, 1);
2612 sv_catpvn(PL_linestr, splits, 1);
2613 } while (*splits++);
2614 /* This loop will embed the trailing NUL of
2615 PL_linestr as the last thing it does before
2617 sv_catpvn(PL_linestr, ");", 2);
2621 sv_catpv(PL_linestr,"our @F=split(' ');");
2624 sv_catpvn(PL_linestr, "\n", 1);
2625 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2626 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2627 PL_last_lop = PL_last_uni = Nullch;
2628 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2629 SV *sv = NEWSV(85,0);
2631 sv_upgrade(sv, SVt_PVMG);
2632 sv_setsv(sv,PL_linestr);
2635 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2640 bof = PL_rsfp ? TRUE : FALSE;
2641 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2644 if (PL_preprocess && !PL_in_eval)
2645 (void)PerlProc_pclose(PL_rsfp);
2646 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2647 PerlIO_clearerr(PL_rsfp);
2649 (void)PerlIO_close(PL_rsfp);
2651 PL_doextract = FALSE;
2653 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2654 sv_setpv(PL_linestr,PL_minus_p
2655 ? ";}continue{print;}" : ";}");
2656 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2657 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2658 PL_last_lop = PL_last_uni = Nullch;
2659 PL_minus_n = PL_minus_p = 0;
2662 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2663 PL_last_lop = PL_last_uni = Nullch;
2664 sv_setpvn(PL_linestr,"",0);
2665 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2667 /* If it looks like the start of a BOM or raw UTF-16,
2668 * check if it in fact is. */
2674 #ifdef PERLIO_IS_STDIO
2675 # ifdef __GNU_LIBRARY__
2676 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2677 # define FTELL_FOR_PIPE_IS_BROKEN
2681 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2682 # define FTELL_FOR_PIPE_IS_BROKEN
2687 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2688 /* This loses the possibility to detect the bof
2689 * situation on perl -P when the libc5 is being used.
2690 * Workaround? Maybe attach some extra state to PL_rsfp?
2693 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2695 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2698 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2699 s = swallow_bom((U8*)s);
2703 /* Incest with pod. */
2704 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2705 sv_setpvn(PL_linestr, "", 0);
2706 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2707 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2708 PL_last_lop = PL_last_uni = Nullch;
2709 PL_doextract = FALSE;
2713 } while (PL_doextract);
2714 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2715 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2716 SV *sv = NEWSV(85,0);
2718 sv_upgrade(sv, SVt_PVMG);
2719 sv_setsv(sv,PL_linestr);
2722 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2724 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2725 PL_last_lop = PL_last_uni = Nullch;
2726 if (CopLINE(PL_curcop) == 1) {
2727 while (s < PL_bufend && isSPACE(*s))
2729 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2733 if (*s == '#' && *(s+1) == '!')
2735 #ifdef ALTERNATE_SHEBANG
2737 static char const as[] = ALTERNATE_SHEBANG;
2738 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2739 d = s + (sizeof(as) - 1);
2741 #endif /* ALTERNATE_SHEBANG */
2750 while (*d && !isSPACE(*d))
2754 #ifdef ARG_ZERO_IS_SCRIPT
2755 if (ipathend > ipath) {
2757 * HP-UX (at least) sets argv[0] to the script name,
2758 * which makes $^X incorrect. And Digital UNIX and Linux,
2759 * at least, set argv[0] to the basename of the Perl
2760 * interpreter. So, having found "#!", we'll set it right.
2762 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
2763 assert(SvPOK(x) || SvGMAGICAL(x));
2764 if (sv_eq(x, CopFILESV(PL_curcop))) {
2765 sv_setpvn(x, ipath, ipathend - ipath);
2771 const char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2772 const char *lstart = SvPV(x,llen);
2774 bstart += blen - llen;
2775 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2776 sv_setpvn(x, ipath, ipathend - ipath);
2781 TAINT_NOT; /* $^X is always tainted, but that's OK */
2783 #endif /* ARG_ZERO_IS_SCRIPT */
2788 d = instr(s,"perl -");
2790 d = instr(s,"perl");
2792 /* avoid getting into infinite loops when shebang
2793 * line contains "Perl" rather than "perl" */
2795 for (d = ipathend-4; d >= ipath; --d) {
2796 if ((*d == 'p' || *d == 'P')
2797 && !ibcmp(d, "perl", 4))
2807 #ifdef ALTERNATE_SHEBANG
2809 * If the ALTERNATE_SHEBANG on this system starts with a
2810 * character that can be part of a Perl expression, then if
2811 * we see it but not "perl", we're probably looking at the
2812 * start of Perl code, not a request to hand off to some
2813 * other interpreter. Similarly, if "perl" is there, but
2814 * not in the first 'word' of the line, we assume the line
2815 * contains the start of the Perl program.
2817 if (d && *s != '#') {
2818 const char *c = ipath;
2819 while (*c && !strchr("; \t\r\n\f\v#", *c))
2822 d = Nullch; /* "perl" not in first word; ignore */
2824 *s = '#'; /* Don't try to parse shebang line */
2826 #endif /* ALTERNATE_SHEBANG */
2827 #ifndef MACOS_TRADITIONAL
2832 !instr(s,"indir") &&
2833 instr(PL_origargv[0],"perl"))
2840 while (s < PL_bufend && isSPACE(*s))
2842 if (s < PL_bufend) {
2843 Newz(899,newargv,PL_origargc+3,char*);
2845 while (s < PL_bufend && !isSPACE(*s))
2848 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2851 newargv = PL_origargv;
2854 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2856 Perl_croak(aTHX_ "Can't exec %s", ipath);
2860 const U32 oldpdb = PL_perldb;
2861 const bool oldn = PL_minus_n;
2862 const bool oldp = PL_minus_p;
2864 while (*d && !isSPACE(*d)) d++;
2865 while (SPACE_OR_TAB(*d)) d++;
2868 const bool switches_done = PL_doswitches;
2870 if (*d == 'M' || *d == 'm' || *d == 'C') {
2872 while (*d && !isSPACE(*d)) d++;
2873 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
2876 d = moreswitches(d);
2878 if (PL_doswitches && !switches_done) {
2879 int argc = PL_origargc;
2880 char **argv = PL_origargv;
2883 } while (argc && argv[0][0] == '-' && argv[0][1]);
2884 init_argv_symbols(argc,argv);
2886 if ((PERLDB_LINE && !oldpdb) ||
2887 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
2888 /* if we have already added "LINE: while (<>) {",
2889 we must not do it again */
2891 sv_setpvn(PL_linestr, "", 0);
2892 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2893 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2894 PL_last_lop = PL_last_uni = Nullch;
2895 PL_preambled = FALSE;
2897 (void)gv_fetchfile(PL_origfilename);
2900 if (PL_doswitches && !switches_done) {
2901 int argc = PL_origargc;
2902 char **argv = PL_origargv;
2905 } while (argc && argv[0][0] == '-' && argv[0][1]);
2906 init_argv_symbols(argc,argv);
2912 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2914 PL_lex_state = LEX_FORMLINE;
2919 #ifdef PERL_STRICT_CR
2920 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2922 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
2924 case ' ': case '\t': case '\f': case 013:
2925 #ifdef MACOS_TRADITIONAL
2932 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2933 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2934 /* handle eval qq[#line 1 "foo"\n ...] */
2935 CopLINE_dec(PL_curcop);
2939 while (s < d && *s != '\n')
2943 else if (s > d) /* Found by Ilya: feed random input to Perl. */
2944 Perl_croak(aTHX_ "panic: input overflow");
2946 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2948 PL_lex_state = LEX_FORMLINE;
2958 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
2965 while (s < PL_bufend && SPACE_OR_TAB(*s))
2968 if (strnEQ(s,"=>",2)) {
2969 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
2970 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2971 "### Saw unary minus before =>, forcing word '%s'\n", s);
2973 OPERATOR('-'); /* unary minus */
2975 PL_last_uni = PL_oldbufptr;
2977 case 'r': ftst = OP_FTEREAD; break;
2978 case 'w': ftst = OP_FTEWRITE; break;
2979 case 'x': ftst = OP_FTEEXEC; break;
2980 case 'o': ftst = OP_FTEOWNED; break;
2981 case 'R': ftst = OP_FTRREAD; break;
2982 case 'W': ftst = OP_FTRWRITE; break;
2983 case 'X': ftst = OP_FTREXEC; break;
2984 case 'O': ftst = OP_FTROWNED; break;
2985 case 'e': ftst = OP_FTIS; break;
2986 case 'z': ftst = OP_FTZERO; break;
2987 case 's': ftst = OP_FTSIZE; break;
2988 case 'f': ftst = OP_FTFILE; break;
2989 case 'd': ftst = OP_FTDIR; break;
2990 case 'l': ftst = OP_FTLINK; break;
2991 case 'p': ftst = OP_FTPIPE; break;
2992 case 'S': ftst = OP_FTSOCK; break;
2993 case 'u': ftst = OP_FTSUID; break;
2994 case 'g': ftst = OP_FTSGID; break;
2995 case 'k': ftst = OP_FTSVTX; break;
2996 case 'b': ftst = OP_FTBLK; break;
2997 case 'c': ftst = OP_FTCHR; break;
2998 case 't': ftst = OP_FTTTY; break;
2999 case 'T': ftst = OP_FTTEXT; break;
3000 case 'B': ftst = OP_FTBINARY; break;
3001 case 'M': case 'A': case 'C':
3002 gv_fetchpv("\024",TRUE, SVt_PV);
3004 case 'M': ftst = OP_FTMTIME; break;
3005 case 'A': ftst = OP_FTATIME; break;
3006 case 'C': ftst = OP_FTCTIME; break;
3014 PL_last_lop_op = (OPCODE)ftst;
3015 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3016 "### Saw file test %c\n", (int)ftst);
3021 /* Assume it was a minus followed by a one-letter named
3022 * subroutine call (or a -bareword), then. */
3023 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3024 "### '-%c' looked like a file test but was not\n",
3033 if (PL_expect == XOPERATOR)
3038 else if (*s == '>') {
3041 if (isIDFIRST_lazy_if(s,UTF)) {
3042 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3050 if (PL_expect == XOPERATOR)
3053 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3055 OPERATOR('-'); /* unary minus */
3062 if (PL_expect == XOPERATOR)
3067 if (PL_expect == XOPERATOR)
3070 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3076 if (PL_expect != XOPERATOR) {
3077 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3078 PL_expect = XOPERATOR;
3079 force_ident(PL_tokenbuf, '*');
3092 if (PL_expect == XOPERATOR) {
3096 PL_tokenbuf[0] = '%';
3097 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3098 if (!PL_tokenbuf[1]) {
3101 PL_pending_ident = '%';
3120 switch (PL_expect) {
3123 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3125 PL_bufptr = s; /* update in case we back off */
3131 PL_expect = XTERMBLOCK;
3135 while (isIDFIRST_lazy_if(s,UTF)) {
3136 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3137 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3138 if (tmp < 0) tmp = -tmp;
3154 d = scan_str(d,TRUE,TRUE);
3156 /* MUST advance bufptr here to avoid bogus
3157 "at end of line" context messages from yyerror().
3159 PL_bufptr = s + len;
3160 yyerror("Unterminated attribute parameter in attribute list");
3163 return REPORT(0); /* EOF indicator */
3167 SV *sv = newSVpvn(s, len);
3168 sv_catsv(sv, PL_lex_stuff);
3169 attrs = append_elem(OP_LIST, attrs,
3170 newSVOP(OP_CONST, 0, sv));
3171 SvREFCNT_dec(PL_lex_stuff);
3172 PL_lex_stuff = Nullsv;
3175 if (len == 6 && strnEQ(s, "unique", len)) {
3176 if (PL_in_my == KEY_our)
3178 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3180 ; /* skip to avoid loading attributes.pm */
3183 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3186 /* NOTE: any CV attrs applied here need to be part of
3187 the CVf_BUILTIN_ATTRS define in cv.h! */
3188 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3189 CvLVALUE_on(PL_compcv);
3190 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3191 CvLOCKED_on(PL_compcv);
3192 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3193 CvMETHOD_on(PL_compcv);
3194 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3195 CvASSERTION_on(PL_compcv);
3196 /* After we've set the flags, it could be argued that
3197 we don't need to do the attributes.pm-based setting
3198 process, and shouldn't bother appending recognized
3199 flags. To experiment with that, uncomment the
3200 following "else". (Note that's already been
3201 uncommented. That keeps the above-applied built-in
3202 attributes from being intercepted (and possibly
3203 rejected) by a package's attribute routines, but is
3204 justified by the performance win for the common case
3205 of applying only built-in attributes.) */
3207 attrs = append_elem(OP_LIST, attrs,
3208 newSVOP(OP_CONST, 0,
3212 if (*s == ':' && s[1] != ':')
3215 break; /* require real whitespace or :'s */
3217 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3218 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
3219 const char q = ((*s == '\'') ? '"' : '\'');
3220 /* If here for an expression, and parsed no attrs, back off. */
3221 if (tmp == '=' && !attrs) {
3225 /* MUST advance bufptr here to avoid bogus "at end of line"
3226 context messages from yyerror().
3230 yyerror("Unterminated attribute list");
3232 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3240 PL_nextval[PL_nexttoke].opval = attrs;
3248 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3249 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3266 if (PL_lex_brackets <= 0)
3267 yyerror("Unmatched right square bracket");
3270 if (PL_lex_state == LEX_INTERPNORMAL) {
3271 if (PL_lex_brackets == 0) {
3272 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3273 PL_lex_state = LEX_INTERPEND;
3280 if (PL_lex_brackets > 100) {
3281 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3283 switch (PL_expect) {
3285 if (PL_lex_formbrack) {
3289 if (PL_oldoldbufptr == PL_last_lop)
3290 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3292 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3293 OPERATOR(HASHBRACK);
3295 while (s < PL_bufend && SPACE_OR_TAB(*s))
3298 PL_tokenbuf[0] = '\0';
3299 if (d < PL_bufend && *d == '-') {
3300 PL_tokenbuf[0] = '-';
3302 while (d < PL_bufend && SPACE_OR_TAB(*d))
3305 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3306 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3308 while (d < PL_bufend && SPACE_OR_TAB(*d))
3311 const char minus = (PL_tokenbuf[0] == '-');
3312 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3320 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3325 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3330 if (PL_oldoldbufptr == PL_last_lop)
3331 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3333 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3336 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3338 /* This hack is to get the ${} in the message. */
3340 yyerror("syntax error");
3343 OPERATOR(HASHBRACK);
3345 /* This hack serves to disambiguate a pair of curlies
3346 * as being a block or an anon hash. Normally, expectation
3347 * determines that, but in cases where we're not in a
3348 * position to expect anything in particular (like inside
3349 * eval"") we have to resolve the ambiguity. This code
3350 * covers the case where the first term in the curlies is a
3351 * quoted string. Most other cases need to be explicitly
3352 * disambiguated by prepending a `+' before the opening
3353 * curly in order to force resolution as an anon hash.
3355 * XXX should probably propagate the outer expectation
3356 * into eval"" to rely less on this hack, but that could
3357 * potentially break current behavior of eval"".
3361 if (*s == '\'' || *s == '"' || *s == '`') {
3362 /* common case: get past first string, handling escapes */
3363 for (t++; t < PL_bufend && *t != *s;)
3364 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3368 else if (*s == 'q') {
3371 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3374 /* skip q//-like construct */
3376 char open, close, term;
3379 while (t < PL_bufend && isSPACE(*t))
3381 /* check for q => */
3382 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3383 OPERATOR(HASHBRACK);
3387 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3391 for (t++; t < PL_bufend; t++) {
3392 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3394 else if (*t == open)
3398 for (t++; t < PL_bufend; t++) {
3399 if (*t == '\\' && t+1 < PL_bufend)
3401 else if (*t == close && --brackets <= 0)
3403 else if (*t == open)
3410 /* skip plain q word */
3411 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3414 else if (isALNUM_lazy_if(t,UTF)) {
3416 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3419 while (t < PL_bufend && isSPACE(*t))
3421 /* if comma follows first term, call it an anon hash */
3422 /* XXX it could be a comma expression with loop modifiers */
3423 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3424 || (*t == '=' && t[1] == '>')))
3425 OPERATOR(HASHBRACK);
3426 if (PL_expect == XREF)
3429 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3435 yylval.ival = CopLINE(PL_curcop);
3436 if (isSPACE(*s) || *s == '#')
3437 PL_copline = NOLINE; /* invalidate current command line number */
3442 if (PL_lex_brackets <= 0)
3443 yyerror("Unmatched right curly bracket");
3445 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3446 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3447 PL_lex_formbrack = 0;
3448 if (PL_lex_state == LEX_INTERPNORMAL) {
3449 if (PL_lex_brackets == 0) {
3450 if (PL_expect & XFAKEBRACK) {
3451 PL_expect &= XENUMMASK;
3452 PL_lex_state = LEX_INTERPEND;
3454 return yylex(); /* ignore fake brackets */
3456 if (*s == '-' && s[1] == '>')
3457 PL_lex_state = LEX_INTERPENDMAYBE;
3458 else if (*s != '[' && *s != '{')
3459 PL_lex_state = LEX_INTERPEND;
3462 if (PL_expect & XFAKEBRACK) {
3463 PL_expect &= XENUMMASK;
3465 return yylex(); /* ignore fake brackets */
3475 if (PL_expect == XOPERATOR) {
3476 if (ckWARN(WARN_SEMICOLON)
3477 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3479 CopLINE_dec(PL_curcop);
3480 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3481 CopLINE_inc(PL_curcop);
3486 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3488 PL_expect = XOPERATOR;
3489 force_ident(PL_tokenbuf, '&');
3493 yylval.ival = (OPpENTERSUB_AMPER<<8);
3512 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
3513 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
3515 if (PL_expect == XSTATE && isALPHA(tmp) &&
3516 (s == PL_linestart+1 || s[-2] == '\n') )
3518 if (PL_in_eval && !PL_rsfp) {
3523 if (strnEQ(s,"=cut",4)) {
3537 PL_doextract = TRUE;
3540 if (PL_lex_brackets < PL_lex_formbrack) {
3542 #ifdef PERL_STRICT_CR
3543 for (t = s; SPACE_OR_TAB(*t); t++) ;
3545 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3547 if (*t == '\n' || *t == '#') {
3559 /* was this !=~ where !~ was meant?
3560 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3562 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3563 const char *t = s+1;
3565 while (t < PL_bufend && isSPACE(*t))
3568 if (*t == '/' || *t == '?' ||
3569 ((*t == 'm' || *t == 's' || *t == 'y') && !isALNUM(t[1])) ||
3570 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3571 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3572 "!=~ should be !~");
3581 if (PL_expect != XOPERATOR) {
3582 if (s[1] != '<' && !strchr(s,'>'))
3585 s = scan_heredoc(s);
3587 s = scan_inputsymbol(s);
3588 TERM(sublex_start());
3593 SHop(OP_LEFT_SHIFT);
3607 SHop(OP_RIGHT_SHIFT);
3616 if (PL_expect == XOPERATOR) {
3617 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3620 return REPORT(','); /* grandfather non-comma-format format */
3624 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3625 PL_tokenbuf[0] = '@';
3626 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3627 sizeof PL_tokenbuf - 1, FALSE);
3628 if (PL_expect == XOPERATOR)
3629 no_op("Array length", s);
3630 if (!PL_tokenbuf[1])
3632 PL_expect = XOPERATOR;
3633 PL_pending_ident = '#';
3637 PL_tokenbuf[0] = '$';
3638 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3639 sizeof PL_tokenbuf - 1, FALSE);
3640 if (PL_expect == XOPERATOR)
3642 if (!PL_tokenbuf[1]) {
3644 yyerror("Final $ should be \\$ or $name");
3648 /* This kludge not intended to be bulletproof. */
3649 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3650 yylval.opval = newSVOP(OP_CONST, 0,
3651 newSViv(PL_compiling.cop_arybase));
3652 yylval.opval->op_private = OPpCONST_ARYBASE;
3658 if (PL_lex_state == LEX_NORMAL)
3661 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3664 PL_tokenbuf[0] = '@';
3665 if (ckWARN(WARN_SYNTAX)) {
3667 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3670 PL_bufptr = skipspace(PL_bufptr);
3671 while (t < PL_bufend && *t != ']')
3673 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3674 "Multidimensional syntax %.*s not supported",
3675 (t - PL_bufptr) + 1, PL_bufptr);
3679 else if (*s == '{') {
3680 PL_tokenbuf[0] = '%';
3681 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
3682 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3684 char tmpbuf[sizeof PL_tokenbuf];
3685 for (t++; isSPACE(*t); t++) ;
3686 if (isIDFIRST_lazy_if(t,UTF)) {
3688 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
3689 for (; isSPACE(*t); t++) ;
3690 if (*t == ';' && get_cv(tmpbuf, FALSE))
3691 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3692 "You need to quote \"%s\"", tmpbuf);
3698 PL_expect = XOPERATOR;
3699 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3700 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3701 if (!islop || PL_last_lop_op == OP_GREPSTART)
3702 PL_expect = XOPERATOR;
3703 else if (strchr("$@\"'`q", *s))
3704 PL_expect = XTERM; /* e.g. print $fh "foo" */
3705 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3706 PL_expect = XTERM; /* e.g. print $fh &sub */
3707 else if (isIDFIRST_lazy_if(s,UTF)) {
3708 char tmpbuf[sizeof PL_tokenbuf];
3709 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3710 if ((tmp = keyword(tmpbuf, len))) {
3711 /* binary operators exclude handle interpretations */
3723 PL_expect = XTERM; /* e.g. print $fh length() */
3728 PL_expect = XTERM; /* e.g. print $fh subr() */
3731 else if (isDIGIT(*s))
3732 PL_expect = XTERM; /* e.g. print $fh 3 */
3733 else if (*s == '.' && isDIGIT(s[1]))
3734 PL_expect = XTERM; /* e.g. print $fh .3 */
3735 else if ((*s == '?' || *s == '-' || *s == '+')
3736 && !isSPACE(s[1]) && s[1] != '=')
3737 PL_expect = XTERM; /* e.g. print $fh -1 */
3738 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/')
3739 PL_expect = XTERM; /* e.g. print $fh /.../
3740 XXX except DORDOR operator */
3741 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3742 PL_expect = XTERM; /* print $fh <<"EOF" */
3744 PL_pending_ident = '$';
3748 if (PL_expect == XOPERATOR)
3750 PL_tokenbuf[0] = '@';
3751 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3752 if (!PL_tokenbuf[1]) {
3755 if (PL_lex_state == LEX_NORMAL)
3757 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3759 PL_tokenbuf[0] = '%';
3761 /* Warn about @ where they meant $. */
3762 if (ckWARN(WARN_SYNTAX)) {
3763 if (*s == '[' || *s == '{') {
3764 const char *t = s + 1;
3765 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3767 if (*t == '}' || *t == ']') {
3769 PL_bufptr = skipspace(PL_bufptr);
3770 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3771 "Scalar value %.*s better written as $%.*s",
3772 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
3777 PL_pending_ident = '@';
3780 case '/': /* may be division, defined-or, or pattern */
3781 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3785 case '?': /* may either be conditional or pattern */
3786 if(PL_expect == XOPERATOR) {
3794 /* A // operator. */
3804 /* Disable warning on "study /blah/" */
3805 if (PL_oldoldbufptr == PL_last_uni
3806 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3807 || memNE(PL_last_uni, "study", 5)
3808 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3811 s = scan_pat(s,OP_MATCH);
3812 TERM(sublex_start());
3816 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3817 #ifdef PERL_STRICT_CR
3820 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3822 && (s == PL_linestart || s[-1] == '\n') )
3824 PL_lex_formbrack = 0;
3828 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3834 yylval.ival = OPf_SPECIAL;
3840 if (PL_expect != XOPERATOR)
3845 case '0': case '1': case '2': case '3': case '4':
3846 case '5': case '6': case '7': case '8': case '9':
3847 s = scan_num(s, &yylval);
3848 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3849 "### Saw number in '%s'\n", s);
3851 if (PL_expect == XOPERATOR)
3856 s = scan_str(s,FALSE,FALSE);
3857 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3858 "### Saw string before '%s'\n", s);
3860 if (PL_expect == XOPERATOR) {
3861 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3864 return REPORT(','); /* grandfather non-comma-format format */
3870 missingterm((char*)0);
3871 yylval.ival = OP_CONST;
3872 TERM(sublex_start());
3875 s = scan_str(s,FALSE,FALSE);
3876 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3877 "### Saw string before '%s'\n", s);
3879 if (PL_expect == XOPERATOR) {
3880 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3883 return REPORT(','); /* grandfather non-comma-format format */
3889 missingterm((char*)0);
3890 yylval.ival = OP_CONST;
3891 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
3892 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
3893 yylval.ival = OP_STRINGIFY;
3897 TERM(sublex_start());
3900 s = scan_str(s,FALSE,FALSE);
3901 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3902 "### Saw backtick string before '%s'\n", s);
3904 if (PL_expect == XOPERATOR)
3905 no_op("Backticks",s);
3907 missingterm((char*)0);
3908 yylval.ival = OP_BACKTICK;
3910 TERM(sublex_start());
3914 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
3915 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
3917 if (PL_expect == XOPERATOR)
3918 no_op("Backslash",s);
3922 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
3923 char *start = s + 2;
3924 while (isDIGIT(*start) || *start == '_')
3926 if (*start == '.' && isDIGIT(start[1])) {
3927 s = scan_num(s, &yylval);
3930 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
3931 else if (!isALPHA(*start) && (PL_expect == XTERM
3932 || PL_expect == XREF || PL_expect == XSTATE
3933 || PL_expect == XTERMORDORDOR)) {
3934 const char c = *start;
3937 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3940 s = scan_num(s, &yylval);
3947 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
3987 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3989 /* Some keywords can be followed by any delimiter, including ':' */
3990 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3991 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3992 (PL_tokenbuf[0] == 'q' &&
3993 strchr("qwxr", PL_tokenbuf[1])))));
3995 /* x::* is just a word, unless x is "CORE" */
3996 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4000 while (d < PL_bufend && isSPACE(*d))
4001 d++; /* no comments skipped here, or s### is misparsed */
4003 /* Is this a label? */
4004 if (!tmp && PL_expect == XSTATE
4005 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4007 yylval.pval = savepv(PL_tokenbuf);
4012 /* Check for keywords */
4013 tmp = keyword(PL_tokenbuf, len);
4015 /* Is this a word before a => operator? */
4016 if (*d == '=' && d[1] == '>') {
4019 = (OP*)newSVOP(OP_CONST, 0,
4020 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4021 yylval.opval->op_private = OPpCONST_BARE;
4025 if (tmp < 0) { /* second-class keyword? */
4026 GV *ogv = Nullgv; /* override (winner) */
4027 GV *hgv = Nullgv; /* hidden (loser) */
4028 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4030 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
4033 if (GvIMPORTED_CV(gv))
4035 else if (! CvMETHOD(cv))
4039 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4040 (gv = *gvp) != (GV*)&PL_sv_undef &&
4041 GvCVu(gv) && GvIMPORTED_CV(gv))
4048 tmp = 0; /* overridden by import or by GLOBAL */
4051 && -tmp==KEY_lock /* XXX generalizable kludge */
4053 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
4055 tmp = 0; /* any sub overrides "weak" keyword */
4060 && PL_expect != XOPERATOR
4061 && PL_expect != XTERMORDORDOR)
4063 /* any sub overrides the "err" keyword, except when really an
4064 * operator is expected */
4067 else { /* no override */
4069 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4070 Perl_warner(aTHX_ packWARN(WARN_MISC),
4071 "dump() better written as CORE::dump()");
4075 if (ckWARN(WARN_AMBIGUOUS) && hgv
4076 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
4077 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4078 "Ambiguous call resolved as CORE::%s(), %s",
4079 GvENAME(hgv), "qualify as such or use &");
4086 default: /* not a keyword */
4090 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4092 /* Get the rest if it looks like a package qualifier */
4094 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4096 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4099 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4100 *s == '\'' ? "'" : "::");
4105 if (PL_expect == XOPERATOR) {
4106 if (PL_bufptr == PL_linestart) {
4107 CopLINE_dec(PL_curcop);
4108 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4109 CopLINE_inc(PL_curcop);
4112 no_op("Bareword",s);
4115 /* Look for a subroutine with this name in current package,
4116 unless name is "Foo::", in which case Foo is a bearword
4117 (and a package name). */
4120 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4122 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
4123 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4124 "Bareword \"%s\" refers to nonexistent package",
4127 PL_tokenbuf[len] = '\0';
4134 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
4137 /* if we saw a global override before, get the right name */
4140 sv = newSVpvn("CORE::GLOBAL::",14);
4141 sv_catpv(sv,PL_tokenbuf);
4144 /* If len is 0, newSVpv does strlen(), which is correct.
4145 If len is non-zero, then it will be the true length,
4146 and so the scalar will be created correctly. */
4147 sv = newSVpv(PL_tokenbuf,len);
4150 /* Presume this is going to be a bareword of some sort. */
4153 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4154 yylval.opval->op_private = OPpCONST_BARE;
4155 /* UTF-8 package name? */
4156 if (UTF && !IN_BYTES &&
4157 is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
4160 /* And if "Foo::", then that's what it certainly is. */
4165 /* See if it's the indirect object for a list operator. */
4167 if (PL_oldoldbufptr &&
4168 PL_oldoldbufptr < PL_bufptr &&
4169 (PL_oldoldbufptr == PL_last_lop
4170 || PL_oldoldbufptr == PL_last_uni) &&
4171 /* NO SKIPSPACE BEFORE HERE! */
4172 (PL_expect == XREF ||
4173 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4175 bool immediate_paren = *s == '(';
4177 /* (Now we can afford to cross potential line boundary.) */
4180 /* Two barewords in a row may indicate method call. */
4182 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
4185 /* If not a declared subroutine, it's an indirect object. */
4186 /* (But it's an indir obj regardless for sort.) */
4188 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4189 ((!gv || !GvCVu(gv)) &&
4190 (PL_last_lop_op != OP_MAPSTART &&
4191 PL_last_lop_op != OP_GREPSTART))))
4193 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4198 PL_expect = XOPERATOR;
4201 /* Is this a word before a => operator? */
4202 if (*s == '=' && s[1] == '>' && !pkgname) {
4204 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4205 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4206 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4210 /* If followed by a paren, it's certainly a subroutine. */
4213 if (gv && GvCVu(gv)) {
4214 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4215 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
4220 PL_nextval[PL_nexttoke].opval = yylval.opval;
4221 PL_expect = XOPERATOR;
4227 /* If followed by var or block, call it a method (unless sub) */
4229 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
4230 PL_last_lop = PL_oldbufptr;
4231 PL_last_lop_op = OP_METHOD;
4235 /* If followed by a bareword, see if it looks like indir obj. */
4238 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4239 && (tmp = intuit_method(s,gv)))
4242 /* Not a method, so call it a subroutine (if defined) */
4244 if (gv && GvCVu(gv)) {
4246 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4247 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4248 "Ambiguous use of -%s resolved as -&%s()",
4249 PL_tokenbuf, PL_tokenbuf);
4250 /* Check for a constant sub */
4252 if ((sv = cv_const_sv(cv))) {
4254 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4255 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4256 yylval.opval->op_private = 0;
4260 /* Resolve to GV now. */
4261 op_free(yylval.opval);
4262 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4263 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4264 PL_last_lop = PL_oldbufptr;
4265 PL_last_lop_op = OP_ENTERSUB;
4266 /* Is there a prototype? */
4269 char *proto = SvPV((SV*)cv, len);
4272 if (*proto == '$' && proto[1] == '\0')
4274 while (*proto == ';')
4276 if (*proto == '&' && *s == '{') {
4277 sv_setpv(PL_subname, PL_curstash ?
4278 "__ANON__" : "__ANON__::__ANON__");
4282 PL_nextval[PL_nexttoke].opval = yylval.opval;
4288 /* Call it a bare word */
4290 if (PL_hints & HINT_STRICT_SUBS)
4291 yylval.opval->op_private |= OPpCONST_STRICT;
4294 if (ckWARN(WARN_RESERVED)) {
4295 if (lastchar != '-') {
4296 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4297 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4298 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4305 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4306 && ckWARN_d(WARN_AMBIGUOUS)) {
4307 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4308 "Operator or semicolon missing before %c%s",
4309 lastchar, PL_tokenbuf);
4310 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4311 "Ambiguous use of %c resolved as operator %c",
4312 lastchar, lastchar);
4318 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4319 newSVpv(CopFILE(PL_curcop),0));
4323 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4324 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4327 case KEY___PACKAGE__:
4328 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4330 ? newSVpv(HvNAME(PL_curstash), 0)
4339 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4340 const char *pname = "main";
4341 if (PL_tokenbuf[2] == 'D')
4342 pname = HvNAME(PL_curstash ? PL_curstash : PL_defstash);
4343 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
4346 GvIOp(gv) = newIO();
4347 IoIFP(GvIOp(gv)) = PL_rsfp;
4348 #if defined(HAS_FCNTL) && defined(F_SETFD)
4350 const int fd = PerlIO_fileno(PL_rsfp);
4351 fcntl(fd,F_SETFD,fd >= 3);
4354 /* Mark this internal pseudo-handle as clean */
4355 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4357 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4358 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4359 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4361 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4362 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4363 /* if the script was opened in binmode, we need to revert
4364 * it to text mode for compatibility; but only iff it has CRs
4365 * XXX this is a questionable hack at best. */
4366 if (PL_bufend-PL_bufptr > 2
4367 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4370 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4371 loc = PerlIO_tell(PL_rsfp);
4372 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4375 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4377 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4378 #endif /* NETWARE */
4379 #ifdef PERLIO_IS_STDIO /* really? */
4380 # if defined(__BORLANDC__)
4381 /* XXX see note in do_binmode() */
4382 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4386 PerlIO_seek(PL_rsfp, loc, 0);
4390 #ifdef PERLIO_LAYERS
4393 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4394 else if (PL_encoding) {
4401 XPUSHs(PL_encoding);
4403 call_method("name", G_SCALAR);
4407 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4408 Perl_form(aTHX_ ":encoding(%"SVf")",
4426 if (PL_expect == XSTATE) {
4433 if (*s == ':' && s[1] == ':') {
4436 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4437 if (!(tmp = keyword(PL_tokenbuf, len)))
4438 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4452 LOP(OP_ACCEPT,XTERM);
4458 LOP(OP_ATAN2,XTERM);
4464 LOP(OP_BINMODE,XTERM);
4467 LOP(OP_BLESS,XTERM);
4476 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
4493 if (!PL_cryptseen) {
4494 PL_cryptseen = TRUE;
4498 LOP(OP_CRYPT,XTERM);
4501 LOP(OP_CHMOD,XTERM);
4504 LOP(OP_CHOWN,XTERM);
4507 LOP(OP_CONNECT,XTERM);
4523 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4527 PL_hints |= HINT_BLOCK_SCOPE;
4537 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4538 LOP(OP_DBMOPEN,XTERM);
4544 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4551 yylval.ival = CopLINE(PL_curcop);
4565 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4566 UNIBRACK(OP_ENTEREVAL);
4584 case KEY_endhostent:
4590 case KEY_endservent:
4593 case KEY_endprotoent:
4604 yylval.ival = CopLINE(PL_curcop);
4606 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4608 if ((PL_bufend - p) >= 3 &&
4609 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4611 else if ((PL_bufend - p) >= 4 &&
4612 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4615 if (isIDFIRST_lazy_if(p,UTF)) {
4616 p = scan_ident(p, PL_bufend,
4617 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4621 Perl_croak(aTHX_ "Missing $ on loop variable");
4626 LOP(OP_FORMLINE,XTERM);
4632 LOP(OP_FCNTL,XTERM);
4638 LOP(OP_FLOCK,XTERM);
4647 LOP(OP_GREPSTART, XREF);
4650 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4665 case KEY_getpriority:
4666 LOP(OP_GETPRIORITY,XTERM);
4668 case KEY_getprotobyname:
4671 case KEY_getprotobynumber:
4672 LOP(OP_GPBYNUMBER,XTERM);
4674 case KEY_getprotoent:
4686 case KEY_getpeername:
4687 UNI(OP_GETPEERNAME);
4689 case KEY_gethostbyname:
4692 case KEY_gethostbyaddr:
4693 LOP(OP_GHBYADDR,XTERM);
4695 case KEY_gethostent:
4698 case KEY_getnetbyname:
4701 case KEY_getnetbyaddr:
4702 LOP(OP_GNBYADDR,XTERM);
4707 case KEY_getservbyname:
4708 LOP(OP_GSBYNAME,XTERM);
4710 case KEY_getservbyport:
4711 LOP(OP_GSBYPORT,XTERM);
4713 case KEY_getservent:
4716 case KEY_getsockname:
4717 UNI(OP_GETSOCKNAME);
4719 case KEY_getsockopt:
4720 LOP(OP_GSOCKOPT,XTERM);
4742 yylval.ival = CopLINE(PL_curcop);
4746 LOP(OP_INDEX,XTERM);
4752 LOP(OP_IOCTL,XTERM);
4764 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4796 LOP(OP_LISTEN,XTERM);
4805 s = scan_pat(s,OP_MATCH);
4806 TERM(sublex_start());
4809 LOP(OP_MAPSTART, XREF);
4812 LOP(OP_MKDIR,XTERM);
4815 LOP(OP_MSGCTL,XTERM);
4818 LOP(OP_MSGGET,XTERM);
4821 LOP(OP_MSGRCV,XTERM);
4824 LOP(OP_MSGSND,XTERM);
4830 if (isIDFIRST_lazy_if(s,UTF)) {
4831 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
4832 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4834 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
4835 if (!PL_in_my_stash) {
4838 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
4846 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4853 if (PL_expect != XSTATE)
4854 yyerror("\"no\" not allowed in expression");
4855 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4856 s = force_version(s, FALSE);
4861 if (*s == '(' || (s = skipspace(s), *s == '('))
4868 if (isIDFIRST_lazy_if(s,UTF)) {
4870 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
4871 for (t=d; *t && isSPACE(*t); t++) ;
4872 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
4874 && !(t[0] == '=' && t[1] == '>')
4876 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
4877 "Precedence problem: open %.*s should be open(%.*s)",
4878 d - s, s, d - s, s);
4884 yylval.ival = OP_OR;
4894 LOP(OP_OPEN_DIR,XTERM);
4897 checkcomma(s,PL_tokenbuf,"filehandle");
4901 checkcomma(s,PL_tokenbuf,"filehandle");
4920 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4924 LOP(OP_PIPE_OP,XTERM);
4927 s = scan_str(s,FALSE,FALSE);
4929 missingterm((char*)0);
4930 yylval.ival = OP_CONST;
4931 TERM(sublex_start());
4937 s = scan_str(s,FALSE,FALSE);
4939 missingterm((char*)0);
4941 if (SvCUR(PL_lex_stuff)) {
4944 d = SvPV_force(PL_lex_stuff, len);
4947 for (; isSPACE(*d) && len; --len, ++d) ;
4950 if (!warned && ckWARN(WARN_QW)) {
4951 for (; !isSPACE(*d) && len; --len, ++d) {
4953 Perl_warner(aTHX_ packWARN(WARN_QW),
4954 "Possible attempt to separate words with commas");
4957 else if (*d == '#') {
4958 Perl_warner(aTHX_ packWARN(WARN_QW),
4959 "Possible attempt to put comments in qw() list");
4965 for (; !isSPACE(*d) && len; --len, ++d) ;
4967 sv = newSVpvn(b, d-b);
4968 if (DO_UTF8(PL_lex_stuff))
4970 words = append_elem(OP_LIST, words,
4971 newSVOP(OP_CONST, 0, tokeq(sv)));
4975 PL_nextval[PL_nexttoke].opval = words;
4980 SvREFCNT_dec(PL_lex_stuff);
4981 PL_lex_stuff = Nullsv;
4987 s = scan_str(s,FALSE,FALSE);
4989 missingterm((char*)0);
4990 yylval.ival = OP_STRINGIFY;
4991 if (SvIVX(PL_lex_stuff) == '\'')
4992 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
4993 TERM(sublex_start());
4996 s = scan_pat(s,OP_QR);
4997 TERM(sublex_start());
5000 s = scan_str(s,FALSE,FALSE);
5002 missingterm((char*)0);
5003 yylval.ival = OP_BACKTICK;
5005 TERM(sublex_start());
5013 s = force_version(s, FALSE);
5015 else if (*s != 'v' || !isDIGIT(s[1])
5016 || (s = force_version(s, TRUE), *s == 'v'))
5018 *PL_tokenbuf = '\0';
5019 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5020 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5021 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5023 yyerror("<> should be quotes");
5031 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5035 LOP(OP_RENAME,XTERM);
5044 LOP(OP_RINDEX,XTERM);
5054 UNIDOR(OP_READLINE);
5067 LOP(OP_REVERSE,XTERM);
5070 UNIDOR(OP_READLINK);
5078 TERM(sublex_start());
5080 TOKEN(1); /* force error */
5089 LOP(OP_SELECT,XTERM);
5095 LOP(OP_SEMCTL,XTERM);
5098 LOP(OP_SEMGET,XTERM);
5101 LOP(OP_SEMOP,XTERM);
5107 LOP(OP_SETPGRP,XTERM);
5109 case KEY_setpriority:
5110 LOP(OP_SETPRIORITY,XTERM);
5112 case KEY_sethostent:
5118 case KEY_setservent:
5121 case KEY_setprotoent:
5131 LOP(OP_SEEKDIR,XTERM);
5133 case KEY_setsockopt:
5134 LOP(OP_SSOCKOPT,XTERM);
5140 LOP(OP_SHMCTL,XTERM);
5143 LOP(OP_SHMGET,XTERM);
5146 LOP(OP_SHMREAD,XTERM);
5149 LOP(OP_SHMWRITE,XTERM);
5152 LOP(OP_SHUTDOWN,XTERM);
5161 LOP(OP_SOCKET,XTERM);
5163 case KEY_socketpair:
5164 LOP(OP_SOCKPAIR,XTERM);
5167 checkcomma(s,PL_tokenbuf,"subroutine name");
5169 if (*s == ';' || *s == ')') /* probably a close */
5170 Perl_croak(aTHX_ "sort is now a reserved word");
5172 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5176 LOP(OP_SPLIT,XTERM);
5179 LOP(OP_SPRINTF,XTERM);
5182 LOP(OP_SPLICE,XTERM);
5197 LOP(OP_SUBSTR,XTERM);
5203 char tmpbuf[sizeof PL_tokenbuf];
5204 SSize_t tboffset = 0;
5205 expectation attrful;
5206 bool have_name, have_proto, bad_proto;
5207 const int key = tmp;
5211 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5212 (*s == ':' && s[1] == ':'))
5215 attrful = XATTRBLOCK;
5216 /* remember buffer pos'n for later force_word */
5217 tboffset = s - PL_oldbufptr;
5218 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5219 if (strchr(tmpbuf, ':'))
5220 sv_setpv(PL_subname, tmpbuf);
5222 sv_setsv(PL_subname,PL_curstname);
5223 sv_catpvn(PL_subname,"::",2);
5224 sv_catpvn(PL_subname,tmpbuf,len);
5231 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5232 PL_expect = XTERMBLOCK;
5233 attrful = XATTRTERM;
5234 sv_setpvn(PL_subname,"?",1);
5238 if (key == KEY_format) {
5240 PL_lex_formbrack = PL_lex_brackets + 1;
5242 (void) force_word(PL_oldbufptr + tboffset, WORD,
5247 /* Look for a prototype */
5251 s = scan_str(s,FALSE,FALSE);
5253 Perl_croak(aTHX_ "Prototype not terminated");
5254 /* strip spaces and check for bad characters */
5255 d = SvPVX(PL_lex_stuff);
5258 for (p = d; *p; ++p) {
5261 if (!strchr("$@%*;[]&\\", *p))
5266 if (bad_proto && ckWARN(WARN_SYNTAX))
5267 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5268 "Illegal character in prototype for %"SVf" : %s",
5270 SvCUR_set(PL_lex_stuff, tmp);
5278 if (*s == ':' && s[1] != ':')
5279 PL_expect = attrful;
5280 else if (*s != '{' && key == KEY_sub) {
5282 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5284 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5288 PL_nextval[PL_nexttoke].opval =
5289 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5290 PL_lex_stuff = Nullsv;
5294 sv_setpv(PL_subname,
5295 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5298 (void) force_word(PL_oldbufptr + tboffset, WORD,
5307 LOP(OP_SYSTEM,XREF);
5310 LOP(OP_SYMLINK,XTERM);
5313 LOP(OP_SYSCALL,XTERM);
5316 LOP(OP_SYSOPEN,XTERM);
5319 LOP(OP_SYSSEEK,XTERM);
5322 LOP(OP_SYSREAD,XTERM);
5325 LOP(OP_SYSWRITE,XTERM);
5329 TERM(sublex_start());
5350 LOP(OP_TRUNCATE,XTERM);
5362 yylval.ival = CopLINE(PL_curcop);
5366 yylval.ival = CopLINE(PL_curcop);
5370 LOP(OP_UNLINK,XTERM);
5376 LOP(OP_UNPACK,XTERM);
5379 LOP(OP_UTIME,XTERM);
5385 LOP(OP_UNSHIFT,XTERM);
5388 if (PL_expect != XSTATE)
5389 yyerror("\"use\" not allowed in expression");
5391 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
5392 s = force_version(s, TRUE);
5393 if (*s == ';' || (s = skipspace(s), *s == ';')) {
5394 PL_nextval[PL_nexttoke].opval = Nullop;
5397 else if (*s == 'v') {
5398 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5399 s = force_version(s, FALSE);
5403 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5404 s = force_version(s, FALSE);
5416 yylval.ival = CopLINE(PL_curcop);
5420 PL_hints |= HINT_BLOCK_SCOPE;
5427 LOP(OP_WAITPID,XTERM);
5436 ctl_l[0] = toCTRL('L');
5438 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5441 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5446 if (PL_expect == XOPERATOR)
5452 yylval.ival = OP_XOR;
5457 TERM(sublex_start());
5462 #pragma segment Main
5466 S_pending_ident(pTHX)
5469 register I32 tmp = 0;
5470 /* pit holds the identifier we read and pending_ident is reset */
5471 char pit = PL_pending_ident;
5472 PL_pending_ident = 0;
5474 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5475 "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
5477 /* if we're in a my(), we can't allow dynamics here.
5478 $foo'bar has already been turned into $foo::bar, so
5479 just check for colons.
5481 if it's a legal name, the OP is a PADANY.
5484 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5485 if (strchr(PL_tokenbuf,':'))
5486 yyerror(Perl_form(aTHX_ "No package name allowed for "
5487 "variable %s in \"our\"",
5489 tmp = allocmy(PL_tokenbuf);
5492 if (strchr(PL_tokenbuf,':'))
5493 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5495 yylval.opval = newOP(OP_PADANY, 0);
5496 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5502 build the ops for accesses to a my() variable.
5504 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5505 then used in a comparison. This catches most, but not
5506 all cases. For instance, it catches
5507 sort { my($a); $a <=> $b }
5509 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5510 (although why you'd do that is anyone's guess).
5513 if (!strchr(PL_tokenbuf,':')) {
5515 tmp = pad_findmy(PL_tokenbuf);
5516 if (tmp != NOT_IN_PAD) {
5517 /* might be an "our" variable" */
5518 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5519 /* build ops for a bareword */
5520 SV *sym = newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)), 0);
5521 sv_catpvn(sym, "::", 2);
5522 sv_catpv(sym, PL_tokenbuf+1);
5523 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5524 yylval.opval->op_private = OPpCONST_ENTERED;
5527 ? (GV_ADDMULTI | GV_ADDINEVAL)
5530 ((PL_tokenbuf[0] == '$') ? SVt_PV
5531 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5536 /* if it's a sort block and they're naming $a or $b */
5537 if (PL_last_lop_op == OP_SORT &&
5538 PL_tokenbuf[0] == '$' &&
5539 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5542 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5543 d < PL_bufend && *d != '\n';
5546 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5547 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5553 yylval.opval = newOP(OP_PADANY, 0);
5554 yylval.opval->op_targ = tmp;
5560 Whine if they've said @foo in a doublequoted string,
5561 and @foo isn't a variable we can find in the symbol
5564 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5565 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
5566 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5567 && ckWARN(WARN_AMBIGUOUS))
5569 /* Downgraded from fatal to warning 20000522 mjd */
5570 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5571 "Possible unintended interpolation of %s in string",
5576 /* build ops for a bareword */
5577 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5578 yylval.opval->op_private = OPpCONST_ENTERED;
5579 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
5580 ((PL_tokenbuf[0] == '$') ? SVt_PV
5581 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5587 * The following code was generated by perl_keyword.pl.
5591 Perl_keyword (pTHX_ const char *name, I32 len)
5595 case 1: /* 5 tokens of length 1 */
5627 case 2: /* 18 tokens of length 2 */
5773 case 3: /* 28 tokens of length 3 */
5777 if (name[1] == 'N' &&
5840 if (name[1] == 'i' &&
5880 if (name[1] == 'o' &&
5889 if (name[1] == 'e' &&
5898 if (name[1] == 'n' &&
5907 if (name[1] == 'o' &&
5916 if (name[1] == 'a' &&
5925 if (name[1] == 'o' &&
5987 if (name[1] == 'e' &&
6019 if (name[1] == 'i' &&
6028 if (name[1] == 's' &&
6037 if (name[1] == 'e' &&
6046 if (name[1] == 'o' &&
6058 case 4: /* 40 tokens of length 4 */
6062 if (name[1] == 'O' &&
6072 if (name[1] == 'N' &&
6082 if (name[1] == 'i' &&
6092 if (name[1] == 'h' &&
6102 if (name[1] == 'u' &&
6115 if (name[2] == 'c' &&
6124 if (name[2] == 's' &&
6133 if (name[2] == 'a' &&
6169 if (name[1] == 'o' &&
6182 if (name[2] == 't' &&
6191 if (name[2] == 'o' &&
6200 if (name[2] == 't' &&
6209 if (name[2] == 'e' &&
6222 if (name[1] == 'o' &&
6235 if (name[2] == 'y' &&
6244 if (name[2] == 'l' &&
6260 if (name[2] == 's' &&
6269 if (name[2] == 'n' &&
6278 if (name[2] == 'c' &&
6291 if (name[1] == 'e' &&
6301 if (name[1] == 'p' &&
6314 if (name[2] == 'c' &&
6323 if (name[2] == 'p' &&
6332 if (name[2] == 's' &&
6348 if (name[2] == 'n' &&
6418 if (name[2] == 'r' &&
6427 if (name[2] == 'r' &&
6436 if (name[2] == 'a' &&
6452 if (name[2] == 'l' &&
6519 case 5: /* 36 tokens of length 5 */
6523 if (name[1] == 'E' &&
6534 if (name[1] == 'H' &&
6548 if (name[2] == 'a' &&
6558 if (name[2] == 'a' &&
6572 if (name[1] == 'l' &&
6589 if (name[3] == 'i' &&
6598 if (name[3] == 'o' &&
6634 if (name[2] == 'o' &&
6644 if (name[2] == 'y' &&
6658 if (name[1] == 'l' &&
6672 if (name[2] == 'n' &&
6682 if (name[2] == 'o' &&
6699 if (name[2] == 'd' &&
6709 if (name[2] == 'c' &&
6726 if (name[2] == 'c' &&
6736 if (name[2] == 't' &&
6750 if (name[1] == 'k' &&
6761 if (name[1] == 'r' &&
6775 if (name[2] == 's' &&
6785 if (name[2] == 'd' &&
6802 if (name[2] == 'm' &&
6812 if (name[2] == 'i' &&
6822 if (name[2] == 'e' &&
6832 if (name[2] == 'l' &&
6842 if (name[2] == 'a' &&
6852 if (name[2] == 'u' &&
6866 if (name[1] == 'i' &&
6880 if (name[2] == 'a' &&
6893 if (name[3] == 'e' &&
6928 if (name[2] == 'i' &&
6945 if (name[2] == 'i' &&
6955 if (name[2] == 'i' &&
6972 case 6: /* 33 tokens of length 6 */
6976 if (name[1] == 'c' &&
6991 if (name[2] == 'l' &&
7002 if (name[2] == 'r' &&
7017 if (name[1] == 'e' &&
7032 if (name[2] == 's' &&
7037 if(ckWARN_d(WARN_SYNTAX))
7038 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
7044 if (name[2] == 'i' &&
7062 if (name[2] == 'l' &&
7073 if (name[2] == 'r' &&
7088 if (name[1] == 'm' &&
7103 if (name[2] == 'n' &&
7114 if (name[2] == 's' &&
7129 if (name[1] == 's' &&
7135 if (name[4] == 't' &&
7144 if (name[4] == 'e' &&
7153 if (name[4] == 'c' &&
7162 if (name[4] == 'n' &&
7178 if (name[1] == 'r' &&
7196 if (name[3] == 'a' &&
7206 if (name[3] == 'u' &&
7220 if (name[2] == 'n' &&
7238 if (name[2] == 'a' &&
7252 if (name[3] == 'e' &&
7265 if (name[4] == 't' &&
7274 if (name[4] == 'e' &&
7296 if (name[4] == 't' &&
7305 if (name[4] == 'e' &&
7321 if (name[2] == 'c' &&
7332 if (name[2] == 'l' &&
7343 if (name[2] == 'b' &&
7354 if (name[2] == 's' &&
7377 if (name[4] == 's' &&
7386 if (name[4] == 'n' &&
7399 if (name[3] == 'a' &&
7416 if (name[1] == 'a' &&
7431 case 7: /* 28 tokens of length 7 */
7435 if (name[1] == 'E' &&
7448 if (name[1] == '_' &&
7461 if (name[1] == 'i' &&
7468 return -KEY_binmode;
7474 if (name[1] == 'o' &&
7481 return -KEY_connect;
7490 if (name[2] == 'm' &&
7496 return -KEY_dbmopen;
7502 if (name[2] == 'f' &&
7518 if (name[1] == 'o' &&
7531 if (name[1] == 'e' &&
7538 if (name[5] == 'r' &&
7541 return -KEY_getpgrp;
7547 if (name[5] == 'i' &&
7550 return -KEY_getppid;
7563 if (name[1] == 'c' &&
7570 return -KEY_lcfirst;
7576 if (name[1] == 'p' &&
7583 return -KEY_opendir;
7589 if (name[1] == 'a' &&
7607 if (name[3] == 'd' &&
7612 return -KEY_readdir;
7618 if (name[3] == 'u' &&
7629 if (name[3] == 'e' &&
7634 return -KEY_reverse;
7653 if (name[3] == 'k' &&
7658 return -KEY_seekdir;
7664 if (name[3] == 'p' &&
7669 return -KEY_setpgrp;
7679 if (name[2] == 'm' &&
7685 return -KEY_shmread;
7691 if (name[2] == 'r' &&
7697 return -KEY_sprintf;
7706 if (name[3] == 'l' &&
7711 return -KEY_symlink;
7720 if (name[4] == 'a' &&
7724 return -KEY_syscall;
7730 if (name[4] == 'p' &&
7734 return -KEY_sysopen;
7740 if (name[4] == 'e' &&
7744 return -KEY_sysread;
7750 if (name[4] == 'e' &&
7754 return -KEY_sysseek;
7772 if (name[1] == 'e' &&
7779 return -KEY_telldir;
7788 if (name[2] == 'f' &&
7794 return -KEY_ucfirst;
7800 if (name[2] == 's' &&
7806 return -KEY_unshift;
7816 if (name[1] == 'a' &&
7823 return -KEY_waitpid;
7832 case 8: /* 26 tokens of length 8 */
7836 if (name[1] == 'U' &&
7844 return KEY_AUTOLOAD;
7855 if (name[3] == 'A' &&
7861 return KEY___DATA__;
7867 if (name[3] == 'I' &&
7873 return -KEY___FILE__;
7879 if (name[3] == 'I' &&
7885 return -KEY___LINE__;
7901 if (name[2] == 'o' &&
7908 return -KEY_closedir;
7914 if (name[2] == 'n' &&
7921 return -KEY_continue;
7931 if (name[1] == 'b' &&
7939 return -KEY_dbmclose;
7945 if (name[1] == 'n' &&
7951 if (name[4] == 'r' &&
7956 return -KEY_endgrent;
7962 if (name[4] == 'w' &&
7967 return -KEY_endpwent;
7980 if (name[1] == 'o' &&
7988 return -KEY_formline;
7994 if (name[1] == 'e' &&
8005 if (name[6] == 'n' &&
8008 return -KEY_getgrent;
8014 if (name[6] == 'i' &&
8017 return -KEY_getgrgid;
8023 if (name[6] == 'a' &&
8026 return -KEY_getgrnam;
8039 if (name[4] == 'o' &&
8044 return -KEY_getlogin;
8055 if (name[6] == 'n' &&
8058 return -KEY_getpwent;
8064 if (name[6] == 'a' &&
8067 return -KEY_getpwnam;
8073 if (name[6] == 'i' &&
8076 return -KEY_getpwuid;
8096 if (name[1] == 'e' &&
8103 if (name[5] == 'i' &&
8110 return -KEY_readline;
8115 return -KEY_readlink;
8126 if (name[5] == 'i' &&
8130 return -KEY_readpipe;
8151 if (name[4] == 'r' &&
8156 return -KEY_setgrent;
8162 if (name[4] == 'w' &&
8167 return -KEY_setpwent;
8183 if (name[3] == 'w' &&
8189 return -KEY_shmwrite;
8195 if (name[3] == 't' &&
8201 return -KEY_shutdown;
8211 if (name[2] == 's' &&
8218 return -KEY_syswrite;
8228 if (name[1] == 'r' &&
8236 return -KEY_truncate;
8245 case 9: /* 8 tokens of length 9 */
8249 if (name[1] == 'n' &&
8258 return -KEY_endnetent;
8264 if (name[1] == 'e' &&
8273 return -KEY_getnetent;
8279 if (name[1] == 'o' &&
8288 return -KEY_localtime;
8294 if (name[1] == 'r' &&
8303 return KEY_prototype;
8309 if (name[1] == 'u' &&
8318 return -KEY_quotemeta;
8324 if (name[1] == 'e' &&
8333 return -KEY_rewinddir;
8339 if (name[1] == 'e' &&
8348 return -KEY_setnetent;
8354 if (name[1] == 'a' &&
8363 return -KEY_wantarray;
8372 case 10: /* 9 tokens of length 10 */
8376 if (name[1] == 'n' &&
8382 if (name[4] == 'o' &&
8389 return -KEY_endhostent;
8395 if (name[4] == 'e' &&
8402 return -KEY_endservent;
8415 if (name[1] == 'e' &&
8421 if (name[4] == 'o' &&
8428 return -KEY_gethostent;
8437 if (name[5] == 'r' &&
8443 return -KEY_getservent;
8449 if (name[5] == 'c' &&
8455 return -KEY_getsockopt;
8480 if (name[4] == 'o' &&
8487 return -KEY_sethostent;
8496 if (name[5] == 'r' &&
8502 return -KEY_setservent;
8508 if (name[5] == 'c' &&
8514 return -KEY_setsockopt;
8531 if (name[2] == 'c' &&
8540 return -KEY_socketpair;
8553 case 11: /* 8 tokens of length 11 */
8557 if (name[1] == '_' &&
8568 return -KEY___PACKAGE__;
8574 if (name[1] == 'n' &&
8585 return -KEY_endprotoent;
8591 if (name[1] == 'e' &&
8600 if (name[5] == 'e' &&
8607 return -KEY_getpeername;
8616 if (name[6] == 'o' &&
8622 return -KEY_getpriority;
8628 if (name[6] == 't' &&
8634 return -KEY_getprotoent;
8648 if (name[4] == 'o' &&
8656 return -KEY_getsockname;
8669 if (name[1] == 'e' &&
8677 if (name[6] == 'o' &&
8683 return -KEY_setpriority;
8689 if (name[6] == 't' &&
8695 return -KEY_setprotoent;
8711 case 12: /* 2 tokens of length 12 */
8712 if (name[0] == 'g' &&
8724 if (name[9] == 'd' &&
8727 { /* getnetbyaddr */
8728 return -KEY_getnetbyaddr;
8734 if (name[9] == 'a' &&
8737 { /* getnetbyname */
8738 return -KEY_getnetbyname;
8750 case 13: /* 4 tokens of length 13 */
8751 if (name[0] == 'g' &&
8758 if (name[4] == 'o' &&
8767 if (name[10] == 'd' &&
8770 { /* gethostbyaddr */
8771 return -KEY_gethostbyaddr;
8777 if (name[10] == 'a' &&
8780 { /* gethostbyname */
8781 return -KEY_gethostbyname;
8794 if (name[4] == 'e' &&
8803 if (name[10] == 'a' &&
8806 { /* getservbyname */
8807 return -KEY_getservbyname;
8813 if (name[10] == 'o' &&
8816 { /* getservbyport */
8817 return -KEY_getservbyport;
8836 case 14: /* 1 tokens of length 14 */
8837 if (name[0] == 'g' &&
8851 { /* getprotobyname */
8852 return -KEY_getprotobyname;
8857 case 16: /* 1 tokens of length 16 */
8858 if (name[0] == 'g' &&
8874 { /* getprotobynumber */
8875 return -KEY_getprotobynumber;
8889 S_checkcomma(pTHX_ register char *s, const char *name, const char *what)
8893 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8894 if (ckWARN(WARN_SYNTAX)) {
8896 for (w = s+2; *w && level; w++) {
8903 for (; *w && isSPACE(*w); w++) ;
8904 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
8905 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8906 "%s (...) interpreted as function",name);
8909 while (s < PL_bufend && isSPACE(*s))
8913 while (s < PL_bufend && isSPACE(*s))
8915 if (isIDFIRST_lazy_if(s,UTF)) {
8917 while (isALNUM_lazy_if(s,UTF))
8919 while (s < PL_bufend && isSPACE(*s))
8923 *s = '\0'; /* XXX If we didn't do this, we could const a lot of toke.c */
8924 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
8928 Perl_croak(aTHX_ "No comma allowed after %s", what);
8933 /* Either returns sv, or mortalizes sv and returns a new SV*.
8934 Best used as sv=new_constant(..., sv, ...).
8935 If s, pv are NULL, calls subroutine with one argument,
8936 and type is used with error messages only. */
8939 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
8943 HV *table = GvHV(PL_hintgv); /* ^H */
8947 const char *why1, *why2, *why3;
8949 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8952 why2 = strEQ(key,"charnames")
8953 ? "(possibly a missing \"use charnames ...\")"
8955 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8956 (type ? type: "undef"), why2);
8958 /* This is convoluted and evil ("goto considered harmful")
8959 * but I do not understand the intricacies of all the different
8960 * failure modes of %^H in here. The goal here is to make
8961 * the most probable error message user-friendly. --jhi */
8966 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8967 (type ? type: "undef"), why1, why2, why3);
8969 yyerror(SvPVX(msg));
8973 cvp = hv_fetch(table, key, strlen(key), FALSE);
8974 if (!cvp || !SvOK(*cvp)) {
8977 why3 = "} is not defined";
8980 sv_2mortal(sv); /* Parent created it permanently */
8983 pv = sv_2mortal(newSVpvn(s, len));
8985 typesv = sv_2mortal(newSVpv(type, 0));
8987 typesv = &PL_sv_undef;
8989 PUSHSTACKi(PERLSI_OVERLOAD);
9001 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9005 /* Check the eval first */
9006 if (!PL_in_eval && SvTRUE(ERRSV)) {
9008 sv_catpv(ERRSV, "Propagated");
9009 yyerror(SvPV(ERRSV, n_a)); /* Duplicates the message inside eval */
9011 res = SvREFCNT_inc(sv);
9015 (void)SvREFCNT_inc(res);
9024 why1 = "Call to &{$^H{";
9026 why3 = "}} did not return a defined value";
9034 /* Returns a NUL terminated string, with the length of the string written to
9038 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9040 register char *d = dest;
9041 register char *e = d + destlen - 3; /* two-character token, ending NUL */
9044 Perl_croak(aTHX_ ident_too_long);
9045 if (isALNUM(*s)) /* UTF handled below */
9047 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
9052 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
9056 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9057 char *t = s + UTF8SKIP(s);
9058 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9060 if (d + (t - s) > e)
9061 Perl_croak(aTHX_ ident_too_long);
9062 Copy(s, d, t - s, char);
9075 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9085 e = d + destlen - 3; /* two-character token, ending NUL */
9087 while (isDIGIT(*s)) {
9089 Perl_croak(aTHX_ ident_too_long);
9096 Perl_croak(aTHX_ ident_too_long);
9097 if (isALNUM(*s)) /* UTF handled below */
9099 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9104 else if (*s == ':' && s[1] == ':') {
9108 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9109 char *t = s + UTF8SKIP(s);
9110 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9112 if (d + (t - s) > e)
9113 Perl_croak(aTHX_ ident_too_long);
9114 Copy(s, d, t - s, char);
9125 if (PL_lex_state != LEX_NORMAL)
9126 PL_lex_state = LEX_INTERPENDMAYBE;
9129 if (*s == '$' && s[1] &&
9130 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9143 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9148 if (isSPACE(s[-1])) {
9150 const char ch = *s++;
9151 if (!SPACE_OR_TAB(ch)) {
9157 if (isIDFIRST_lazy_if(d,UTF)) {
9161 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
9163 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
9166 Copy(s, d, e - s, char);
9171 while ((isALNUM(*s) || *s == ':') && d < e)
9174 Perl_croak(aTHX_ ident_too_long);
9177 while (s < send && SPACE_OR_TAB(*s)) s++;
9178 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9179 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
9180 const char *brack = *s == '[' ? "[...]" : "{...}";
9181 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9182 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9183 funny, dest, brack, funny, dest, brack);
9186 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9190 /* Handle extended ${^Foo} variables
9191 * 1999-02-27 mjd-perl-patch@plover.com */
9192 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9196 while (isALNUM(*s) && d < e) {
9200 Perl_croak(aTHX_ ident_too_long);
9205 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9206 PL_lex_state = LEX_INTERPEND;
9211 if (PL_lex_state == LEX_NORMAL) {
9212 if (ckWARN(WARN_AMBIGUOUS) &&
9213 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
9215 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9216 "Ambiguous use of %c{%s} resolved to %c%s",
9217 funny, dest, funny, dest);
9222 s = bracket; /* let the parser handle it */
9226 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9227 PL_lex_state = LEX_INTERPEND;
9232 Perl_pmflag(pTHX_ U32* pmfl, int ch)
9237 *pmfl |= PMf_GLOBAL;
9239 *pmfl |= PMf_CONTINUE;
9243 *pmfl |= PMf_MULTILINE;
9245 *pmfl |= PMf_SINGLELINE;
9247 *pmfl |= PMf_EXTENDED;
9251 S_scan_pat(pTHX_ char *start, I32 type)
9254 char *s = scan_str(start,FALSE,FALSE);
9257 Perl_croak(aTHX_ "Search pattern not terminated");
9259 pm = (PMOP*)newPMOP(type, 0);
9260 if (PL_multi_open == '?')
9261 pm->op_pmflags |= PMf_ONCE;
9263 while (*s && strchr("iomsx", *s))
9264 pmflag(&pm->op_pmflags,*s++);
9267 while (*s && strchr("iogcmsx", *s))
9268 pmflag(&pm->op_pmflags,*s++);
9270 /* issue a warning if /c is specified,but /g is not */
9271 if (ckWARN(WARN_REGEXP) &&
9272 (pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9274 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_without_g);
9277 pm->op_pmpermflags = pm->op_pmflags;
9279 PL_lex_op = (OP*)pm;
9280 yylval.ival = OP_MATCH;
9285 S_scan_subst(pTHX_ char *start)
9293 yylval.ival = OP_NULL;
9295 s = scan_str(start,FALSE,FALSE);
9298 Perl_croak(aTHX_ "Substitution pattern not terminated");
9300 if (s[-1] == PL_multi_open)
9303 first_start = PL_multi_start;
9304 s = scan_str(s,FALSE,FALSE);
9307 SvREFCNT_dec(PL_lex_stuff);
9308 PL_lex_stuff = Nullsv;
9310 Perl_croak(aTHX_ "Substitution replacement not terminated");
9312 PL_multi_start = first_start; /* so whole substitution is taken together */
9314 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9320 else if (strchr("iogcmsx", *s))
9321 pmflag(&pm->op_pmflags,*s++);
9326 /* /c is not meaningful with s/// */
9327 if (ckWARN(WARN_REGEXP) && (pm->op_pmflags & PMf_CONTINUE))
9329 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_in_subst);
9334 PL_sublex_info.super_bufptr = s;
9335 PL_sublex_info.super_bufend = PL_bufend;
9337 pm->op_pmflags |= PMf_EVAL;
9338 repl = newSVpvn("",0);
9340 sv_catpv(repl, es ? "eval " : "do ");
9341 sv_catpvn(repl, "{ ", 2);
9342 sv_catsv(repl, PL_lex_repl);
9343 sv_catpvn(repl, " };", 2);
9345 SvREFCNT_dec(PL_lex_repl);
9349 pm->op_pmpermflags = pm->op_pmflags;
9350 PL_lex_op = (OP*)pm;
9351 yylval.ival = OP_SUBST;
9356 S_scan_trans(pTHX_ char *start)
9365 yylval.ival = OP_NULL;
9367 s = scan_str(start,FALSE,FALSE);
9369 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9370 if (s[-1] == PL_multi_open)
9373 s = scan_str(s,FALSE,FALSE);
9376 SvREFCNT_dec(PL_lex_stuff);
9377 PL_lex_stuff = Nullsv;
9379 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9382 complement = del = squash = 0;
9386 complement = OPpTRANS_COMPLEMENT;
9389 del = OPpTRANS_DELETE;
9392 squash = OPpTRANS_SQUASH;
9401 New(803, tbl, complement&&!del?258:256, short);
9402 o = newPVOP(OP_TRANS, 0, (char*)tbl);
9403 o->op_private &= ~OPpTRANS_ALL;
9404 o->op_private |= del|squash|complement|
9405 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9406 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9409 yylval.ival = OP_TRANS;
9414 S_scan_heredoc(pTHX_ register char *s)
9417 I32 op_type = OP_SCALAR;
9421 const char newline[] = "\n";
9422 const char *found_newline;
9426 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9430 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9433 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
9434 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9437 s = delimcpy(d, e, s, PL_bufend, term, &len);
9447 if (!isALNUM_lazy_if(s,UTF))
9448 deprecate_old("bare << to mean <<\"\"");
9449 for (; isALNUM_lazy_if(s,UTF); s++) {
9454 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9455 Perl_croak(aTHX_ "Delimiter for here document is too long");
9458 len = d - PL_tokenbuf;
9459 #ifndef PERL_STRICT_CR
9460 d = strchr(s, '\r');
9464 while (s < PL_bufend) {
9470 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9479 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
9483 if ( outer || !(found_newline = ninstr(s,PL_bufend,newline,newline+1)) ) {
9484 herewas = newSVpvn(s,PL_bufend-s);
9488 herewas = newSVpvn(s,found_newline-s);
9490 s += SvCUR(herewas);
9492 tmpstr = NEWSV(87,79);
9493 sv_upgrade(tmpstr, SVt_PVIV);
9496 SvIV_set(tmpstr, -1);
9498 else if (term == '`') {
9499 op_type = OP_BACKTICK;
9500 SvIV_set(tmpstr, '\\');
9504 PL_multi_start = CopLINE(PL_curcop);
9505 PL_multi_open = PL_multi_close = '<';
9506 term = *PL_tokenbuf;
9507 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9508 char *bufptr = PL_sublex_info.super_bufptr;
9509 char *bufend = PL_sublex_info.super_bufend;
9510 char *olds = s - SvCUR(herewas);
9511 s = strchr(bufptr, '\n');
9515 while (s < bufend &&
9516 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9518 CopLINE_inc(PL_curcop);
9521 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9522 missingterm(PL_tokenbuf);
9524 sv_setpvn(herewas,bufptr,d-bufptr+1);
9525 sv_setpvn(tmpstr,d+1,s-d);
9527 sv_catpvn(herewas,s,bufend-s);
9528 Copy(SvPVX(herewas),bufptr,SvCUR(herewas) + 1,char);
9535 while (s < PL_bufend &&
9536 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9538 CopLINE_inc(PL_curcop);
9540 if (s >= PL_bufend) {
9541 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9542 missingterm(PL_tokenbuf);
9544 sv_setpvn(tmpstr,d+1,s-d);
9546 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9548 sv_catpvn(herewas,s,PL_bufend-s);
9549 sv_setsv(PL_linestr,herewas);
9550 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9551 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9552 PL_last_lop = PL_last_uni = Nullch;
9555 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
9556 while (s >= PL_bufend) { /* multiple line string? */
9558 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
9559 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9560 missingterm(PL_tokenbuf);
9562 CopLINE_inc(PL_curcop);
9563 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9564 PL_last_lop = PL_last_uni = Nullch;
9565 #ifndef PERL_STRICT_CR
9566 if (PL_bufend - PL_linestart >= 2) {
9567 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9568 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9570 PL_bufend[-2] = '\n';
9572 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
9574 else if (PL_bufend[-1] == '\r')
9575 PL_bufend[-1] = '\n';
9577 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9578 PL_bufend[-1] = '\n';
9580 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9581 SV *sv = NEWSV(88,0);
9583 sv_upgrade(sv, SVt_PVMG);
9584 sv_setsv(sv,PL_linestr);
9587 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
9589 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9590 STRLEN off = PL_bufend - 1 - SvPVX(PL_linestr);
9591 *(SvPVX(PL_linestr) + off ) = ' ';
9592 sv_catsv(PL_linestr,herewas);
9593 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9594 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9598 sv_catsv(tmpstr,PL_linestr);
9603 PL_multi_end = CopLINE(PL_curcop);
9604 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9605 SvPV_shrink_to_cur(tmpstr);
9607 SvREFCNT_dec(herewas);
9609 if (UTF && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
9611 else if (PL_encoding)
9612 sv_recode_to_utf8(tmpstr, PL_encoding);
9614 PL_lex_stuff = tmpstr;
9615 yylval.ival = op_type;
9620 takes: current position in input buffer
9621 returns: new position in input buffer
9622 side-effects: yylval and lex_op are set.
9627 <FH> read from filehandle
9628 <pkg::FH> read from package qualified filehandle
9629 <pkg'FH> read from package qualified filehandle
9630 <$fh> read from filehandle in $fh
9636 S_scan_inputsymbol(pTHX_ char *start)
9638 register char *s = start; /* current position in buffer */
9644 d = PL_tokenbuf; /* start of temp holding space */
9645 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9646 end = strchr(s, '\n');
9649 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9651 /* die if we didn't have space for the contents of the <>,
9652 or if it didn't end, or if we see a newline
9655 if (len >= sizeof PL_tokenbuf)
9656 Perl_croak(aTHX_ "Excessively long <> operator");
9658 Perl_croak(aTHX_ "Unterminated <> operator");
9663 Remember, only scalar variables are interpreted as filehandles by
9664 this code. Anything more complex (e.g., <$fh{$num}>) will be
9665 treated as a glob() call.
9666 This code makes use of the fact that except for the $ at the front,
9667 a scalar variable and a filehandle look the same.
9669 if (*d == '$' && d[1]) d++;
9671 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9672 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9675 /* If we've tried to read what we allow filehandles to look like, and
9676 there's still text left, then it must be a glob() and not a getline.
9677 Use scan_str to pull out the stuff between the <> and treat it
9678 as nothing more than a string.
9681 if (d - PL_tokenbuf != len) {
9682 yylval.ival = OP_GLOB;
9684 s = scan_str(start,FALSE,FALSE);
9686 Perl_croak(aTHX_ "Glob not terminated");
9690 bool readline_overriden = FALSE;
9691 GV *gv_readline = Nullgv;
9693 /* we're in a filehandle read situation */
9696 /* turn <> into <ARGV> */
9698 Copy("ARGV",d,5,char);
9700 /* Check whether readline() is overriden */
9701 if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
9702 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9704 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
9705 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
9706 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9707 readline_overriden = TRUE;
9709 /* if <$fh>, create the ops to turn the variable into a
9715 /* try to find it in the pad for this block, otherwise find
9716 add symbol table ops
9718 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
9719 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
9720 SV *sym = sv_2mortal(
9721 newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)),0));
9722 sv_catpvn(sym, "::", 2);
9728 OP *o = newOP(OP_PADSV, 0);
9730 PL_lex_op = readline_overriden
9731 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9732 append_elem(OP_LIST, o,
9733 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9734 : (OP*)newUNOP(OP_READLINE, 0, o);
9743 ? (GV_ADDMULTI | GV_ADDINEVAL)
9746 PL_lex_op = readline_overriden
9747 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9748 append_elem(OP_LIST,
9749 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9750 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9751 : (OP*)newUNOP(OP_READLINE, 0,
9752 newUNOP(OP_RV2SV, 0,
9753 newGVOP(OP_GV, 0, gv)));
9755 if (!readline_overriden)
9756 PL_lex_op->op_flags |= OPf_SPECIAL;
9757 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
9758 yylval.ival = OP_NULL;
9761 /* If it's none of the above, it must be a literal filehandle
9762 (<Foo::BAR> or <FOO>) so build a simple readline OP */
9764 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
9765 PL_lex_op = readline_overriden
9766 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9767 append_elem(OP_LIST,
9768 newGVOP(OP_GV, 0, gv),
9769 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9770 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9771 yylval.ival = OP_NULL;
9780 takes: start position in buffer
9781 keep_quoted preserve \ on the embedded delimiter(s)
9782 keep_delims preserve the delimiters around the string
9783 returns: position to continue reading from buffer
9784 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9785 updates the read buffer.
9787 This subroutine pulls a string out of the input. It is called for:
9788 q single quotes q(literal text)
9789 ' single quotes 'literal text'
9790 qq double quotes qq(interpolate $here please)
9791 " double quotes "interpolate $here please"
9792 qx backticks qx(/bin/ls -l)
9793 ` backticks `/bin/ls -l`
9794 qw quote words @EXPORT_OK = qw( func() $spam )
9795 m// regexp match m/this/
9796 s/// regexp substitute s/this/that/
9797 tr/// string transliterate tr/this/that/
9798 y/// string transliterate y/this/that/
9799 ($*@) sub prototypes sub foo ($)
9800 (stuff) sub attr parameters sub foo : attr(stuff)
9801 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9803 In most of these cases (all but <>, patterns and transliterate)
9804 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9805 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9806 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9809 It skips whitespace before the string starts, and treats the first
9810 character as the delimiter. If the delimiter is one of ([{< then
9811 the corresponding "close" character )]}> is used as the closing
9812 delimiter. It allows quoting of delimiters, and if the string has
9813 balanced delimiters ([{<>}]) it allows nesting.
9815 On success, the SV with the resulting string is put into lex_stuff or,
9816 if that is already non-NULL, into lex_repl. The second case occurs only
9817 when parsing the RHS of the special constructs s/// and tr/// (y///).
9818 For convenience, the terminating delimiter character is stuffed into
9823 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9825 SV *sv; /* scalar value: string */
9826 char *tmps; /* temp string, used for delimiter matching */
9827 register char *s = start; /* current position in the buffer */
9828 register char term; /* terminating character */
9829 register char *to; /* current position in the sv's data */
9830 I32 brackets = 1; /* bracket nesting level */
9831 bool has_utf8 = FALSE; /* is there any utf8 content? */
9832 I32 termcode; /* terminating char. code */
9833 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
9834 STRLEN termlen; /* length of terminating string */
9835 char *last = NULL; /* last position for nesting bracket */
9837 /* skip space before the delimiter */
9841 /* mark where we are, in case we need to report errors */
9844 /* after skipping whitespace, the next character is the terminator */
9847 termcode = termstr[0] = term;
9851 termcode = utf8_to_uvchr((U8*)s, &termlen);
9852 Copy(s, termstr, termlen, U8);
9853 if (!UTF8_IS_INVARIANT(term))
9857 /* mark where we are */
9858 PL_multi_start = CopLINE(PL_curcop);
9859 PL_multi_open = term;
9861 /* find corresponding closing delimiter */
9862 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9863 termcode = termstr[0] = term = tmps[5];
9865 PL_multi_close = term;
9867 /* create a new SV to hold the contents. 87 is leak category, I'm
9868 assuming. 79 is the SV's initial length. What a random number. */
9870 sv_upgrade(sv, SVt_PVIV);
9871 SvIV_set(sv, termcode);
9872 (void)SvPOK_only(sv); /* validate pointer */
9874 /* move past delimiter and try to read a complete string */
9876 sv_catpvn(sv, s, termlen);
9879 if (PL_encoding && !UTF) {
9883 int offset = s - SvPVX(PL_linestr);
9884 bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9885 &offset, (char*)termstr, termlen);
9886 char *ns = SvPVX(PL_linestr) + offset;
9887 char *svlast = SvEND(sv) - 1;
9889 for (; s < ns; s++) {
9890 if (*s == '\n' && !PL_rsfp)
9891 CopLINE_inc(PL_curcop);
9894 goto read_more_line;
9896 /* handle quoted delimiters */
9897 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9899 for (t = svlast-2; t >= SvPVX(sv) && *t == '\\';)
9901 if ((svlast-1 - t) % 2) {
9905 SvCUR_set(sv, SvCUR(sv) - 1);
9910 if (PL_multi_open == PL_multi_close) {
9918 for (t = w = last; t < svlast; w++, t++) {
9919 /* At here, all closes are "was quoted" one,
9920 so we don't check PL_multi_close. */
9922 if (!keep_quoted && *(t+1) == PL_multi_open)
9927 else if (*t == PL_multi_open)
9935 SvCUR_set(sv, w - SvPVX(sv));
9938 if (--brackets <= 0)
9944 SvCUR_set(sv, SvCUR(sv) - 1);
9950 /* extend sv if need be */
9951 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9952 /* set 'to' to the next character in the sv's string */
9953 to = SvPVX(sv)+SvCUR(sv);
9955 /* if open delimiter is the close delimiter read unbridle */
9956 if (PL_multi_open == PL_multi_close) {
9957 for (; s < PL_bufend; s++,to++) {
9958 /* embedded newlines increment the current line number */
9959 if (*s == '\n' && !PL_rsfp)
9960 CopLINE_inc(PL_curcop);
9961 /* handle quoted delimiters */
9962 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9963 if (!keep_quoted && s[1] == term)
9965 /* any other quotes are simply copied straight through */
9969 /* terminate when run out of buffer (the for() condition), or
9970 have found the terminator */
9971 else if (*s == term) {
9974 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9977 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9983 /* if the terminator isn't the same as the start character (e.g.,
9984 matched brackets), we have to allow more in the quoting, and
9985 be prepared for nested brackets.
9988 /* read until we run out of string, or we find the terminator */
9989 for (; s < PL_bufend; s++,to++) {
9990 /* embedded newlines increment the line count */
9991 if (*s == '\n' && !PL_rsfp)
9992 CopLINE_inc(PL_curcop);
9993 /* backslashes can escape the open or closing characters */
9994 if (*s == '\\' && s+1 < PL_bufend) {
9996 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10001 /* allow nested opens and closes */
10002 else if (*s == PL_multi_close && --brackets <= 0)
10004 else if (*s == PL_multi_open)
10006 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10011 /* terminate the copied string and update the sv's end-of-string */
10013 SvCUR_set(sv, to - SvPVX(sv));
10016 * this next chunk reads more into the buffer if we're not done yet
10020 break; /* handle case where we are done yet :-) */
10022 #ifndef PERL_STRICT_CR
10023 if (to - SvPVX(sv) >= 2) {
10024 if ((to[-2] == '\r' && to[-1] == '\n') ||
10025 (to[-2] == '\n' && to[-1] == '\r'))
10029 SvCUR_set(sv, to - SvPVX(sv));
10031 else if (to[-1] == '\r')
10034 else if (to - SvPVX(sv) == 1 && to[-1] == '\r')
10039 /* if we're out of file, or a read fails, bail and reset the current
10040 line marker so we can report where the unterminated string began
10043 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10045 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10048 /* we read a line, so increment our line counter */
10049 CopLINE_inc(PL_curcop);
10051 /* update debugger info */
10052 if (PERLDB_LINE && PL_curstash != PL_debstash) {
10053 SV *sv = NEWSV(88,0);
10055 sv_upgrade(sv, SVt_PVMG);
10056 sv_setsv(sv,PL_linestr);
10057 (void)SvIOK_on(sv);
10059 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
10062 /* having changed the buffer, we must update PL_bufend */
10063 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10064 PL_last_lop = PL_last_uni = Nullch;
10067 /* at this point, we have successfully read the delimited string */
10069 if (!PL_encoding || UTF) {
10071 sv_catpvn(sv, s, termlen);
10074 if (has_utf8 || PL_encoding)
10077 PL_multi_end = CopLINE(PL_curcop);
10079 /* if we allocated too much space, give some back */
10080 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10081 SvLEN_set(sv, SvCUR(sv) + 1);
10082 SvPV_renew(sv, SvLEN(sv));
10085 /* decide whether this is the first or second quoted string we've read
10098 takes: pointer to position in buffer
10099 returns: pointer to new position in buffer
10100 side-effects: builds ops for the constant in yylval.op
10102 Read a number in any of the formats that Perl accepts:
10104 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10105 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10108 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10110 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10113 If it reads a number without a decimal point or an exponent, it will
10114 try converting the number to an integer and see if it can do so
10115 without loss of precision.
10119 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10121 register const char *s = start; /* current position in buffer */
10122 register char *d; /* destination in temp buffer */
10123 register char *e; /* end of temp buffer */
10124 NV nv; /* number read, as a double */
10125 SV *sv = Nullsv; /* place to put the converted number */
10126 bool floatit; /* boolean: int or float? */
10127 const char *lastub = 0; /* position of last underbar */
10128 static char const number_too_long[] = "Number too long";
10130 /* We use the first character to decide what type of number this is */
10134 Perl_croak(aTHX_ "panic: scan_num");
10136 /* if it starts with a 0, it could be an octal number, a decimal in
10137 0.13 disguise, or a hexadecimal number, or a binary number. */
10141 u holds the "number so far"
10142 shift the power of 2 of the base
10143 (hex == 4, octal == 3, binary == 1)
10144 overflowed was the number more than we can hold?
10146 Shift is used when we add a digit. It also serves as an "are
10147 we in octal/hex/binary?" indicator to disallow hex characters
10148 when in octal mode.
10153 bool overflowed = FALSE;
10154 bool just_zero = TRUE; /* just plain 0 or binary number? */
10155 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10156 static const char* const bases[5] =
10157 { "", "binary", "", "octal", "hexadecimal" };
10158 static const char* const Bases[5] =
10159 { "", "Binary", "", "Octal", "Hexadecimal" };
10160 static const char* const maxima[5] =
10162 "0b11111111111111111111111111111111",
10166 const char *base, *Base, *max;
10168 /* check for hex */
10173 } else if (s[1] == 'b') {
10178 /* check for a decimal in disguise */
10179 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10181 /* so it must be octal */
10188 if (ckWARN(WARN_SYNTAX))
10189 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10190 "Misplaced _ in number");
10194 base = bases[shift];
10195 Base = Bases[shift];
10196 max = maxima[shift];
10198 /* read the rest of the number */
10200 /* x is used in the overflow test,
10201 b is the digit we're adding on. */
10206 /* if we don't mention it, we're done */
10210 /* _ are ignored -- but warned about if consecutive */
10212 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
10213 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10214 "Misplaced _ in number");
10218 /* 8 and 9 are not octal */
10219 case '8': case '9':
10221 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10225 case '2': case '3': case '4':
10226 case '5': case '6': case '7':
10228 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10231 case '0': case '1':
10232 b = *s++ & 15; /* ASCII digit -> value of digit */
10236 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10237 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10238 /* make sure they said 0x */
10241 b = (*s++ & 7) + 9;
10243 /* Prepare to put the digit we have onto the end
10244 of the number so far. We check for overflows.
10250 x = u << shift; /* make room for the digit */
10252 if ((x >> shift) != u
10253 && !(PL_hints & HINT_NEW_BINARY)) {
10256 if (ckWARN_d(WARN_OVERFLOW))
10257 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10258 "Integer overflow in %s number",
10261 u = x | b; /* add the digit to the end */
10264 n *= nvshift[shift];
10265 /* If an NV has not enough bits in its
10266 * mantissa to represent an UV this summing of
10267 * small low-order numbers is a waste of time
10268 * (because the NV cannot preserve the
10269 * low-order bits anyway): we could just
10270 * remember when did we overflow and in the
10271 * end just multiply n by the right
10279 /* if we get here, we had success: make a scalar value from
10284 /* final misplaced underbar check */
10285 if (s[-1] == '_') {
10286 if (ckWARN(WARN_SYNTAX))
10287 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10292 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
10293 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10294 "%s number > %s non-portable",
10300 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
10301 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10302 "%s number > %s non-portable",
10307 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10308 sv = new_constant(start, s - start, "integer",
10310 else if (PL_hints & HINT_NEW_BINARY)
10311 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
10316 handle decimal numbers.
10317 we're also sent here when we read a 0 as the first digit
10319 case '1': case '2': case '3': case '4': case '5':
10320 case '6': case '7': case '8': case '9': case '.':
10323 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10326 /* read next group of digits and _ and copy into d */
10327 while (isDIGIT(*s) || *s == '_') {
10328 /* skip underscores, checking for misplaced ones
10332 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
10333 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10334 "Misplaced _ in number");
10338 /* check for end of fixed-length buffer */
10340 Perl_croak(aTHX_ number_too_long);
10341 /* if we're ok, copy the character */
10346 /* final misplaced underbar check */
10347 if (lastub && s == lastub + 1) {
10348 if (ckWARN(WARN_SYNTAX))
10349 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10352 /* read a decimal portion if there is one. avoid
10353 3..5 being interpreted as the number 3. followed
10356 if (*s == '.' && s[1] != '.') {
10361 if (ckWARN(WARN_SYNTAX))
10362 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10363 "Misplaced _ in number");
10367 /* copy, ignoring underbars, until we run out of digits.
10369 for (; isDIGIT(*s) || *s == '_'; s++) {
10370 /* fixed length buffer check */
10372 Perl_croak(aTHX_ number_too_long);
10374 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
10375 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10376 "Misplaced _ in number");
10382 /* fractional part ending in underbar? */
10383 if (s[-1] == '_') {
10384 if (ckWARN(WARN_SYNTAX))
10385 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10386 "Misplaced _ in number");
10388 if (*s == '.' && isDIGIT(s[1])) {
10389 /* oops, it's really a v-string, but without the "v" */
10395 /* read exponent part, if present */
10396 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10400 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10401 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10403 /* stray preinitial _ */
10405 if (ckWARN(WARN_SYNTAX))
10406 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10407 "Misplaced _ in number");
10411 /* allow positive or negative exponent */
10412 if (*s == '+' || *s == '-')
10415 /* stray initial _ */
10417 if (ckWARN(WARN_SYNTAX))
10418 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10419 "Misplaced _ in number");
10423 /* read digits of exponent */
10424 while (isDIGIT(*s) || *s == '_') {
10427 Perl_croak(aTHX_ number_too_long);
10431 if (ckWARN(WARN_SYNTAX) &&
10432 ((lastub && s == lastub + 1) ||
10433 (!isDIGIT(s[1]) && s[1] != '_')))
10434 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10435 "Misplaced _ in number");
10442 /* make an sv from the string */
10446 We try to do an integer conversion first if no characters
10447 indicating "float" have been found.
10452 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10454 if (flags == IS_NUMBER_IN_UV) {
10456 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
10459 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10460 if (uv <= (UV) IV_MIN)
10461 sv_setiv(sv, -(IV)uv);
10468 /* terminate the string */
10470 nv = Atof(PL_tokenbuf);
10474 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
10475 (PL_hints & HINT_NEW_INTEGER) )
10476 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
10477 (floatit ? "float" : "integer"),
10481 /* if it starts with a v, it could be a v-string */
10484 sv = NEWSV(92,5); /* preallocate storage space */
10485 s = scan_vstring(s,sv);
10489 /* make the op for the constant and return */
10492 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10494 lvalp->opval = Nullop;
10500 S_scan_formline(pTHX_ register char *s)
10502 register char *eol;
10504 SV *stuff = newSVpvn("",0);
10505 bool needargs = FALSE;
10506 bool eofmt = FALSE;
10508 while (!needargs) {
10511 #ifdef PERL_STRICT_CR
10512 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
10514 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
10516 if (*t == '\n' || t == PL_bufend) {
10521 if (PL_in_eval && !PL_rsfp) {
10522 eol = (char *) memchr(s,'\n',PL_bufend-s);
10527 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10529 for (t = s; t < eol; t++) {
10530 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10532 goto enough; /* ~~ must be first line in formline */
10534 if (*t == '@' || *t == '^')
10538 sv_catpvn(stuff, s, eol-s);
10539 #ifndef PERL_STRICT_CR
10540 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10541 char *end = SvPVX(stuff) + SvCUR(stuff);
10544 SvCUR_set(stuff, SvCUR(stuff) - 1);
10553 s = filter_gets(PL_linestr, PL_rsfp, 0);
10554 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
10555 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
10556 PL_last_lop = PL_last_uni = Nullch;
10565 if (SvCUR(stuff)) {
10568 PL_lex_state = LEX_NORMAL;
10569 PL_nextval[PL_nexttoke].ival = 0;
10573 PL_lex_state = LEX_FORMLINE;
10575 if (UTF && is_utf8_string((U8*)SvPVX(stuff), SvCUR(stuff)))
10577 else if (PL_encoding)
10578 sv_recode_to_utf8(stuff, PL_encoding);
10580 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10582 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
10586 SvREFCNT_dec(stuff);
10588 PL_lex_formbrack = 0;
10599 PL_cshlen = strlen(PL_cshname);
10604 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10606 I32 oldsavestack_ix = PL_savestack_ix;
10607 CV* outsidecv = PL_compcv;
10610 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10612 SAVEI32(PL_subline);
10613 save_item(PL_subname);
10614 SAVESPTR(PL_compcv);
10616 PL_compcv = (CV*)NEWSV(1104,0);
10617 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
10618 CvFLAGS(PL_compcv) |= flags;
10620 PL_subline = CopLINE(PL_curcop);
10621 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10622 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
10623 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10625 return oldsavestack_ix;
10629 #pragma segment Perl_yylex
10632 Perl_yywarn(pTHX_ const char *s)
10634 PL_in_eval |= EVAL_WARNONLY;
10636 PL_in_eval &= ~EVAL_WARNONLY;
10641 Perl_yyerror(pTHX_ const char *s)
10643 const char *where = NULL;
10644 const char *context = NULL;
10648 if (!yychar || (yychar == ';' && !PL_rsfp))
10650 else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 &&
10651 PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) {
10654 The code below is removed for NetWare because it abends/crashes on NetWare
10655 when the script has error such as not having the closing quotes like:
10656 if ($var eq "value)
10657 Checking of white spaces is anyway done in NetWare code.
10660 while (isSPACE(*PL_oldoldbufptr))
10663 context = PL_oldoldbufptr;
10664 contlen = PL_bufptr - PL_oldoldbufptr;
10666 else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 &&
10667 PL_oldbufptr != PL_bufptr) {
10670 The code below is removed for NetWare because it abends/crashes on NetWare
10671 when the script has error such as not having the closing quotes like:
10672 if ($var eq "value)
10673 Checking of white spaces is anyway done in NetWare code.
10676 while (isSPACE(*PL_oldbufptr))
10679 context = PL_oldbufptr;
10680 contlen = PL_bufptr - PL_oldbufptr;
10682 else if (yychar > 255)
10683 where = "next token ???";
10684 else if (yychar == -2) { /* YYEMPTY */
10685 if (PL_lex_state == LEX_NORMAL ||
10686 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10687 where = "at end of line";
10688 else if (PL_lex_inpat)
10689 where = "within pattern";
10691 where = "within string";
10694 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
10696 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10697 else if (isPRINT_LC(yychar))
10698 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
10700 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10701 where = SvPVX(where_sv);
10703 msg = sv_2mortal(newSVpv(s, 0));
10704 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10705 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10707 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10709 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10710 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10711 Perl_sv_catpvf(aTHX_ msg,
10712 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10713 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10716 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
10717 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
10720 if (PL_error_count >= 10) {
10721 if (PL_in_eval && SvCUR(ERRSV))
10722 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10723 ERRSV, OutCopFILE(PL_curcop));
10725 Perl_croak(aTHX_ "%s has too many errors.\n",
10726 OutCopFILE(PL_curcop));
10729 PL_in_my_stash = Nullhv;
10733 #pragma segment Main
10737 S_swallow_bom(pTHX_ U8 *s)
10739 const STRLEN slen = SvCUR(PL_linestr);
10742 if (s[1] == 0xFE) {
10743 /* UTF-16 little-endian? (or UTF32-LE?) */
10744 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
10745 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
10746 #ifndef PERL_NO_UTF16_FILTER
10747 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
10750 if (PL_bufend > (char*)s) {
10754 filter_add(utf16rev_textfilter, NULL);
10755 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
10756 utf16_to_utf8_reversed(s, news,
10757 PL_bufend - (char*)s - 1,
10759 sv_setpvn(PL_linestr, (const char*)news, newlen);
10761 SvUTF8_on(PL_linestr);
10762 s = (U8*)SvPVX(PL_linestr);
10763 PL_bufend = SvPVX(PL_linestr) + newlen;
10766 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
10771 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
10772 #ifndef PERL_NO_UTF16_FILTER
10773 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10776 if (PL_bufend > (char *)s) {
10780 filter_add(utf16_textfilter, NULL);
10781 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
10782 utf16_to_utf8(s, news,
10783 PL_bufend - (char*)s,
10785 sv_setpvn(PL_linestr, (const char*)news, newlen);
10787 SvUTF8_on(PL_linestr);
10788 s = (U8*)SvPVX(PL_linestr);
10789 PL_bufend = SvPVX(PL_linestr) + newlen;
10792 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
10797 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10798 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10799 s += 3; /* UTF-8 */
10805 if (s[2] == 0xFE && s[3] == 0xFF) {
10806 /* UTF-32 big-endian */
10807 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
10810 else if (s[2] == 0 && s[3] != 0) {
10813 * are a good indicator of UTF-16BE. */
10814 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10819 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10822 * are a good indicator of UTF-16LE. */
10823 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10832 * Restore a source filter.
10836 restore_rsfp(pTHX_ void *f)
10838 PerlIO *fp = (PerlIO*)f;
10840 if (PL_rsfp == PerlIO_stdin())
10841 PerlIO_clearerr(PL_rsfp);
10842 else if (PL_rsfp && (PL_rsfp != fp))
10843 PerlIO_close(PL_rsfp);
10847 #ifndef PERL_NO_UTF16_FILTER
10849 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10851 const STRLEN old = SvCUR(sv);
10852 const I32 count = FILTER_READ(idx+1, sv, maxlen);
10853 DEBUG_P(PerlIO_printf(Perl_debug_log,
10854 "utf16_textfilter(%p): %d %d (%d)\n",
10855 utf16_textfilter, idx, maxlen, (int) count));
10859 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
10860 Copy(SvPVX(sv), tmps, old, char);
10861 utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
10862 SvCUR(sv) - old, &newlen);
10863 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
10865 DEBUG_P({sv_dump(sv);});
10870 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10872 const STRLEN old = SvCUR(sv);
10873 const I32 count = FILTER_READ(idx+1, sv, maxlen);
10874 DEBUG_P(PerlIO_printf(Perl_debug_log,
10875 "utf16rev_textfilter(%p): %d %d (%d)\n",
10876 utf16rev_textfilter, idx, maxlen, (int) count));
10880 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
10881 Copy(SvPVX(sv), tmps, old, char);
10882 utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
10883 SvCUR(sv) - old, &newlen);
10884 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
10886 DEBUG_P({ sv_dump(sv); });
10892 Returns a pointer to the next character after the parsed
10893 vstring, as well as updating the passed in sv.
10895 Function must be called like
10898 s = scan_vstring(s,sv);
10900 The sv should already be large enough to store the vstring
10901 passed in, for performance reasons.
10906 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
10908 const char *pos = s;
10909 const char *start = s;
10910 if (*pos == 'v') pos++; /* get past 'v' */
10911 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
10913 if ( *pos != '.') {
10914 /* this may not be a v-string if followed by => */
10915 const char *next = pos;
10916 while (next < PL_bufend && isSPACE(*next))
10918 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
10919 /* return string not v-string */
10920 sv_setpvn(sv,(char *)s,pos-s);
10921 return (char *)pos;
10925 if (!isALPHA(*pos)) {
10927 U8 tmpbuf[UTF8_MAXBYTES+1];
10930 if (*s == 'v') s++; /* get past 'v' */
10932 sv_setpvn(sv, "", 0);
10937 /* this is atoi() that tolerates underscores */
10938 const char *end = pos;
10940 while (--end >= s) {
10945 rev += (*end - '0') * mult;
10947 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
10948 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10949 "Integer overflow in decimal number");
10953 if (rev > 0x7FFFFFFF)
10954 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10956 /* Append native character for the rev point */
10957 tmpend = uvchr_to_utf8(tmpbuf, rev);
10958 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10959 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10961 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
10967 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
10971 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10979 * c-indentation-style: bsd
10980 * c-basic-offset: 4
10981 * indent-tabs-mode: t
10984 * vim: shiftwidth=4: