3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] =
30 "Identifier too long";
31 static const char c_without_g[] =
32 "Use of /c modifier is meaningless without /g";
33 static const char c_in_subst[] =
34 "Use of /c modifier is meaningless in s///";
36 static void restore_rsfp(pTHX_ void *f);
37 #ifndef PERL_NO_UTF16_FILTER
38 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
42 #define XFAKEBRACK 128
45 #ifdef USE_UTF8_SCRIPTS
46 # define UTF (!IN_BYTES)
48 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
51 /* In variables named $^X, these are the legal values for X.
52 * 1999-02-27 mjd-perl-patch@plover.com */
53 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
55 /* On MacOS, respect nonbreaking spaces */
56 #ifdef MACOS_TRADITIONAL
57 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
59 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
62 /* LEX_* are values for PL_lex_state, the state of the lexer.
63 * They are arranged oddly so that the guard on the switch statement
64 * can get by with a single comparison (if the compiler is smart enough).
67 /* #define LEX_NOTPARSING 11 is done in perl.h. */
70 #define LEX_INTERPNORMAL 9
71 #define LEX_INTERPCASEMOD 8
72 #define LEX_INTERPPUSH 7
73 #define LEX_INTERPSTART 6
74 #define LEX_INTERPEND 5
75 #define LEX_INTERPENDMAYBE 4
76 #define LEX_INTERPCONCAT 3
77 #define LEX_INTERPCONST 2
78 #define LEX_FORMLINE 1
79 #define LEX_KNOWNEXT 0
82 static const char* const lex_state_names[] = {
101 #include "keywords.h"
103 /* CLINE is a macro that ensures PL_copline has a sane value */
108 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
111 * Convenience functions to return different tokens and prime the
112 * lexer for the next token. They all take an argument.
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
124 * FUN1 : not used, except for not, which isn't a UNIOP
125 * BOop : bitwise or or xor
127 * SHop : shift operator
128 * PWop : power operator
129 * PMop : pattern-matching operator
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
133 * Rop : relational operator <= != gt
135 * Also see LOP and lop() below.
138 #ifdef DEBUGGING /* Serve -DT. */
139 # define REPORT(retval) tokereport(s,(int)retval)
141 # define REPORT(retval) (retval)
144 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
145 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
146 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
147 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
148 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
149 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
150 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
151 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
152 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
153 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
154 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
155 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
156 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
157 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
158 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
159 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
160 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
161 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
162 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
163 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
165 /* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
170 #define UNI2(f,x) { \
174 PL_last_uni = PL_oldbufptr; \
175 PL_last_lop_op = f; \
177 return REPORT( (int)FUNC1 ); \
179 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
181 #define UNI(f) UNI2(f,XTERM)
182 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
184 #define UNIBRACK(f) { \
187 PL_last_uni = PL_oldbufptr; \
189 return REPORT( (int)FUNC1 ); \
191 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
194 /* grandfather return to old style */
195 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
199 /* how to interpret the yylval associated with the token */
203 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
209 static struct debug_tokens { const int token, type; const char *name; }
210 const debug_tokens[] =
212 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
213 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
214 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
215 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
216 { ARROW, TOKENTYPE_NONE, "ARROW" },
217 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
218 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
219 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
220 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
221 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
222 { DO, TOKENTYPE_NONE, "DO" },
223 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
224 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
225 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
226 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
227 { ELSE, TOKENTYPE_NONE, "ELSE" },
228 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
229 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
230 { FOR, TOKENTYPE_IVAL, "FOR" },
231 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
232 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
233 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
234 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
235 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
236 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
237 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
238 { IF, TOKENTYPE_IVAL, "IF" },
239 { LABEL, TOKENTYPE_PVAL, "LABEL" },
240 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
241 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
242 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
243 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
244 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
245 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
246 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
247 { MY, TOKENTYPE_IVAL, "MY" },
248 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
249 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
250 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
251 { OROP, TOKENTYPE_IVAL, "OROP" },
252 { OROR, TOKENTYPE_NONE, "OROR" },
253 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
254 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
255 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
256 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
257 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
258 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
259 { PREINC, TOKENTYPE_NONE, "PREINC" },
260 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
261 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
262 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
263 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
264 { SUB, TOKENTYPE_NONE, "SUB" },
265 { THING, TOKENTYPE_OPVAL, "THING" },
266 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
267 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
268 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
269 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
270 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
271 { USE, TOKENTYPE_IVAL, "USE" },
272 { WHILE, TOKENTYPE_IVAL, "WHILE" },
273 { WORD, TOKENTYPE_OPVAL, "WORD" },
274 { 0, TOKENTYPE_NONE, 0 }
277 /* dump the returned token in rv, plus any optional arg in yylval */
280 S_tokereport(pTHX_ const char* s, I32 rv)
283 const char *name = Nullch;
284 enum token_type type = TOKENTYPE_NONE;
285 const struct debug_tokens *p;
286 SV* report = newSVpvn("<== ", 4);
288 for (p = debug_tokens; p->token; p++) {
289 if (p->token == (int)rv) {
296 Perl_sv_catpv(aTHX_ report, name);
297 else if ((char)rv > ' ' && (char)rv < '~')
298 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
300 Perl_sv_catpv(aTHX_ report, "EOF");
302 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
305 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
308 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
310 case TOKENTYPE_OPNUM:
311 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
312 PL_op_name[yylval.ival]);
315 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
317 case TOKENTYPE_OPVAL:
319 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
320 PL_op_name[yylval.opval->op_type]);
322 Perl_sv_catpv(aTHX_ report, "(opval=null)");
325 Perl_sv_catpvf(aTHX_ report, " at line %"IVdf" [", (IV)CopLINE(PL_curcop));
326 if (s - PL_bufptr > 0)
327 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
329 if (PL_oldbufptr && *PL_oldbufptr)
330 sv_catpv(report, PL_tokenbuf);
332 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen_const(report));
342 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
343 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
347 S_ao(pTHX_ int toketype)
349 if (*PL_bufptr == '=') {
351 if (toketype == ANDAND)
352 yylval.ival = OP_ANDASSIGN;
353 else if (toketype == OROR)
354 yylval.ival = OP_ORASSIGN;
355 else if (toketype == DORDOR)
356 yylval.ival = OP_DORASSIGN;
364 * When Perl expects an operator and finds something else, no_op
365 * prints the warning. It always prints "<something> found where
366 * operator expected. It prints "Missing semicolon on previous line?"
367 * if the surprise occurs at the start of the line. "do you need to
368 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
369 * where the compiler doesn't know if foo is a method call or a function.
370 * It prints "Missing operator before end of line" if there's nothing
371 * after the missing operator, or "... before <...>" if there is something
372 * after the missing operator.
376 S_no_op(pTHX_ const char *what, char *s)
378 char *oldbp = PL_bufptr;
379 bool is_first = (PL_oldbufptr == PL_linestart);
385 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
386 if (ckWARN_d(WARN_SYNTAX)) {
388 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
389 "\t(Missing semicolon on previous line?)\n");
390 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
392 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
393 if (t < PL_bufptr && isSPACE(*t))
394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
395 "\t(Do you need to predeclare %.*s?)\n",
396 t - PL_oldoldbufptr, PL_oldoldbufptr);
400 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
401 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
409 * Complain about missing quote/regexp/heredoc terminator.
410 * If it's called with (char *)NULL then it cauterizes the line buffer.
411 * If we're in a delimited string and the delimiter is a control
412 * character, it's reformatted into a two-char sequence like ^C.
417 S_missingterm(pTHX_ char *s)
422 char *nl = strrchr(s,'\n');
428 iscntrl(PL_multi_close)
430 PL_multi_close < 32 || PL_multi_close == 127
434 tmpbuf[1] = toCTRL(PL_multi_close);
439 *tmpbuf = (char)PL_multi_close;
443 q = strchr(s,'"') ? '\'' : '"';
444 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
452 Perl_deprecate(pTHX_ const char *s)
454 if (ckWARN(WARN_DEPRECATED))
455 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
459 Perl_deprecate_old(pTHX_ const char *s)
461 /* This function should NOT be called for any new deprecated warnings */
462 /* Use Perl_deprecate instead */
464 /* It is here to maintain backward compatibility with the pre-5.8 */
465 /* warnings category hierarchy. The "deprecated" category used to */
466 /* live under the "syntax" category. It is now a top-level category */
467 /* in its own right. */
469 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
470 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
471 "Use of %s is deprecated", s);
476 * Deprecate a comma-less variable list.
482 deprecate_old("comma-less variable list");
486 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
487 * utf16-to-utf8-reversed.
490 #ifdef PERL_CR_FILTER
494 register const char *s = SvPVX_const(sv);
495 register const char *e = s + SvCUR(sv);
496 /* outer loop optimized to do nothing if there are no CR-LFs */
498 if (*s++ == '\r' && *s == '\n') {
499 /* hit a CR-LF, need to copy the rest */
500 register char *d = s - 1;
503 if (*s == '\r' && s[1] == '\n')
514 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
516 const I32 count = FILTER_READ(idx+1, sv, maxlen);
517 if (count > 0 && !maxlen)
525 * Initialize variables. Uses the Perl save_stack to save its state (for
526 * recursive calls to the parser).
530 Perl_lex_start(pTHX_ SV *line)
535 SAVEI32(PL_lex_dojoin);
536 SAVEI32(PL_lex_brackets);
537 SAVEI32(PL_lex_casemods);
538 SAVEI32(PL_lex_starts);
539 SAVEI32(PL_lex_state);
540 SAVEVPTR(PL_lex_inpat);
541 SAVEI32(PL_lex_inwhat);
542 if (PL_lex_state == LEX_KNOWNEXT) {
543 I32 toke = PL_nexttoke;
544 while (--toke >= 0) {
545 SAVEI32(PL_nexttype[toke]);
546 SAVEVPTR(PL_nextval[toke]);
548 SAVEI32(PL_nexttoke);
550 SAVECOPLINE(PL_curcop);
553 SAVEPPTR(PL_oldbufptr);
554 SAVEPPTR(PL_oldoldbufptr);
555 SAVEPPTR(PL_last_lop);
556 SAVEPPTR(PL_last_uni);
557 SAVEPPTR(PL_linestart);
558 SAVESPTR(PL_linestr);
559 SAVEGENERICPV(PL_lex_brackstack);
560 SAVEGENERICPV(PL_lex_casestack);
561 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
562 SAVESPTR(PL_lex_stuff);
563 SAVEI32(PL_lex_defer);
564 SAVEI32(PL_sublex_info.sub_inwhat);
565 SAVESPTR(PL_lex_repl);
567 SAVEINT(PL_lex_expect);
569 PL_lex_state = LEX_NORMAL;
573 New(899, PL_lex_brackstack, 120, char);
574 New(899, PL_lex_casestack, 12, char);
576 *PL_lex_casestack = '\0';
579 PL_lex_stuff = Nullsv;
580 PL_lex_repl = Nullsv;
584 PL_sublex_info.sub_inwhat = 0;
586 if (SvREADONLY(PL_linestr))
587 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
588 s = SvPV_const(PL_linestr, len);
589 if (!len || s[len-1] != ';') {
590 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
591 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
592 sv_catpvn(PL_linestr, "\n;", 2);
594 SvTEMP_off(PL_linestr);
595 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
596 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
597 PL_last_lop = PL_last_uni = Nullch;
603 * Finalizer for lexing operations. Must be called when the parser is
604 * done with the lexer.
610 PL_doextract = FALSE;
615 * This subroutine has nothing to do with tilting, whether at windmills
616 * or pinball tables. Its name is short for "increment line". It
617 * increments the current line number in CopLINE(PL_curcop) and checks
618 * to see whether the line starts with a comment of the form
619 * # line 500 "foo.pm"
620 * If so, it sets the current line number and file to the values in the comment.
624 S_incline(pTHX_ char *s)
631 CopLINE_inc(PL_curcop);
634 while (SPACE_OR_TAB(*s)) s++;
635 if (strnEQ(s, "line", 4))
639 if (SPACE_OR_TAB(*s))
643 while (SPACE_OR_TAB(*s)) s++;
649 while (SPACE_OR_TAB(*s))
651 if (*s == '"' && (t = strchr(s+1, '"'))) {
656 for (t = s; !isSPACE(*t); t++) ;
659 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
661 if (*e != '\n' && *e != '\0')
662 return; /* false alarm */
667 CopFILE_free(PL_curcop);
668 CopFILE_set(PL_curcop, s);
671 CopLINE_set(PL_curcop, atoi(n)-1);
676 * Called to gobble the appropriate amount and type of whitespace.
677 * Skips comments as well.
681 S_skipspace(pTHX_ register char *s)
683 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
684 while (s < PL_bufend && SPACE_OR_TAB(*s))
690 SSize_t oldprevlen, oldoldprevlen;
691 SSize_t oldloplen = 0, oldunilen = 0;
692 while (s < PL_bufend && isSPACE(*s)) {
693 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
698 if (s < PL_bufend && *s == '#') {
699 while (s < PL_bufend && *s != '\n')
703 if (PL_in_eval && !PL_rsfp) {
710 /* only continue to recharge the buffer if we're at the end
711 * of the buffer, we're not reading from a source filter, and
712 * we're in normal lexing mode
714 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
715 PL_lex_state == LEX_FORMLINE)
718 /* try to recharge the buffer */
719 if ((s = filter_gets(PL_linestr, PL_rsfp,
720 (prevlen = SvCUR(PL_linestr)))) == Nullch)
722 /* end of file. Add on the -p or -n magic */
725 ";}continue{print or die qq(-p destination: $!\\n);}");
726 PL_minus_n = PL_minus_p = 0;
728 else if (PL_minus_n) {
729 sv_setpvn(PL_linestr, ";}", 2);
733 sv_setpvn(PL_linestr,";", 1);
735 /* reset variables for next time we lex */
736 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
738 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
739 PL_last_lop = PL_last_uni = Nullch;
741 /* Close the filehandle. Could be from -P preprocessor,
742 * STDIN, or a regular file. If we were reading code from
743 * STDIN (because the commandline held no -e or filename)
744 * then we don't close it, we reset it so the code can
745 * read from STDIN too.
748 if (PL_preprocess && !PL_in_eval)
749 (void)PerlProc_pclose(PL_rsfp);
750 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
751 PerlIO_clearerr(PL_rsfp);
753 (void)PerlIO_close(PL_rsfp);
758 /* not at end of file, so we only read another line */
759 /* make corresponding updates to old pointers, for yyerror() */
760 oldprevlen = PL_oldbufptr - PL_bufend;
761 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
763 oldunilen = PL_last_uni - PL_bufend;
765 oldloplen = PL_last_lop - PL_bufend;
766 PL_linestart = PL_bufptr = s + prevlen;
767 PL_bufend = s + SvCUR(PL_linestr);
769 PL_oldbufptr = s + oldprevlen;
770 PL_oldoldbufptr = s + oldoldprevlen;
772 PL_last_uni = s + oldunilen;
774 PL_last_lop = s + oldloplen;
777 /* debugger active and we're not compiling the debugger code,
778 * so store the line into the debugger's array of lines
780 if (PERLDB_LINE && PL_curstash != PL_debstash) {
781 SV *sv = NEWSV(85,0);
783 sv_upgrade(sv, SVt_PVMG);
784 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
787 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
794 * Check the unary operators to ensure there's no ambiguity in how they're
795 * used. An ambiguous piece of code would be:
797 * This doesn't mean rand() + 5. Because rand() is a unary operator,
798 * the +5 is its argument.
807 if (PL_oldoldbufptr != PL_last_uni)
809 while (isSPACE(*PL_last_uni))
811 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
812 if ((t = strchr(s, '(')) && t < PL_bufptr)
814 if (ckWARN_d(WARN_AMBIGUOUS)){
817 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
818 "Warning: Use of \"%s\" without parentheses is ambiguous",
825 * LOP : macro to build a list operator. Its behaviour has been replaced
826 * with a subroutine, S_lop() for which LOP is just another name.
829 #define LOP(f,x) return lop(f,x,s)
833 * Build a list operator (or something that might be one). The rules:
834 * - if we have a next token, then it's a list operator [why?]
835 * - if the next thing is an opening paren, then it's a function
836 * - else it's a list operator
840 S_lop(pTHX_ I32 f, int x, char *s)
846 PL_last_lop = PL_oldbufptr;
847 PL_last_lop_op = (OPCODE)f;
849 return REPORT(LSTOP);
856 return REPORT(LSTOP);
861 * When the lexer realizes it knows the next token (for instance,
862 * it is reordering tokens for the parser) then it can call S_force_next
863 * to know what token to return the next time the lexer is called. Caller
864 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
865 * handles the token correctly.
869 S_force_next(pTHX_ I32 type)
871 PL_nexttype[PL_nexttoke] = type;
873 if (PL_lex_state != LEX_KNOWNEXT) {
874 PL_lex_defer = PL_lex_state;
875 PL_lex_expect = PL_expect;
876 PL_lex_state = LEX_KNOWNEXT;
881 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
883 SV *sv = newSVpvn(start,len);
884 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
891 * When the lexer knows the next thing is a word (for instance, it has
892 * just seen -> and it knows that the next char is a word char, then
893 * it calls S_force_word to stick the next word into the PL_next lookahead.
896 * char *start : buffer position (must be within PL_linestr)
897 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
898 * int check_keyword : if true, Perl checks to make sure the word isn't
899 * a keyword (do this if the word is a label, e.g. goto FOO)
900 * int allow_pack : if true, : characters will also be allowed (require,
902 * int allow_initial_tick : used by the "sub" lexer only.
906 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
911 start = skipspace(start);
913 if (isIDFIRST_lazy_if(s,UTF) ||
914 (allow_pack && *s == ':') ||
915 (allow_initial_tick && *s == '\'') )
917 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
918 if (check_keyword && keyword(PL_tokenbuf, len))
920 if (token == METHOD) {
925 PL_expect = XOPERATOR;
928 PL_nextval[PL_nexttoke].opval
929 = (OP*)newSVOP(OP_CONST,0,
930 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
931 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
939 * Called when the lexer wants $foo *foo &foo etc, but the program
940 * text only contains the "foo" portion. The first argument is a pointer
941 * to the "foo", and the second argument is the type symbol to prefix.
942 * Forces the next token to be a "WORD".
943 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
947 S_force_ident(pTHX_ register const char *s, int kind)
950 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
951 PL_nextval[PL_nexttoke].opval = o;
954 o->op_private = OPpCONST_ENTERED;
955 /* XXX see note in pp_entereval() for why we forgo typo
956 warnings if the symbol must be introduced in an eval.
958 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
959 kind == '$' ? SVt_PV :
960 kind == '@' ? SVt_PVAV :
961 kind == '%' ? SVt_PVHV :
969 Perl_str_to_version(pTHX_ SV *sv)
974 const char *start = SvPV_const(sv,len);
975 const char *end = start + len;
976 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
977 while (start < end) {
981 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
986 retval += ((NV)n)/nshift;
995 * Forces the next token to be a version number.
996 * If the next token appears to be an invalid version number, (e.g. "v2b"),
997 * and if "guessing" is TRUE, then no new token is created (and the caller
998 * must use an alternative parsing method).
1002 S_force_version(pTHX_ char *s, int guessing)
1004 OP *version = Nullop;
1013 while (isDIGIT(*d) || *d == '_' || *d == '.')
1015 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1017 s = scan_num(s, &yylval);
1018 version = yylval.opval;
1019 ver = cSVOPx(version)->op_sv;
1020 if (SvPOK(ver) && !SvNIOK(ver)) {
1021 SvUPGRADE(ver, SVt_PVNV);
1022 SvNV_set(ver, str_to_version(ver));
1023 SvNOK_on(ver); /* hint that it is a version */
1030 /* NOTE: The parser sees the package name and the VERSION swapped */
1031 PL_nextval[PL_nexttoke].opval = version;
1039 * Tokenize a quoted string passed in as an SV. It finds the next
1040 * chunk, up to end of string or a backslash. It may make a new
1041 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1046 S_tokeq(pTHX_ SV *sv)
1049 register char *send;
1057 s = SvPV_force(sv, len);
1058 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1061 while (s < send && *s != '\\')
1066 if ( PL_hints & HINT_NEW_STRING ) {
1067 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1073 if (s + 1 < send && (s[1] == '\\'))
1074 s++; /* all that, just for this */
1079 SvCUR_set(sv, d - SvPVX_const(sv));
1081 if ( PL_hints & HINT_NEW_STRING )
1082 return new_constant(NULL, 0, "q", sv, pv, "q");
1087 * Now come three functions related to double-quote context,
1088 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1089 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1090 * interact with PL_lex_state, and create fake ( ... ) argument lists
1091 * to handle functions and concatenation.
1092 * They assume that whoever calls them will be setting up a fake
1093 * join call, because each subthing puts a ',' after it. This lets
1096 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1098 * (I'm not sure whether the spurious commas at the end of lcfirst's
1099 * arguments and join's arguments are created or not).
1104 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1106 * Pattern matching will set PL_lex_op to the pattern-matching op to
1107 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1109 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1111 * Everything else becomes a FUNC.
1113 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1114 * had an OP_CONST or OP_READLINE). This just sets us up for a
1115 * call to S_sublex_push().
1119 S_sublex_start(pTHX)
1121 const register I32 op_type = yylval.ival;
1123 if (op_type == OP_NULL) {
1124 yylval.opval = PL_lex_op;
1128 if (op_type == OP_CONST || op_type == OP_READLINE) {
1129 SV *sv = tokeq(PL_lex_stuff);
1131 if (SvTYPE(sv) == SVt_PVIV) {
1132 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1134 const char *p = SvPV_const(sv, len);
1135 SV * const nsv = newSVpvn(p, len);
1141 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1142 PL_lex_stuff = Nullsv;
1143 /* Allow <FH> // "foo" */
1144 if (op_type == OP_READLINE)
1145 PL_expect = XTERMORDORDOR;
1149 PL_sublex_info.super_state = PL_lex_state;
1150 PL_sublex_info.sub_inwhat = op_type;
1151 PL_sublex_info.sub_op = PL_lex_op;
1152 PL_lex_state = LEX_INTERPPUSH;
1156 yylval.opval = PL_lex_op;
1166 * Create a new scope to save the lexing state. The scope will be
1167 * ended in S_sublex_done. Returns a '(', starting the function arguments
1168 * to the uc, lc, etc. found before.
1169 * Sets PL_lex_state to LEX_INTERPCONCAT.
1178 PL_lex_state = PL_sublex_info.super_state;
1179 SAVEI32(PL_lex_dojoin);
1180 SAVEI32(PL_lex_brackets);
1181 SAVEI32(PL_lex_casemods);
1182 SAVEI32(PL_lex_starts);
1183 SAVEI32(PL_lex_state);
1184 SAVEVPTR(PL_lex_inpat);
1185 SAVEI32(PL_lex_inwhat);
1186 SAVECOPLINE(PL_curcop);
1187 SAVEPPTR(PL_bufptr);
1188 SAVEPPTR(PL_bufend);
1189 SAVEPPTR(PL_oldbufptr);
1190 SAVEPPTR(PL_oldoldbufptr);
1191 SAVEPPTR(PL_last_lop);
1192 SAVEPPTR(PL_last_uni);
1193 SAVEPPTR(PL_linestart);
1194 SAVESPTR(PL_linestr);
1195 SAVEGENERICPV(PL_lex_brackstack);
1196 SAVEGENERICPV(PL_lex_casestack);
1198 PL_linestr = PL_lex_stuff;
1199 PL_lex_stuff = Nullsv;
1201 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1202 = SvPVX(PL_linestr);
1203 PL_bufend += SvCUR(PL_linestr);
1204 PL_last_lop = PL_last_uni = Nullch;
1205 SAVEFREESV(PL_linestr);
1207 PL_lex_dojoin = FALSE;
1208 PL_lex_brackets = 0;
1209 New(899, PL_lex_brackstack, 120, char);
1210 New(899, PL_lex_casestack, 12, char);
1211 PL_lex_casemods = 0;
1212 *PL_lex_casestack = '\0';
1214 PL_lex_state = LEX_INTERPCONCAT;
1215 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1217 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1218 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1219 PL_lex_inpat = PL_sublex_info.sub_op;
1221 PL_lex_inpat = Nullop;
1228 * Restores lexer state after a S_sublex_push.
1235 if (!PL_lex_starts++) {
1236 SV *sv = newSVpvn("",0);
1237 if (SvUTF8(PL_linestr))
1239 PL_expect = XOPERATOR;
1240 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1244 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1245 PL_lex_state = LEX_INTERPCASEMOD;
1249 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1250 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1251 PL_linestr = PL_lex_repl;
1253 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1254 PL_bufend += SvCUR(PL_linestr);
1255 PL_last_lop = PL_last_uni = Nullch;
1256 SAVEFREESV(PL_linestr);
1257 PL_lex_dojoin = FALSE;
1258 PL_lex_brackets = 0;
1259 PL_lex_casemods = 0;
1260 *PL_lex_casestack = '\0';
1262 if (SvEVALED(PL_lex_repl)) {
1263 PL_lex_state = LEX_INTERPNORMAL;
1265 /* we don't clear PL_lex_repl here, so that we can check later
1266 whether this is an evalled subst; that means we rely on the
1267 logic to ensure sublex_done() is called again only via the
1268 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1271 PL_lex_state = LEX_INTERPCONCAT;
1272 PL_lex_repl = Nullsv;
1278 PL_bufend = SvPVX(PL_linestr);
1279 PL_bufend += SvCUR(PL_linestr);
1280 PL_expect = XOPERATOR;
1281 PL_sublex_info.sub_inwhat = 0;
1289 Extracts a pattern, double-quoted string, or transliteration. This
1292 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1293 processing a pattern (PL_lex_inpat is true), a transliteration
1294 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1296 Returns a pointer to the character scanned up to. Iff this is
1297 advanced from the start pointer supplied (ie if anything was
1298 successfully parsed), will leave an OP for the substring scanned
1299 in yylval. Caller must intuit reason for not parsing further
1300 by looking at the next characters herself.
1304 double-quoted style: \r and \n
1305 regexp special ones: \D \s
1307 backrefs: \1 (deprecated in substitution replacements)
1308 case and quoting: \U \Q \E
1309 stops on @ and $, but not for $ as tail anchor
1311 In transliterations:
1312 characters are VERY literal, except for - not at the start or end
1313 of the string, which indicates a range. scan_const expands the
1314 range to the full set of intermediate characters.
1316 In double-quoted strings:
1318 double-quoted style: \r and \n
1320 backrefs: \1 (deprecated)
1321 case and quoting: \U \Q \E
1324 scan_const does *not* construct ops to handle interpolated strings.
1325 It stops processing as soon as it finds an embedded $ or @ variable
1326 and leaves it to the caller to work out what's going on.
1328 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1330 $ in pattern could be $foo or could be tail anchor. Assumption:
1331 it's a tail anchor if $ is the last thing in the string, or if it's
1332 followed by one of ")| \n\t"
1334 \1 (backreferences) are turned into $1
1336 The structure of the code is
1337 while (there's a character to process) {
1338 handle transliteration ranges
1339 skip regexp comments
1340 skip # initiated comments in //x patterns
1341 check for embedded @foo
1342 check for embedded scalars
1344 leave intact backslashes from leave (below)
1345 deprecate \1 in strings and sub replacements
1346 handle string-changing backslashes \l \U \Q \E, etc.
1347 switch (what was escaped) {
1348 handle - in a transliteration (becomes a literal -)
1349 handle \132 octal characters
1350 handle 0x15 hex characters
1351 handle \cV (control V)
1352 handle printf backslashes (\f, \r, \n, etc)
1354 } (end if backslash)
1355 } (end while character to read)
1360 S_scan_const(pTHX_ char *start)
1362 register char *send = PL_bufend; /* end of the constant */
1363 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1364 register char *s = start; /* start of the constant */
1365 register char *d = SvPVX(sv); /* destination for copies */
1366 bool dorange = FALSE; /* are we in a translit range? */
1367 bool didrange = FALSE; /* did we just finish a range? */
1368 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1369 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1372 const char *leaveit = /* set of acceptably-backslashed characters */
1374 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1377 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1378 /* If we are doing a trans and we know we want UTF8 set expectation */
1379 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1380 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1384 while (s < send || dorange) {
1385 /* get transliterations out of the way (they're most literal) */
1386 if (PL_lex_inwhat == OP_TRANS) {
1387 /* expand a range A-Z to the full set of characters. AIE! */
1389 I32 i; /* current expanded character */
1390 I32 min; /* first character in range */
1391 I32 max; /* last character in range */
1394 char *c = (char*)utf8_hop((U8*)d, -1);
1398 *c = (char)UTF_TO_NATIVE(0xff);
1399 /* mark the range as done, and continue */
1405 i = d - SvPVX_const(sv); /* remember current offset */
1406 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1407 d = SvPVX(sv) + i; /* refresh d after realloc */
1408 d -= 2; /* eat the first char and the - */
1410 min = (U8)*d; /* first char in range */
1411 max = (U8)d[1]; /* last char in range */
1415 "Invalid range \"%c-%c\" in transliteration operator",
1416 (char)min, (char)max);
1420 if ((isLOWER(min) && isLOWER(max)) ||
1421 (isUPPER(min) && isUPPER(max))) {
1423 for (i = min; i <= max; i++)
1425 *d++ = NATIVE_TO_NEED(has_utf8,i);
1427 for (i = min; i <= max; i++)
1429 *d++ = NATIVE_TO_NEED(has_utf8,i);
1434 for (i = min; i <= max; i++)
1437 /* mark the range as done, and continue */
1443 /* range begins (ignore - as first or last char) */
1444 else if (*s == '-' && s+1 < send && s != start) {
1446 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1449 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1461 /* if we get here, we're not doing a transliteration */
1463 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1464 except for the last char, which will be done separately. */
1465 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1467 while (s+1 < send && *s != ')')
1468 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1470 else if (s[2] == '{' /* This should match regcomp.c */
1471 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1474 char *regparse = s + (s[2] == '{' ? 3 : 4);
1477 while (count && (c = *regparse)) {
1478 if (c == '\\' && regparse[1])
1486 if (*regparse != ')')
1487 regparse--; /* Leave one char for continuation. */
1488 while (s < regparse)
1489 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1493 /* likewise skip #-initiated comments in //x patterns */
1494 else if (*s == '#' && PL_lex_inpat &&
1495 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1496 while (s+1 < send && *s != '\n')
1497 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1500 /* check for embedded arrays
1501 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1503 else if (*s == '@' && s[1]
1504 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1507 /* check for embedded scalars. only stop if we're sure it's a
1510 else if (*s == '$') {
1511 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1513 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1514 break; /* in regexp, $ might be tail anchor */
1517 /* End of else if chain - OP_TRANS rejoin rest */
1520 if (*s == '\\' && s+1 < send) {
1523 /* some backslashes we leave behind */
1524 if (*leaveit && *s && strchr(leaveit, *s)) {
1525 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1526 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1530 /* deprecate \1 in strings and substitution replacements */
1531 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1532 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1534 if (ckWARN(WARN_SYNTAX))
1535 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1540 /* string-change backslash escapes */
1541 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1546 /* if we get here, it's either a quoted -, or a digit */
1549 /* quoted - in transliterations */
1551 if (PL_lex_inwhat == OP_TRANS) {
1558 if (ckWARN(WARN_MISC) &&
1561 Perl_warner(aTHX_ packWARN(WARN_MISC),
1562 "Unrecognized escape \\%c passed through",
1564 /* default action is to copy the quoted character */
1565 goto default_action;
1568 /* \132 indicates an octal constant */
1569 case '0': case '1': case '2': case '3':
1570 case '4': case '5': case '6': case '7':
1574 uv = grok_oct(s, &len, &flags, NULL);
1577 goto NUM_ESCAPE_INSERT;
1579 /* \x24 indicates a hex constant */
1583 char* e = strchr(s, '}');
1584 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1585 PERL_SCAN_DISALLOW_PREFIX;
1590 yyerror("Missing right brace on \\x{}");
1594 uv = grok_hex(s, &len, &flags, NULL);
1600 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1601 uv = grok_hex(s, &len, &flags, NULL);
1607 /* Insert oct or hex escaped character.
1608 * There will always enough room in sv since such
1609 * escapes will be longer than any UTF-8 sequence
1610 * they can end up as. */
1612 /* We need to map to chars to ASCII before doing the tests
1615 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1616 if (!has_utf8 && uv > 255) {
1617 /* Might need to recode whatever we have
1618 * accumulated so far if it contains any
1621 * (Can't we keep track of that and avoid
1622 * this rescan? --jhi)
1626 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1627 if (!NATIVE_IS_INVARIANT(*c)) {
1632 STRLEN offset = d - SvPVX_const(sv);
1634 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1638 while (src >= (const U8 *)SvPVX_const(sv)) {
1639 if (!NATIVE_IS_INVARIANT(*src)) {
1640 U8 ch = NATIVE_TO_ASCII(*src);
1641 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1642 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1652 if (has_utf8 || uv > 255) {
1653 d = (char*)uvchr_to_utf8((U8*)d, uv);
1655 if (PL_lex_inwhat == OP_TRANS &&
1656 PL_sublex_info.sub_op) {
1657 PL_sublex_info.sub_op->op_private |=
1658 (PL_lex_repl ? OPpTRANS_FROM_UTF
1671 /* \N{LATIN SMALL LETTER A} is a named character */
1675 char* e = strchr(s, '}');
1681 yyerror("Missing right brace on \\N{}");
1685 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1687 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1688 PERL_SCAN_DISALLOW_PREFIX;
1691 uv = grok_hex(s, &len, &flags, NULL);
1693 goto NUM_ESCAPE_INSERT;
1695 res = newSVpvn(s + 1, e - s - 1);
1696 res = new_constant( Nullch, 0, "charnames",
1697 res, Nullsv, "\\N{...}" );
1699 sv_utf8_upgrade(res);
1700 str = SvPV_const(res,len);
1701 #ifdef EBCDIC_NEVER_MIND
1702 /* charnames uses pack U and that has been
1703 * recently changed to do the below uni->native
1704 * mapping, so this would be redundant (and wrong,
1705 * the code point would be doubly converted).
1706 * But leave this in just in case the pack U change
1707 * gets revoked, but the semantics is still
1708 * desireable for charnames. --jhi */
1710 UV uv = utf8_to_uvchr((const U8*)str, 0);
1713 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1715 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1716 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1717 str = SvPV_const(res, len);
1721 if (!has_utf8 && SvUTF8(res)) {
1722 const char *ostart = SvPVX_const(sv);
1723 SvCUR_set(sv, d - ostart);
1726 sv_utf8_upgrade(sv);
1727 /* this just broke our allocation above... */
1728 SvGROW(sv, (STRLEN)(send - start));
1729 d = SvPVX(sv) + SvCUR(sv);
1732 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1733 const char *odest = SvPVX_const(sv);
1735 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1736 d = SvPVX(sv) + (d - odest);
1738 Copy(str, d, len, char);
1745 yyerror("Missing braces on \\N{}");
1748 /* \c is a control character */
1757 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1760 yyerror("Missing control char name in \\c");
1764 /* printf-style backslashes, formfeeds, newlines, etc */
1766 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1769 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1772 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1775 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1778 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1781 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1784 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1790 } /* end if (backslash) */
1793 /* If we started with encoded form, or already know we want it
1794 and then encode the next character */
1795 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1797 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1798 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1801 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1802 STRLEN off = d - SvPVX_const(sv);
1803 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1805 d = (char*)uvchr_to_utf8((U8*)d, uv);
1809 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1811 } /* while loop to process each character */
1813 /* terminate the string and set up the sv */
1815 SvCUR_set(sv, d - SvPVX_const(sv));
1816 if (SvCUR(sv) >= SvLEN(sv))
1817 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1820 if (PL_encoding && !has_utf8) {
1821 sv_recode_to_utf8(sv, PL_encoding);
1827 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1828 PL_sublex_info.sub_op->op_private |=
1829 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1833 /* shrink the sv if we allocated more than we used */
1834 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1835 SvPV_shrink_to_cur(sv);
1838 /* return the substring (via yylval) only if we parsed anything */
1839 if (s > PL_bufptr) {
1840 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1841 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1843 ( PL_lex_inwhat == OP_TRANS
1845 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1848 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1855 * Returns TRUE if there's more to the expression (e.g., a subscript),
1858 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1860 * ->[ and ->{ return TRUE
1861 * { and [ outside a pattern are always subscripts, so return TRUE
1862 * if we're outside a pattern and it's not { or [, then return FALSE
1863 * if we're in a pattern and the first char is a {
1864 * {4,5} (any digits around the comma) returns FALSE
1865 * if we're in a pattern and the first char is a [
1867 * [SOMETHING] has a funky algorithm to decide whether it's a
1868 * character class or not. It has to deal with things like
1869 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1870 * anything else returns TRUE
1873 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1876 S_intuit_more(pTHX_ register char *s)
1878 if (PL_lex_brackets)
1880 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1882 if (*s != '{' && *s != '[')
1887 /* In a pattern, so maybe we have {n,m}. */
1904 /* On the other hand, maybe we have a character class */
1907 if (*s == ']' || *s == '^')
1910 /* this is terrifying, and it works */
1911 int weight = 2; /* let's weigh the evidence */
1913 unsigned char un_char = 255, last_un_char;
1914 const char *send = strchr(s,']');
1915 char tmpbuf[sizeof PL_tokenbuf * 4];
1917 if (!send) /* has to be an expression */
1920 Zero(seen,256,char);
1923 else if (isDIGIT(*s)) {
1925 if (isDIGIT(s[1]) && s[2] == ']')
1931 for (; s < send; s++) {
1932 last_un_char = un_char;
1933 un_char = (unsigned char)*s;
1938 weight -= seen[un_char] * 10;
1939 if (isALNUM_lazy_if(s+1,UTF)) {
1940 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
1941 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
1946 else if (*s == '$' && s[1] &&
1947 strchr("[#!%*<>()-=",s[1])) {
1948 if (/*{*/ strchr("])} =",s[2]))
1957 if (strchr("wds]",s[1]))
1959 else if (seen['\''] || seen['"'])
1961 else if (strchr("rnftbxcav",s[1]))
1963 else if (isDIGIT(s[1])) {
1965 while (s[1] && isDIGIT(s[1]))
1975 if (strchr("aA01! ",last_un_char))
1977 if (strchr("zZ79~",s[1]))
1979 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1980 weight -= 5; /* cope with negative subscript */
1983 if (!isALNUM(last_un_char)
1984 && !(last_un_char == '$' || last_un_char == '@'
1985 || last_un_char == '&')
1986 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
1991 if (keyword(tmpbuf, d - tmpbuf))
1994 if (un_char == last_un_char + 1)
1996 weight -= seen[un_char];
2001 if (weight >= 0) /* probably a character class */
2011 * Does all the checking to disambiguate
2013 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2014 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2016 * First argument is the stuff after the first token, e.g. "bar".
2018 * Not a method if bar is a filehandle.
2019 * Not a method if foo is a subroutine prototyped to take a filehandle.
2020 * Not a method if it's really "Foo $bar"
2021 * Method if it's "foo $bar"
2022 * Not a method if it's really "print foo $bar"
2023 * Method if it's really "foo package::" (interpreted as package->foo)
2024 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2025 * Not a method if bar is a filehandle or package, but is quoted with
2030 S_intuit_method(pTHX_ char *start, GV *gv)
2032 char *s = start + (*start == '$');
2033 char tmpbuf[sizeof PL_tokenbuf];
2041 if ((cv = GvCVu(gv))) {
2042 const char *proto = SvPVX_const(cv);
2052 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2053 /* start is the beginning of the possible filehandle/object,
2054 * and s is the end of it
2055 * tmpbuf is a copy of it
2058 if (*start == '$') {
2059 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2064 return *s == '(' ? FUNCMETH : METHOD;
2066 if (!keyword(tmpbuf, len)) {
2067 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2072 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
2073 if (indirgv && GvCVu(indirgv))
2075 /* filehandle or package name makes it a method */
2076 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2078 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2079 return 0; /* no assumptions -- "=>" quotes bearword */
2081 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2082 newSVpvn(tmpbuf,len));
2083 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2087 return *s == '(' ? FUNCMETH : METHOD;
2095 * Return a string of Perl code to load the debugger. If PERL5DB
2096 * is set, it will return the contents of that, otherwise a
2097 * compile-time require of perl5db.pl.
2104 const char *pdb = PerlEnv_getenv("PERL5DB");
2108 SETERRNO(0,SS_NORMAL);
2109 return "BEGIN { require 'perl5db.pl' }";
2115 /* Encoded script support. filter_add() effectively inserts a
2116 * 'pre-processing' function into the current source input stream.
2117 * Note that the filter function only applies to the current source file
2118 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2120 * The datasv parameter (which may be NULL) can be used to pass
2121 * private data to this instance of the filter. The filter function
2122 * can recover the SV using the FILTER_DATA macro and use it to
2123 * store private buffers and state information.
2125 * The supplied datasv parameter is upgraded to a PVIO type
2126 * and the IoDIRP/IoANY field is used to store the function pointer,
2127 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2128 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2129 * private use must be set using malloc'd pointers.
2133 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2138 if (!PL_rsfp_filters)
2139 PL_rsfp_filters = newAV();
2141 datasv = NEWSV(255,0);
2142 SvUPGRADE(datasv, SVt_PVIO);
2143 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2144 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2145 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2146 IoANY(datasv), SvPV_nolen(datasv)));
2147 av_unshift(PL_rsfp_filters, 1);
2148 av_store(PL_rsfp_filters, 0, datasv) ;
2153 /* Delete most recently added instance of this filter function. */
2155 Perl_filter_del(pTHX_ filter_t funcp)
2160 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
2162 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2164 /* if filter is on top of stack (usual case) just pop it off */
2165 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2166 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2167 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2168 IoANY(datasv) = (void *)NULL;
2169 sv_free(av_pop(PL_rsfp_filters));
2173 /* we need to search for the correct entry and clear it */
2174 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2178 /* Invoke the idxth filter function for the current rsfp. */
2179 /* maxlen 0 = read one text line */
2181 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2186 if (!PL_rsfp_filters)
2188 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2189 /* Provide a default input filter to make life easy. */
2190 /* Note that we append to the line. This is handy. */
2191 DEBUG_P(PerlIO_printf(Perl_debug_log,
2192 "filter_read %d: from rsfp\n", idx));
2196 const int old_len = SvCUR(buf_sv);
2198 /* ensure buf_sv is large enough */
2199 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2200 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2201 if (PerlIO_error(PL_rsfp))
2202 return -1; /* error */
2204 return 0 ; /* end of file */
2206 SvCUR_set(buf_sv, old_len + len) ;
2209 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2210 if (PerlIO_error(PL_rsfp))
2211 return -1; /* error */
2213 return 0 ; /* end of file */
2216 return SvCUR(buf_sv);
2218 /* Skip this filter slot if filter has been deleted */
2219 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2220 DEBUG_P(PerlIO_printf(Perl_debug_log,
2221 "filter_read %d: skipped (filter deleted)\n",
2223 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2225 /* Get function pointer hidden within datasv */
2226 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2227 DEBUG_P(PerlIO_printf(Perl_debug_log,
2228 "filter_read %d: via function %p (%s)\n",
2229 idx, datasv, SvPV_nolen_const(datasv)));
2230 /* Call function. The function is expected to */
2231 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2232 /* Return: <0:error, =0:eof, >0:not eof */
2233 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2237 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2239 #ifdef PERL_CR_FILTER
2240 if (!PL_rsfp_filters) {
2241 filter_add(S_cr_textfilter,NULL);
2244 if (PL_rsfp_filters) {
2246 SvCUR_set(sv, 0); /* start with empty line */
2247 if (FILTER_READ(0, sv, 0) > 0)
2248 return ( SvPVX(sv) ) ;
2253 return (sv_gets(sv, fp, append));
2257 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2261 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2265 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2266 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2268 return GvHV(gv); /* Foo:: */
2271 /* use constant CLASS => 'MyClass' */
2272 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2274 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2275 pkgname = SvPV_nolen_const(sv);
2279 return gv_stashpv(pkgname, FALSE);
2283 static const char* const exp_name[] =
2284 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2285 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2292 Works out what to call the token just pulled out of the input
2293 stream. The yacc parser takes care of taking the ops we return and
2294 stitching them into a tree.
2300 if read an identifier
2301 if we're in a my declaration
2302 croak if they tried to say my($foo::bar)
2303 build the ops for a my() declaration
2304 if it's an access to a my() variable
2305 are we in a sort block?
2306 croak if my($a); $a <=> $b
2307 build ops for access to a my() variable
2308 if in a dq string, and they've said @foo and we can't find @foo
2310 build ops for a bareword
2311 if we already built the token before, use it.
2316 #pragma segment Perl_yylex
2321 register char *s = PL_bufptr;
2328 I32 orig_keyword = 0;
2331 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2332 lex_state_names[PL_lex_state]);
2334 /* check if there's an identifier for us to look at */
2335 if (PL_pending_ident)
2336 return REPORT(S_pending_ident(aTHX));
2338 /* no identifier pending identification */
2340 switch (PL_lex_state) {
2342 case LEX_NORMAL: /* Some compilers will produce faster */
2343 case LEX_INTERPNORMAL: /* code if we comment these out. */
2347 /* when we've already built the next token, just pull it out of the queue */
2350 yylval = PL_nextval[PL_nexttoke];
2352 PL_lex_state = PL_lex_defer;
2353 PL_expect = PL_lex_expect;
2354 PL_lex_defer = LEX_NORMAL;
2356 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2357 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2358 (IV)PL_nexttype[PL_nexttoke]); });
2360 return REPORT(PL_nexttype[PL_nexttoke]);
2362 /* interpolated case modifiers like \L \U, including \Q and \E.
2363 when we get here, PL_bufptr is at the \
2365 case LEX_INTERPCASEMOD:
2367 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2368 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2370 /* handle \E or end of string */
2371 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2373 if (PL_lex_casemods) {
2374 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2375 PL_lex_casestack[PL_lex_casemods] = '\0';
2377 if (PL_bufptr != PL_bufend
2378 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2380 PL_lex_state = LEX_INTERPCONCAT;
2384 if (PL_bufptr != PL_bufend)
2386 PL_lex_state = LEX_INTERPCONCAT;
2390 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2391 "### Saw case modifier at '%s'\n", PL_bufptr); });
2393 if (s[1] == '\\' && s[2] == 'E') {
2395 PL_lex_state = LEX_INTERPCONCAT;
2399 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2400 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2401 if ((*s == 'L' || *s == 'U') &&
2402 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2403 PL_lex_casestack[--PL_lex_casemods] = '\0';
2406 if (PL_lex_casemods > 10)
2407 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2408 PL_lex_casestack[PL_lex_casemods++] = *s;
2409 PL_lex_casestack[PL_lex_casemods] = '\0';
2410 PL_lex_state = LEX_INTERPCONCAT;
2411 PL_nextval[PL_nexttoke].ival = 0;
2414 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2416 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2418 PL_nextval[PL_nexttoke].ival = OP_LC;
2420 PL_nextval[PL_nexttoke].ival = OP_UC;
2422 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2424 Perl_croak(aTHX_ "panic: yylex");
2428 if (PL_lex_starts) {
2431 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2432 if (PL_lex_casemods == 1 && PL_lex_inpat)
2441 case LEX_INTERPPUSH:
2442 return REPORT(sublex_push());
2444 case LEX_INTERPSTART:
2445 if (PL_bufptr == PL_bufend)
2446 return REPORT(sublex_done());
2447 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2448 "### Interpolated variable at '%s'\n", PL_bufptr); });
2450 PL_lex_dojoin = (*PL_bufptr == '@');
2451 PL_lex_state = LEX_INTERPNORMAL;
2452 if (PL_lex_dojoin) {
2453 PL_nextval[PL_nexttoke].ival = 0;
2455 force_ident("\"", '$');
2456 PL_nextval[PL_nexttoke].ival = 0;
2458 PL_nextval[PL_nexttoke].ival = 0;
2460 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2463 if (PL_lex_starts++) {
2465 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2466 if (!PL_lex_casemods && PL_lex_inpat)
2473 case LEX_INTERPENDMAYBE:
2474 if (intuit_more(PL_bufptr)) {
2475 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2481 if (PL_lex_dojoin) {
2482 PL_lex_dojoin = FALSE;
2483 PL_lex_state = LEX_INTERPCONCAT;
2486 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2487 && SvEVALED(PL_lex_repl))
2489 if (PL_bufptr != PL_bufend)
2490 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2491 PL_lex_repl = Nullsv;
2494 case LEX_INTERPCONCAT:
2496 if (PL_lex_brackets)
2497 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2499 if (PL_bufptr == PL_bufend)
2500 return REPORT(sublex_done());
2502 if (SvIVX(PL_linestr) == '\'') {
2503 SV *sv = newSVsv(PL_linestr);
2506 else if ( PL_hints & HINT_NEW_RE )
2507 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2508 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2512 s = scan_const(PL_bufptr);
2514 PL_lex_state = LEX_INTERPCASEMOD;
2516 PL_lex_state = LEX_INTERPSTART;
2519 if (s != PL_bufptr) {
2520 PL_nextval[PL_nexttoke] = yylval;
2523 if (PL_lex_starts++) {
2524 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2525 if (!PL_lex_casemods && PL_lex_inpat)
2538 PL_lex_state = LEX_NORMAL;
2539 s = scan_formline(PL_bufptr);
2540 if (!PL_lex_formbrack)
2546 PL_oldoldbufptr = PL_oldbufptr;
2549 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
2550 exp_name[PL_expect], s);
2556 if (isIDFIRST_lazy_if(s,UTF))
2558 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2561 goto fake_eof; /* emulate EOF on ^D or ^Z */
2566 if (PL_lex_brackets) {
2567 if (PL_lex_formbrack)
2568 yyerror("Format not terminated");
2570 yyerror("Missing right curly or square bracket");
2572 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2573 "### Tokener got EOF\n");
2577 if (s++ < PL_bufend)
2578 goto retry; /* ignore stray nulls */
2581 if (!PL_in_eval && !PL_preambled) {
2582 PL_preambled = TRUE;
2583 sv_setpv(PL_linestr,incl_perldb());
2584 if (SvCUR(PL_linestr))
2585 sv_catpvn(PL_linestr,";", 1);
2587 while(AvFILLp(PL_preambleav) >= 0) {
2588 SV *tmpsv = av_shift(PL_preambleav);
2589 sv_catsv(PL_linestr, tmpsv);
2590 sv_catpvn(PL_linestr, ";", 1);
2593 sv_free((SV*)PL_preambleav);
2594 PL_preambleav = NULL;
2596 if (PL_minus_n || PL_minus_p) {
2597 sv_catpv(PL_linestr, "LINE: while (<>) {");
2599 sv_catpv(PL_linestr,"chomp;");
2602 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2603 || *PL_splitstr == '"')
2604 && strchr(PL_splitstr + 1, *PL_splitstr))
2605 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2607 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2608 bytes can be used as quoting characters. :-) */
2609 /* The count here deliberately includes the NUL
2610 that terminates the C string constant. This
2611 embeds the opening NUL into the string. */
2612 const char *splits = PL_splitstr;
2613 sv_catpvn(PL_linestr, "our @F=split(q", 15);
2616 if (*splits == '\\')
2617 sv_catpvn(PL_linestr, splits, 1);
2618 sv_catpvn(PL_linestr, splits, 1);
2619 } while (*splits++);
2620 /* This loop will embed the trailing NUL of
2621 PL_linestr as the last thing it does before
2623 sv_catpvn(PL_linestr, ");", 2);
2627 sv_catpv(PL_linestr,"our @F=split(' ');");
2630 sv_catpvn(PL_linestr, "\n", 1);
2631 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2632 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2633 PL_last_lop = PL_last_uni = Nullch;
2634 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2635 SV *sv = NEWSV(85,0);
2637 sv_upgrade(sv, SVt_PVMG);
2638 sv_setsv(sv,PL_linestr);
2641 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2646 bof = PL_rsfp ? TRUE : FALSE;
2647 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2650 if (PL_preprocess && !PL_in_eval)
2651 (void)PerlProc_pclose(PL_rsfp);
2652 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2653 PerlIO_clearerr(PL_rsfp);
2655 (void)PerlIO_close(PL_rsfp);
2657 PL_doextract = FALSE;
2659 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2660 sv_setpv(PL_linestr,PL_minus_p
2661 ? ";}continue{print;}" : ";}");
2662 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2663 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2664 PL_last_lop = PL_last_uni = Nullch;
2665 PL_minus_n = PL_minus_p = 0;
2668 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2669 PL_last_lop = PL_last_uni = Nullch;
2670 sv_setpvn(PL_linestr,"",0);
2671 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2673 /* If it looks like the start of a BOM or raw UTF-16,
2674 * check if it in fact is. */
2680 #ifdef PERLIO_IS_STDIO
2681 # ifdef __GNU_LIBRARY__
2682 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2683 # define FTELL_FOR_PIPE_IS_BROKEN
2687 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2688 # define FTELL_FOR_PIPE_IS_BROKEN
2693 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2694 /* This loses the possibility to detect the bof
2695 * situation on perl -P when the libc5 is being used.
2696 * Workaround? Maybe attach some extra state to PL_rsfp?
2699 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2701 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2704 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2705 s = swallow_bom((U8*)s);
2709 /* Incest with pod. */
2710 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2711 sv_setpvn(PL_linestr, "", 0);
2712 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2713 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2714 PL_last_lop = PL_last_uni = Nullch;
2715 PL_doextract = FALSE;
2719 } while (PL_doextract);
2720 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2721 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2722 SV *sv = NEWSV(85,0);
2724 sv_upgrade(sv, SVt_PVMG);
2725 sv_setsv(sv,PL_linestr);
2728 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2730 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2731 PL_last_lop = PL_last_uni = Nullch;
2732 if (CopLINE(PL_curcop) == 1) {
2733 while (s < PL_bufend && isSPACE(*s))
2735 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2739 if (*s == '#' && *(s+1) == '!')
2741 #ifdef ALTERNATE_SHEBANG
2743 static char const as[] = ALTERNATE_SHEBANG;
2744 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2745 d = s + (sizeof(as) - 1);
2747 #endif /* ALTERNATE_SHEBANG */
2756 while (*d && !isSPACE(*d))
2760 #ifdef ARG_ZERO_IS_SCRIPT
2761 if (ipathend > ipath) {
2763 * HP-UX (at least) sets argv[0] to the script name,
2764 * which makes $^X incorrect. And Digital UNIX and Linux,
2765 * at least, set argv[0] to the basename of the Perl
2766 * interpreter. So, having found "#!", we'll set it right.
2768 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
2769 assert(SvPOK(x) || SvGMAGICAL(x));
2770 if (sv_eq(x, CopFILESV(PL_curcop))) {
2771 sv_setpvn(x, ipath, ipathend - ipath);
2777 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
2778 const char *lstart = SvPV_const(x,llen);
2780 bstart += blen - llen;
2781 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2782 sv_setpvn(x, ipath, ipathend - ipath);
2787 TAINT_NOT; /* $^X is always tainted, but that's OK */
2789 #endif /* ARG_ZERO_IS_SCRIPT */
2794 d = instr(s,"perl -");
2796 d = instr(s,"perl");
2798 /* avoid getting into infinite loops when shebang
2799 * line contains "Perl" rather than "perl" */
2801 for (d = ipathend-4; d >= ipath; --d) {
2802 if ((*d == 'p' || *d == 'P')
2803 && !ibcmp(d, "perl", 4))
2813 #ifdef ALTERNATE_SHEBANG
2815 * If the ALTERNATE_SHEBANG on this system starts with a
2816 * character that can be part of a Perl expression, then if
2817 * we see it but not "perl", we're probably looking at the
2818 * start of Perl code, not a request to hand off to some
2819 * other interpreter. Similarly, if "perl" is there, but
2820 * not in the first 'word' of the line, we assume the line
2821 * contains the start of the Perl program.
2823 if (d && *s != '#') {
2824 const char *c = ipath;
2825 while (*c && !strchr("; \t\r\n\f\v#", *c))
2828 d = Nullch; /* "perl" not in first word; ignore */
2830 *s = '#'; /* Don't try to parse shebang line */
2832 #endif /* ALTERNATE_SHEBANG */
2833 #ifndef MACOS_TRADITIONAL
2838 !instr(s,"indir") &&
2839 instr(PL_origargv[0],"perl"))
2846 while (s < PL_bufend && isSPACE(*s))
2848 if (s < PL_bufend) {
2849 Newz(899,newargv,PL_origargc+3,char*);
2851 while (s < PL_bufend && !isSPACE(*s))
2854 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2857 newargv = PL_origargv;
2860 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2862 Perl_croak(aTHX_ "Can't exec %s", ipath);
2866 const U32 oldpdb = PL_perldb;
2867 const bool oldn = PL_minus_n;
2868 const bool oldp = PL_minus_p;
2870 while (*d && !isSPACE(*d)) d++;
2871 while (SPACE_OR_TAB(*d)) d++;
2874 const bool switches_done = PL_doswitches;
2876 if (*d == 'M' || *d == 'm' || *d == 'C') {
2878 while (*d && !isSPACE(*d)) d++;
2879 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
2882 d = moreswitches(d);
2884 if (PL_doswitches && !switches_done) {
2885 int argc = PL_origargc;
2886 char **argv = PL_origargv;
2889 } while (argc && argv[0][0] == '-' && argv[0][1]);
2890 init_argv_symbols(argc,argv);
2892 if ((PERLDB_LINE && !oldpdb) ||
2893 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
2894 /* if we have already added "LINE: while (<>) {",
2895 we must not do it again */
2897 sv_setpvn(PL_linestr, "", 0);
2898 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2899 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2900 PL_last_lop = PL_last_uni = Nullch;
2901 PL_preambled = FALSE;
2903 (void)gv_fetchfile(PL_origfilename);
2906 if (PL_doswitches && !switches_done) {
2907 int argc = PL_origargc;
2908 char **argv = PL_origargv;
2911 } while (argc && argv[0][0] == '-' && argv[0][1]);
2912 init_argv_symbols(argc,argv);
2918 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2920 PL_lex_state = LEX_FORMLINE;
2925 #ifdef PERL_STRICT_CR
2926 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2928 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
2930 case ' ': case '\t': case '\f': case 013:
2931 #ifdef MACOS_TRADITIONAL
2938 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2939 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2940 /* handle eval qq[#line 1 "foo"\n ...] */
2941 CopLINE_dec(PL_curcop);
2945 while (s < d && *s != '\n')
2949 else if (s > d) /* Found by Ilya: feed random input to Perl. */
2950 Perl_croak(aTHX_ "panic: input overflow");
2952 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2954 PL_lex_state = LEX_FORMLINE;
2964 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
2971 while (s < PL_bufend && SPACE_OR_TAB(*s))
2974 if (strnEQ(s,"=>",2)) {
2975 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
2976 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2977 "### Saw unary minus before =>, forcing word '%s'\n", s);
2979 OPERATOR('-'); /* unary minus */
2981 PL_last_uni = PL_oldbufptr;
2983 case 'r': ftst = OP_FTEREAD; break;
2984 case 'w': ftst = OP_FTEWRITE; break;
2985 case 'x': ftst = OP_FTEEXEC; break;
2986 case 'o': ftst = OP_FTEOWNED; break;
2987 case 'R': ftst = OP_FTRREAD; break;
2988 case 'W': ftst = OP_FTRWRITE; break;
2989 case 'X': ftst = OP_FTREXEC; break;
2990 case 'O': ftst = OP_FTROWNED; break;
2991 case 'e': ftst = OP_FTIS; break;
2992 case 'z': ftst = OP_FTZERO; break;
2993 case 's': ftst = OP_FTSIZE; break;
2994 case 'f': ftst = OP_FTFILE; break;
2995 case 'd': ftst = OP_FTDIR; break;
2996 case 'l': ftst = OP_FTLINK; break;
2997 case 'p': ftst = OP_FTPIPE; break;
2998 case 'S': ftst = OP_FTSOCK; break;
2999 case 'u': ftst = OP_FTSUID; break;
3000 case 'g': ftst = OP_FTSGID; break;
3001 case 'k': ftst = OP_FTSVTX; break;
3002 case 'b': ftst = OP_FTBLK; break;
3003 case 'c': ftst = OP_FTCHR; break;
3004 case 't': ftst = OP_FTTTY; break;
3005 case 'T': ftst = OP_FTTEXT; break;
3006 case 'B': ftst = OP_FTBINARY; break;
3007 case 'M': case 'A': case 'C':
3008 gv_fetchpv("\024",TRUE, SVt_PV);
3010 case 'M': ftst = OP_FTMTIME; break;
3011 case 'A': ftst = OP_FTATIME; break;
3012 case 'C': ftst = OP_FTCTIME; break;
3020 PL_last_lop_op = (OPCODE)ftst;
3021 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3022 "### Saw file test %c\n", (int)ftst);
3027 /* Assume it was a minus followed by a one-letter named
3028 * subroutine call (or a -bareword), then. */
3029 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3030 "### '-%c' looked like a file test but was not\n",
3039 if (PL_expect == XOPERATOR)
3044 else if (*s == '>') {
3047 if (isIDFIRST_lazy_if(s,UTF)) {
3048 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3056 if (PL_expect == XOPERATOR)
3059 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3061 OPERATOR('-'); /* unary minus */
3068 if (PL_expect == XOPERATOR)
3073 if (PL_expect == XOPERATOR)
3076 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3082 if (PL_expect != XOPERATOR) {
3083 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3084 PL_expect = XOPERATOR;
3085 force_ident(PL_tokenbuf, '*');
3098 if (PL_expect == XOPERATOR) {
3102 PL_tokenbuf[0] = '%';
3103 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3104 if (!PL_tokenbuf[1]) {
3107 PL_pending_ident = '%';
3126 switch (PL_expect) {
3129 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3131 PL_bufptr = s; /* update in case we back off */
3137 PL_expect = XTERMBLOCK;
3141 while (isIDFIRST_lazy_if(s,UTF)) {
3142 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3143 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3144 if (tmp < 0) tmp = -tmp;
3160 d = scan_str(d,TRUE,TRUE);
3162 /* MUST advance bufptr here to avoid bogus
3163 "at end of line" context messages from yyerror().
3165 PL_bufptr = s + len;
3166 yyerror("Unterminated attribute parameter in attribute list");
3169 return REPORT(0); /* EOF indicator */
3173 SV *sv = newSVpvn(s, len);
3174 sv_catsv(sv, PL_lex_stuff);
3175 attrs = append_elem(OP_LIST, attrs,
3176 newSVOP(OP_CONST, 0, sv));
3177 SvREFCNT_dec(PL_lex_stuff);
3178 PL_lex_stuff = Nullsv;
3181 if (len == 6 && strnEQ(s, "unique", len)) {
3182 if (PL_in_my == KEY_our)
3184 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3186 ; /* skip to avoid loading attributes.pm */
3189 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3192 /* NOTE: any CV attrs applied here need to be part of
3193 the CVf_BUILTIN_ATTRS define in cv.h! */
3194 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3195 CvLVALUE_on(PL_compcv);
3196 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3197 CvLOCKED_on(PL_compcv);
3198 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3199 CvMETHOD_on(PL_compcv);
3200 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3201 CvASSERTION_on(PL_compcv);
3202 /* After we've set the flags, it could be argued that
3203 we don't need to do the attributes.pm-based setting
3204 process, and shouldn't bother appending recognized
3205 flags. To experiment with that, uncomment the
3206 following "else". (Note that's already been
3207 uncommented. That keeps the above-applied built-in
3208 attributes from being intercepted (and possibly
3209 rejected) by a package's attribute routines, but is
3210 justified by the performance win for the common case
3211 of applying only built-in attributes.) */
3213 attrs = append_elem(OP_LIST, attrs,
3214 newSVOP(OP_CONST, 0,
3218 if (*s == ':' && s[1] != ':')
3221 break; /* require real whitespace or :'s */
3223 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3224 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
3225 const char q = ((*s == '\'') ? '"' : '\'');
3226 /* If here for an expression, and parsed no attrs, back off. */
3227 if (tmp == '=' && !attrs) {
3231 /* MUST advance bufptr here to avoid bogus "at end of line"
3232 context messages from yyerror().
3236 yyerror("Unterminated attribute list");
3238 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3246 PL_nextval[PL_nexttoke].opval = attrs;
3254 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3255 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3272 if (PL_lex_brackets <= 0)
3273 yyerror("Unmatched right square bracket");
3276 if (PL_lex_state == LEX_INTERPNORMAL) {
3277 if (PL_lex_brackets == 0) {
3278 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3279 PL_lex_state = LEX_INTERPEND;
3286 if (PL_lex_brackets > 100) {
3287 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3289 switch (PL_expect) {
3291 if (PL_lex_formbrack) {
3295 if (PL_oldoldbufptr == PL_last_lop)
3296 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3298 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3299 OPERATOR(HASHBRACK);
3301 while (s < PL_bufend && SPACE_OR_TAB(*s))
3304 PL_tokenbuf[0] = '\0';
3305 if (d < PL_bufend && *d == '-') {
3306 PL_tokenbuf[0] = '-';
3308 while (d < PL_bufend && SPACE_OR_TAB(*d))
3311 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3312 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3314 while (d < PL_bufend && SPACE_OR_TAB(*d))
3317 const char minus = (PL_tokenbuf[0] == '-');
3318 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3326 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3331 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3336 if (PL_oldoldbufptr == PL_last_lop)
3337 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3339 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3342 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3344 /* This hack is to get the ${} in the message. */
3346 yyerror("syntax error");
3349 OPERATOR(HASHBRACK);
3351 /* This hack serves to disambiguate a pair of curlies
3352 * as being a block or an anon hash. Normally, expectation
3353 * determines that, but in cases where we're not in a
3354 * position to expect anything in particular (like inside
3355 * eval"") we have to resolve the ambiguity. This code
3356 * covers the case where the first term in the curlies is a
3357 * quoted string. Most other cases need to be explicitly
3358 * disambiguated by prepending a "+" before the opening
3359 * curly in order to force resolution as an anon hash.
3361 * XXX should probably propagate the outer expectation
3362 * into eval"" to rely less on this hack, but that could
3363 * potentially break current behavior of eval"".
3367 if (*s == '\'' || *s == '"' || *s == '`') {
3368 /* common case: get past first string, handling escapes */
3369 for (t++; t < PL_bufend && *t != *s;)
3370 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3374 else if (*s == 'q') {
3377 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3380 /* skip q//-like construct */
3382 char open, close, term;
3385 while (t < PL_bufend && isSPACE(*t))
3387 /* check for q => */
3388 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3389 OPERATOR(HASHBRACK);
3393 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3397 for (t++; t < PL_bufend; t++) {
3398 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3400 else if (*t == open)
3404 for (t++; t < PL_bufend; t++) {
3405 if (*t == '\\' && t+1 < PL_bufend)
3407 else if (*t == close && --brackets <= 0)
3409 else if (*t == open)
3416 /* skip plain q word */
3417 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3420 else if (isALNUM_lazy_if(t,UTF)) {
3422 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3425 while (t < PL_bufend && isSPACE(*t))
3427 /* if comma follows first term, call it an anon hash */
3428 /* XXX it could be a comma expression with loop modifiers */
3429 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3430 || (*t == '=' && t[1] == '>')))
3431 OPERATOR(HASHBRACK);
3432 if (PL_expect == XREF)
3435 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3441 yylval.ival = CopLINE(PL_curcop);
3442 if (isSPACE(*s) || *s == '#')
3443 PL_copline = NOLINE; /* invalidate current command line number */
3448 if (PL_lex_brackets <= 0)
3449 yyerror("Unmatched right curly bracket");
3451 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3452 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3453 PL_lex_formbrack = 0;
3454 if (PL_lex_state == LEX_INTERPNORMAL) {
3455 if (PL_lex_brackets == 0) {
3456 if (PL_expect & XFAKEBRACK) {
3457 PL_expect &= XENUMMASK;
3458 PL_lex_state = LEX_INTERPEND;
3460 return yylex(); /* ignore fake brackets */
3462 if (*s == '-' && s[1] == '>')
3463 PL_lex_state = LEX_INTERPENDMAYBE;
3464 else if (*s != '[' && *s != '{')
3465 PL_lex_state = LEX_INTERPEND;
3468 if (PL_expect & XFAKEBRACK) {
3469 PL_expect &= XENUMMASK;
3471 return yylex(); /* ignore fake brackets */
3481 if (PL_expect == XOPERATOR) {
3482 if (ckWARN(WARN_SEMICOLON)
3483 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3485 CopLINE_dec(PL_curcop);
3486 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3487 CopLINE_inc(PL_curcop);
3492 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3494 PL_expect = XOPERATOR;
3495 force_ident(PL_tokenbuf, '&');
3499 yylval.ival = (OPpENTERSUB_AMPER<<8);
3518 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
3519 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
3521 if (PL_expect == XSTATE && isALPHA(tmp) &&
3522 (s == PL_linestart+1 || s[-2] == '\n') )
3524 if (PL_in_eval && !PL_rsfp) {
3529 if (strnEQ(s,"=cut",4)) {
3543 PL_doextract = TRUE;
3546 if (PL_lex_brackets < PL_lex_formbrack) {
3548 #ifdef PERL_STRICT_CR
3549 for (t = s; SPACE_OR_TAB(*t); t++) ;
3551 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3553 if (*t == '\n' || *t == '#') {
3565 /* was this !=~ where !~ was meant?
3566 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3568 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3569 const char *t = s+1;
3571 while (t < PL_bufend && isSPACE(*t))
3574 if (*t == '/' || *t == '?' ||
3575 ((*t == 'm' || *t == 's' || *t == 'y') && !isALNUM(t[1])) ||
3576 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3577 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3578 "!=~ should be !~");
3587 if (PL_expect != XOPERATOR) {
3588 if (s[1] != '<' && !strchr(s,'>'))
3591 s = scan_heredoc(s);
3593 s = scan_inputsymbol(s);
3594 TERM(sublex_start());
3599 SHop(OP_LEFT_SHIFT);
3613 SHop(OP_RIGHT_SHIFT);
3622 if (PL_expect == XOPERATOR) {
3623 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3626 return REPORT(','); /* grandfather non-comma-format format */
3630 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3631 PL_tokenbuf[0] = '@';
3632 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3633 sizeof PL_tokenbuf - 1, FALSE);
3634 if (PL_expect == XOPERATOR)
3635 no_op("Array length", s);
3636 if (!PL_tokenbuf[1])
3638 PL_expect = XOPERATOR;
3639 PL_pending_ident = '#';
3643 PL_tokenbuf[0] = '$';
3644 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3645 sizeof PL_tokenbuf - 1, FALSE);
3646 if (PL_expect == XOPERATOR)
3648 if (!PL_tokenbuf[1]) {
3650 yyerror("Final $ should be \\$ or $name");
3654 /* This kludge not intended to be bulletproof. */
3655 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3656 yylval.opval = newSVOP(OP_CONST, 0,
3657 newSViv(PL_compiling.cop_arybase));
3658 yylval.opval->op_private = OPpCONST_ARYBASE;
3664 if (PL_lex_state == LEX_NORMAL)
3667 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3670 PL_tokenbuf[0] = '@';
3671 if (ckWARN(WARN_SYNTAX)) {
3673 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3676 PL_bufptr = skipspace(PL_bufptr);
3677 while (t < PL_bufend && *t != ']')
3679 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3680 "Multidimensional syntax %.*s not supported",
3681 (t - PL_bufptr) + 1, PL_bufptr);
3685 else if (*s == '{') {
3686 PL_tokenbuf[0] = '%';
3687 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
3688 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3690 char tmpbuf[sizeof PL_tokenbuf];
3691 for (t++; isSPACE(*t); t++) ;
3692 if (isIDFIRST_lazy_if(t,UTF)) {
3694 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
3695 for (; isSPACE(*t); t++) ;
3696 if (*t == ';' && get_cv(tmpbuf, FALSE))
3697 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3698 "You need to quote \"%s\"", tmpbuf);
3704 PL_expect = XOPERATOR;
3705 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3706 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3707 if (!islop || PL_last_lop_op == OP_GREPSTART)
3708 PL_expect = XOPERATOR;
3709 else if (strchr("$@\"'`q", *s))
3710 PL_expect = XTERM; /* e.g. print $fh "foo" */
3711 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3712 PL_expect = XTERM; /* e.g. print $fh &sub */
3713 else if (isIDFIRST_lazy_if(s,UTF)) {
3714 char tmpbuf[sizeof PL_tokenbuf];
3715 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3716 if ((tmp = keyword(tmpbuf, len))) {
3717 /* binary operators exclude handle interpretations */
3729 PL_expect = XTERM; /* e.g. print $fh length() */
3734 PL_expect = XTERM; /* e.g. print $fh subr() */
3737 else if (isDIGIT(*s))
3738 PL_expect = XTERM; /* e.g. print $fh 3 */
3739 else if (*s == '.' && isDIGIT(s[1]))
3740 PL_expect = XTERM; /* e.g. print $fh .3 */
3741 else if ((*s == '?' || *s == '-' || *s == '+')
3742 && !isSPACE(s[1]) && s[1] != '=')
3743 PL_expect = XTERM; /* e.g. print $fh -1 */
3744 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/')
3745 PL_expect = XTERM; /* e.g. print $fh /.../
3746 XXX except DORDOR operator */
3747 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3748 PL_expect = XTERM; /* print $fh <<"EOF" */
3750 PL_pending_ident = '$';
3754 if (PL_expect == XOPERATOR)
3756 PL_tokenbuf[0] = '@';
3757 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3758 if (!PL_tokenbuf[1]) {
3761 if (PL_lex_state == LEX_NORMAL)
3763 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3765 PL_tokenbuf[0] = '%';
3767 /* Warn about @ where they meant $. */
3768 if (ckWARN(WARN_SYNTAX)) {
3769 if (*s == '[' || *s == '{') {
3770 const char *t = s + 1;
3771 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3773 if (*t == '}' || *t == ']') {
3775 PL_bufptr = skipspace(PL_bufptr);
3776 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3777 "Scalar value %.*s better written as $%.*s",
3778 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
3783 PL_pending_ident = '@';
3786 case '/': /* may be division, defined-or, or pattern */
3787 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3791 case '?': /* may either be conditional or pattern */
3792 if(PL_expect == XOPERATOR) {
3800 /* A // operator. */
3810 /* Disable warning on "study /blah/" */
3811 if (PL_oldoldbufptr == PL_last_uni
3812 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3813 || memNE(PL_last_uni, "study", 5)
3814 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3817 s = scan_pat(s,OP_MATCH);
3818 TERM(sublex_start());
3822 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3823 #ifdef PERL_STRICT_CR
3826 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3828 && (s == PL_linestart || s[-1] == '\n') )
3830 PL_lex_formbrack = 0;
3834 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3840 yylval.ival = OPf_SPECIAL;
3846 if (PL_expect != XOPERATOR)
3851 case '0': case '1': case '2': case '3': case '4':
3852 case '5': case '6': case '7': case '8': case '9':
3853 s = scan_num(s, &yylval);
3854 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3855 "### Saw number in '%s'\n", s);
3857 if (PL_expect == XOPERATOR)
3862 s = scan_str(s,FALSE,FALSE);
3863 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3864 "### Saw string before '%s'\n", s);
3866 if (PL_expect == XOPERATOR) {
3867 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3870 return REPORT(','); /* grandfather non-comma-format format */
3876 missingterm((char*)0);
3877 yylval.ival = OP_CONST;
3878 TERM(sublex_start());
3881 s = scan_str(s,FALSE,FALSE);
3882 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3883 "### Saw string before '%s'\n", s);
3885 if (PL_expect == XOPERATOR) {
3886 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3889 return REPORT(','); /* grandfather non-comma-format format */
3895 missingterm((char*)0);
3896 yylval.ival = OP_CONST;
3897 /* FIXME. I think that this can be const if char *d is replaced by
3898 more localised variables. */
3899 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
3900 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
3901 yylval.ival = OP_STRINGIFY;
3905 TERM(sublex_start());
3908 s = scan_str(s,FALSE,FALSE);
3909 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3910 "### Saw backtick string before '%s'\n", s);
3912 if (PL_expect == XOPERATOR)
3913 no_op("Backticks",s);
3915 missingterm((char*)0);
3916 yylval.ival = OP_BACKTICK;
3918 TERM(sublex_start());
3922 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
3923 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
3925 if (PL_expect == XOPERATOR)
3926 no_op("Backslash",s);
3930 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
3931 char *start = s + 2;
3932 while (isDIGIT(*start) || *start == '_')
3934 if (*start == '.' && isDIGIT(start[1])) {
3935 s = scan_num(s, &yylval);
3938 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
3939 else if (!isALPHA(*start) && (PL_expect == XTERM
3940 || PL_expect == XREF || PL_expect == XSTATE
3941 || PL_expect == XTERMORDORDOR)) {
3942 const char c = *start;
3945 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3948 s = scan_num(s, &yylval);
3955 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
3995 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3997 /* Some keywords can be followed by any delimiter, including ':' */
3998 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3999 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4000 (PL_tokenbuf[0] == 'q' &&
4001 strchr("qwxr", PL_tokenbuf[1])))));
4003 /* x::* is just a word, unless x is "CORE" */
4004 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4008 while (d < PL_bufend && isSPACE(*d))
4009 d++; /* no comments skipped here, or s### is misparsed */
4011 /* Is this a label? */
4012 if (!tmp && PL_expect == XSTATE
4013 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4015 yylval.pval = savepv(PL_tokenbuf);
4020 /* Check for keywords */
4021 tmp = keyword(PL_tokenbuf, len);
4023 /* Is this a word before a => operator? */
4024 if (*d == '=' && d[1] == '>') {
4027 = (OP*)newSVOP(OP_CONST, 0,
4028 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4029 yylval.opval->op_private = OPpCONST_BARE;
4033 if (tmp < 0) { /* second-class keyword? */
4034 GV *ogv = Nullgv; /* override (winner) */
4035 GV *hgv = Nullgv; /* hidden (loser) */
4036 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4038 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
4041 if (GvIMPORTED_CV(gv))
4043 else if (! CvMETHOD(cv))
4047 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4048 (gv = *gvp) != (GV*)&PL_sv_undef &&
4049 GvCVu(gv) && GvIMPORTED_CV(gv))
4056 tmp = 0; /* overridden by import or by GLOBAL */
4059 && -tmp==KEY_lock /* XXX generalizable kludge */
4061 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
4063 tmp = 0; /* any sub overrides "weak" keyword */
4068 && PL_expect != XOPERATOR
4069 && PL_expect != XTERMORDORDOR)
4071 /* any sub overrides the "err" keyword, except when really an
4072 * operator is expected */
4075 else { /* no override */
4077 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4078 Perl_warner(aTHX_ packWARN(WARN_MISC),
4079 "dump() better written as CORE::dump()");
4083 if (ckWARN(WARN_AMBIGUOUS) && hgv
4084 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
4085 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4086 "Ambiguous call resolved as CORE::%s(), %s",
4087 GvENAME(hgv), "qualify as such or use &");
4094 default: /* not a keyword */
4098 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4100 /* Get the rest if it looks like a package qualifier */
4102 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4104 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4107 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4108 *s == '\'' ? "'" : "::");
4113 if (PL_expect == XOPERATOR) {
4114 if (PL_bufptr == PL_linestart) {
4115 CopLINE_dec(PL_curcop);
4116 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4117 CopLINE_inc(PL_curcop);
4120 no_op("Bareword",s);
4123 /* Look for a subroutine with this name in current package,
4124 unless name is "Foo::", in which case Foo is a bearword
4125 (and a package name). */
4128 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4130 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
4131 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4132 "Bareword \"%s\" refers to nonexistent package",
4135 PL_tokenbuf[len] = '\0';
4142 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
4145 /* if we saw a global override before, get the right name */
4148 sv = newSVpvn("CORE::GLOBAL::",14);
4149 sv_catpv(sv,PL_tokenbuf);
4152 /* If len is 0, newSVpv does strlen(), which is correct.
4153 If len is non-zero, then it will be the true length,
4154 and so the scalar will be created correctly. */
4155 sv = newSVpv(PL_tokenbuf,len);
4158 /* Presume this is going to be a bareword of some sort. */
4161 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4162 yylval.opval->op_private = OPpCONST_BARE;
4163 /* UTF-8 package name? */
4164 if (UTF && !IN_BYTES &&
4165 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
4168 /* And if "Foo::", then that's what it certainly is. */
4173 /* See if it's the indirect object for a list operator. */
4175 if (PL_oldoldbufptr &&
4176 PL_oldoldbufptr < PL_bufptr &&
4177 (PL_oldoldbufptr == PL_last_lop
4178 || PL_oldoldbufptr == PL_last_uni) &&
4179 /* NO SKIPSPACE BEFORE HERE! */
4180 (PL_expect == XREF ||
4181 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4183 bool immediate_paren = *s == '(';
4185 /* (Now we can afford to cross potential line boundary.) */
4188 /* Two barewords in a row may indicate method call. */
4190 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
4193 /* If not a declared subroutine, it's an indirect object. */
4194 /* (But it's an indir obj regardless for sort.) */
4196 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4197 ((!gv || !GvCVu(gv)) &&
4198 (PL_last_lop_op != OP_MAPSTART &&
4199 PL_last_lop_op != OP_GREPSTART))))
4201 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4206 PL_expect = XOPERATOR;
4209 /* Is this a word before a => operator? */
4210 if (*s == '=' && s[1] == '>' && !pkgname) {
4212 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4213 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4214 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4218 /* If followed by a paren, it's certainly a subroutine. */
4221 if (gv && GvCVu(gv)) {
4222 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4223 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
4228 PL_nextval[PL_nexttoke].opval = yylval.opval;
4229 PL_expect = XOPERATOR;
4235 /* If followed by var or block, call it a method (unless sub) */
4237 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
4238 PL_last_lop = PL_oldbufptr;
4239 PL_last_lop_op = OP_METHOD;
4243 /* If followed by a bareword, see if it looks like indir obj. */
4246 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4247 && (tmp = intuit_method(s,gv)))
4250 /* Not a method, so call it a subroutine (if defined) */
4252 if (gv && GvCVu(gv)) {
4254 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4255 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4256 "Ambiguous use of -%s resolved as -&%s()",
4257 PL_tokenbuf, PL_tokenbuf);
4258 /* Check for a constant sub */
4260 if ((sv = cv_const_sv(cv))) {
4262 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4263 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4264 yylval.opval->op_private = 0;
4268 /* Resolve to GV now. */
4269 op_free(yylval.opval);
4270 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4271 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4272 PL_last_lop = PL_oldbufptr;
4273 PL_last_lop_op = OP_ENTERSUB;
4274 /* Is there a prototype? */
4277 const char *proto = SvPV_const((SV*)cv, len);
4280 if (*proto == '$' && proto[1] == '\0')
4282 while (*proto == ';')
4284 if (*proto == '&' && *s == '{') {
4285 sv_setpv(PL_subname, PL_curstash ?
4286 "__ANON__" : "__ANON__::__ANON__");
4290 PL_nextval[PL_nexttoke].opval = yylval.opval;
4296 /* Call it a bare word */
4298 if (PL_hints & HINT_STRICT_SUBS)
4299 yylval.opval->op_private |= OPpCONST_STRICT;
4302 if (ckWARN(WARN_RESERVED)) {
4303 if (lastchar != '-') {
4304 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4305 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4306 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4313 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4314 && ckWARN_d(WARN_AMBIGUOUS)) {
4315 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4316 "Operator or semicolon missing before %c%s",
4317 lastchar, PL_tokenbuf);
4318 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4319 "Ambiguous use of %c resolved as operator %c",
4320 lastchar, lastchar);
4326 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4327 newSVpv(CopFILE(PL_curcop),0));
4331 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4332 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4335 case KEY___PACKAGE__:
4336 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4338 ? newSVhek(HvNAME_HEK(PL_curstash))
4347 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4348 const char *pname = "main";
4349 if (PL_tokenbuf[2] == 'D')
4350 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
4351 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
4354 GvIOp(gv) = newIO();
4355 IoIFP(GvIOp(gv)) = PL_rsfp;
4356 #if defined(HAS_FCNTL) && defined(F_SETFD)
4358 const int fd = PerlIO_fileno(PL_rsfp);
4359 fcntl(fd,F_SETFD,fd >= 3);
4362 /* Mark this internal pseudo-handle as clean */
4363 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4365 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4366 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4367 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4369 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4370 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4371 /* if the script was opened in binmode, we need to revert
4372 * it to text mode for compatibility; but only iff it has CRs
4373 * XXX this is a questionable hack at best. */
4374 if (PL_bufend-PL_bufptr > 2
4375 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4378 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4379 loc = PerlIO_tell(PL_rsfp);
4380 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4383 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4385 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4386 #endif /* NETWARE */
4387 #ifdef PERLIO_IS_STDIO /* really? */
4388 # if defined(__BORLANDC__)
4389 /* XXX see note in do_binmode() */
4390 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4394 PerlIO_seek(PL_rsfp, loc, 0);
4398 #ifdef PERLIO_LAYERS
4401 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4402 else if (PL_encoding) {
4409 XPUSHs(PL_encoding);
4411 call_method("name", G_SCALAR);
4415 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4416 Perl_form(aTHX_ ":encoding(%"SVf")",
4434 if (PL_expect == XSTATE) {
4441 if (*s == ':' && s[1] == ':') {
4444 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4445 if (!(tmp = keyword(PL_tokenbuf, len)))
4446 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4460 LOP(OP_ACCEPT,XTERM);
4466 LOP(OP_ATAN2,XTERM);
4472 LOP(OP_BINMODE,XTERM);
4475 LOP(OP_BLESS,XTERM);
4484 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
4501 if (!PL_cryptseen) {
4502 PL_cryptseen = TRUE;
4506 LOP(OP_CRYPT,XTERM);
4509 LOP(OP_CHMOD,XTERM);
4512 LOP(OP_CHOWN,XTERM);
4515 LOP(OP_CONNECT,XTERM);
4531 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4535 PL_hints |= HINT_BLOCK_SCOPE;
4545 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4546 LOP(OP_DBMOPEN,XTERM);
4552 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4559 yylval.ival = CopLINE(PL_curcop);
4573 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4574 UNIBRACK(OP_ENTEREVAL);
4592 case KEY_endhostent:
4598 case KEY_endservent:
4601 case KEY_endprotoent:
4612 yylval.ival = CopLINE(PL_curcop);
4614 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4616 if ((PL_bufend - p) >= 3 &&
4617 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4619 else if ((PL_bufend - p) >= 4 &&
4620 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4623 if (isIDFIRST_lazy_if(p,UTF)) {
4624 p = scan_ident(p, PL_bufend,
4625 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4629 Perl_croak(aTHX_ "Missing $ on loop variable");
4634 LOP(OP_FORMLINE,XTERM);
4640 LOP(OP_FCNTL,XTERM);
4646 LOP(OP_FLOCK,XTERM);
4655 LOP(OP_GREPSTART, XREF);
4658 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4673 case KEY_getpriority:
4674 LOP(OP_GETPRIORITY,XTERM);
4676 case KEY_getprotobyname:
4679 case KEY_getprotobynumber:
4680 LOP(OP_GPBYNUMBER,XTERM);
4682 case KEY_getprotoent:
4694 case KEY_getpeername:
4695 UNI(OP_GETPEERNAME);
4697 case KEY_gethostbyname:
4700 case KEY_gethostbyaddr:
4701 LOP(OP_GHBYADDR,XTERM);
4703 case KEY_gethostent:
4706 case KEY_getnetbyname:
4709 case KEY_getnetbyaddr:
4710 LOP(OP_GNBYADDR,XTERM);
4715 case KEY_getservbyname:
4716 LOP(OP_GSBYNAME,XTERM);
4718 case KEY_getservbyport:
4719 LOP(OP_GSBYPORT,XTERM);
4721 case KEY_getservent:
4724 case KEY_getsockname:
4725 UNI(OP_GETSOCKNAME);
4727 case KEY_getsockopt:
4728 LOP(OP_GSOCKOPT,XTERM);
4750 yylval.ival = CopLINE(PL_curcop);
4754 LOP(OP_INDEX,XTERM);
4760 LOP(OP_IOCTL,XTERM);
4772 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4804 LOP(OP_LISTEN,XTERM);
4813 s = scan_pat(s,OP_MATCH);
4814 TERM(sublex_start());
4817 LOP(OP_MAPSTART, XREF);
4820 LOP(OP_MKDIR,XTERM);
4823 LOP(OP_MSGCTL,XTERM);
4826 LOP(OP_MSGGET,XTERM);
4829 LOP(OP_MSGRCV,XTERM);
4832 LOP(OP_MSGSND,XTERM);
4838 if (isIDFIRST_lazy_if(s,UTF)) {
4839 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
4840 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4842 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
4843 if (!PL_in_my_stash) {
4846 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
4854 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4861 if (PL_expect != XSTATE)
4862 yyerror("\"no\" not allowed in expression");
4863 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4864 s = force_version(s, FALSE);
4869 if (*s == '(' || (s = skipspace(s), *s == '('))
4876 if (isIDFIRST_lazy_if(s,UTF)) {
4878 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
4879 for (t=d; *t && isSPACE(*t); t++) ;
4880 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
4882 && !(t[0] == '=' && t[1] == '>')
4884 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
4885 "Precedence problem: open %.*s should be open(%.*s)",
4886 d - s, s, d - s, s);
4892 yylval.ival = OP_OR;
4902 LOP(OP_OPEN_DIR,XTERM);
4905 checkcomma(s,PL_tokenbuf,"filehandle");
4909 checkcomma(s,PL_tokenbuf,"filehandle");
4928 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4932 LOP(OP_PIPE_OP,XTERM);
4935 s = scan_str(s,FALSE,FALSE);
4937 missingterm((char*)0);
4938 yylval.ival = OP_CONST;
4939 TERM(sublex_start());
4945 s = scan_str(s,FALSE,FALSE);
4947 missingterm((char*)0);
4948 PL_expect = XOPERATOR;
4950 if (SvCUR(PL_lex_stuff)) {
4953 d = SvPV_force(PL_lex_stuff, len);
4956 for (; isSPACE(*d) && len; --len, ++d) ;
4959 if (!warned && ckWARN(WARN_QW)) {
4960 for (; !isSPACE(*d) && len; --len, ++d) {
4962 Perl_warner(aTHX_ packWARN(WARN_QW),
4963 "Possible attempt to separate words with commas");
4966 else if (*d == '#') {
4967 Perl_warner(aTHX_ packWARN(WARN_QW),
4968 "Possible attempt to put comments in qw() list");
4974 for (; !isSPACE(*d) && len; --len, ++d) ;
4976 sv = newSVpvn(b, d-b);
4977 if (DO_UTF8(PL_lex_stuff))
4979 words = append_elem(OP_LIST, words,
4980 newSVOP(OP_CONST, 0, tokeq(sv)));
4984 PL_nextval[PL_nexttoke].opval = words;
4989 SvREFCNT_dec(PL_lex_stuff);
4990 PL_lex_stuff = Nullsv;
4996 s = scan_str(s,FALSE,FALSE);
4998 missingterm((char*)0);
4999 yylval.ival = OP_STRINGIFY;
5000 if (SvIVX(PL_lex_stuff) == '\'')
5001 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
5002 TERM(sublex_start());
5005 s = scan_pat(s,OP_QR);
5006 TERM(sublex_start());
5009 s = scan_str(s,FALSE,FALSE);
5011 missingterm((char*)0);
5012 yylval.ival = OP_BACKTICK;
5014 TERM(sublex_start());
5022 s = force_version(s, FALSE);
5024 else if (*s != 'v' || !isDIGIT(s[1])
5025 || (s = force_version(s, TRUE), *s == 'v'))
5027 *PL_tokenbuf = '\0';
5028 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5029 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5030 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5032 yyerror("<> should be quotes");
5040 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5044 LOP(OP_RENAME,XTERM);
5053 LOP(OP_RINDEX,XTERM);
5063 UNIDOR(OP_READLINE);
5076 LOP(OP_REVERSE,XTERM);
5079 UNIDOR(OP_READLINK);
5087 TERM(sublex_start());
5089 TOKEN(1); /* force error */
5098 LOP(OP_SELECT,XTERM);
5104 LOP(OP_SEMCTL,XTERM);
5107 LOP(OP_SEMGET,XTERM);
5110 LOP(OP_SEMOP,XTERM);
5116 LOP(OP_SETPGRP,XTERM);
5118 case KEY_setpriority:
5119 LOP(OP_SETPRIORITY,XTERM);
5121 case KEY_sethostent:
5127 case KEY_setservent:
5130 case KEY_setprotoent:
5140 LOP(OP_SEEKDIR,XTERM);
5142 case KEY_setsockopt:
5143 LOP(OP_SSOCKOPT,XTERM);
5149 LOP(OP_SHMCTL,XTERM);
5152 LOP(OP_SHMGET,XTERM);
5155 LOP(OP_SHMREAD,XTERM);
5158 LOP(OP_SHMWRITE,XTERM);
5161 LOP(OP_SHUTDOWN,XTERM);
5170 LOP(OP_SOCKET,XTERM);
5172 case KEY_socketpair:
5173 LOP(OP_SOCKPAIR,XTERM);
5176 checkcomma(s,PL_tokenbuf,"subroutine name");
5178 if (*s == ';' || *s == ')') /* probably a close */
5179 Perl_croak(aTHX_ "sort is now a reserved word");
5181 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5185 LOP(OP_SPLIT,XTERM);
5188 LOP(OP_SPRINTF,XTERM);
5191 LOP(OP_SPLICE,XTERM);
5206 LOP(OP_SUBSTR,XTERM);
5212 char tmpbuf[sizeof PL_tokenbuf];
5213 SSize_t tboffset = 0;
5214 expectation attrful;
5215 bool have_name, have_proto, bad_proto;
5216 const int key = tmp;
5220 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5221 (*s == ':' && s[1] == ':'))
5224 attrful = XATTRBLOCK;
5225 /* remember buffer pos'n for later force_word */
5226 tboffset = s - PL_oldbufptr;
5227 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5228 if (strchr(tmpbuf, ':'))
5229 sv_setpv(PL_subname, tmpbuf);
5231 sv_setsv(PL_subname,PL_curstname);
5232 sv_catpvn(PL_subname,"::",2);
5233 sv_catpvn(PL_subname,tmpbuf,len);
5240 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5241 PL_expect = XTERMBLOCK;
5242 attrful = XATTRTERM;
5243 sv_setpvn(PL_subname,"?",1);
5247 if (key == KEY_format) {
5249 PL_lex_formbrack = PL_lex_brackets + 1;
5251 (void) force_word(PL_oldbufptr + tboffset, WORD,
5256 /* Look for a prototype */
5260 s = scan_str(s,FALSE,FALSE);
5262 Perl_croak(aTHX_ "Prototype not terminated");
5263 /* strip spaces and check for bad characters */
5264 d = SvPVX(PL_lex_stuff);
5267 for (p = d; *p; ++p) {
5270 if (!strchr("$@%*;[]&\\", *p))
5275 if (bad_proto && ckWARN(WARN_SYNTAX))
5276 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5277 "Illegal character in prototype for %"SVf" : %s",
5279 SvCUR_set(PL_lex_stuff, tmp);
5287 if (*s == ':' && s[1] != ':')
5288 PL_expect = attrful;
5289 else if (*s != '{' && key == KEY_sub) {
5291 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5293 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5297 PL_nextval[PL_nexttoke].opval =
5298 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5299 PL_lex_stuff = Nullsv;
5303 sv_setpv(PL_subname,
5304 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5307 (void) force_word(PL_oldbufptr + tboffset, WORD,
5316 LOP(OP_SYSTEM,XREF);
5319 LOP(OP_SYMLINK,XTERM);
5322 LOP(OP_SYSCALL,XTERM);
5325 LOP(OP_SYSOPEN,XTERM);
5328 LOP(OP_SYSSEEK,XTERM);
5331 LOP(OP_SYSREAD,XTERM);
5334 LOP(OP_SYSWRITE,XTERM);
5338 TERM(sublex_start());
5359 LOP(OP_TRUNCATE,XTERM);
5371 yylval.ival = CopLINE(PL_curcop);
5375 yylval.ival = CopLINE(PL_curcop);
5379 LOP(OP_UNLINK,XTERM);
5385 LOP(OP_UNPACK,XTERM);
5388 LOP(OP_UTIME,XTERM);
5394 LOP(OP_UNSHIFT,XTERM);
5397 if (PL_expect != XSTATE)
5398 yyerror("\"use\" not allowed in expression");
5400 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
5401 s = force_version(s, TRUE);
5402 if (*s == ';' || (s = skipspace(s), *s == ';')) {
5403 PL_nextval[PL_nexttoke].opval = Nullop;
5406 else if (*s == 'v') {
5407 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5408 s = force_version(s, FALSE);
5412 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5413 s = force_version(s, FALSE);
5425 yylval.ival = CopLINE(PL_curcop);
5429 PL_hints |= HINT_BLOCK_SCOPE;
5436 LOP(OP_WAITPID,XTERM);
5445 ctl_l[0] = toCTRL('L');
5447 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5450 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5455 if (PL_expect == XOPERATOR)
5461 yylval.ival = OP_XOR;
5466 TERM(sublex_start());
5471 #pragma segment Main
5475 S_pending_ident(pTHX)
5478 register I32 tmp = 0;
5479 /* pit holds the identifier we read and pending_ident is reset */
5480 char pit = PL_pending_ident;
5481 PL_pending_ident = 0;
5483 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5484 "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
5486 /* if we're in a my(), we can't allow dynamics here.
5487 $foo'bar has already been turned into $foo::bar, so
5488 just check for colons.
5490 if it's a legal name, the OP is a PADANY.
5493 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5494 if (strchr(PL_tokenbuf,':'))
5495 yyerror(Perl_form(aTHX_ "No package name allowed for "
5496 "variable %s in \"our\"",
5498 tmp = allocmy(PL_tokenbuf);
5501 if (strchr(PL_tokenbuf,':'))
5502 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5504 yylval.opval = newOP(OP_PADANY, 0);
5505 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5511 build the ops for accesses to a my() variable.
5513 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5514 then used in a comparison. This catches most, but not
5515 all cases. For instance, it catches
5516 sort { my($a); $a <=> $b }
5518 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5519 (although why you'd do that is anyone's guess).
5522 if (!strchr(PL_tokenbuf,':')) {
5524 tmp = pad_findmy(PL_tokenbuf);
5525 if (tmp != NOT_IN_PAD) {
5526 /* might be an "our" variable" */
5527 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5528 /* build ops for a bareword */
5529 HV *stash = PAD_COMPNAME_OURSTASH(tmp);
5530 HEK *stashname = HvNAME_HEK(stash);
5531 SV *sym = newSVhek(stashname);
5532 sv_catpvn(sym, "::", 2);
5533 sv_catpv(sym, PL_tokenbuf+1);
5534 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5535 yylval.opval->op_private = OPpCONST_ENTERED;
5538 ? (GV_ADDMULTI | GV_ADDINEVAL)
5541 ((PL_tokenbuf[0] == '$') ? SVt_PV
5542 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5547 /* if it's a sort block and they're naming $a or $b */
5548 if (PL_last_lop_op == OP_SORT &&
5549 PL_tokenbuf[0] == '$' &&
5550 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5553 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5554 d < PL_bufend && *d != '\n';
5557 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5558 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5564 yylval.opval = newOP(OP_PADANY, 0);
5565 yylval.opval->op_targ = tmp;
5571 Whine if they've said @foo in a doublequoted string,
5572 and @foo isn't a variable we can find in the symbol
5575 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5576 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
5577 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5578 && ckWARN(WARN_AMBIGUOUS))
5580 /* Downgraded from fatal to warning 20000522 mjd */
5581 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5582 "Possible unintended interpolation of %s in string",
5587 /* build ops for a bareword */
5588 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5589 yylval.opval->op_private = OPpCONST_ENTERED;
5590 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
5591 ((PL_tokenbuf[0] == '$') ? SVt_PV
5592 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5598 * The following code was generated by perl_keyword.pl.
5602 Perl_keyword (pTHX_ const char *name, I32 len)
5606 case 1: /* 5 tokens of length 1 */
5638 case 2: /* 18 tokens of length 2 */
5784 case 3: /* 28 tokens of length 3 */
5788 if (name[1] == 'N' &&
5851 if (name[1] == 'i' &&
5891 if (name[1] == 'o' &&
5900 if (name[1] == 'e' &&
5909 if (name[1] == 'n' &&
5918 if (name[1] == 'o' &&
5927 if (name[1] == 'a' &&
5936 if (name[1] == 'o' &&
5998 if (name[1] == 'e' &&
6030 if (name[1] == 'i' &&
6039 if (name[1] == 's' &&
6048 if (name[1] == 'e' &&
6057 if (name[1] == 'o' &&
6069 case 4: /* 40 tokens of length 4 */
6073 if (name[1] == 'O' &&
6083 if (name[1] == 'N' &&
6093 if (name[1] == 'i' &&
6103 if (name[1] == 'h' &&
6113 if (name[1] == 'u' &&
6126 if (name[2] == 'c' &&
6135 if (name[2] == 's' &&
6144 if (name[2] == 'a' &&
6180 if (name[1] == 'o' &&
6193 if (name[2] == 't' &&
6202 if (name[2] == 'o' &&
6211 if (name[2] == 't' &&
6220 if (name[2] == 'e' &&
6233 if (name[1] == 'o' &&
6246 if (name[2] == 'y' &&
6255 if (name[2] == 'l' &&
6271 if (name[2] == 's' &&
6280 if (name[2] == 'n' &&
6289 if (name[2] == 'c' &&
6302 if (name[1] == 'e' &&
6312 if (name[1] == 'p' &&
6325 if (name[2] == 'c' &&
6334 if (name[2] == 'p' &&
6343 if (name[2] == 's' &&
6359 if (name[2] == 'n' &&
6429 if (name[2] == 'r' &&
6438 if (name[2] == 'r' &&
6447 if (name[2] == 'a' &&
6463 if (name[2] == 'l' &&
6530 case 5: /* 36 tokens of length 5 */
6534 if (name[1] == 'E' &&
6545 if (name[1] == 'H' &&
6559 if (name[2] == 'a' &&
6569 if (name[2] == 'a' &&
6583 if (name[1] == 'l' &&
6600 if (name[3] == 'i' &&
6609 if (name[3] == 'o' &&
6645 if (name[2] == 'o' &&
6655 if (name[2] == 'y' &&
6669 if (name[1] == 'l' &&
6683 if (name[2] == 'n' &&
6693 if (name[2] == 'o' &&
6710 if (name[2] == 'd' &&
6720 if (name[2] == 'c' &&
6737 if (name[2] == 'c' &&
6747 if (name[2] == 't' &&
6761 if (name[1] == 'k' &&
6772 if (name[1] == 'r' &&
6786 if (name[2] == 's' &&
6796 if (name[2] == 'd' &&
6813 if (name[2] == 'm' &&
6823 if (name[2] == 'i' &&
6833 if (name[2] == 'e' &&
6843 if (name[2] == 'l' &&
6853 if (name[2] == 'a' &&
6863 if (name[2] == 'u' &&
6877 if (name[1] == 'i' &&
6891 if (name[2] == 'a' &&
6904 if (name[3] == 'e' &&
6939 if (name[2] == 'i' &&
6956 if (name[2] == 'i' &&
6966 if (name[2] == 'i' &&
6983 case 6: /* 33 tokens of length 6 */
6987 if (name[1] == 'c' &&
7002 if (name[2] == 'l' &&
7013 if (name[2] == 'r' &&
7028 if (name[1] == 'e' &&
7043 if (name[2] == 's' &&
7048 if(ckWARN_d(WARN_SYNTAX))
7049 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
7055 if (name[2] == 'i' &&
7073 if (name[2] == 'l' &&
7084 if (name[2] == 'r' &&
7099 if (name[1] == 'm' &&
7114 if (name[2] == 'n' &&
7125 if (name[2] == 's' &&
7140 if (name[1] == 's' &&
7146 if (name[4] == 't' &&
7155 if (name[4] == 'e' &&
7164 if (name[4] == 'c' &&
7173 if (name[4] == 'n' &&
7189 if (name[1] == 'r' &&
7207 if (name[3] == 'a' &&
7217 if (name[3] == 'u' &&
7231 if (name[2] == 'n' &&
7249 if (name[2] == 'a' &&
7263 if (name[3] == 'e' &&
7276 if (name[4] == 't' &&
7285 if (name[4] == 'e' &&
7307 if (name[4] == 't' &&
7316 if (name[4] == 'e' &&
7332 if (name[2] == 'c' &&
7343 if (name[2] == 'l' &&
7354 if (name[2] == 'b' &&
7365 if (name[2] == 's' &&
7388 if (name[4] == 's' &&
7397 if (name[4] == 'n' &&
7410 if (name[3] == 'a' &&
7427 if (name[1] == 'a' &&
7442 case 7: /* 28 tokens of length 7 */
7446 if (name[1] == 'E' &&
7459 if (name[1] == '_' &&
7472 if (name[1] == 'i' &&
7479 return -KEY_binmode;
7485 if (name[1] == 'o' &&
7492 return -KEY_connect;
7501 if (name[2] == 'm' &&
7507 return -KEY_dbmopen;
7513 if (name[2] == 'f' &&
7529 if (name[1] == 'o' &&
7542 if (name[1] == 'e' &&
7549 if (name[5] == 'r' &&
7552 return -KEY_getpgrp;
7558 if (name[5] == 'i' &&
7561 return -KEY_getppid;
7574 if (name[1] == 'c' &&
7581 return -KEY_lcfirst;
7587 if (name[1] == 'p' &&
7594 return -KEY_opendir;
7600 if (name[1] == 'a' &&
7618 if (name[3] == 'd' &&
7623 return -KEY_readdir;
7629 if (name[3] == 'u' &&
7640 if (name[3] == 'e' &&
7645 return -KEY_reverse;
7664 if (name[3] == 'k' &&
7669 return -KEY_seekdir;
7675 if (name[3] == 'p' &&
7680 return -KEY_setpgrp;
7690 if (name[2] == 'm' &&
7696 return -KEY_shmread;
7702 if (name[2] == 'r' &&
7708 return -KEY_sprintf;
7717 if (name[3] == 'l' &&
7722 return -KEY_symlink;
7731 if (name[4] == 'a' &&
7735 return -KEY_syscall;
7741 if (name[4] == 'p' &&
7745 return -KEY_sysopen;
7751 if (name[4] == 'e' &&
7755 return -KEY_sysread;
7761 if (name[4] == 'e' &&
7765 return -KEY_sysseek;
7783 if (name[1] == 'e' &&
7790 return -KEY_telldir;
7799 if (name[2] == 'f' &&
7805 return -KEY_ucfirst;
7811 if (name[2] == 's' &&
7817 return -KEY_unshift;
7827 if (name[1] == 'a' &&
7834 return -KEY_waitpid;
7843 case 8: /* 26 tokens of length 8 */
7847 if (name[1] == 'U' &&
7855 return KEY_AUTOLOAD;
7866 if (name[3] == 'A' &&
7872 return KEY___DATA__;
7878 if (name[3] == 'I' &&
7884 return -KEY___FILE__;
7890 if (name[3] == 'I' &&
7896 return -KEY___LINE__;
7912 if (name[2] == 'o' &&
7919 return -KEY_closedir;
7925 if (name[2] == 'n' &&
7932 return -KEY_continue;
7942 if (name[1] == 'b' &&
7950 return -KEY_dbmclose;
7956 if (name[1] == 'n' &&
7962 if (name[4] == 'r' &&
7967 return -KEY_endgrent;
7973 if (name[4] == 'w' &&
7978 return -KEY_endpwent;
7991 if (name[1] == 'o' &&
7999 return -KEY_formline;
8005 if (name[1] == 'e' &&
8016 if (name[6] == 'n' &&
8019 return -KEY_getgrent;
8025 if (name[6] == 'i' &&
8028 return -KEY_getgrgid;
8034 if (name[6] == 'a' &&
8037 return -KEY_getgrnam;
8050 if (name[4] == 'o' &&
8055 return -KEY_getlogin;
8066 if (name[6] == 'n' &&
8069 return -KEY_getpwent;
8075 if (name[6] == 'a' &&
8078 return -KEY_getpwnam;
8084 if (name[6] == 'i' &&
8087 return -KEY_getpwuid;
8107 if (name[1] == 'e' &&
8114 if (name[5] == 'i' &&
8121 return -KEY_readline;
8126 return -KEY_readlink;
8137 if (name[5] == 'i' &&
8141 return -KEY_readpipe;
8162 if (name[4] == 'r' &&
8167 return -KEY_setgrent;
8173 if (name[4] == 'w' &&
8178 return -KEY_setpwent;
8194 if (name[3] == 'w' &&
8200 return -KEY_shmwrite;
8206 if (name[3] == 't' &&
8212 return -KEY_shutdown;
8222 if (name[2] == 's' &&
8229 return -KEY_syswrite;
8239 if (name[1] == 'r' &&
8247 return -KEY_truncate;
8256 case 9: /* 8 tokens of length 9 */
8260 if (name[1] == 'n' &&
8269 return -KEY_endnetent;
8275 if (name[1] == 'e' &&
8284 return -KEY_getnetent;
8290 if (name[1] == 'o' &&
8299 return -KEY_localtime;
8305 if (name[1] == 'r' &&
8314 return KEY_prototype;
8320 if (name[1] == 'u' &&
8329 return -KEY_quotemeta;
8335 if (name[1] == 'e' &&
8344 return -KEY_rewinddir;
8350 if (name[1] == 'e' &&
8359 return -KEY_setnetent;
8365 if (name[1] == 'a' &&
8374 return -KEY_wantarray;
8383 case 10: /* 9 tokens of length 10 */
8387 if (name[1] == 'n' &&
8393 if (name[4] == 'o' &&
8400 return -KEY_endhostent;
8406 if (name[4] == 'e' &&
8413 return -KEY_endservent;
8426 if (name[1] == 'e' &&
8432 if (name[4] == 'o' &&
8439 return -KEY_gethostent;
8448 if (name[5] == 'r' &&
8454 return -KEY_getservent;
8460 if (name[5] == 'c' &&
8466 return -KEY_getsockopt;
8491 if (name[4] == 'o' &&
8498 return -KEY_sethostent;
8507 if (name[5] == 'r' &&
8513 return -KEY_setservent;
8519 if (name[5] == 'c' &&
8525 return -KEY_setsockopt;
8542 if (name[2] == 'c' &&
8551 return -KEY_socketpair;
8564 case 11: /* 8 tokens of length 11 */
8568 if (name[1] == '_' &&
8579 return -KEY___PACKAGE__;
8585 if (name[1] == 'n' &&
8596 return -KEY_endprotoent;
8602 if (name[1] == 'e' &&
8611 if (name[5] == 'e' &&
8618 return -KEY_getpeername;
8627 if (name[6] == 'o' &&
8633 return -KEY_getpriority;
8639 if (name[6] == 't' &&
8645 return -KEY_getprotoent;
8659 if (name[4] == 'o' &&
8667 return -KEY_getsockname;
8680 if (name[1] == 'e' &&
8688 if (name[6] == 'o' &&
8694 return -KEY_setpriority;
8700 if (name[6] == 't' &&
8706 return -KEY_setprotoent;
8722 case 12: /* 2 tokens of length 12 */
8723 if (name[0] == 'g' &&
8735 if (name[9] == 'd' &&
8738 { /* getnetbyaddr */
8739 return -KEY_getnetbyaddr;
8745 if (name[9] == 'a' &&
8748 { /* getnetbyname */
8749 return -KEY_getnetbyname;
8761 case 13: /* 4 tokens of length 13 */
8762 if (name[0] == 'g' &&
8769 if (name[4] == 'o' &&
8778 if (name[10] == 'd' &&
8781 { /* gethostbyaddr */
8782 return -KEY_gethostbyaddr;
8788 if (name[10] == 'a' &&
8791 { /* gethostbyname */
8792 return -KEY_gethostbyname;
8805 if (name[4] == 'e' &&
8814 if (name[10] == 'a' &&
8817 { /* getservbyname */
8818 return -KEY_getservbyname;
8824 if (name[10] == 'o' &&
8827 { /* getservbyport */
8828 return -KEY_getservbyport;
8847 case 14: /* 1 tokens of length 14 */
8848 if (name[0] == 'g' &&
8862 { /* getprotobyname */
8863 return -KEY_getprotobyname;
8868 case 16: /* 1 tokens of length 16 */
8869 if (name[0] == 'g' &&
8885 { /* getprotobynumber */
8886 return -KEY_getprotobynumber;
8900 S_checkcomma(pTHX_ register char *s, const char *name, const char *what)
8904 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8905 if (ckWARN(WARN_SYNTAX)) {
8907 for (w = s+2; *w && level; w++) {
8914 for (; *w && isSPACE(*w); w++) ;
8915 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
8916 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8917 "%s (...) interpreted as function",name);
8920 while (s < PL_bufend && isSPACE(*s))
8924 while (s < PL_bufend && isSPACE(*s))
8926 if (isIDFIRST_lazy_if(s,UTF)) {
8928 while (isALNUM_lazy_if(s,UTF))
8930 while (s < PL_bufend && isSPACE(*s))
8934 *s = '\0'; /* XXX If we didn't do this, we could const a lot of toke.c */
8935 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
8939 Perl_croak(aTHX_ "No comma allowed after %s", what);
8944 /* Either returns sv, or mortalizes sv and returns a new SV*.
8945 Best used as sv=new_constant(..., sv, ...).
8946 If s, pv are NULL, calls subroutine with one argument,
8947 and type is used with error messages only. */
8950 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
8954 HV *table = GvHV(PL_hintgv); /* ^H */
8958 const char *why1, *why2, *why3;
8960 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8963 why2 = strEQ(key,"charnames")
8964 ? "(possibly a missing \"use charnames ...\")"
8966 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8967 (type ? type: "undef"), why2);
8969 /* This is convoluted and evil ("goto considered harmful")
8970 * but I do not understand the intricacies of all the different
8971 * failure modes of %^H in here. The goal here is to make
8972 * the most probable error message user-friendly. --jhi */
8977 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8978 (type ? type: "undef"), why1, why2, why3);
8980 yyerror(SvPVX_const(msg));
8984 cvp = hv_fetch(table, key, strlen(key), FALSE);
8985 if (!cvp || !SvOK(*cvp)) {
8988 why3 = "} is not defined";
8991 sv_2mortal(sv); /* Parent created it permanently */
8994 pv = sv_2mortal(newSVpvn(s, len));
8996 typesv = sv_2mortal(newSVpv(type, 0));
8998 typesv = &PL_sv_undef;
9000 PUSHSTACKi(PERLSI_OVERLOAD);
9012 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9016 /* Check the eval first */
9017 if (!PL_in_eval && SvTRUE(ERRSV)) {
9018 sv_catpv(ERRSV, "Propagated");
9019 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
9021 res = SvREFCNT_inc(sv);
9025 (void)SvREFCNT_inc(res);
9034 why1 = "Call to &{$^H{";
9036 why3 = "}} did not return a defined value";
9044 /* Returns a NUL terminated string, with the length of the string written to
9048 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9050 register char *d = dest;
9051 register char *e = d + destlen - 3; /* two-character token, ending NUL */
9054 Perl_croak(aTHX_ ident_too_long);
9055 if (isALNUM(*s)) /* UTF handled below */
9057 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
9062 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
9066 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9067 char *t = s + UTF8SKIP(s);
9068 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9070 if (d + (t - s) > e)
9071 Perl_croak(aTHX_ ident_too_long);
9072 Copy(s, d, t - s, char);
9085 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9095 e = d + destlen - 3; /* two-character token, ending NUL */
9097 while (isDIGIT(*s)) {
9099 Perl_croak(aTHX_ ident_too_long);
9106 Perl_croak(aTHX_ ident_too_long);
9107 if (isALNUM(*s)) /* UTF handled below */
9109 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9114 else if (*s == ':' && s[1] == ':') {
9118 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9119 char *t = s + UTF8SKIP(s);
9120 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9122 if (d + (t - s) > e)
9123 Perl_croak(aTHX_ ident_too_long);
9124 Copy(s, d, t - s, char);
9135 if (PL_lex_state != LEX_NORMAL)
9136 PL_lex_state = LEX_INTERPENDMAYBE;
9139 if (*s == '$' && s[1] &&
9140 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9153 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9158 if (isSPACE(s[-1])) {
9160 const char ch = *s++;
9161 if (!SPACE_OR_TAB(ch)) {
9167 if (isIDFIRST_lazy_if(d,UTF)) {
9171 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
9173 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
9176 Copy(s, d, e - s, char);
9181 while ((isALNUM(*s) || *s == ':') && d < e)
9184 Perl_croak(aTHX_ ident_too_long);
9187 while (s < send && SPACE_OR_TAB(*s)) s++;
9188 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9189 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
9190 const char *brack = *s == '[' ? "[...]" : "{...}";
9191 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9192 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9193 funny, dest, brack, funny, dest, brack);
9196 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9200 /* Handle extended ${^Foo} variables
9201 * 1999-02-27 mjd-perl-patch@plover.com */
9202 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9206 while (isALNUM(*s) && d < e) {
9210 Perl_croak(aTHX_ ident_too_long);
9215 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9216 PL_lex_state = LEX_INTERPEND;
9221 if (PL_lex_state == LEX_NORMAL) {
9222 if (ckWARN(WARN_AMBIGUOUS) &&
9223 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
9225 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9226 "Ambiguous use of %c{%s} resolved to %c%s",
9227 funny, dest, funny, dest);
9232 s = bracket; /* let the parser handle it */
9236 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9237 PL_lex_state = LEX_INTERPEND;
9242 Perl_pmflag(pTHX_ U32* pmfl, int ch)
9247 *pmfl |= PMf_GLOBAL;
9249 *pmfl |= PMf_CONTINUE;
9253 *pmfl |= PMf_MULTILINE;
9255 *pmfl |= PMf_SINGLELINE;
9257 *pmfl |= PMf_EXTENDED;
9261 S_scan_pat(pTHX_ char *start, I32 type)
9264 char *s = scan_str(start,FALSE,FALSE);
9267 Perl_croak(aTHX_ "Search pattern not terminated");
9269 pm = (PMOP*)newPMOP(type, 0);
9270 if (PL_multi_open == '?')
9271 pm->op_pmflags |= PMf_ONCE;
9273 while (*s && strchr("iomsx", *s))
9274 pmflag(&pm->op_pmflags,*s++);
9277 while (*s && strchr("iogcmsx", *s))
9278 pmflag(&pm->op_pmflags,*s++);
9280 /* issue a warning if /c is specified,but /g is not */
9281 if (ckWARN(WARN_REGEXP) &&
9282 (pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9284 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_without_g);
9287 pm->op_pmpermflags = pm->op_pmflags;
9289 PL_lex_op = (OP*)pm;
9290 yylval.ival = OP_MATCH;
9295 S_scan_subst(pTHX_ char *start)
9303 yylval.ival = OP_NULL;
9305 s = scan_str(start,FALSE,FALSE);
9308 Perl_croak(aTHX_ "Substitution pattern not terminated");
9310 if (s[-1] == PL_multi_open)
9313 first_start = PL_multi_start;
9314 s = scan_str(s,FALSE,FALSE);
9317 SvREFCNT_dec(PL_lex_stuff);
9318 PL_lex_stuff = Nullsv;
9320 Perl_croak(aTHX_ "Substitution replacement not terminated");
9322 PL_multi_start = first_start; /* so whole substitution is taken together */
9324 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9330 else if (strchr("iogcmsx", *s))
9331 pmflag(&pm->op_pmflags,*s++);
9336 /* /c is not meaningful with s/// */
9337 if (ckWARN(WARN_REGEXP) && (pm->op_pmflags & PMf_CONTINUE))
9339 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_in_subst);
9344 PL_sublex_info.super_bufptr = s;
9345 PL_sublex_info.super_bufend = PL_bufend;
9347 pm->op_pmflags |= PMf_EVAL;
9348 repl = newSVpvn("",0);
9350 sv_catpv(repl, es ? "eval " : "do ");
9351 sv_catpvn(repl, "{ ", 2);
9352 sv_catsv(repl, PL_lex_repl);
9353 sv_catpvn(repl, " };", 2);
9355 SvREFCNT_dec(PL_lex_repl);
9359 pm->op_pmpermflags = pm->op_pmflags;
9360 PL_lex_op = (OP*)pm;
9361 yylval.ival = OP_SUBST;
9366 S_scan_trans(pTHX_ char *start)
9375 yylval.ival = OP_NULL;
9377 s = scan_str(start,FALSE,FALSE);
9379 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9380 if (s[-1] == PL_multi_open)
9383 s = scan_str(s,FALSE,FALSE);
9386 SvREFCNT_dec(PL_lex_stuff);
9387 PL_lex_stuff = Nullsv;
9389 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9392 complement = del = squash = 0;
9396 complement = OPpTRANS_COMPLEMENT;
9399 del = OPpTRANS_DELETE;
9402 squash = OPpTRANS_SQUASH;
9411 New(803, tbl, complement&&!del?258:256, short);
9412 o = newPVOP(OP_TRANS, 0, (char*)tbl);
9413 o->op_private &= ~OPpTRANS_ALL;
9414 o->op_private |= del|squash|complement|
9415 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9416 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9419 yylval.ival = OP_TRANS;
9424 S_scan_heredoc(pTHX_ register char *s)
9427 I32 op_type = OP_SCALAR;
9431 const char newline[] = "\n";
9432 const char *found_newline;
9436 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9440 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9443 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
9444 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9447 s = delimcpy(d, e, s, PL_bufend, term, &len);
9457 if (!isALNUM_lazy_if(s,UTF))
9458 deprecate_old("bare << to mean <<\"\"");
9459 for (; isALNUM_lazy_if(s,UTF); s++) {
9464 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9465 Perl_croak(aTHX_ "Delimiter for here document is too long");
9468 len = d - PL_tokenbuf;
9469 #ifndef PERL_STRICT_CR
9470 d = strchr(s, '\r');
9472 char * const olds = s;
9474 while (s < PL_bufend) {
9480 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9489 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9493 if ( outer || !(found_newline = ninstr(s,PL_bufend,newline,newline+1)) ) {
9494 herewas = newSVpvn(s,PL_bufend-s);
9498 herewas = newSVpvn(s,found_newline-s);
9500 s += SvCUR(herewas);
9502 tmpstr = NEWSV(87,79);
9503 sv_upgrade(tmpstr, SVt_PVIV);
9506 SvIV_set(tmpstr, -1);
9508 else if (term == '`') {
9509 op_type = OP_BACKTICK;
9510 SvIV_set(tmpstr, '\\');
9514 PL_multi_start = CopLINE(PL_curcop);
9515 PL_multi_open = PL_multi_close = '<';
9516 term = *PL_tokenbuf;
9517 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9518 char *bufptr = PL_sublex_info.super_bufptr;
9519 char *bufend = PL_sublex_info.super_bufend;
9520 char * const olds = s - SvCUR(herewas);
9521 s = strchr(bufptr, '\n');
9525 while (s < bufend &&
9526 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9528 CopLINE_inc(PL_curcop);
9531 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9532 missingterm(PL_tokenbuf);
9534 sv_setpvn(herewas,bufptr,d-bufptr+1);
9535 sv_setpvn(tmpstr,d+1,s-d);
9537 sv_catpvn(herewas,s,bufend-s);
9538 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9545 while (s < PL_bufend &&
9546 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9548 CopLINE_inc(PL_curcop);
9550 if (s >= PL_bufend) {
9551 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9552 missingterm(PL_tokenbuf);
9554 sv_setpvn(tmpstr,d+1,s-d);
9556 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9558 sv_catpvn(herewas,s,PL_bufend-s);
9559 sv_setsv(PL_linestr,herewas);
9560 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9561 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9562 PL_last_lop = PL_last_uni = Nullch;
9565 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
9566 while (s >= PL_bufend) { /* multiple line string? */
9568 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
9569 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9570 missingterm(PL_tokenbuf);
9572 CopLINE_inc(PL_curcop);
9573 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9574 PL_last_lop = PL_last_uni = Nullch;
9575 #ifndef PERL_STRICT_CR
9576 if (PL_bufend - PL_linestart >= 2) {
9577 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9578 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9580 PL_bufend[-2] = '\n';
9582 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9584 else if (PL_bufend[-1] == '\r')
9585 PL_bufend[-1] = '\n';
9587 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9588 PL_bufend[-1] = '\n';
9590 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9591 SV *sv = NEWSV(88,0);
9593 sv_upgrade(sv, SVt_PVMG);
9594 sv_setsv(sv,PL_linestr);
9597 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
9599 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9600 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9601 *(SvPVX(PL_linestr) + off ) = ' ';
9602 sv_catsv(PL_linestr,herewas);
9603 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9604 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9608 sv_catsv(tmpstr,PL_linestr);
9613 PL_multi_end = CopLINE(PL_curcop);
9614 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9615 SvPV_shrink_to_cur(tmpstr);
9617 SvREFCNT_dec(herewas);
9619 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9621 else if (PL_encoding)
9622 sv_recode_to_utf8(tmpstr, PL_encoding);
9624 PL_lex_stuff = tmpstr;
9625 yylval.ival = op_type;
9630 takes: current position in input buffer
9631 returns: new position in input buffer
9632 side-effects: yylval and lex_op are set.
9637 <FH> read from filehandle
9638 <pkg::FH> read from package qualified filehandle
9639 <pkg'FH> read from package qualified filehandle
9640 <$fh> read from filehandle in $fh
9646 S_scan_inputsymbol(pTHX_ char *start)
9648 register char *s = start; /* current position in buffer */
9654 d = PL_tokenbuf; /* start of temp holding space */
9655 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9656 end = strchr(s, '\n');
9659 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9661 /* die if we didn't have space for the contents of the <>,
9662 or if it didn't end, or if we see a newline
9665 if (len >= sizeof PL_tokenbuf)
9666 Perl_croak(aTHX_ "Excessively long <> operator");
9668 Perl_croak(aTHX_ "Unterminated <> operator");
9673 Remember, only scalar variables are interpreted as filehandles by
9674 this code. Anything more complex (e.g., <$fh{$num}>) will be
9675 treated as a glob() call.
9676 This code makes use of the fact that except for the $ at the front,
9677 a scalar variable and a filehandle look the same.
9679 if (*d == '$' && d[1]) d++;
9681 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9682 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9685 /* If we've tried to read what we allow filehandles to look like, and
9686 there's still text left, then it must be a glob() and not a getline.
9687 Use scan_str to pull out the stuff between the <> and treat it
9688 as nothing more than a string.
9691 if (d - PL_tokenbuf != len) {
9692 yylval.ival = OP_GLOB;
9694 s = scan_str(start,FALSE,FALSE);
9696 Perl_croak(aTHX_ "Glob not terminated");
9700 bool readline_overriden = FALSE;
9701 GV *gv_readline = Nullgv;
9703 /* we're in a filehandle read situation */
9706 /* turn <> into <ARGV> */
9708 Copy("ARGV",d,5,char);
9710 /* Check whether readline() is overriden */
9711 if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
9712 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9714 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
9715 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
9716 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9717 readline_overriden = TRUE;
9719 /* if <$fh>, create the ops to turn the variable into a
9725 /* try to find it in the pad for this block, otherwise find
9726 add symbol table ops
9728 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
9729 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
9730 HV *stash = PAD_COMPNAME_OURSTASH(tmp);
9731 HEK *stashname = HvNAME_HEK(stash);
9732 SV *sym = sv_2mortal(newSVhek(stashname));
9733 sv_catpvn(sym, "::", 2);
9739 OP *o = newOP(OP_PADSV, 0);
9741 PL_lex_op = readline_overriden
9742 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9743 append_elem(OP_LIST, o,
9744 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9745 : (OP*)newUNOP(OP_READLINE, 0, o);
9754 ? (GV_ADDMULTI | GV_ADDINEVAL)
9757 PL_lex_op = readline_overriden
9758 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9759 append_elem(OP_LIST,
9760 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9761 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9762 : (OP*)newUNOP(OP_READLINE, 0,
9763 newUNOP(OP_RV2SV, 0,
9764 newGVOP(OP_GV, 0, gv)));
9766 if (!readline_overriden)
9767 PL_lex_op->op_flags |= OPf_SPECIAL;
9768 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
9769 yylval.ival = OP_NULL;
9772 /* If it's none of the above, it must be a literal filehandle
9773 (<Foo::BAR> or <FOO>) so build a simple readline OP */
9775 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
9776 PL_lex_op = readline_overriden
9777 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9778 append_elem(OP_LIST,
9779 newGVOP(OP_GV, 0, gv),
9780 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9781 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9782 yylval.ival = OP_NULL;
9791 takes: start position in buffer
9792 keep_quoted preserve \ on the embedded delimiter(s)
9793 keep_delims preserve the delimiters around the string
9794 returns: position to continue reading from buffer
9795 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9796 updates the read buffer.
9798 This subroutine pulls a string out of the input. It is called for:
9799 q single quotes q(literal text)
9800 ' single quotes 'literal text'
9801 qq double quotes qq(interpolate $here please)
9802 " double quotes "interpolate $here please"
9803 qx backticks qx(/bin/ls -l)
9804 ` backticks `/bin/ls -l`
9805 qw quote words @EXPORT_OK = qw( func() $spam )
9806 m// regexp match m/this/
9807 s/// regexp substitute s/this/that/
9808 tr/// string transliterate tr/this/that/
9809 y/// string transliterate y/this/that/
9810 ($*@) sub prototypes sub foo ($)
9811 (stuff) sub attr parameters sub foo : attr(stuff)
9812 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9814 In most of these cases (all but <>, patterns and transliterate)
9815 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9816 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9817 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9820 It skips whitespace before the string starts, and treats the first
9821 character as the delimiter. If the delimiter is one of ([{< then
9822 the corresponding "close" character )]}> is used as the closing
9823 delimiter. It allows quoting of delimiters, and if the string has
9824 balanced delimiters ([{<>}]) it allows nesting.
9826 On success, the SV with the resulting string is put into lex_stuff or,
9827 if that is already non-NULL, into lex_repl. The second case occurs only
9828 when parsing the RHS of the special constructs s/// and tr/// (y///).
9829 For convenience, the terminating delimiter character is stuffed into
9834 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9836 SV *sv; /* scalar value: string */
9837 char *tmps; /* temp string, used for delimiter matching */
9838 register char *s = start; /* current position in the buffer */
9839 register char term; /* terminating character */
9840 register char *to; /* current position in the sv's data */
9841 I32 brackets = 1; /* bracket nesting level */
9842 bool has_utf8 = FALSE; /* is there any utf8 content? */
9843 I32 termcode; /* terminating char. code */
9844 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
9845 STRLEN termlen; /* length of terminating string */
9846 char *last = NULL; /* last position for nesting bracket */
9848 /* skip space before the delimiter */
9852 /* mark where we are, in case we need to report errors */
9855 /* after skipping whitespace, the next character is the terminator */
9858 termcode = termstr[0] = term;
9862 termcode = utf8_to_uvchr((U8*)s, &termlen);
9863 Copy(s, termstr, termlen, U8);
9864 if (!UTF8_IS_INVARIANT(term))
9868 /* mark where we are */
9869 PL_multi_start = CopLINE(PL_curcop);
9870 PL_multi_open = term;
9872 /* find corresponding closing delimiter */
9873 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9874 termcode = termstr[0] = term = tmps[5];
9876 PL_multi_close = term;
9878 /* create a new SV to hold the contents. 87 is leak category, I'm
9879 assuming. 79 is the SV's initial length. What a random number. */
9881 sv_upgrade(sv, SVt_PVIV);
9882 SvIV_set(sv, termcode);
9883 (void)SvPOK_only(sv); /* validate pointer */
9885 /* move past delimiter and try to read a complete string */
9887 sv_catpvn(sv, s, termlen);
9890 if (PL_encoding && !UTF) {
9894 int offset = s - SvPVX_const(PL_linestr);
9895 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9896 &offset, (char*)termstr, termlen);
9897 const char *ns = SvPVX_const(PL_linestr) + offset;
9898 char *svlast = SvEND(sv) - 1;
9900 for (; s < ns; s++) {
9901 if (*s == '\n' && !PL_rsfp)
9902 CopLINE_inc(PL_curcop);
9905 goto read_more_line;
9907 /* handle quoted delimiters */
9908 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9910 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9912 if ((svlast-1 - t) % 2) {
9916 SvCUR_set(sv, SvCUR(sv) - 1);
9921 if (PL_multi_open == PL_multi_close) {
9929 for (t = w = last; t < svlast; w++, t++) {
9930 /* At here, all closes are "was quoted" one,
9931 so we don't check PL_multi_close. */
9933 if (!keep_quoted && *(t+1) == PL_multi_open)
9938 else if (*t == PL_multi_open)
9946 SvCUR_set(sv, w - SvPVX_const(sv));
9949 if (--brackets <= 0)
9955 SvCUR_set(sv, SvCUR(sv) - 1);
9961 /* extend sv if need be */
9962 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9963 /* set 'to' to the next character in the sv's string */
9964 to = SvPVX(sv)+SvCUR(sv);
9966 /* if open delimiter is the close delimiter read unbridle */
9967 if (PL_multi_open == PL_multi_close) {
9968 for (; s < PL_bufend; s++,to++) {
9969 /* embedded newlines increment the current line number */
9970 if (*s == '\n' && !PL_rsfp)
9971 CopLINE_inc(PL_curcop);
9972 /* handle quoted delimiters */
9973 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9974 if (!keep_quoted && s[1] == term)
9976 /* any other quotes are simply copied straight through */
9980 /* terminate when run out of buffer (the for() condition), or
9981 have found the terminator */
9982 else if (*s == term) {
9985 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9988 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9994 /* if the terminator isn't the same as the start character (e.g.,
9995 matched brackets), we have to allow more in the quoting, and
9996 be prepared for nested brackets.
9999 /* read until we run out of string, or we find the terminator */
10000 for (; s < PL_bufend; s++,to++) {
10001 /* embedded newlines increment the line count */
10002 if (*s == '\n' && !PL_rsfp)
10003 CopLINE_inc(PL_curcop);
10004 /* backslashes can escape the open or closing characters */
10005 if (*s == '\\' && s+1 < PL_bufend) {
10006 if (!keep_quoted &&
10007 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10012 /* allow nested opens and closes */
10013 else if (*s == PL_multi_close && --brackets <= 0)
10015 else if (*s == PL_multi_open)
10017 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10022 /* terminate the copied string and update the sv's end-of-string */
10024 SvCUR_set(sv, to - SvPVX_const(sv));
10027 * this next chunk reads more into the buffer if we're not done yet
10031 break; /* handle case where we are done yet :-) */
10033 #ifndef PERL_STRICT_CR
10034 if (to - SvPVX_const(sv) >= 2) {
10035 if ((to[-2] == '\r' && to[-1] == '\n') ||
10036 (to[-2] == '\n' && to[-1] == '\r'))
10040 SvCUR_set(sv, to - SvPVX_const(sv));
10042 else if (to[-1] == '\r')
10045 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10050 /* if we're out of file, or a read fails, bail and reset the current
10051 line marker so we can report where the unterminated string began
10054 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10056 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10059 /* we read a line, so increment our line counter */
10060 CopLINE_inc(PL_curcop);
10062 /* update debugger info */
10063 if (PERLDB_LINE && PL_curstash != PL_debstash) {
10064 SV *sv = NEWSV(88,0);
10066 sv_upgrade(sv, SVt_PVMG);
10067 sv_setsv(sv,PL_linestr);
10068 (void)SvIOK_on(sv);
10070 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
10073 /* having changed the buffer, we must update PL_bufend */
10074 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10075 PL_last_lop = PL_last_uni = Nullch;
10078 /* at this point, we have successfully read the delimited string */
10080 if (!PL_encoding || UTF) {
10082 sv_catpvn(sv, s, termlen);
10085 if (has_utf8 || PL_encoding)
10088 PL_multi_end = CopLINE(PL_curcop);
10090 /* if we allocated too much space, give some back */
10091 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10092 SvLEN_set(sv, SvCUR(sv) + 1);
10093 SvPV_renew(sv, SvLEN(sv));
10096 /* decide whether this is the first or second quoted string we've read
10109 takes: pointer to position in buffer
10110 returns: pointer to new position in buffer
10111 side-effects: builds ops for the constant in yylval.op
10113 Read a number in any of the formats that Perl accepts:
10115 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10116 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10119 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10121 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10124 If it reads a number without a decimal point or an exponent, it will
10125 try converting the number to an integer and see if it can do so
10126 without loss of precision.
10130 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10132 register const char *s = start; /* current position in buffer */
10133 register char *d; /* destination in temp buffer */
10134 register char *e; /* end of temp buffer */
10135 NV nv; /* number read, as a double */
10136 SV *sv = Nullsv; /* place to put the converted number */
10137 bool floatit; /* boolean: int or float? */
10138 const char *lastub = 0; /* position of last underbar */
10139 static char const number_too_long[] = "Number too long";
10141 /* We use the first character to decide what type of number this is */
10145 Perl_croak(aTHX_ "panic: scan_num");
10147 /* if it starts with a 0, it could be an octal number, a decimal in
10148 0.13 disguise, or a hexadecimal number, or a binary number. */
10152 u holds the "number so far"
10153 shift the power of 2 of the base
10154 (hex == 4, octal == 3, binary == 1)
10155 overflowed was the number more than we can hold?
10157 Shift is used when we add a digit. It also serves as an "are
10158 we in octal/hex/binary?" indicator to disallow hex characters
10159 when in octal mode.
10164 bool overflowed = FALSE;
10165 bool just_zero = TRUE; /* just plain 0 or binary number? */
10166 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10167 static const char* const bases[5] =
10168 { "", "binary", "", "octal", "hexadecimal" };
10169 static const char* const Bases[5] =
10170 { "", "Binary", "", "Octal", "Hexadecimal" };
10171 static const char* const maxima[5] =
10173 "0b11111111111111111111111111111111",
10177 const char *base, *Base, *max;
10179 /* check for hex */
10184 } else if (s[1] == 'b') {
10189 /* check for a decimal in disguise */
10190 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10192 /* so it must be octal */
10199 if (ckWARN(WARN_SYNTAX))
10200 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10201 "Misplaced _ in number");
10205 base = bases[shift];
10206 Base = Bases[shift];
10207 max = maxima[shift];
10209 /* read the rest of the number */
10211 /* x is used in the overflow test,
10212 b is the digit we're adding on. */
10217 /* if we don't mention it, we're done */
10221 /* _ are ignored -- but warned about if consecutive */
10223 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
10224 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10225 "Misplaced _ in number");
10229 /* 8 and 9 are not octal */
10230 case '8': case '9':
10232 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10236 case '2': case '3': case '4':
10237 case '5': case '6': case '7':
10239 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10242 case '0': case '1':
10243 b = *s++ & 15; /* ASCII digit -> value of digit */
10247 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10248 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10249 /* make sure they said 0x */
10252 b = (*s++ & 7) + 9;
10254 /* Prepare to put the digit we have onto the end
10255 of the number so far. We check for overflows.
10261 x = u << shift; /* make room for the digit */
10263 if ((x >> shift) != u
10264 && !(PL_hints & HINT_NEW_BINARY)) {
10267 if (ckWARN_d(WARN_OVERFLOW))
10268 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10269 "Integer overflow in %s number",
10272 u = x | b; /* add the digit to the end */
10275 n *= nvshift[shift];
10276 /* If an NV has not enough bits in its
10277 * mantissa to represent an UV this summing of
10278 * small low-order numbers is a waste of time
10279 * (because the NV cannot preserve the
10280 * low-order bits anyway): we could just
10281 * remember when did we overflow and in the
10282 * end just multiply n by the right
10290 /* if we get here, we had success: make a scalar value from
10295 /* final misplaced underbar check */
10296 if (s[-1] == '_') {
10297 if (ckWARN(WARN_SYNTAX))
10298 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10303 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
10304 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10305 "%s number > %s non-portable",
10311 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
10312 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10313 "%s number > %s non-portable",
10318 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10319 sv = new_constant(start, s - start, "integer",
10321 else if (PL_hints & HINT_NEW_BINARY)
10322 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
10327 handle decimal numbers.
10328 we're also sent here when we read a 0 as the first digit
10330 case '1': case '2': case '3': case '4': case '5':
10331 case '6': case '7': case '8': case '9': case '.':
10334 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10337 /* read next group of digits and _ and copy into d */
10338 while (isDIGIT(*s) || *s == '_') {
10339 /* skip underscores, checking for misplaced ones
10343 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
10344 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10345 "Misplaced _ in number");
10349 /* check for end of fixed-length buffer */
10351 Perl_croak(aTHX_ number_too_long);
10352 /* if we're ok, copy the character */
10357 /* final misplaced underbar check */
10358 if (lastub && s == lastub + 1) {
10359 if (ckWARN(WARN_SYNTAX))
10360 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10363 /* read a decimal portion if there is one. avoid
10364 3..5 being interpreted as the number 3. followed
10367 if (*s == '.' && s[1] != '.') {
10372 if (ckWARN(WARN_SYNTAX))
10373 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10374 "Misplaced _ in number");
10378 /* copy, ignoring underbars, until we run out of digits.
10380 for (; isDIGIT(*s) || *s == '_'; s++) {
10381 /* fixed length buffer check */
10383 Perl_croak(aTHX_ number_too_long);
10385 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
10386 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10387 "Misplaced _ in number");
10393 /* fractional part ending in underbar? */
10394 if (s[-1] == '_') {
10395 if (ckWARN(WARN_SYNTAX))
10396 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10397 "Misplaced _ in number");
10399 if (*s == '.' && isDIGIT(s[1])) {
10400 /* oops, it's really a v-string, but without the "v" */
10406 /* read exponent part, if present */
10407 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10411 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10412 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10414 /* stray preinitial _ */
10416 if (ckWARN(WARN_SYNTAX))
10417 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10418 "Misplaced _ in number");
10422 /* allow positive or negative exponent */
10423 if (*s == '+' || *s == '-')
10426 /* stray initial _ */
10428 if (ckWARN(WARN_SYNTAX))
10429 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10430 "Misplaced _ in number");
10434 /* read digits of exponent */
10435 while (isDIGIT(*s) || *s == '_') {
10438 Perl_croak(aTHX_ number_too_long);
10442 if (ckWARN(WARN_SYNTAX) &&
10443 ((lastub && s == lastub + 1) ||
10444 (!isDIGIT(s[1]) && s[1] != '_')))
10445 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10446 "Misplaced _ in number");
10453 /* make an sv from the string */
10457 We try to do an integer conversion first if no characters
10458 indicating "float" have been found.
10463 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10465 if (flags == IS_NUMBER_IN_UV) {
10467 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
10470 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10471 if (uv <= (UV) IV_MIN)
10472 sv_setiv(sv, -(IV)uv);
10479 /* terminate the string */
10481 nv = Atof(PL_tokenbuf);
10485 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
10486 (PL_hints & HINT_NEW_INTEGER) )
10487 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
10488 (floatit ? "float" : "integer"),
10492 /* if it starts with a v, it could be a v-string */
10495 sv = NEWSV(92,5); /* preallocate storage space */
10496 s = scan_vstring(s,sv);
10500 /* make the op for the constant and return */
10503 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10505 lvalp->opval = Nullop;
10511 S_scan_formline(pTHX_ register char *s)
10513 register char *eol;
10515 SV *stuff = newSVpvn("",0);
10516 bool needargs = FALSE;
10517 bool eofmt = FALSE;
10519 while (!needargs) {
10522 #ifdef PERL_STRICT_CR
10523 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
10525 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
10527 if (*t == '\n' || t == PL_bufend) {
10532 if (PL_in_eval && !PL_rsfp) {
10533 eol = (char *) memchr(s,'\n',PL_bufend-s);
10538 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10540 for (t = s; t < eol; t++) {
10541 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10543 goto enough; /* ~~ must be first line in formline */
10545 if (*t == '@' || *t == '^')
10549 sv_catpvn(stuff, s, eol-s);
10550 #ifndef PERL_STRICT_CR
10551 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10552 char *end = SvPVX(stuff) + SvCUR(stuff);
10555 SvCUR_set(stuff, SvCUR(stuff) - 1);
10564 s = filter_gets(PL_linestr, PL_rsfp, 0);
10565 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
10566 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
10567 PL_last_lop = PL_last_uni = Nullch;
10576 if (SvCUR(stuff)) {
10579 PL_lex_state = LEX_NORMAL;
10580 PL_nextval[PL_nexttoke].ival = 0;
10584 PL_lex_state = LEX_FORMLINE;
10586 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10588 else if (PL_encoding)
10589 sv_recode_to_utf8(stuff, PL_encoding);
10591 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10593 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
10597 SvREFCNT_dec(stuff);
10599 PL_lex_formbrack = 0;
10610 PL_cshlen = strlen(PL_cshname);
10615 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10617 const I32 oldsavestack_ix = PL_savestack_ix;
10618 CV* outsidecv = PL_compcv;
10621 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10623 SAVEI32(PL_subline);
10624 save_item(PL_subname);
10625 SAVESPTR(PL_compcv);
10627 PL_compcv = (CV*)NEWSV(1104,0);
10628 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
10629 CvFLAGS(PL_compcv) |= flags;
10631 PL_subline = CopLINE(PL_curcop);
10632 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10633 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
10634 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10636 return oldsavestack_ix;
10640 #pragma segment Perl_yylex
10643 Perl_yywarn(pTHX_ const char *s)
10645 PL_in_eval |= EVAL_WARNONLY;
10647 PL_in_eval &= ~EVAL_WARNONLY;
10652 Perl_yyerror(pTHX_ const char *s)
10654 const char *where = NULL;
10655 const char *context = NULL;
10659 if (!yychar || (yychar == ';' && !PL_rsfp))
10661 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10662 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10663 PL_oldbufptr != PL_bufptr) {
10666 The code below is removed for NetWare because it abends/crashes on NetWare
10667 when the script has error such as not having the closing quotes like:
10668 if ($var eq "value)
10669 Checking of white spaces is anyway done in NetWare code.
10672 while (isSPACE(*PL_oldoldbufptr))
10675 context = PL_oldoldbufptr;
10676 contlen = PL_bufptr - PL_oldoldbufptr;
10678 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10679 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10682 The code below is removed for NetWare because it abends/crashes on NetWare
10683 when the script has error such as not having the closing quotes like:
10684 if ($var eq "value)
10685 Checking of white spaces is anyway done in NetWare code.
10688 while (isSPACE(*PL_oldbufptr))
10691 context = PL_oldbufptr;
10692 contlen = PL_bufptr - PL_oldbufptr;
10694 else if (yychar > 255)
10695 where = "next token ???";
10696 else if (yychar == -2) { /* YYEMPTY */
10697 if (PL_lex_state == LEX_NORMAL ||
10698 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10699 where = "at end of line";
10700 else if (PL_lex_inpat)
10701 where = "within pattern";
10703 where = "within string";
10706 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
10708 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10709 else if (isPRINT_LC(yychar))
10710 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
10712 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10713 where = SvPVX_const(where_sv);
10715 msg = sv_2mortal(newSVpv(s, 0));
10716 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10717 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10719 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10721 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10722 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10723 Perl_sv_catpvf(aTHX_ msg,
10724 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10725 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10728 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
10729 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
10732 if (PL_error_count >= 10) {
10733 if (PL_in_eval && SvCUR(ERRSV))
10734 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10735 ERRSV, OutCopFILE(PL_curcop));
10737 Perl_croak(aTHX_ "%s has too many errors.\n",
10738 OutCopFILE(PL_curcop));
10741 PL_in_my_stash = Nullhv;
10745 #pragma segment Main
10749 S_swallow_bom(pTHX_ U8 *s)
10751 const STRLEN slen = SvCUR(PL_linestr);
10754 if (s[1] == 0xFE) {
10755 /* UTF-16 little-endian? (or UTF32-LE?) */
10756 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
10757 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
10758 #ifndef PERL_NO_UTF16_FILTER
10759 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
10762 if (PL_bufend > (char*)s) {
10766 filter_add(utf16rev_textfilter, NULL);
10767 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
10768 utf16_to_utf8_reversed(s, news,
10769 PL_bufend - (char*)s - 1,
10771 sv_setpvn(PL_linestr, (const char*)news, newlen);
10773 SvUTF8_on(PL_linestr);
10774 s = (U8*)SvPVX(PL_linestr);
10775 PL_bufend = SvPVX(PL_linestr) + newlen;
10778 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
10783 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
10784 #ifndef PERL_NO_UTF16_FILTER
10785 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10788 if (PL_bufend > (char *)s) {
10792 filter_add(utf16_textfilter, NULL);
10793 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
10794 utf16_to_utf8(s, news,
10795 PL_bufend - (char*)s,
10797 sv_setpvn(PL_linestr, (const char*)news, newlen);
10799 SvUTF8_on(PL_linestr);
10800 s = (U8*)SvPVX(PL_linestr);
10801 PL_bufend = SvPVX(PL_linestr) + newlen;
10804 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
10809 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10810 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10811 s += 3; /* UTF-8 */
10817 if (s[2] == 0xFE && s[3] == 0xFF) {
10818 /* UTF-32 big-endian */
10819 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
10822 else if (s[2] == 0 && s[3] != 0) {
10825 * are a good indicator of UTF-16BE. */
10826 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10831 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10834 * are a good indicator of UTF-16LE. */
10835 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10844 * Restore a source filter.
10848 restore_rsfp(pTHX_ void *f)
10850 PerlIO *fp = (PerlIO*)f;
10852 if (PL_rsfp == PerlIO_stdin())
10853 PerlIO_clearerr(PL_rsfp);
10854 else if (PL_rsfp && (PL_rsfp != fp))
10855 PerlIO_close(PL_rsfp);
10859 #ifndef PERL_NO_UTF16_FILTER
10861 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10863 const STRLEN old = SvCUR(sv);
10864 const I32 count = FILTER_READ(idx+1, sv, maxlen);
10865 DEBUG_P(PerlIO_printf(Perl_debug_log,
10866 "utf16_textfilter(%p): %d %d (%d)\n",
10867 utf16_textfilter, idx, maxlen, (int) count));
10871 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
10872 Copy(SvPVX_const(sv), tmps, old, char);
10873 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
10874 SvCUR(sv) - old, &newlen);
10875 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
10877 DEBUG_P({sv_dump(sv);});
10882 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10884 const STRLEN old = SvCUR(sv);
10885 const I32 count = FILTER_READ(idx+1, sv, maxlen);
10886 DEBUG_P(PerlIO_printf(Perl_debug_log,
10887 "utf16rev_textfilter(%p): %d %d (%d)\n",
10888 utf16rev_textfilter, idx, maxlen, (int) count));
10892 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
10893 Copy(SvPVX_const(sv), tmps, old, char);
10894 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
10895 SvCUR(sv) - old, &newlen);
10896 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
10898 DEBUG_P({ sv_dump(sv); });
10904 Returns a pointer to the next character after the parsed
10905 vstring, as well as updating the passed in sv.
10907 Function must be called like
10910 s = scan_vstring(s,sv);
10912 The sv should already be large enough to store the vstring
10913 passed in, for performance reasons.
10918 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
10920 const char *pos = s;
10921 const char *start = s;
10922 if (*pos == 'v') pos++; /* get past 'v' */
10923 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
10925 if ( *pos != '.') {
10926 /* this may not be a v-string if followed by => */
10927 const char *next = pos;
10928 while (next < PL_bufend && isSPACE(*next))
10930 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
10931 /* return string not v-string */
10932 sv_setpvn(sv,(char *)s,pos-s);
10933 return (char *)pos;
10937 if (!isALPHA(*pos)) {
10939 U8 tmpbuf[UTF8_MAXBYTES+1];
10942 if (*s == 'v') s++; /* get past 'v' */
10944 sv_setpvn(sv, "", 0);
10949 /* this is atoi() that tolerates underscores */
10950 const char *end = pos;
10952 while (--end >= s) {
10957 rev += (*end - '0') * mult;
10959 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
10960 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10961 "Integer overflow in decimal number");
10965 if (rev > 0x7FFFFFFF)
10966 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10968 /* Append native character for the rev point */
10969 tmpend = uvchr_to_utf8(tmpbuf, rev);
10970 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10971 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10973 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
10979 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
10983 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10991 * c-indentation-style: bsd
10992 * c-basic-offset: 4
10993 * indent-tabs-mode: t
10996 * ex: set ts=8 sts=4 sw=4 noet: