3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] =
30 "Identifier too long";
31 static const char c_without_g[] =
32 "Use of /c modifier is meaningless without /g";
33 static const char c_in_subst[] =
34 "Use of /c modifier is meaningless in s///";
36 static void restore_rsfp(pTHX_ void *f);
37 #ifndef PERL_NO_UTF16_FILTER
38 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
42 #define XFAKEBRACK 128
45 #ifdef USE_UTF8_SCRIPTS
46 # define UTF (!IN_BYTES)
48 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
51 /* In variables named $^X, these are the legal values for X.
52 * 1999-02-27 mjd-perl-patch@plover.com */
53 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
55 /* On MacOS, respect nonbreaking spaces */
56 #ifdef MACOS_TRADITIONAL
57 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
59 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
62 /* LEX_* are values for PL_lex_state, the state of the lexer.
63 * They are arranged oddly so that the guard on the switch statement
64 * can get by with a single comparison (if the compiler is smart enough).
67 /* #define LEX_NOTPARSING 11 is done in perl.h. */
70 #define LEX_INTERPNORMAL 9
71 #define LEX_INTERPCASEMOD 8
72 #define LEX_INTERPPUSH 7
73 #define LEX_INTERPSTART 6
74 #define LEX_INTERPEND 5
75 #define LEX_INTERPENDMAYBE 4
76 #define LEX_INTERPCONCAT 3
77 #define LEX_INTERPCONST 2
78 #define LEX_FORMLINE 1
79 #define LEX_KNOWNEXT 0
82 static const char* const lex_state_names[] = {
101 #include "keywords.h"
103 /* CLINE is a macro that ensures PL_copline has a sane value */
108 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
111 * Convenience functions to return different tokens and prime the
112 * lexer for the next token. They all take an argument.
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
124 * FUN1 : not used, except for not, which isn't a UNIOP
125 * BOop : bitwise or or xor
127 * SHop : shift operator
128 * PWop : power operator
129 * PMop : pattern-matching operator
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
133 * Rop : relational operator <= != gt
135 * Also see LOP and lop() below.
138 #ifdef DEBUGGING /* Serve -DT. */
139 # define REPORT(retval) tokereport(s,(int)retval)
141 # define REPORT(retval) (retval)
144 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
145 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
146 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
147 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
148 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
149 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
150 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
151 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
152 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
153 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
154 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
155 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
156 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
157 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
158 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
159 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
160 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
161 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
162 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
163 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
165 /* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
170 #define UNI2(f,x) { \
174 PL_last_uni = PL_oldbufptr; \
175 PL_last_lop_op = f; \
177 return REPORT( (int)FUNC1 ); \
179 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
181 #define UNI(f) UNI2(f,XTERM)
182 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
184 #define UNIBRACK(f) { \
187 PL_last_uni = PL_oldbufptr; \
189 return REPORT( (int)FUNC1 ); \
191 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
194 /* grandfather return to old style */
195 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
199 /* how to interpret the yylval associated with the token */
203 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
209 static struct debug_tokens { const int token, type; const char *name; }
210 const debug_tokens[] =
212 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
213 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
214 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
215 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
216 { ARROW, TOKENTYPE_NONE, "ARROW" },
217 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
218 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
219 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
220 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
221 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
222 { DO, TOKENTYPE_NONE, "DO" },
223 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
224 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
225 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
226 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
227 { ELSE, TOKENTYPE_NONE, "ELSE" },
228 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
229 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
230 { FOR, TOKENTYPE_IVAL, "FOR" },
231 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
232 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
233 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
234 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
235 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
236 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
237 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
238 { IF, TOKENTYPE_IVAL, "IF" },
239 { LABEL, TOKENTYPE_PVAL, "LABEL" },
240 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
241 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
242 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
243 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
244 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
245 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
246 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
247 { MY, TOKENTYPE_IVAL, "MY" },
248 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
249 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
250 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
251 { OROP, TOKENTYPE_IVAL, "OROP" },
252 { OROR, TOKENTYPE_NONE, "OROR" },
253 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
254 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
255 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
256 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
257 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
258 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
259 { PREINC, TOKENTYPE_NONE, "PREINC" },
260 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
261 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
262 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
263 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
264 { SUB, TOKENTYPE_NONE, "SUB" },
265 { THING, TOKENTYPE_OPVAL, "THING" },
266 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
267 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
268 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
269 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
270 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
271 { USE, TOKENTYPE_IVAL, "USE" },
272 { WHILE, TOKENTYPE_IVAL, "WHILE" },
273 { WORD, TOKENTYPE_OPVAL, "WORD" },
274 { 0, TOKENTYPE_NONE, 0 }
277 /* dump the returned token in rv, plus any optional arg in yylval */
280 S_tokereport(pTHX_ const char* s, I32 rv)
283 const char *name = Nullch;
284 enum token_type type = TOKENTYPE_NONE;
285 const struct debug_tokens *p;
286 SV* report = newSVpvn("<== ", 4);
288 for (p = debug_tokens; p->token; p++) {
289 if (p->token == (int)rv) {
296 Perl_sv_catpv(aTHX_ report, name);
297 else if ((char)rv > ' ' && (char)rv < '~')
298 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
300 Perl_sv_catpv(aTHX_ report, "EOF");
302 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
305 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
308 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
310 case TOKENTYPE_OPNUM:
311 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
312 PL_op_name[yylval.ival]);
315 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
317 case TOKENTYPE_OPVAL:
319 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
320 PL_op_name[yylval.opval->op_type]);
322 Perl_sv_catpv(aTHX_ report, "(opval=null)");
325 Perl_sv_catpvf(aTHX_ report, " at line %"IVdf" [", (IV)CopLINE(PL_curcop));
326 if (s - PL_bufptr > 0)
327 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
329 if (PL_oldbufptr && *PL_oldbufptr)
330 sv_catpv(report, PL_tokenbuf);
332 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen_const(report));
342 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
343 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
347 S_ao(pTHX_ int toketype)
349 if (*PL_bufptr == '=') {
351 if (toketype == ANDAND)
352 yylval.ival = OP_ANDASSIGN;
353 else if (toketype == OROR)
354 yylval.ival = OP_ORASSIGN;
355 else if (toketype == DORDOR)
356 yylval.ival = OP_DORASSIGN;
364 * When Perl expects an operator and finds something else, no_op
365 * prints the warning. It always prints "<something> found where
366 * operator expected. It prints "Missing semicolon on previous line?"
367 * if the surprise occurs at the start of the line. "do you need to
368 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
369 * where the compiler doesn't know if foo is a method call or a function.
370 * It prints "Missing operator before end of line" if there's nothing
371 * after the missing operator, or "... before <...>" if there is something
372 * after the missing operator.
376 S_no_op(pTHX_ const char *what, char *s)
378 char *oldbp = PL_bufptr;
379 bool is_first = (PL_oldbufptr == PL_linestart);
385 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
386 if (ckWARN_d(WARN_SYNTAX)) {
388 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
389 "\t(Missing semicolon on previous line?)\n");
390 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
392 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
393 if (t < PL_bufptr && isSPACE(*t))
394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
395 "\t(Do you need to predeclare %.*s?)\n",
396 t - PL_oldoldbufptr, PL_oldoldbufptr);
400 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
401 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
409 * Complain about missing quote/regexp/heredoc terminator.
410 * If it's called with (char *)NULL then it cauterizes the line buffer.
411 * If we're in a delimited string and the delimiter is a control
412 * character, it's reformatted into a two-char sequence like ^C.
417 S_missingterm(pTHX_ char *s)
422 char *nl = strrchr(s,'\n');
428 iscntrl(PL_multi_close)
430 PL_multi_close < 32 || PL_multi_close == 127
434 tmpbuf[1] = toCTRL(PL_multi_close);
439 *tmpbuf = (char)PL_multi_close;
443 q = strchr(s,'"') ? '\'' : '"';
444 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
452 Perl_deprecate(pTHX_ const char *s)
454 if (ckWARN(WARN_DEPRECATED))
455 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
459 Perl_deprecate_old(pTHX_ const char *s)
461 /* This function should NOT be called for any new deprecated warnings */
462 /* Use Perl_deprecate instead */
464 /* It is here to maintain backward compatibility with the pre-5.8 */
465 /* warnings category hierarchy. The "deprecated" category used to */
466 /* live under the "syntax" category. It is now a top-level category */
467 /* in its own right. */
469 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
470 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
471 "Use of %s is deprecated", s);
476 * Deprecate a comma-less variable list.
482 deprecate_old("comma-less variable list");
486 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
487 * utf16-to-utf8-reversed.
490 #ifdef PERL_CR_FILTER
494 register const char *s = SvPVX_const(sv);
495 register const char *e = s + SvCUR(sv);
496 /* outer loop optimized to do nothing if there are no CR-LFs */
498 if (*s++ == '\r' && *s == '\n') {
499 /* hit a CR-LF, need to copy the rest */
500 register char *d = s - 1;
503 if (*s == '\r' && s[1] == '\n')
514 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
516 const I32 count = FILTER_READ(idx+1, sv, maxlen);
517 if (count > 0 && !maxlen)
525 * Initialize variables. Uses the Perl save_stack to save its state (for
526 * recursive calls to the parser).
530 Perl_lex_start(pTHX_ SV *line)
535 SAVEI32(PL_lex_dojoin);
536 SAVEI32(PL_lex_brackets);
537 SAVEI32(PL_lex_casemods);
538 SAVEI32(PL_lex_starts);
539 SAVEI32(PL_lex_state);
540 SAVEVPTR(PL_lex_inpat);
541 SAVEI32(PL_lex_inwhat);
542 if (PL_lex_state == LEX_KNOWNEXT) {
543 I32 toke = PL_nexttoke;
544 while (--toke >= 0) {
545 SAVEI32(PL_nexttype[toke]);
546 SAVEVPTR(PL_nextval[toke]);
548 SAVEI32(PL_nexttoke);
550 SAVECOPLINE(PL_curcop);
553 SAVEPPTR(PL_oldbufptr);
554 SAVEPPTR(PL_oldoldbufptr);
555 SAVEPPTR(PL_last_lop);
556 SAVEPPTR(PL_last_uni);
557 SAVEPPTR(PL_linestart);
558 SAVESPTR(PL_linestr);
559 SAVEGENERICPV(PL_lex_brackstack);
560 SAVEGENERICPV(PL_lex_casestack);
561 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
562 SAVESPTR(PL_lex_stuff);
563 SAVEI32(PL_lex_defer);
564 SAVEI32(PL_sublex_info.sub_inwhat);
565 SAVESPTR(PL_lex_repl);
567 SAVEINT(PL_lex_expect);
569 PL_lex_state = LEX_NORMAL;
573 New(899, PL_lex_brackstack, 120, char);
574 New(899, PL_lex_casestack, 12, char);
576 *PL_lex_casestack = '\0';
579 PL_lex_stuff = Nullsv;
580 PL_lex_repl = Nullsv;
584 PL_sublex_info.sub_inwhat = 0;
586 if (SvREADONLY(PL_linestr))
587 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
588 s = SvPV_const(PL_linestr, len);
589 if (!len || s[len-1] != ';') {
590 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
591 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
592 sv_catpvn(PL_linestr, "\n;", 2);
594 SvTEMP_off(PL_linestr);
595 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
596 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
597 PL_last_lop = PL_last_uni = Nullch;
603 * Finalizer for lexing operations. Must be called when the parser is
604 * done with the lexer.
610 PL_doextract = FALSE;
615 * This subroutine has nothing to do with tilting, whether at windmills
616 * or pinball tables. Its name is short for "increment line". It
617 * increments the current line number in CopLINE(PL_curcop) and checks
618 * to see whether the line starts with a comment of the form
619 * # line 500 "foo.pm"
620 * If so, it sets the current line number and file to the values in the comment.
624 S_incline(pTHX_ char *s)
631 CopLINE_inc(PL_curcop);
634 while (SPACE_OR_TAB(*s)) s++;
635 if (strnEQ(s, "line", 4))
639 if (SPACE_OR_TAB(*s))
643 while (SPACE_OR_TAB(*s)) s++;
649 while (SPACE_OR_TAB(*s))
651 if (*s == '"' && (t = strchr(s+1, '"'))) {
656 for (t = s; !isSPACE(*t); t++) ;
659 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
661 if (*e != '\n' && *e != '\0')
662 return; /* false alarm */
667 CopFILE_free(PL_curcop);
668 CopFILE_set(PL_curcop, s);
671 CopLINE_set(PL_curcop, atoi(n)-1);
676 * Called to gobble the appropriate amount and type of whitespace.
677 * Skips comments as well.
681 S_skipspace(pTHX_ register char *s)
683 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
684 while (s < PL_bufend && SPACE_OR_TAB(*s))
690 SSize_t oldprevlen, oldoldprevlen;
691 SSize_t oldloplen = 0, oldunilen = 0;
692 while (s < PL_bufend && isSPACE(*s)) {
693 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
698 if (s < PL_bufend && *s == '#') {
699 while (s < PL_bufend && *s != '\n')
703 if (PL_in_eval && !PL_rsfp) {
710 /* only continue to recharge the buffer if we're at the end
711 * of the buffer, we're not reading from a source filter, and
712 * we're in normal lexing mode
714 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
715 PL_lex_state == LEX_FORMLINE)
718 /* try to recharge the buffer */
719 if ((s = filter_gets(PL_linestr, PL_rsfp,
720 (prevlen = SvCUR(PL_linestr)))) == Nullch)
722 /* end of file. Add on the -p or -n magic */
725 ";}continue{print or die qq(-p destination: $!\\n);}");
726 PL_minus_n = PL_minus_p = 0;
728 else if (PL_minus_n) {
729 sv_setpvn(PL_linestr, ";}", 2);
733 sv_setpvn(PL_linestr,";", 1);
735 /* reset variables for next time we lex */
736 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
738 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
739 PL_last_lop = PL_last_uni = Nullch;
741 /* Close the filehandle. Could be from -P preprocessor,
742 * STDIN, or a regular file. If we were reading code from
743 * STDIN (because the commandline held no -e or filename)
744 * then we don't close it, we reset it so the code can
745 * read from STDIN too.
748 if (PL_preprocess && !PL_in_eval)
749 (void)PerlProc_pclose(PL_rsfp);
750 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
751 PerlIO_clearerr(PL_rsfp);
753 (void)PerlIO_close(PL_rsfp);
758 /* not at end of file, so we only read another line */
759 /* make corresponding updates to old pointers, for yyerror() */
760 oldprevlen = PL_oldbufptr - PL_bufend;
761 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
763 oldunilen = PL_last_uni - PL_bufend;
765 oldloplen = PL_last_lop - PL_bufend;
766 PL_linestart = PL_bufptr = s + prevlen;
767 PL_bufend = s + SvCUR(PL_linestr);
769 PL_oldbufptr = s + oldprevlen;
770 PL_oldoldbufptr = s + oldoldprevlen;
772 PL_last_uni = s + oldunilen;
774 PL_last_lop = s + oldloplen;
777 /* debugger active and we're not compiling the debugger code,
778 * so store the line into the debugger's array of lines
780 if (PERLDB_LINE && PL_curstash != PL_debstash) {
781 SV *sv = NEWSV(85,0);
783 sv_upgrade(sv, SVt_PVMG);
784 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
787 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
794 * Check the unary operators to ensure there's no ambiguity in how they're
795 * used. An ambiguous piece of code would be:
797 * This doesn't mean rand() + 5. Because rand() is a unary operator,
798 * the +5 is its argument.
807 if (PL_oldoldbufptr != PL_last_uni)
809 while (isSPACE(*PL_last_uni))
811 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
812 if ((t = strchr(s, '(')) && t < PL_bufptr)
814 if (ckWARN_d(WARN_AMBIGUOUS)){
817 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
818 "Warning: Use of \"%s\" without parentheses is ambiguous",
825 * LOP : macro to build a list operator. Its behaviour has been replaced
826 * with a subroutine, S_lop() for which LOP is just another name.
829 #define LOP(f,x) return lop(f,x,s)
833 * Build a list operator (or something that might be one). The rules:
834 * - if we have a next token, then it's a list operator [why?]
835 * - if the next thing is an opening paren, then it's a function
836 * - else it's a list operator
840 S_lop(pTHX_ I32 f, int x, char *s)
846 PL_last_lop = PL_oldbufptr;
847 PL_last_lop_op = (OPCODE)f;
849 return REPORT(LSTOP);
856 return REPORT(LSTOP);
861 * When the lexer realizes it knows the next token (for instance,
862 * it is reordering tokens for the parser) then it can call S_force_next
863 * to know what token to return the next time the lexer is called. Caller
864 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
865 * handles the token correctly.
869 S_force_next(pTHX_ I32 type)
871 PL_nexttype[PL_nexttoke] = type;
873 if (PL_lex_state != LEX_KNOWNEXT) {
874 PL_lex_defer = PL_lex_state;
875 PL_lex_expect = PL_expect;
876 PL_lex_state = LEX_KNOWNEXT;
881 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
883 SV *sv = newSVpvn(start,len);
884 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
891 * When the lexer knows the next thing is a word (for instance, it has
892 * just seen -> and it knows that the next char is a word char, then
893 * it calls S_force_word to stick the next word into the PL_next lookahead.
896 * char *start : buffer position (must be within PL_linestr)
897 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
898 * int check_keyword : if true, Perl checks to make sure the word isn't
899 * a keyword (do this if the word is a label, e.g. goto FOO)
900 * int allow_pack : if true, : characters will also be allowed (require,
902 * int allow_initial_tick : used by the "sub" lexer only.
906 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
911 start = skipspace(start);
913 if (isIDFIRST_lazy_if(s,UTF) ||
914 (allow_pack && *s == ':') ||
915 (allow_initial_tick && *s == '\'') )
917 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
918 if (check_keyword && keyword(PL_tokenbuf, len))
920 if (token == METHOD) {
925 PL_expect = XOPERATOR;
928 PL_nextval[PL_nexttoke].opval
929 = (OP*)newSVOP(OP_CONST,0,
930 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
931 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
939 * Called when the lexer wants $foo *foo &foo etc, but the program
940 * text only contains the "foo" portion. The first argument is a pointer
941 * to the "foo", and the second argument is the type symbol to prefix.
942 * Forces the next token to be a "WORD".
943 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
947 S_force_ident(pTHX_ register const char *s, int kind)
950 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
951 PL_nextval[PL_nexttoke].opval = o;
954 o->op_private = OPpCONST_ENTERED;
955 /* XXX see note in pp_entereval() for why we forgo typo
956 warnings if the symbol must be introduced in an eval.
958 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
959 kind == '$' ? SVt_PV :
960 kind == '@' ? SVt_PVAV :
961 kind == '%' ? SVt_PVHV :
969 Perl_str_to_version(pTHX_ SV *sv)
974 const char *start = SvPV_const(sv,len);
975 const char *end = start + len;
976 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
977 while (start < end) {
981 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
986 retval += ((NV)n)/nshift;
995 * Forces the next token to be a version number.
996 * If the next token appears to be an invalid version number, (e.g. "v2b"),
997 * and if "guessing" is TRUE, then no new token is created (and the caller
998 * must use an alternative parsing method).
1002 S_force_version(pTHX_ char *s, int guessing)
1004 OP *version = Nullop;
1013 while (isDIGIT(*d) || *d == '_' || *d == '.')
1015 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1017 s = scan_num(s, &yylval);
1018 version = yylval.opval;
1019 ver = cSVOPx(version)->op_sv;
1020 if (SvPOK(ver) && !SvNIOK(ver)) {
1021 SvUPGRADE(ver, SVt_PVNV);
1022 SvNV_set(ver, str_to_version(ver));
1023 SvNOK_on(ver); /* hint that it is a version */
1030 /* NOTE: The parser sees the package name and the VERSION swapped */
1031 PL_nextval[PL_nexttoke].opval = version;
1039 * Tokenize a quoted string passed in as an SV. It finds the next
1040 * chunk, up to end of string or a backslash. It may make a new
1041 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1046 S_tokeq(pTHX_ SV *sv)
1049 register char *send;
1057 s = SvPV_force(sv, len);
1058 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1061 while (s < send && *s != '\\')
1066 if ( PL_hints & HINT_NEW_STRING ) {
1067 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1073 if (s + 1 < send && (s[1] == '\\'))
1074 s++; /* all that, just for this */
1079 SvCUR_set(sv, d - SvPVX_const(sv));
1081 if ( PL_hints & HINT_NEW_STRING )
1082 return new_constant(NULL, 0, "q", sv, pv, "q");
1087 * Now come three functions related to double-quote context,
1088 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1089 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1090 * interact with PL_lex_state, and create fake ( ... ) argument lists
1091 * to handle functions and concatenation.
1092 * They assume that whoever calls them will be setting up a fake
1093 * join call, because each subthing puts a ',' after it. This lets
1096 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1098 * (I'm not sure whether the spurious commas at the end of lcfirst's
1099 * arguments and join's arguments are created or not).
1104 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1106 * Pattern matching will set PL_lex_op to the pattern-matching op to
1107 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1109 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1111 * Everything else becomes a FUNC.
1113 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1114 * had an OP_CONST or OP_READLINE). This just sets us up for a
1115 * call to S_sublex_push().
1119 S_sublex_start(pTHX)
1121 const register I32 op_type = yylval.ival;
1123 if (op_type == OP_NULL) {
1124 yylval.opval = PL_lex_op;
1128 if (op_type == OP_CONST || op_type == OP_READLINE) {
1129 SV *sv = tokeq(PL_lex_stuff);
1131 if (SvTYPE(sv) == SVt_PVIV) {
1132 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1134 const char *p = SvPV_const(sv, len);
1135 SV * const nsv = newSVpvn(p, len);
1141 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1142 PL_lex_stuff = Nullsv;
1143 /* Allow <FH> // "foo" */
1144 if (op_type == OP_READLINE)
1145 PL_expect = XTERMORDORDOR;
1149 PL_sublex_info.super_state = PL_lex_state;
1150 PL_sublex_info.sub_inwhat = op_type;
1151 PL_sublex_info.sub_op = PL_lex_op;
1152 PL_lex_state = LEX_INTERPPUSH;
1156 yylval.opval = PL_lex_op;
1166 * Create a new scope to save the lexing state. The scope will be
1167 * ended in S_sublex_done. Returns a '(', starting the function arguments
1168 * to the uc, lc, etc. found before.
1169 * Sets PL_lex_state to LEX_INTERPCONCAT.
1178 PL_lex_state = PL_sublex_info.super_state;
1179 SAVEI32(PL_lex_dojoin);
1180 SAVEI32(PL_lex_brackets);
1181 SAVEI32(PL_lex_casemods);
1182 SAVEI32(PL_lex_starts);
1183 SAVEI32(PL_lex_state);
1184 SAVEVPTR(PL_lex_inpat);
1185 SAVEI32(PL_lex_inwhat);
1186 SAVECOPLINE(PL_curcop);
1187 SAVEPPTR(PL_bufptr);
1188 SAVEPPTR(PL_bufend);
1189 SAVEPPTR(PL_oldbufptr);
1190 SAVEPPTR(PL_oldoldbufptr);
1191 SAVEPPTR(PL_last_lop);
1192 SAVEPPTR(PL_last_uni);
1193 SAVEPPTR(PL_linestart);
1194 SAVESPTR(PL_linestr);
1195 SAVEGENERICPV(PL_lex_brackstack);
1196 SAVEGENERICPV(PL_lex_casestack);
1198 PL_linestr = PL_lex_stuff;
1199 PL_lex_stuff = Nullsv;
1201 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1202 = SvPVX(PL_linestr);
1203 PL_bufend += SvCUR(PL_linestr);
1204 PL_last_lop = PL_last_uni = Nullch;
1205 SAVEFREESV(PL_linestr);
1207 PL_lex_dojoin = FALSE;
1208 PL_lex_brackets = 0;
1209 New(899, PL_lex_brackstack, 120, char);
1210 New(899, PL_lex_casestack, 12, char);
1211 PL_lex_casemods = 0;
1212 *PL_lex_casestack = '\0';
1214 PL_lex_state = LEX_INTERPCONCAT;
1215 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1217 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1218 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1219 PL_lex_inpat = PL_sublex_info.sub_op;
1221 PL_lex_inpat = Nullop;
1228 * Restores lexer state after a S_sublex_push.
1235 if (!PL_lex_starts++) {
1236 SV *sv = newSVpvn("",0);
1237 if (SvUTF8(PL_linestr))
1239 PL_expect = XOPERATOR;
1240 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1244 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1245 PL_lex_state = LEX_INTERPCASEMOD;
1249 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1250 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1251 PL_linestr = PL_lex_repl;
1253 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1254 PL_bufend += SvCUR(PL_linestr);
1255 PL_last_lop = PL_last_uni = Nullch;
1256 SAVEFREESV(PL_linestr);
1257 PL_lex_dojoin = FALSE;
1258 PL_lex_brackets = 0;
1259 PL_lex_casemods = 0;
1260 *PL_lex_casestack = '\0';
1262 if (SvEVALED(PL_lex_repl)) {
1263 PL_lex_state = LEX_INTERPNORMAL;
1265 /* we don't clear PL_lex_repl here, so that we can check later
1266 whether this is an evalled subst; that means we rely on the
1267 logic to ensure sublex_done() is called again only via the
1268 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1271 PL_lex_state = LEX_INTERPCONCAT;
1272 PL_lex_repl = Nullsv;
1278 PL_bufend = SvPVX(PL_linestr);
1279 PL_bufend += SvCUR(PL_linestr);
1280 PL_expect = XOPERATOR;
1281 PL_sublex_info.sub_inwhat = 0;
1289 Extracts a pattern, double-quoted string, or transliteration. This
1292 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1293 processing a pattern (PL_lex_inpat is true), a transliteration
1294 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1296 Returns a pointer to the character scanned up to. Iff this is
1297 advanced from the start pointer supplied (ie if anything was
1298 successfully parsed), will leave an OP for the substring scanned
1299 in yylval. Caller must intuit reason for not parsing further
1300 by looking at the next characters herself.
1304 double-quoted style: \r and \n
1305 regexp special ones: \D \s
1307 backrefs: \1 (deprecated in substitution replacements)
1308 case and quoting: \U \Q \E
1309 stops on @ and $, but not for $ as tail anchor
1311 In transliterations:
1312 characters are VERY literal, except for - not at the start or end
1313 of the string, which indicates a range. scan_const expands the
1314 range to the full set of intermediate characters.
1316 In double-quoted strings:
1318 double-quoted style: \r and \n
1320 backrefs: \1 (deprecated)
1321 case and quoting: \U \Q \E
1324 scan_const does *not* construct ops to handle interpolated strings.
1325 It stops processing as soon as it finds an embedded $ or @ variable
1326 and leaves it to the caller to work out what's going on.
1328 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1330 $ in pattern could be $foo or could be tail anchor. Assumption:
1331 it's a tail anchor if $ is the last thing in the string, or if it's
1332 followed by one of ")| \n\t"
1334 \1 (backreferences) are turned into $1
1336 The structure of the code is
1337 while (there's a character to process) {
1338 handle transliteration ranges
1339 skip regexp comments
1340 skip # initiated comments in //x patterns
1341 check for embedded @foo
1342 check for embedded scalars
1344 leave intact backslashes from leave (below)
1345 deprecate \1 in strings and sub replacements
1346 handle string-changing backslashes \l \U \Q \E, etc.
1347 switch (what was escaped) {
1348 handle - in a transliteration (becomes a literal -)
1349 handle \132 octal characters
1350 handle 0x15 hex characters
1351 handle \cV (control V)
1352 handle printf backslashes (\f, \r, \n, etc)
1354 } (end if backslash)
1355 } (end while character to read)
1360 S_scan_const(pTHX_ char *start)
1362 register char *send = PL_bufend; /* end of the constant */
1363 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1364 register char *s = start; /* start of the constant */
1365 register char *d = SvPVX(sv); /* destination for copies */
1366 bool dorange = FALSE; /* are we in a translit range? */
1367 bool didrange = FALSE; /* did we just finish a range? */
1368 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1369 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1372 const char *leaveit = /* set of acceptably-backslashed characters */
1374 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1377 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1378 /* If we are doing a trans and we know we want UTF8 set expectation */
1379 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1380 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1384 while (s < send || dorange) {
1385 /* get transliterations out of the way (they're most literal) */
1386 if (PL_lex_inwhat == OP_TRANS) {
1387 /* expand a range A-Z to the full set of characters. AIE! */
1389 I32 i; /* current expanded character */
1390 I32 min; /* first character in range */
1391 I32 max; /* last character in range */
1394 char *c = (char*)utf8_hop((U8*)d, -1);
1398 *c = (char)UTF_TO_NATIVE(0xff);
1399 /* mark the range as done, and continue */
1405 i = d - SvPVX_const(sv); /* remember current offset */
1406 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1407 d = SvPVX(sv) + i; /* refresh d after realloc */
1408 d -= 2; /* eat the first char and the - */
1410 min = (U8)*d; /* first char in range */
1411 max = (U8)d[1]; /* last char in range */
1415 "Invalid range \"%c-%c\" in transliteration operator",
1416 (char)min, (char)max);
1420 if ((isLOWER(min) && isLOWER(max)) ||
1421 (isUPPER(min) && isUPPER(max))) {
1423 for (i = min; i <= max; i++)
1425 *d++ = NATIVE_TO_NEED(has_utf8,i);
1427 for (i = min; i <= max; i++)
1429 *d++ = NATIVE_TO_NEED(has_utf8,i);
1434 for (i = min; i <= max; i++)
1437 /* mark the range as done, and continue */
1443 /* range begins (ignore - as first or last char) */
1444 else if (*s == '-' && s+1 < send && s != start) {
1446 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1449 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1461 /* if we get here, we're not doing a transliteration */
1463 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1464 except for the last char, which will be done separately. */
1465 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1467 while (s+1 < send && *s != ')')
1468 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1470 else if (s[2] == '{' /* This should match regcomp.c */
1471 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1474 char *regparse = s + (s[2] == '{' ? 3 : 4);
1477 while (count && (c = *regparse)) {
1478 if (c == '\\' && regparse[1])
1486 if (*regparse != ')')
1487 regparse--; /* Leave one char for continuation. */
1488 while (s < regparse)
1489 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1493 /* likewise skip #-initiated comments in //x patterns */
1494 else if (*s == '#' && PL_lex_inpat &&
1495 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1496 while (s+1 < send && *s != '\n')
1497 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1500 /* check for embedded arrays
1501 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1503 else if (*s == '@' && s[1]
1504 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1507 /* check for embedded scalars. only stop if we're sure it's a
1510 else if (*s == '$') {
1511 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1513 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1514 break; /* in regexp, $ might be tail anchor */
1517 /* End of else if chain - OP_TRANS rejoin rest */
1520 if (*s == '\\' && s+1 < send) {
1523 /* some backslashes we leave behind */
1524 if (*leaveit && *s && strchr(leaveit, *s)) {
1525 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1526 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1530 /* deprecate \1 in strings and substitution replacements */
1531 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1532 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1534 if (ckWARN(WARN_SYNTAX))
1535 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1540 /* string-change backslash escapes */
1541 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1546 /* if we get here, it's either a quoted -, or a digit */
1549 /* quoted - in transliterations */
1551 if (PL_lex_inwhat == OP_TRANS) {
1558 if (ckWARN(WARN_MISC) &&
1561 Perl_warner(aTHX_ packWARN(WARN_MISC),
1562 "Unrecognized escape \\%c passed through",
1564 /* default action is to copy the quoted character */
1565 goto default_action;
1568 /* \132 indicates an octal constant */
1569 case '0': case '1': case '2': case '3':
1570 case '4': case '5': case '6': case '7':
1574 uv = grok_oct(s, &len, &flags, NULL);
1577 goto NUM_ESCAPE_INSERT;
1579 /* \x24 indicates a hex constant */
1583 char* e = strchr(s, '}');
1584 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1585 PERL_SCAN_DISALLOW_PREFIX;
1590 yyerror("Missing right brace on \\x{}");
1594 uv = grok_hex(s, &len, &flags, NULL);
1600 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1601 uv = grok_hex(s, &len, &flags, NULL);
1607 /* Insert oct or hex escaped character.
1608 * There will always enough room in sv since such
1609 * escapes will be longer than any UTF-8 sequence
1610 * they can end up as. */
1612 /* We need to map to chars to ASCII before doing the tests
1615 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1616 if (!has_utf8 && uv > 255) {
1617 /* Might need to recode whatever we have
1618 * accumulated so far if it contains any
1621 * (Can't we keep track of that and avoid
1622 * this rescan? --jhi)
1626 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1627 if (!NATIVE_IS_INVARIANT(*c)) {
1632 STRLEN offset = d - SvPVX_const(sv);
1634 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1638 while (src >= (const U8 *)SvPVX_const(sv)) {
1639 if (!NATIVE_IS_INVARIANT(*src)) {
1640 U8 ch = NATIVE_TO_ASCII(*src);
1641 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1642 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1652 if (has_utf8 || uv > 255) {
1653 d = (char*)uvchr_to_utf8((U8*)d, uv);
1655 if (PL_lex_inwhat == OP_TRANS &&
1656 PL_sublex_info.sub_op) {
1657 PL_sublex_info.sub_op->op_private |=
1658 (PL_lex_repl ? OPpTRANS_FROM_UTF
1671 /* \N{LATIN SMALL LETTER A} is a named character */
1675 char* e = strchr(s, '}');
1681 yyerror("Missing right brace on \\N{}");
1685 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1687 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1688 PERL_SCAN_DISALLOW_PREFIX;
1691 uv = grok_hex(s, &len, &flags, NULL);
1693 goto NUM_ESCAPE_INSERT;
1695 res = newSVpvn(s + 1, e - s - 1);
1696 res = new_constant( Nullch, 0, "charnames",
1697 res, Nullsv, "\\N{...}" );
1699 sv_utf8_upgrade(res);
1700 str = SvPV_const(res,len);
1701 #ifdef EBCDIC_NEVER_MIND
1702 /* charnames uses pack U and that has been
1703 * recently changed to do the below uni->native
1704 * mapping, so this would be redundant (and wrong,
1705 * the code point would be doubly converted).
1706 * But leave this in just in case the pack U change
1707 * gets revoked, but the semantics is still
1708 * desireable for charnames. --jhi */
1710 UV uv = utf8_to_uvchr((const U8*)str, 0);
1713 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1715 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1716 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1717 str = SvPV_const(res, len);
1721 if (!has_utf8 && SvUTF8(res)) {
1722 const char *ostart = SvPVX_const(sv);
1723 SvCUR_set(sv, d - ostart);
1726 sv_utf8_upgrade(sv);
1727 /* this just broke our allocation above... */
1728 SvGROW(sv, (STRLEN)(send - start));
1729 d = SvPVX(sv) + SvCUR(sv);
1732 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1733 const char *odest = SvPVX_const(sv);
1735 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1736 d = SvPVX(sv) + (d - odest);
1738 Copy(str, d, len, char);
1745 yyerror("Missing braces on \\N{}");
1748 /* \c is a control character */
1757 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1760 yyerror("Missing control char name in \\c");
1764 /* printf-style backslashes, formfeeds, newlines, etc */
1766 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1769 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1772 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1775 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1778 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1781 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1784 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1790 } /* end if (backslash) */
1793 /* If we started with encoded form, or already know we want it
1794 and then encode the next character */
1795 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1797 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1798 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1801 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1802 STRLEN off = d - SvPVX_const(sv);
1803 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1805 d = (char*)uvchr_to_utf8((U8*)d, uv);
1809 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1811 } /* while loop to process each character */
1813 /* terminate the string and set up the sv */
1815 SvCUR_set(sv, d - SvPVX_const(sv));
1816 if (SvCUR(sv) >= SvLEN(sv))
1817 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1820 if (PL_encoding && !has_utf8) {
1821 sv_recode_to_utf8(sv, PL_encoding);
1827 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1828 PL_sublex_info.sub_op->op_private |=
1829 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1833 /* shrink the sv if we allocated more than we used */
1834 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1835 SvPV_shrink_to_cur(sv);
1838 /* return the substring (via yylval) only if we parsed anything */
1839 if (s > PL_bufptr) {
1840 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1841 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1843 ( PL_lex_inwhat == OP_TRANS
1845 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1848 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1855 * Returns TRUE if there's more to the expression (e.g., a subscript),
1858 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1860 * ->[ and ->{ return TRUE
1861 * { and [ outside a pattern are always subscripts, so return TRUE
1862 * if we're outside a pattern and it's not { or [, then return FALSE
1863 * if we're in a pattern and the first char is a {
1864 * {4,5} (any digits around the comma) returns FALSE
1865 * if we're in a pattern and the first char is a [
1867 * [SOMETHING] has a funky algorithm to decide whether it's a
1868 * character class or not. It has to deal with things like
1869 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1870 * anything else returns TRUE
1873 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1876 S_intuit_more(pTHX_ register char *s)
1878 if (PL_lex_brackets)
1880 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1882 if (*s != '{' && *s != '[')
1887 /* In a pattern, so maybe we have {n,m}. */
1904 /* On the other hand, maybe we have a character class */
1907 if (*s == ']' || *s == '^')
1910 /* this is terrifying, and it works */
1911 int weight = 2; /* let's weigh the evidence */
1913 unsigned char un_char = 255, last_un_char;
1914 const char *send = strchr(s,']');
1915 char tmpbuf[sizeof PL_tokenbuf * 4];
1917 if (!send) /* has to be an expression */
1920 Zero(seen,256,char);
1923 else if (isDIGIT(*s)) {
1925 if (isDIGIT(s[1]) && s[2] == ']')
1931 for (; s < send; s++) {
1932 last_un_char = un_char;
1933 un_char = (unsigned char)*s;
1938 weight -= seen[un_char] * 10;
1939 if (isALNUM_lazy_if(s+1,UTF)) {
1940 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
1941 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
1946 else if (*s == '$' && s[1] &&
1947 strchr("[#!%*<>()-=",s[1])) {
1948 if (/*{*/ strchr("])} =",s[2]))
1957 if (strchr("wds]",s[1]))
1959 else if (seen['\''] || seen['"'])
1961 else if (strchr("rnftbxcav",s[1]))
1963 else if (isDIGIT(s[1])) {
1965 while (s[1] && isDIGIT(s[1]))
1975 if (strchr("aA01! ",last_un_char))
1977 if (strchr("zZ79~",s[1]))
1979 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1980 weight -= 5; /* cope with negative subscript */
1983 if (!isALNUM(last_un_char)
1984 && !(last_un_char == '$' || last_un_char == '@'
1985 || last_un_char == '&')
1986 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
1991 if (keyword(tmpbuf, d - tmpbuf))
1994 if (un_char == last_un_char + 1)
1996 weight -= seen[un_char];
2001 if (weight >= 0) /* probably a character class */
2011 * Does all the checking to disambiguate
2013 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2014 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2016 * First argument is the stuff after the first token, e.g. "bar".
2018 * Not a method if bar is a filehandle.
2019 * Not a method if foo is a subroutine prototyped to take a filehandle.
2020 * Not a method if it's really "Foo $bar"
2021 * Method if it's "foo $bar"
2022 * Not a method if it's really "print foo $bar"
2023 * Method if it's really "foo package::" (interpreted as package->foo)
2024 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2025 * Not a method if bar is a filehandle or package, but is quoted with
2030 S_intuit_method(pTHX_ char *start, GV *gv)
2032 char *s = start + (*start == '$');
2033 char tmpbuf[sizeof PL_tokenbuf];
2041 if ((cv = GvCVu(gv))) {
2042 const char *proto = SvPVX_const(cv);
2052 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2053 /* start is the beginning of the possible filehandle/object,
2054 * and s is the end of it
2055 * tmpbuf is a copy of it
2058 if (*start == '$') {
2059 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2064 return *s == '(' ? FUNCMETH : METHOD;
2066 if (!keyword(tmpbuf, len)) {
2067 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2072 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
2073 if (indirgv && GvCVu(indirgv))
2075 /* filehandle or package name makes it a method */
2076 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2078 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2079 return 0; /* no assumptions -- "=>" quotes bearword */
2081 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2082 newSVpvn(tmpbuf,len));
2083 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2087 return *s == '(' ? FUNCMETH : METHOD;
2095 * Return a string of Perl code to load the debugger. If PERL5DB
2096 * is set, it will return the contents of that, otherwise a
2097 * compile-time require of perl5db.pl.
2104 const char *pdb = PerlEnv_getenv("PERL5DB");
2108 SETERRNO(0,SS_NORMAL);
2109 return "BEGIN { require 'perl5db.pl' }";
2115 /* Encoded script support. filter_add() effectively inserts a
2116 * 'pre-processing' function into the current source input stream.
2117 * Note that the filter function only applies to the current source file
2118 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2120 * The datasv parameter (which may be NULL) can be used to pass
2121 * private data to this instance of the filter. The filter function
2122 * can recover the SV using the FILTER_DATA macro and use it to
2123 * store private buffers and state information.
2125 * The supplied datasv parameter is upgraded to a PVIO type
2126 * and the IoDIRP/IoANY field is used to store the function pointer,
2127 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2128 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2129 * private use must be set using malloc'd pointers.
2133 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2138 if (!PL_rsfp_filters)
2139 PL_rsfp_filters = newAV();
2141 datasv = NEWSV(255,0);
2142 SvUPGRADE(datasv, SVt_PVIO);
2143 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2144 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2145 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2146 IoANY(datasv), SvPV_nolen(datasv)));
2147 av_unshift(PL_rsfp_filters, 1);
2148 av_store(PL_rsfp_filters, 0, datasv) ;
2153 /* Delete most recently added instance of this filter function. */
2155 Perl_filter_del(pTHX_ filter_t funcp)
2160 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
2162 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2164 /* if filter is on top of stack (usual case) just pop it off */
2165 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2166 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2167 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2168 IoANY(datasv) = (void *)NULL;
2169 sv_free(av_pop(PL_rsfp_filters));
2173 /* we need to search for the correct entry and clear it */
2174 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2178 /* Invoke the idxth filter function for the current rsfp. */
2179 /* maxlen 0 = read one text line */
2181 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2186 if (!PL_rsfp_filters)
2188 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2189 /* Provide a default input filter to make life easy. */
2190 /* Note that we append to the line. This is handy. */
2191 DEBUG_P(PerlIO_printf(Perl_debug_log,
2192 "filter_read %d: from rsfp\n", idx));
2196 const int old_len = SvCUR(buf_sv);
2198 /* ensure buf_sv is large enough */
2199 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2200 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2201 if (PerlIO_error(PL_rsfp))
2202 return -1; /* error */
2204 return 0 ; /* end of file */
2206 SvCUR_set(buf_sv, old_len + len) ;
2209 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2210 if (PerlIO_error(PL_rsfp))
2211 return -1; /* error */
2213 return 0 ; /* end of file */
2216 return SvCUR(buf_sv);
2218 /* Skip this filter slot if filter has been deleted */
2219 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2220 DEBUG_P(PerlIO_printf(Perl_debug_log,
2221 "filter_read %d: skipped (filter deleted)\n",
2223 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2225 /* Get function pointer hidden within datasv */
2226 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2227 DEBUG_P(PerlIO_printf(Perl_debug_log,
2228 "filter_read %d: via function %p (%s)\n",
2229 idx, datasv, SvPV_nolen_const(datasv)));
2230 /* Call function. The function is expected to */
2231 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2232 /* Return: <0:error, =0:eof, >0:not eof */
2233 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2237 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2239 #ifdef PERL_CR_FILTER
2240 if (!PL_rsfp_filters) {
2241 filter_add(S_cr_textfilter,NULL);
2244 if (PL_rsfp_filters) {
2246 SvCUR_set(sv, 0); /* start with empty line */
2247 if (FILTER_READ(0, sv, 0) > 0)
2248 return ( SvPVX(sv) ) ;
2253 return (sv_gets(sv, fp, append));
2257 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2261 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2265 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2266 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2268 return GvHV(gv); /* Foo:: */
2271 /* use constant CLASS => 'MyClass' */
2272 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2274 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2275 pkgname = SvPV_nolen_const(sv);
2279 return gv_stashpv(pkgname, FALSE);
2283 static const char* const exp_name[] =
2284 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2285 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2292 Works out what to call the token just pulled out of the input
2293 stream. The yacc parser takes care of taking the ops we return and
2294 stitching them into a tree.
2300 if read an identifier
2301 if we're in a my declaration
2302 croak if they tried to say my($foo::bar)
2303 build the ops for a my() declaration
2304 if it's an access to a my() variable
2305 are we in a sort block?
2306 croak if my($a); $a <=> $b
2307 build ops for access to a my() variable
2308 if in a dq string, and they've said @foo and we can't find @foo
2310 build ops for a bareword
2311 if we already built the token before, use it.
2316 #pragma segment Perl_yylex
2321 register char *s = PL_bufptr;
2328 I32 orig_keyword = 0;
2331 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2332 lex_state_names[PL_lex_state]);
2334 /* check if there's an identifier for us to look at */
2335 if (PL_pending_ident)
2336 return REPORT(S_pending_ident(aTHX));
2338 /* no identifier pending identification */
2340 switch (PL_lex_state) {
2342 case LEX_NORMAL: /* Some compilers will produce faster */
2343 case LEX_INTERPNORMAL: /* code if we comment these out. */
2347 /* when we've already built the next token, just pull it out of the queue */
2350 yylval = PL_nextval[PL_nexttoke];
2352 PL_lex_state = PL_lex_defer;
2353 PL_expect = PL_lex_expect;
2354 PL_lex_defer = LEX_NORMAL;
2356 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2357 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2358 (IV)PL_nexttype[PL_nexttoke]); });
2360 return REPORT(PL_nexttype[PL_nexttoke]);
2362 /* interpolated case modifiers like \L \U, including \Q and \E.
2363 when we get here, PL_bufptr is at the \
2365 case LEX_INTERPCASEMOD:
2367 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2368 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2370 /* handle \E or end of string */
2371 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2373 if (PL_lex_casemods) {
2374 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2375 PL_lex_casestack[PL_lex_casemods] = '\0';
2377 if (PL_bufptr != PL_bufend
2378 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2380 PL_lex_state = LEX_INTERPCONCAT;
2384 if (PL_bufptr != PL_bufend)
2386 PL_lex_state = LEX_INTERPCONCAT;
2390 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2391 "### Saw case modifier at '%s'\n", PL_bufptr); });
2393 if (s[1] == '\\' && s[2] == 'E') {
2395 PL_lex_state = LEX_INTERPCONCAT;
2399 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2400 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2401 if ((*s == 'L' || *s == 'U') &&
2402 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2403 PL_lex_casestack[--PL_lex_casemods] = '\0';
2406 if (PL_lex_casemods > 10)
2407 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2408 PL_lex_casestack[PL_lex_casemods++] = *s;
2409 PL_lex_casestack[PL_lex_casemods] = '\0';
2410 PL_lex_state = LEX_INTERPCONCAT;
2411 PL_nextval[PL_nexttoke].ival = 0;
2414 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2416 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2418 PL_nextval[PL_nexttoke].ival = OP_LC;
2420 PL_nextval[PL_nexttoke].ival = OP_UC;
2422 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2424 Perl_croak(aTHX_ "panic: yylex");
2428 if (PL_lex_starts) {
2431 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2432 if (PL_lex_casemods == 1 && PL_lex_inpat)
2441 case LEX_INTERPPUSH:
2442 return REPORT(sublex_push());
2444 case LEX_INTERPSTART:
2445 if (PL_bufptr == PL_bufend)
2446 return REPORT(sublex_done());
2447 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2448 "### Interpolated variable at '%s'\n", PL_bufptr); });
2450 PL_lex_dojoin = (*PL_bufptr == '@');
2451 PL_lex_state = LEX_INTERPNORMAL;
2452 if (PL_lex_dojoin) {
2453 PL_nextval[PL_nexttoke].ival = 0;
2455 force_ident("\"", '$');
2456 PL_nextval[PL_nexttoke].ival = 0;
2458 PL_nextval[PL_nexttoke].ival = 0;
2460 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2463 if (PL_lex_starts++) {
2465 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2466 if (!PL_lex_casemods && PL_lex_inpat)
2473 case LEX_INTERPENDMAYBE:
2474 if (intuit_more(PL_bufptr)) {
2475 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2481 if (PL_lex_dojoin) {
2482 PL_lex_dojoin = FALSE;
2483 PL_lex_state = LEX_INTERPCONCAT;
2486 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2487 && SvEVALED(PL_lex_repl))
2489 if (PL_bufptr != PL_bufend)
2490 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2491 PL_lex_repl = Nullsv;
2494 case LEX_INTERPCONCAT:
2496 if (PL_lex_brackets)
2497 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2499 if (PL_bufptr == PL_bufend)
2500 return REPORT(sublex_done());
2502 if (SvIVX(PL_linestr) == '\'') {
2503 SV *sv = newSVsv(PL_linestr);
2506 else if ( PL_hints & HINT_NEW_RE )
2507 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2508 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2512 s = scan_const(PL_bufptr);
2514 PL_lex_state = LEX_INTERPCASEMOD;
2516 PL_lex_state = LEX_INTERPSTART;
2519 if (s != PL_bufptr) {
2520 PL_nextval[PL_nexttoke] = yylval;
2523 if (PL_lex_starts++) {
2524 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2525 if (!PL_lex_casemods && PL_lex_inpat)
2538 PL_lex_state = LEX_NORMAL;
2539 s = scan_formline(PL_bufptr);
2540 if (!PL_lex_formbrack)
2546 PL_oldoldbufptr = PL_oldbufptr;
2549 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
2550 exp_name[PL_expect], s);
2556 if (isIDFIRST_lazy_if(s,UTF))
2558 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2561 goto fake_eof; /* emulate EOF on ^D or ^Z */
2566 if (PL_lex_brackets) {
2567 if (PL_lex_formbrack)
2568 yyerror("Format not terminated");
2570 yyerror("Missing right curly or square bracket");
2572 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2573 "### Tokener got EOF\n");
2577 if (s++ < PL_bufend)
2578 goto retry; /* ignore stray nulls */
2581 if (!PL_in_eval && !PL_preambled) {
2582 PL_preambled = TRUE;
2583 sv_setpv(PL_linestr,incl_perldb());
2584 if (SvCUR(PL_linestr))
2585 sv_catpvn(PL_linestr,";", 1);
2587 while(AvFILLp(PL_preambleav) >= 0) {
2588 SV *tmpsv = av_shift(PL_preambleav);
2589 sv_catsv(PL_linestr, tmpsv);
2590 sv_catpvn(PL_linestr, ";", 1);
2593 sv_free((SV*)PL_preambleav);
2594 PL_preambleav = NULL;
2596 if (PL_minus_n || PL_minus_p) {
2597 sv_catpv(PL_linestr, "LINE: while (<>) {");
2599 sv_catpv(PL_linestr,"chomp;");
2602 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2603 || *PL_splitstr == '"')
2604 && strchr(PL_splitstr + 1, *PL_splitstr))
2605 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2607 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2608 bytes can be used as quoting characters. :-) */
2609 /* The count here deliberately includes the NUL
2610 that terminates the C string constant. This
2611 embeds the opening NUL into the string. */
2612 const char *splits = PL_splitstr;
2613 sv_catpvn(PL_linestr, "our @F=split(q", 15);
2616 if (*splits == '\\')
2617 sv_catpvn(PL_linestr, splits, 1);
2618 sv_catpvn(PL_linestr, splits, 1);
2619 } while (*splits++);
2620 /* This loop will embed the trailing NUL of
2621 PL_linestr as the last thing it does before
2623 sv_catpvn(PL_linestr, ");", 2);
2627 sv_catpv(PL_linestr,"our @F=split(' ');");
2630 sv_catpvn(PL_linestr, "\n", 1);
2631 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2632 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2633 PL_last_lop = PL_last_uni = Nullch;
2634 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2635 SV *sv = NEWSV(85,0);
2637 sv_upgrade(sv, SVt_PVMG);
2638 sv_setsv(sv,PL_linestr);
2641 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2646 bof = PL_rsfp ? TRUE : FALSE;
2647 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2650 if (PL_preprocess && !PL_in_eval)
2651 (void)PerlProc_pclose(PL_rsfp);
2652 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2653 PerlIO_clearerr(PL_rsfp);
2655 (void)PerlIO_close(PL_rsfp);
2657 PL_doextract = FALSE;
2659 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2660 sv_setpv(PL_linestr,PL_minus_p
2661 ? ";}continue{print;}" : ";}");
2662 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2663 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2664 PL_last_lop = PL_last_uni = Nullch;
2665 PL_minus_n = PL_minus_p = 0;
2668 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2669 PL_last_lop = PL_last_uni = Nullch;
2670 sv_setpvn(PL_linestr,"",0);
2671 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2673 /* If it looks like the start of a BOM or raw UTF-16,
2674 * check if it in fact is. */
2680 #ifdef PERLIO_IS_STDIO
2681 # ifdef __GNU_LIBRARY__
2682 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2683 # define FTELL_FOR_PIPE_IS_BROKEN
2687 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2688 # define FTELL_FOR_PIPE_IS_BROKEN
2693 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2694 /* This loses the possibility to detect the bof
2695 * situation on perl -P when the libc5 is being used.
2696 * Workaround? Maybe attach some extra state to PL_rsfp?
2699 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2701 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2704 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2705 s = swallow_bom((U8*)s);
2709 /* Incest with pod. */
2710 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2711 sv_setpvn(PL_linestr, "", 0);
2712 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2713 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2714 PL_last_lop = PL_last_uni = Nullch;
2715 PL_doextract = FALSE;
2719 } while (PL_doextract);
2720 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2721 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2722 SV *sv = NEWSV(85,0);
2724 sv_upgrade(sv, SVt_PVMG);
2725 sv_setsv(sv,PL_linestr);
2728 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2730 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2731 PL_last_lop = PL_last_uni = Nullch;
2732 if (CopLINE(PL_curcop) == 1) {
2733 while (s < PL_bufend && isSPACE(*s))
2735 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2739 if (*s == '#' && *(s+1) == '!')
2741 #ifdef ALTERNATE_SHEBANG
2743 static char const as[] = ALTERNATE_SHEBANG;
2744 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2745 d = s + (sizeof(as) - 1);
2747 #endif /* ALTERNATE_SHEBANG */
2756 while (*d && !isSPACE(*d))
2760 #ifdef ARG_ZERO_IS_SCRIPT
2761 if (ipathend > ipath) {
2763 * HP-UX (at least) sets argv[0] to the script name,
2764 * which makes $^X incorrect. And Digital UNIX and Linux,
2765 * at least, set argv[0] to the basename of the Perl
2766 * interpreter. So, having found "#!", we'll set it right.
2768 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
2769 assert(SvPOK(x) || SvGMAGICAL(x));
2770 if (sv_eq(x, CopFILESV(PL_curcop))) {
2771 sv_setpvn(x, ipath, ipathend - ipath);
2777 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
2778 const char *lstart = SvPV_const(x,llen);
2780 bstart += blen - llen;
2781 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2782 sv_setpvn(x, ipath, ipathend - ipath);
2787 TAINT_NOT; /* $^X is always tainted, but that's OK */
2789 #endif /* ARG_ZERO_IS_SCRIPT */
2794 d = instr(s,"perl -");
2796 d = instr(s,"perl");
2798 /* avoid getting into infinite loops when shebang
2799 * line contains "Perl" rather than "perl" */
2801 for (d = ipathend-4; d >= ipath; --d) {
2802 if ((*d == 'p' || *d == 'P')
2803 && !ibcmp(d, "perl", 4))
2813 #ifdef ALTERNATE_SHEBANG
2815 * If the ALTERNATE_SHEBANG on this system starts with a
2816 * character that can be part of a Perl expression, then if
2817 * we see it but not "perl", we're probably looking at the
2818 * start of Perl code, not a request to hand off to some
2819 * other interpreter. Similarly, if "perl" is there, but
2820 * not in the first 'word' of the line, we assume the line
2821 * contains the start of the Perl program.
2823 if (d && *s != '#') {
2824 const char *c = ipath;
2825 while (*c && !strchr("; \t\r\n\f\v#", *c))
2828 d = Nullch; /* "perl" not in first word; ignore */
2830 *s = '#'; /* Don't try to parse shebang line */
2832 #endif /* ALTERNATE_SHEBANG */
2833 #ifndef MACOS_TRADITIONAL
2838 !instr(s,"indir") &&
2839 instr(PL_origargv[0],"perl"))
2846 while (s < PL_bufend && isSPACE(*s))
2848 if (s < PL_bufend) {
2849 Newz(899,newargv,PL_origargc+3,char*);
2851 while (s < PL_bufend && !isSPACE(*s))
2854 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2857 newargv = PL_origargv;
2860 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2862 Perl_croak(aTHX_ "Can't exec %s", ipath);
2866 const U32 oldpdb = PL_perldb;
2867 const bool oldn = PL_minus_n;
2868 const bool oldp = PL_minus_p;
2870 while (*d && !isSPACE(*d)) d++;
2871 while (SPACE_OR_TAB(*d)) d++;
2874 const bool switches_done = PL_doswitches;
2876 if (*d == 'M' || *d == 'm' || *d == 'C') {
2878 while (*d && !isSPACE(*d)) d++;
2879 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
2882 d = moreswitches(d);
2884 if (PL_doswitches && !switches_done) {
2885 int argc = PL_origargc;
2886 char **argv = PL_origargv;
2889 } while (argc && argv[0][0] == '-' && argv[0][1]);
2890 init_argv_symbols(argc,argv);
2892 if ((PERLDB_LINE && !oldpdb) ||
2893 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
2894 /* if we have already added "LINE: while (<>) {",
2895 we must not do it again */
2897 sv_setpvn(PL_linestr, "", 0);
2898 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2899 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2900 PL_last_lop = PL_last_uni = Nullch;
2901 PL_preambled = FALSE;
2903 (void)gv_fetchfile(PL_origfilename);
2906 if (PL_doswitches && !switches_done) {
2907 int argc = PL_origargc;
2908 char **argv = PL_origargv;
2911 } while (argc && argv[0][0] == '-' && argv[0][1]);
2912 init_argv_symbols(argc,argv);
2918 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2920 PL_lex_state = LEX_FORMLINE;
2925 #ifdef PERL_STRICT_CR
2926 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2928 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
2930 case ' ': case '\t': case '\f': case 013:
2931 #ifdef MACOS_TRADITIONAL
2938 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2939 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2940 /* handle eval qq[#line 1 "foo"\n ...] */
2941 CopLINE_dec(PL_curcop);
2945 while (s < d && *s != '\n')
2949 else if (s > d) /* Found by Ilya: feed random input to Perl. */
2950 Perl_croak(aTHX_ "panic: input overflow");
2952 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2954 PL_lex_state = LEX_FORMLINE;
2964 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
2971 while (s < PL_bufend && SPACE_OR_TAB(*s))
2974 if (strnEQ(s,"=>",2)) {
2975 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
2976 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2977 "### Saw unary minus before =>, forcing word '%s'\n", s);
2979 OPERATOR('-'); /* unary minus */
2981 PL_last_uni = PL_oldbufptr;
2983 case 'r': ftst = OP_FTEREAD; break;
2984 case 'w': ftst = OP_FTEWRITE; break;
2985 case 'x': ftst = OP_FTEEXEC; break;
2986 case 'o': ftst = OP_FTEOWNED; break;
2987 case 'R': ftst = OP_FTRREAD; break;
2988 case 'W': ftst = OP_FTRWRITE; break;
2989 case 'X': ftst = OP_FTREXEC; break;
2990 case 'O': ftst = OP_FTROWNED; break;
2991 case 'e': ftst = OP_FTIS; break;
2992 case 'z': ftst = OP_FTZERO; break;
2993 case 's': ftst = OP_FTSIZE; break;
2994 case 'f': ftst = OP_FTFILE; break;
2995 case 'd': ftst = OP_FTDIR; break;
2996 case 'l': ftst = OP_FTLINK; break;
2997 case 'p': ftst = OP_FTPIPE; break;
2998 case 'S': ftst = OP_FTSOCK; break;
2999 case 'u': ftst = OP_FTSUID; break;
3000 case 'g': ftst = OP_FTSGID; break;
3001 case 'k': ftst = OP_FTSVTX; break;
3002 case 'b': ftst = OP_FTBLK; break;
3003 case 'c': ftst = OP_FTCHR; break;
3004 case 't': ftst = OP_FTTTY; break;
3005 case 'T': ftst = OP_FTTEXT; break;
3006 case 'B': ftst = OP_FTBINARY; break;
3007 case 'M': case 'A': case 'C':
3008 gv_fetchpv("\024",TRUE, SVt_PV);
3010 case 'M': ftst = OP_FTMTIME; break;
3011 case 'A': ftst = OP_FTATIME; break;
3012 case 'C': ftst = OP_FTCTIME; break;
3020 PL_last_lop_op = (OPCODE)ftst;
3021 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3022 "### Saw file test %c\n", (int)ftst);
3027 /* Assume it was a minus followed by a one-letter named
3028 * subroutine call (or a -bareword), then. */
3029 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3030 "### '-%c' looked like a file test but was not\n",
3039 if (PL_expect == XOPERATOR)
3044 else if (*s == '>') {
3047 if (isIDFIRST_lazy_if(s,UTF)) {
3048 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3056 if (PL_expect == XOPERATOR)
3059 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3061 OPERATOR('-'); /* unary minus */
3068 if (PL_expect == XOPERATOR)
3073 if (PL_expect == XOPERATOR)
3076 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3082 if (PL_expect != XOPERATOR) {
3083 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3084 PL_expect = XOPERATOR;
3085 force_ident(PL_tokenbuf, '*');
3098 if (PL_expect == XOPERATOR) {
3102 PL_tokenbuf[0] = '%';
3103 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3104 if (!PL_tokenbuf[1]) {
3107 PL_pending_ident = '%';
3126 switch (PL_expect) {
3129 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3131 PL_bufptr = s; /* update in case we back off */
3137 PL_expect = XTERMBLOCK;
3141 while (isIDFIRST_lazy_if(s,UTF)) {
3142 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3143 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3144 if (tmp < 0) tmp = -tmp;
3160 d = scan_str(d,TRUE,TRUE);
3162 /* MUST advance bufptr here to avoid bogus
3163 "at end of line" context messages from yyerror().
3165 PL_bufptr = s + len;
3166 yyerror("Unterminated attribute parameter in attribute list");
3169 return REPORT(0); /* EOF indicator */
3173 SV *sv = newSVpvn(s, len);
3174 sv_catsv(sv, PL_lex_stuff);
3175 attrs = append_elem(OP_LIST, attrs,
3176 newSVOP(OP_CONST, 0, sv));
3177 SvREFCNT_dec(PL_lex_stuff);
3178 PL_lex_stuff = Nullsv;
3181 if (len == 6 && strnEQ(s, "unique", len)) {
3182 if (PL_in_my == KEY_our)
3184 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3186 ; /* skip to avoid loading attributes.pm */
3189 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3192 /* NOTE: any CV attrs applied here need to be part of
3193 the CVf_BUILTIN_ATTRS define in cv.h! */
3194 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3195 CvLVALUE_on(PL_compcv);
3196 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3197 CvLOCKED_on(PL_compcv);
3198 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3199 CvMETHOD_on(PL_compcv);
3200 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3201 CvASSERTION_on(PL_compcv);
3202 /* After we've set the flags, it could be argued that
3203 we don't need to do the attributes.pm-based setting
3204 process, and shouldn't bother appending recognized
3205 flags. To experiment with that, uncomment the
3206 following "else". (Note that's already been
3207 uncommented. That keeps the above-applied built-in
3208 attributes from being intercepted (and possibly
3209 rejected) by a package's attribute routines, but is
3210 justified by the performance win for the common case
3211 of applying only built-in attributes.) */
3213 attrs = append_elem(OP_LIST, attrs,
3214 newSVOP(OP_CONST, 0,
3218 if (*s == ':' && s[1] != ':')
3221 break; /* require real whitespace or :'s */
3223 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3224 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
3225 const char q = ((*s == '\'') ? '"' : '\'');
3226 /* If here for an expression, and parsed no attrs, back off. */
3227 if (tmp == '=' && !attrs) {
3231 /* MUST advance bufptr here to avoid bogus "at end of line"
3232 context messages from yyerror().
3236 yyerror("Unterminated attribute list");
3238 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3246 PL_nextval[PL_nexttoke].opval = attrs;
3254 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3255 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3272 if (PL_lex_brackets <= 0)
3273 yyerror("Unmatched right square bracket");
3276 if (PL_lex_state == LEX_INTERPNORMAL) {
3277 if (PL_lex_brackets == 0) {
3278 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3279 PL_lex_state = LEX_INTERPEND;
3286 if (PL_lex_brackets > 100) {
3287 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3289 switch (PL_expect) {
3291 if (PL_lex_formbrack) {
3295 if (PL_oldoldbufptr == PL_last_lop)
3296 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3298 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3299 OPERATOR(HASHBRACK);
3301 while (s < PL_bufend && SPACE_OR_TAB(*s))
3304 PL_tokenbuf[0] = '\0';
3305 if (d < PL_bufend && *d == '-') {
3306 PL_tokenbuf[0] = '-';
3308 while (d < PL_bufend && SPACE_OR_TAB(*d))
3311 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3312 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3314 while (d < PL_bufend && SPACE_OR_TAB(*d))
3317 const char minus = (PL_tokenbuf[0] == '-');
3318 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3326 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3331 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3336 if (PL_oldoldbufptr == PL_last_lop)
3337 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3339 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3342 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3344 /* This hack is to get the ${} in the message. */
3346 yyerror("syntax error");
3349 OPERATOR(HASHBRACK);
3351 /* This hack serves to disambiguate a pair of curlies
3352 * as being a block or an anon hash. Normally, expectation
3353 * determines that, but in cases where we're not in a
3354 * position to expect anything in particular (like inside
3355 * eval"") we have to resolve the ambiguity. This code
3356 * covers the case where the first term in the curlies is a
3357 * quoted string. Most other cases need to be explicitly
3358 * disambiguated by prepending a "+" before the opening
3359 * curly in order to force resolution as an anon hash.
3361 * XXX should probably propagate the outer expectation
3362 * into eval"" to rely less on this hack, but that could
3363 * potentially break current behavior of eval"".
3367 if (*s == '\'' || *s == '"' || *s == '`') {
3368 /* common case: get past first string, handling escapes */
3369 for (t++; t < PL_bufend && *t != *s;)
3370 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3374 else if (*s == 'q') {
3377 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3380 /* skip q//-like construct */
3382 char open, close, term;
3385 while (t < PL_bufend && isSPACE(*t))
3387 /* check for q => */
3388 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3389 OPERATOR(HASHBRACK);
3393 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3397 for (t++; t < PL_bufend; t++) {
3398 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3400 else if (*t == open)
3404 for (t++; t < PL_bufend; t++) {
3405 if (*t == '\\' && t+1 < PL_bufend)
3407 else if (*t == close && --brackets <= 0)
3409 else if (*t == open)
3416 /* skip plain q word */
3417 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3420 else if (isALNUM_lazy_if(t,UTF)) {
3422 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3425 while (t < PL_bufend && isSPACE(*t))
3427 /* if comma follows first term, call it an anon hash */
3428 /* XXX it could be a comma expression with loop modifiers */
3429 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3430 || (*t == '=' && t[1] == '>')))
3431 OPERATOR(HASHBRACK);
3432 if (PL_expect == XREF)
3435 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3441 yylval.ival = CopLINE(PL_curcop);
3442 if (isSPACE(*s) || *s == '#')
3443 PL_copline = NOLINE; /* invalidate current command line number */
3448 if (PL_lex_brackets <= 0)
3449 yyerror("Unmatched right curly bracket");
3451 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3452 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3453 PL_lex_formbrack = 0;
3454 if (PL_lex_state == LEX_INTERPNORMAL) {
3455 if (PL_lex_brackets == 0) {
3456 if (PL_expect & XFAKEBRACK) {
3457 PL_expect &= XENUMMASK;
3458 PL_lex_state = LEX_INTERPEND;
3460 return yylex(); /* ignore fake brackets */
3462 if (*s == '-' && s[1] == '>')
3463 PL_lex_state = LEX_INTERPENDMAYBE;
3464 else if (*s != '[' && *s != '{')
3465 PL_lex_state = LEX_INTERPEND;
3468 if (PL_expect & XFAKEBRACK) {
3469 PL_expect &= XENUMMASK;
3471 return yylex(); /* ignore fake brackets */
3481 if (PL_expect == XOPERATOR) {
3482 if (ckWARN(WARN_SEMICOLON)
3483 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3485 CopLINE_dec(PL_curcop);
3486 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3487 CopLINE_inc(PL_curcop);
3492 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3494 PL_expect = XOPERATOR;
3495 force_ident(PL_tokenbuf, '&');
3499 yylval.ival = (OPpENTERSUB_AMPER<<8);
3518 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
3519 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
3521 if (PL_expect == XSTATE && isALPHA(tmp) &&
3522 (s == PL_linestart+1 || s[-2] == '\n') )
3524 if (PL_in_eval && !PL_rsfp) {
3529 if (strnEQ(s,"=cut",4)) {
3543 PL_doextract = TRUE;
3546 if (PL_lex_brackets < PL_lex_formbrack) {
3548 #ifdef PERL_STRICT_CR
3549 for (t = s; SPACE_OR_TAB(*t); t++) ;
3551 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3553 if (*t == '\n' || *t == '#') {
3565 /* was this !=~ where !~ was meant?
3566 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3568 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3569 const char *t = s+1;
3571 while (t < PL_bufend && isSPACE(*t))
3574 if (*t == '/' || *t == '?' ||
3575 ((*t == 'm' || *t == 's' || *t == 'y') && !isALNUM(t[1])) ||
3576 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3577 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3578 "!=~ should be !~");
3587 if (PL_expect != XOPERATOR) {
3588 if (s[1] != '<' && !strchr(s,'>'))
3591 s = scan_heredoc(s);
3593 s = scan_inputsymbol(s);
3594 TERM(sublex_start());
3599 SHop(OP_LEFT_SHIFT);
3613 SHop(OP_RIGHT_SHIFT);
3622 if (PL_expect == XOPERATOR) {
3623 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3626 return REPORT(','); /* grandfather non-comma-format format */
3630 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3631 PL_tokenbuf[0] = '@';
3632 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3633 sizeof PL_tokenbuf - 1, FALSE);
3634 if (PL_expect == XOPERATOR)
3635 no_op("Array length", s);
3636 if (!PL_tokenbuf[1])
3638 PL_expect = XOPERATOR;
3639 PL_pending_ident = '#';
3643 PL_tokenbuf[0] = '$';
3644 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3645 sizeof PL_tokenbuf - 1, FALSE);
3646 if (PL_expect == XOPERATOR)
3648 if (!PL_tokenbuf[1]) {
3650 yyerror("Final $ should be \\$ or $name");
3654 /* This kludge not intended to be bulletproof. */
3655 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3656 yylval.opval = newSVOP(OP_CONST, 0,
3657 newSViv(PL_compiling.cop_arybase));
3658 yylval.opval->op_private = OPpCONST_ARYBASE;
3664 if (PL_lex_state == LEX_NORMAL)
3667 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3670 PL_tokenbuf[0] = '@';
3671 if (ckWARN(WARN_SYNTAX)) {
3673 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3676 PL_bufptr = skipspace(PL_bufptr);
3677 while (t < PL_bufend && *t != ']')
3679 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3680 "Multidimensional syntax %.*s not supported",
3681 (t - PL_bufptr) + 1, PL_bufptr);
3685 else if (*s == '{') {
3686 PL_tokenbuf[0] = '%';
3687 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
3688 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3690 char tmpbuf[sizeof PL_tokenbuf];
3691 for (t++; isSPACE(*t); t++) ;
3692 if (isIDFIRST_lazy_if(t,UTF)) {
3694 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
3695 for (; isSPACE(*t); t++) ;
3696 if (*t == ';' && get_cv(tmpbuf, FALSE))
3697 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3698 "You need to quote \"%s\"", tmpbuf);
3704 PL_expect = XOPERATOR;
3705 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3706 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3707 if (!islop || PL_last_lop_op == OP_GREPSTART)
3708 PL_expect = XOPERATOR;
3709 else if (strchr("$@\"'`q", *s))
3710 PL_expect = XTERM; /* e.g. print $fh "foo" */
3711 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3712 PL_expect = XTERM; /* e.g. print $fh &sub */
3713 else if (isIDFIRST_lazy_if(s,UTF)) {
3714 char tmpbuf[sizeof PL_tokenbuf];
3715 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3716 if ((tmp = keyword(tmpbuf, len))) {
3717 /* binary operators exclude handle interpretations */
3729 PL_expect = XTERM; /* e.g. print $fh length() */
3734 PL_expect = XTERM; /* e.g. print $fh subr() */
3737 else if (isDIGIT(*s))
3738 PL_expect = XTERM; /* e.g. print $fh 3 */
3739 else if (*s == '.' && isDIGIT(s[1]))
3740 PL_expect = XTERM; /* e.g. print $fh .3 */
3741 else if ((*s == '?' || *s == '-' || *s == '+')
3742 && !isSPACE(s[1]) && s[1] != '=')
3743 PL_expect = XTERM; /* e.g. print $fh -1 */
3744 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/')
3745 PL_expect = XTERM; /* e.g. print $fh /.../
3746 XXX except DORDOR operator */
3747 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3748 PL_expect = XTERM; /* print $fh <<"EOF" */
3750 PL_pending_ident = '$';
3754 if (PL_expect == XOPERATOR)
3756 PL_tokenbuf[0] = '@';
3757 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3758 if (!PL_tokenbuf[1]) {
3761 if (PL_lex_state == LEX_NORMAL)
3763 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3765 PL_tokenbuf[0] = '%';
3767 /* Warn about @ where they meant $. */
3768 if (ckWARN(WARN_SYNTAX)) {
3769 if (*s == '[' || *s == '{') {
3770 const char *t = s + 1;
3771 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3773 if (*t == '}' || *t == ']') {
3775 PL_bufptr = skipspace(PL_bufptr);
3776 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3777 "Scalar value %.*s better written as $%.*s",
3778 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
3783 PL_pending_ident = '@';
3786 case '/': /* may be division, defined-or, or pattern */
3787 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3791 case '?': /* may either be conditional or pattern */
3792 if(PL_expect == XOPERATOR) {
3800 /* A // operator. */
3810 /* Disable warning on "study /blah/" */
3811 if (PL_oldoldbufptr == PL_last_uni
3812 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3813 || memNE(PL_last_uni, "study", 5)
3814 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3817 s = scan_pat(s,OP_MATCH);
3818 TERM(sublex_start());
3822 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3823 #ifdef PERL_STRICT_CR
3826 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3828 && (s == PL_linestart || s[-1] == '\n') )
3830 PL_lex_formbrack = 0;
3834 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3840 yylval.ival = OPf_SPECIAL;
3846 if (PL_expect != XOPERATOR)
3851 case '0': case '1': case '2': case '3': case '4':
3852 case '5': case '6': case '7': case '8': case '9':
3853 s = scan_num(s, &yylval);
3854 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3855 "### Saw number in '%s'\n", s);
3857 if (PL_expect == XOPERATOR)
3862 s = scan_str(s,FALSE,FALSE);
3863 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3864 "### Saw string before '%s'\n", s);
3866 if (PL_expect == XOPERATOR) {
3867 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3870 return REPORT(','); /* grandfather non-comma-format format */
3876 missingterm((char*)0);
3877 yylval.ival = OP_CONST;
3878 TERM(sublex_start());
3881 s = scan_str(s,FALSE,FALSE);
3882 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3883 "### Saw string before '%s'\n", s);
3885 if (PL_expect == XOPERATOR) {
3886 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3889 return REPORT(','); /* grandfather non-comma-format format */
3895 missingterm((char*)0);
3896 yylval.ival = OP_CONST;
3897 /* FIXME. I think that this can be const if char *d is replaced by
3898 more localised variables. */
3899 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
3900 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
3901 yylval.ival = OP_STRINGIFY;
3905 TERM(sublex_start());
3908 s = scan_str(s,FALSE,FALSE);
3909 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3910 "### Saw backtick string before '%s'\n", s);
3912 if (PL_expect == XOPERATOR)
3913 no_op("Backticks",s);
3915 missingterm((char*)0);
3916 yylval.ival = OP_BACKTICK;
3918 TERM(sublex_start());
3922 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
3923 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
3925 if (PL_expect == XOPERATOR)
3926 no_op("Backslash",s);
3930 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
3931 char *start = s + 2;
3932 while (isDIGIT(*start) || *start == '_')
3934 if (*start == '.' && isDIGIT(start[1])) {
3935 s = scan_num(s, &yylval);
3938 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
3939 else if (!isALPHA(*start) && (PL_expect == XTERM
3940 || PL_expect == XREF || PL_expect == XSTATE
3941 || PL_expect == XTERMORDORDOR)) {
3942 const char c = *start;
3945 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3948 s = scan_num(s, &yylval);
3955 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
3995 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3997 /* Some keywords can be followed by any delimiter, including ':' */
3998 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3999 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4000 (PL_tokenbuf[0] == 'q' &&
4001 strchr("qwxr", PL_tokenbuf[1])))));
4003 /* x::* is just a word, unless x is "CORE" */
4004 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4008 while (d < PL_bufend && isSPACE(*d))
4009 d++; /* no comments skipped here, or s### is misparsed */
4011 /* Is this a label? */
4012 if (!tmp && PL_expect == XSTATE
4013 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4015 yylval.pval = savepv(PL_tokenbuf);
4020 /* Check for keywords */
4021 tmp = keyword(PL_tokenbuf, len);
4023 /* Is this a word before a => operator? */
4024 if (*d == '=' && d[1] == '>') {
4027 = (OP*)newSVOP(OP_CONST, 0,
4028 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4029 yylval.opval->op_private = OPpCONST_BARE;
4033 if (tmp < 0) { /* second-class keyword? */
4034 GV *ogv = Nullgv; /* override (winner) */
4035 GV *hgv = Nullgv; /* hidden (loser) */
4036 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4038 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
4041 if (GvIMPORTED_CV(gv))
4043 else if (! CvMETHOD(cv))
4047 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4048 (gv = *gvp) != (GV*)&PL_sv_undef &&
4049 GvCVu(gv) && GvIMPORTED_CV(gv))
4056 tmp = 0; /* overridden by import or by GLOBAL */
4059 && -tmp==KEY_lock /* XXX generalizable kludge */
4061 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
4063 tmp = 0; /* any sub overrides "weak" keyword */
4068 && PL_expect != XOPERATOR
4069 && PL_expect != XTERMORDORDOR)
4071 /* any sub overrides the "err" keyword, except when really an
4072 * operator is expected */
4075 else { /* no override */
4077 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4078 Perl_warner(aTHX_ packWARN(WARN_MISC),
4079 "dump() better written as CORE::dump()");
4083 if (ckWARN(WARN_AMBIGUOUS) && hgv
4084 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
4085 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4086 "Ambiguous call resolved as CORE::%s(), %s",
4087 GvENAME(hgv), "qualify as such or use &");
4094 default: /* not a keyword */
4098 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4100 /* Get the rest if it looks like a package qualifier */
4102 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4104 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4107 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4108 *s == '\'' ? "'" : "::");
4113 if (PL_expect == XOPERATOR) {
4114 if (PL_bufptr == PL_linestart) {
4115 CopLINE_dec(PL_curcop);
4116 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4117 CopLINE_inc(PL_curcop);
4120 no_op("Bareword",s);
4123 /* Look for a subroutine with this name in current package,
4124 unless name is "Foo::", in which case Foo is a bearword
4125 (and a package name). */
4128 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4130 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
4131 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4132 "Bareword \"%s\" refers to nonexistent package",
4135 PL_tokenbuf[len] = '\0';
4142 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
4145 /* if we saw a global override before, get the right name */
4148 sv = newSVpvn("CORE::GLOBAL::",14);
4149 sv_catpv(sv,PL_tokenbuf);
4152 /* If len is 0, newSVpv does strlen(), which is correct.
4153 If len is non-zero, then it will be the true length,
4154 and so the scalar will be created correctly. */
4155 sv = newSVpv(PL_tokenbuf,len);
4158 /* Presume this is going to be a bareword of some sort. */
4161 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4162 yylval.opval->op_private = OPpCONST_BARE;
4163 /* UTF-8 package name? */
4164 if (UTF && !IN_BYTES &&
4165 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
4168 /* And if "Foo::", then that's what it certainly is. */
4173 /* See if it's the indirect object for a list operator. */
4175 if (PL_oldoldbufptr &&
4176 PL_oldoldbufptr < PL_bufptr &&
4177 (PL_oldoldbufptr == PL_last_lop
4178 || PL_oldoldbufptr == PL_last_uni) &&
4179 /* NO SKIPSPACE BEFORE HERE! */
4180 (PL_expect == XREF ||
4181 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4183 bool immediate_paren = *s == '(';
4185 /* (Now we can afford to cross potential line boundary.) */
4188 /* Two barewords in a row may indicate method call. */
4190 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
4193 /* If not a declared subroutine, it's an indirect object. */
4194 /* (But it's an indir obj regardless for sort.) */
4196 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4197 ((!gv || !GvCVu(gv)) &&
4198 (PL_last_lop_op != OP_MAPSTART &&
4199 PL_last_lop_op != OP_GREPSTART))))
4201 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4206 PL_expect = XOPERATOR;
4209 /* Is this a word before a => operator? */
4210 if (*s == '=' && s[1] == '>' && !pkgname) {
4212 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4213 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4214 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4218 /* If followed by a paren, it's certainly a subroutine. */
4221 if (gv && GvCVu(gv)) {
4222 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4223 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
4228 PL_nextval[PL_nexttoke].opval = yylval.opval;
4229 PL_expect = XOPERATOR;
4235 /* If followed by var or block, call it a method (unless sub) */
4237 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
4238 PL_last_lop = PL_oldbufptr;
4239 PL_last_lop_op = OP_METHOD;
4243 /* If followed by a bareword, see if it looks like indir obj. */
4246 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4247 && (tmp = intuit_method(s,gv)))
4250 /* Not a method, so call it a subroutine (if defined) */
4252 if (gv && GvCVu(gv)) {
4254 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4255 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4256 "Ambiguous use of -%s resolved as -&%s()",
4257 PL_tokenbuf, PL_tokenbuf);
4258 /* Check for a constant sub */
4260 if ((sv = cv_const_sv(cv))) {
4262 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4263 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4264 yylval.opval->op_private = 0;
4268 /* Resolve to GV now. */
4269 op_free(yylval.opval);
4270 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4271 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4272 PL_last_lop = PL_oldbufptr;
4273 PL_last_lop_op = OP_ENTERSUB;
4274 /* Is there a prototype? */
4277 const char *proto = SvPV_const((SV*)cv, len);
4280 if (*proto == '$' && proto[1] == '\0')
4282 while (*proto == ';')
4284 if (*proto == '&' && *s == '{') {
4285 sv_setpv(PL_subname, PL_curstash ?
4286 "__ANON__" : "__ANON__::__ANON__");
4290 PL_nextval[PL_nexttoke].opval = yylval.opval;
4296 /* Call it a bare word */
4298 if (PL_hints & HINT_STRICT_SUBS)
4299 yylval.opval->op_private |= OPpCONST_STRICT;
4302 if (ckWARN(WARN_RESERVED)) {
4303 if (lastchar != '-') {
4304 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4305 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4306 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4313 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4314 && ckWARN_d(WARN_AMBIGUOUS)) {
4315 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4316 "Operator or semicolon missing before %c%s",
4317 lastchar, PL_tokenbuf);
4318 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4319 "Ambiguous use of %c resolved as operator %c",
4320 lastchar, lastchar);
4326 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4327 newSVpv(CopFILE(PL_curcop),0));
4331 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4332 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4335 case KEY___PACKAGE__:
4336 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4338 ? newSVhek(HvNAME_HEK(PL_curstash))
4345 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4346 const char *pname = "main";
4347 if (PL_tokenbuf[2] == 'D')
4348 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
4349 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
4352 GvIOp(gv) = newIO();
4353 IoIFP(GvIOp(gv)) = PL_rsfp;
4354 #if defined(HAS_FCNTL) && defined(F_SETFD)
4356 const int fd = PerlIO_fileno(PL_rsfp);
4357 fcntl(fd,F_SETFD,fd >= 3);
4360 /* Mark this internal pseudo-handle as clean */
4361 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4363 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4364 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4365 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4367 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4368 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4369 /* if the script was opened in binmode, we need to revert
4370 * it to text mode for compatibility; but only iff it has CRs
4371 * XXX this is a questionable hack at best. */
4372 if (PL_bufend-PL_bufptr > 2
4373 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4376 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4377 loc = PerlIO_tell(PL_rsfp);
4378 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4381 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4383 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4384 #endif /* NETWARE */
4385 #ifdef PERLIO_IS_STDIO /* really? */
4386 # if defined(__BORLANDC__)
4387 /* XXX see note in do_binmode() */
4388 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4392 PerlIO_seek(PL_rsfp, loc, 0);
4396 #ifdef PERLIO_LAYERS
4399 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4400 else if (PL_encoding) {
4407 XPUSHs(PL_encoding);
4409 call_method("name", G_SCALAR);
4413 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4414 Perl_form(aTHX_ ":encoding(%"SVf")",
4432 if (PL_expect == XSTATE) {
4439 if (*s == ':' && s[1] == ':') {
4442 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4443 if (!(tmp = keyword(PL_tokenbuf, len)))
4444 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4458 LOP(OP_ACCEPT,XTERM);
4464 LOP(OP_ATAN2,XTERM);
4470 LOP(OP_BINMODE,XTERM);
4473 LOP(OP_BLESS,XTERM);
4482 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
4499 if (!PL_cryptseen) {
4500 PL_cryptseen = TRUE;
4504 LOP(OP_CRYPT,XTERM);
4507 LOP(OP_CHMOD,XTERM);
4510 LOP(OP_CHOWN,XTERM);
4513 LOP(OP_CONNECT,XTERM);
4529 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4533 PL_hints |= HINT_BLOCK_SCOPE;
4543 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4544 LOP(OP_DBMOPEN,XTERM);
4550 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4557 yylval.ival = CopLINE(PL_curcop);
4571 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4572 UNIBRACK(OP_ENTEREVAL);
4590 case KEY_endhostent:
4596 case KEY_endservent:
4599 case KEY_endprotoent:
4610 yylval.ival = CopLINE(PL_curcop);
4612 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4614 if ((PL_bufend - p) >= 3 &&
4615 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4617 else if ((PL_bufend - p) >= 4 &&
4618 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4621 if (isIDFIRST_lazy_if(p,UTF)) {
4622 p = scan_ident(p, PL_bufend,
4623 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4627 Perl_croak(aTHX_ "Missing $ on loop variable");
4632 LOP(OP_FORMLINE,XTERM);
4638 LOP(OP_FCNTL,XTERM);
4644 LOP(OP_FLOCK,XTERM);
4653 LOP(OP_GREPSTART, XREF);
4656 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4671 case KEY_getpriority:
4672 LOP(OP_GETPRIORITY,XTERM);
4674 case KEY_getprotobyname:
4677 case KEY_getprotobynumber:
4678 LOP(OP_GPBYNUMBER,XTERM);
4680 case KEY_getprotoent:
4692 case KEY_getpeername:
4693 UNI(OP_GETPEERNAME);
4695 case KEY_gethostbyname:
4698 case KEY_gethostbyaddr:
4699 LOP(OP_GHBYADDR,XTERM);
4701 case KEY_gethostent:
4704 case KEY_getnetbyname:
4707 case KEY_getnetbyaddr:
4708 LOP(OP_GNBYADDR,XTERM);
4713 case KEY_getservbyname:
4714 LOP(OP_GSBYNAME,XTERM);
4716 case KEY_getservbyport:
4717 LOP(OP_GSBYPORT,XTERM);
4719 case KEY_getservent:
4722 case KEY_getsockname:
4723 UNI(OP_GETSOCKNAME);
4725 case KEY_getsockopt:
4726 LOP(OP_GSOCKOPT,XTERM);
4748 yylval.ival = CopLINE(PL_curcop);
4752 LOP(OP_INDEX,XTERM);
4758 LOP(OP_IOCTL,XTERM);
4770 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4802 LOP(OP_LISTEN,XTERM);
4811 s = scan_pat(s,OP_MATCH);
4812 TERM(sublex_start());
4815 LOP(OP_MAPSTART, XREF);
4818 LOP(OP_MKDIR,XTERM);
4821 LOP(OP_MSGCTL,XTERM);
4824 LOP(OP_MSGGET,XTERM);
4827 LOP(OP_MSGRCV,XTERM);
4830 LOP(OP_MSGSND,XTERM);
4836 if (isIDFIRST_lazy_if(s,UTF)) {
4837 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
4838 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4840 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
4841 if (!PL_in_my_stash) {
4844 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
4852 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4859 if (PL_expect != XSTATE)
4860 yyerror("\"no\" not allowed in expression");
4861 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4862 s = force_version(s, FALSE);
4867 if (*s == '(' || (s = skipspace(s), *s == '('))
4874 if (isIDFIRST_lazy_if(s,UTF)) {
4876 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
4877 for (t=d; *t && isSPACE(*t); t++) ;
4878 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
4880 && !(t[0] == '=' && t[1] == '>')
4882 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
4883 "Precedence problem: open %.*s should be open(%.*s)",
4884 d - s, s, d - s, s);
4890 yylval.ival = OP_OR;
4900 LOP(OP_OPEN_DIR,XTERM);
4903 checkcomma(s,PL_tokenbuf,"filehandle");
4907 checkcomma(s,PL_tokenbuf,"filehandle");
4926 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4930 LOP(OP_PIPE_OP,XTERM);
4933 s = scan_str(s,FALSE,FALSE);
4935 missingterm((char*)0);
4936 yylval.ival = OP_CONST;
4937 TERM(sublex_start());
4943 s = scan_str(s,FALSE,FALSE);
4945 missingterm((char*)0);
4946 PL_expect = XOPERATOR;
4948 if (SvCUR(PL_lex_stuff)) {
4951 d = SvPV_force(PL_lex_stuff, len);
4954 for (; isSPACE(*d) && len; --len, ++d) ;
4957 if (!warned && ckWARN(WARN_QW)) {
4958 for (; !isSPACE(*d) && len; --len, ++d) {
4960 Perl_warner(aTHX_ packWARN(WARN_QW),
4961 "Possible attempt to separate words with commas");
4964 else if (*d == '#') {
4965 Perl_warner(aTHX_ packWARN(WARN_QW),
4966 "Possible attempt to put comments in qw() list");
4972 for (; !isSPACE(*d) && len; --len, ++d) ;
4974 sv = newSVpvn(b, d-b);
4975 if (DO_UTF8(PL_lex_stuff))
4977 words = append_elem(OP_LIST, words,
4978 newSVOP(OP_CONST, 0, tokeq(sv)));
4982 PL_nextval[PL_nexttoke].opval = words;
4987 SvREFCNT_dec(PL_lex_stuff);
4988 PL_lex_stuff = Nullsv;
4994 s = scan_str(s,FALSE,FALSE);
4996 missingterm((char*)0);
4997 yylval.ival = OP_STRINGIFY;
4998 if (SvIVX(PL_lex_stuff) == '\'')
4999 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
5000 TERM(sublex_start());
5003 s = scan_pat(s,OP_QR);
5004 TERM(sublex_start());
5007 s = scan_str(s,FALSE,FALSE);
5009 missingterm((char*)0);
5010 yylval.ival = OP_BACKTICK;
5012 TERM(sublex_start());
5020 s = force_version(s, FALSE);
5022 else if (*s != 'v' || !isDIGIT(s[1])
5023 || (s = force_version(s, TRUE), *s == 'v'))
5025 *PL_tokenbuf = '\0';
5026 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5027 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5028 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5030 yyerror("<> should be quotes");
5038 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5042 LOP(OP_RENAME,XTERM);
5051 LOP(OP_RINDEX,XTERM);
5061 UNIDOR(OP_READLINE);
5074 LOP(OP_REVERSE,XTERM);
5077 UNIDOR(OP_READLINK);
5085 TERM(sublex_start());
5087 TOKEN(1); /* force error */
5096 LOP(OP_SELECT,XTERM);
5102 LOP(OP_SEMCTL,XTERM);
5105 LOP(OP_SEMGET,XTERM);
5108 LOP(OP_SEMOP,XTERM);
5114 LOP(OP_SETPGRP,XTERM);
5116 case KEY_setpriority:
5117 LOP(OP_SETPRIORITY,XTERM);
5119 case KEY_sethostent:
5125 case KEY_setservent:
5128 case KEY_setprotoent:
5138 LOP(OP_SEEKDIR,XTERM);
5140 case KEY_setsockopt:
5141 LOP(OP_SSOCKOPT,XTERM);
5147 LOP(OP_SHMCTL,XTERM);
5150 LOP(OP_SHMGET,XTERM);
5153 LOP(OP_SHMREAD,XTERM);
5156 LOP(OP_SHMWRITE,XTERM);
5159 LOP(OP_SHUTDOWN,XTERM);
5168 LOP(OP_SOCKET,XTERM);
5170 case KEY_socketpair:
5171 LOP(OP_SOCKPAIR,XTERM);
5174 checkcomma(s,PL_tokenbuf,"subroutine name");
5176 if (*s == ';' || *s == ')') /* probably a close */
5177 Perl_croak(aTHX_ "sort is now a reserved word");
5179 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5183 LOP(OP_SPLIT,XTERM);
5186 LOP(OP_SPRINTF,XTERM);
5189 LOP(OP_SPLICE,XTERM);
5204 LOP(OP_SUBSTR,XTERM);
5210 char tmpbuf[sizeof PL_tokenbuf];
5211 SSize_t tboffset = 0;
5212 expectation attrful;
5213 bool have_name, have_proto, bad_proto;
5214 const int key = tmp;
5218 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5219 (*s == ':' && s[1] == ':'))
5222 attrful = XATTRBLOCK;
5223 /* remember buffer pos'n for later force_word */
5224 tboffset = s - PL_oldbufptr;
5225 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5226 if (strchr(tmpbuf, ':'))
5227 sv_setpv(PL_subname, tmpbuf);
5229 sv_setsv(PL_subname,PL_curstname);
5230 sv_catpvn(PL_subname,"::",2);
5231 sv_catpvn(PL_subname,tmpbuf,len);
5238 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5239 PL_expect = XTERMBLOCK;
5240 attrful = XATTRTERM;
5241 sv_setpvn(PL_subname,"?",1);
5245 if (key == KEY_format) {
5247 PL_lex_formbrack = PL_lex_brackets + 1;
5249 (void) force_word(PL_oldbufptr + tboffset, WORD,
5254 /* Look for a prototype */
5258 s = scan_str(s,FALSE,FALSE);
5260 Perl_croak(aTHX_ "Prototype not terminated");
5261 /* strip spaces and check for bad characters */
5262 d = SvPVX(PL_lex_stuff);
5265 for (p = d; *p; ++p) {
5268 if (!strchr("$@%*;[]&\\", *p))
5273 if (bad_proto && ckWARN(WARN_SYNTAX))
5274 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5275 "Illegal character in prototype for %"SVf" : %s",
5277 SvCUR_set(PL_lex_stuff, tmp);
5285 if (*s == ':' && s[1] != ':')
5286 PL_expect = attrful;
5287 else if (*s != '{' && key == KEY_sub) {
5289 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5291 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5295 PL_nextval[PL_nexttoke].opval =
5296 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5297 PL_lex_stuff = Nullsv;
5301 sv_setpv(PL_subname,
5302 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5305 (void) force_word(PL_oldbufptr + tboffset, WORD,
5314 LOP(OP_SYSTEM,XREF);
5317 LOP(OP_SYMLINK,XTERM);
5320 LOP(OP_SYSCALL,XTERM);
5323 LOP(OP_SYSOPEN,XTERM);
5326 LOP(OP_SYSSEEK,XTERM);
5329 LOP(OP_SYSREAD,XTERM);
5332 LOP(OP_SYSWRITE,XTERM);
5336 TERM(sublex_start());
5357 LOP(OP_TRUNCATE,XTERM);
5369 yylval.ival = CopLINE(PL_curcop);
5373 yylval.ival = CopLINE(PL_curcop);
5377 LOP(OP_UNLINK,XTERM);
5383 LOP(OP_UNPACK,XTERM);
5386 LOP(OP_UTIME,XTERM);
5392 LOP(OP_UNSHIFT,XTERM);
5395 if (PL_expect != XSTATE)
5396 yyerror("\"use\" not allowed in expression");
5398 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
5399 s = force_version(s, TRUE);
5400 if (*s == ';' || (s = skipspace(s), *s == ';')) {
5401 PL_nextval[PL_nexttoke].opval = Nullop;
5404 else if (*s == 'v') {
5405 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5406 s = force_version(s, FALSE);
5410 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5411 s = force_version(s, FALSE);
5423 yylval.ival = CopLINE(PL_curcop);
5427 PL_hints |= HINT_BLOCK_SCOPE;
5434 LOP(OP_WAITPID,XTERM);
5443 ctl_l[0] = toCTRL('L');
5445 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5448 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5453 if (PL_expect == XOPERATOR)
5459 yylval.ival = OP_XOR;
5464 TERM(sublex_start());
5469 #pragma segment Main
5473 S_pending_ident(pTHX)
5476 register I32 tmp = 0;
5477 /* pit holds the identifier we read and pending_ident is reset */
5478 char pit = PL_pending_ident;
5479 PL_pending_ident = 0;
5481 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5482 "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
5484 /* if we're in a my(), we can't allow dynamics here.
5485 $foo'bar has already been turned into $foo::bar, so
5486 just check for colons.
5488 if it's a legal name, the OP is a PADANY.
5491 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5492 if (strchr(PL_tokenbuf,':'))
5493 yyerror(Perl_form(aTHX_ "No package name allowed for "
5494 "variable %s in \"our\"",
5496 tmp = allocmy(PL_tokenbuf);
5499 if (strchr(PL_tokenbuf,':'))
5500 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5502 yylval.opval = newOP(OP_PADANY, 0);
5503 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5509 build the ops for accesses to a my() variable.
5511 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5512 then used in a comparison. This catches most, but not
5513 all cases. For instance, it catches
5514 sort { my($a); $a <=> $b }
5516 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5517 (although why you'd do that is anyone's guess).
5520 if (!strchr(PL_tokenbuf,':')) {
5522 tmp = pad_findmy(PL_tokenbuf);
5523 if (tmp != NOT_IN_PAD) {
5524 /* might be an "our" variable" */
5525 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5526 /* build ops for a bareword */
5527 HV *stash = PAD_COMPNAME_OURSTASH(tmp);
5528 HEK *stashname = HvNAME_HEK(stash);
5529 SV *sym = newSVhek(stashname);
5530 sv_catpvn(sym, "::", 2);
5531 sv_catpv(sym, PL_tokenbuf+1);
5532 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5533 yylval.opval->op_private = OPpCONST_ENTERED;
5536 ? (GV_ADDMULTI | GV_ADDINEVAL)
5539 ((PL_tokenbuf[0] == '$') ? SVt_PV
5540 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5545 /* if it's a sort block and they're naming $a or $b */
5546 if (PL_last_lop_op == OP_SORT &&
5547 PL_tokenbuf[0] == '$' &&
5548 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5551 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5552 d < PL_bufend && *d != '\n';
5555 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5556 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5562 yylval.opval = newOP(OP_PADANY, 0);
5563 yylval.opval->op_targ = tmp;
5569 Whine if they've said @foo in a doublequoted string,
5570 and @foo isn't a variable we can find in the symbol
5573 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5574 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
5575 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5576 && ckWARN(WARN_AMBIGUOUS))
5578 /* Downgraded from fatal to warning 20000522 mjd */
5579 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5580 "Possible unintended interpolation of %s in string",
5585 /* build ops for a bareword */
5586 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5587 yylval.opval->op_private = OPpCONST_ENTERED;
5588 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
5589 ((PL_tokenbuf[0] == '$') ? SVt_PV
5590 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5596 * The following code was generated by perl_keyword.pl.
5600 Perl_keyword (pTHX_ const char *name, I32 len)
5604 case 1: /* 5 tokens of length 1 */
5636 case 2: /* 18 tokens of length 2 */
5782 case 3: /* 28 tokens of length 3 */
5786 if (name[1] == 'N' &&
5849 if (name[1] == 'i' &&
5889 if (name[1] == 'o' &&
5898 if (name[1] == 'e' &&
5907 if (name[1] == 'n' &&
5916 if (name[1] == 'o' &&
5925 if (name[1] == 'a' &&
5934 if (name[1] == 'o' &&
5996 if (name[1] == 'e' &&
6028 if (name[1] == 'i' &&
6037 if (name[1] == 's' &&
6046 if (name[1] == 'e' &&
6055 if (name[1] == 'o' &&
6067 case 4: /* 40 tokens of length 4 */
6071 if (name[1] == 'O' &&
6081 if (name[1] == 'N' &&
6091 if (name[1] == 'i' &&
6101 if (name[1] == 'h' &&
6111 if (name[1] == 'u' &&
6124 if (name[2] == 'c' &&
6133 if (name[2] == 's' &&
6142 if (name[2] == 'a' &&
6178 if (name[1] == 'o' &&
6191 if (name[2] == 't' &&
6200 if (name[2] == 'o' &&
6209 if (name[2] == 't' &&
6218 if (name[2] == 'e' &&
6231 if (name[1] == 'o' &&
6244 if (name[2] == 'y' &&
6253 if (name[2] == 'l' &&
6269 if (name[2] == 's' &&
6278 if (name[2] == 'n' &&
6287 if (name[2] == 'c' &&
6300 if (name[1] == 'e' &&
6310 if (name[1] == 'p' &&
6323 if (name[2] == 'c' &&
6332 if (name[2] == 'p' &&
6341 if (name[2] == 's' &&
6357 if (name[2] == 'n' &&
6427 if (name[2] == 'r' &&
6436 if (name[2] == 'r' &&
6445 if (name[2] == 'a' &&
6461 if (name[2] == 'l' &&
6528 case 5: /* 36 tokens of length 5 */
6532 if (name[1] == 'E' &&
6543 if (name[1] == 'H' &&
6557 if (name[2] == 'a' &&
6567 if (name[2] == 'a' &&
6581 if (name[1] == 'l' &&
6598 if (name[3] == 'i' &&
6607 if (name[3] == 'o' &&
6643 if (name[2] == 'o' &&
6653 if (name[2] == 'y' &&
6667 if (name[1] == 'l' &&
6681 if (name[2] == 'n' &&
6691 if (name[2] == 'o' &&
6708 if (name[2] == 'd' &&
6718 if (name[2] == 'c' &&
6735 if (name[2] == 'c' &&
6745 if (name[2] == 't' &&
6759 if (name[1] == 'k' &&
6770 if (name[1] == 'r' &&
6784 if (name[2] == 's' &&
6794 if (name[2] == 'd' &&
6811 if (name[2] == 'm' &&
6821 if (name[2] == 'i' &&
6831 if (name[2] == 'e' &&
6841 if (name[2] == 'l' &&
6851 if (name[2] == 'a' &&
6861 if (name[2] == 'u' &&
6875 if (name[1] == 'i' &&
6889 if (name[2] == 'a' &&
6902 if (name[3] == 'e' &&
6937 if (name[2] == 'i' &&
6954 if (name[2] == 'i' &&
6964 if (name[2] == 'i' &&
6981 case 6: /* 33 tokens of length 6 */
6985 if (name[1] == 'c' &&
7000 if (name[2] == 'l' &&
7011 if (name[2] == 'r' &&
7026 if (name[1] == 'e' &&
7041 if (name[2] == 's' &&
7046 if(ckWARN_d(WARN_SYNTAX))
7047 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
7053 if (name[2] == 'i' &&
7071 if (name[2] == 'l' &&
7082 if (name[2] == 'r' &&
7097 if (name[1] == 'm' &&
7112 if (name[2] == 'n' &&
7123 if (name[2] == 's' &&
7138 if (name[1] == 's' &&
7144 if (name[4] == 't' &&
7153 if (name[4] == 'e' &&
7162 if (name[4] == 'c' &&
7171 if (name[4] == 'n' &&
7187 if (name[1] == 'r' &&
7205 if (name[3] == 'a' &&
7215 if (name[3] == 'u' &&
7229 if (name[2] == 'n' &&
7247 if (name[2] == 'a' &&
7261 if (name[3] == 'e' &&
7274 if (name[4] == 't' &&
7283 if (name[4] == 'e' &&
7305 if (name[4] == 't' &&
7314 if (name[4] == 'e' &&
7330 if (name[2] == 'c' &&
7341 if (name[2] == 'l' &&
7352 if (name[2] == 'b' &&
7363 if (name[2] == 's' &&
7386 if (name[4] == 's' &&
7395 if (name[4] == 'n' &&
7408 if (name[3] == 'a' &&
7425 if (name[1] == 'a' &&
7440 case 7: /* 28 tokens of length 7 */
7444 if (name[1] == 'E' &&
7457 if (name[1] == '_' &&
7470 if (name[1] == 'i' &&
7477 return -KEY_binmode;
7483 if (name[1] == 'o' &&
7490 return -KEY_connect;
7499 if (name[2] == 'm' &&
7505 return -KEY_dbmopen;
7511 if (name[2] == 'f' &&
7527 if (name[1] == 'o' &&
7540 if (name[1] == 'e' &&
7547 if (name[5] == 'r' &&
7550 return -KEY_getpgrp;
7556 if (name[5] == 'i' &&
7559 return -KEY_getppid;
7572 if (name[1] == 'c' &&
7579 return -KEY_lcfirst;
7585 if (name[1] == 'p' &&
7592 return -KEY_opendir;
7598 if (name[1] == 'a' &&
7616 if (name[3] == 'd' &&
7621 return -KEY_readdir;
7627 if (name[3] == 'u' &&
7638 if (name[3] == 'e' &&
7643 return -KEY_reverse;
7662 if (name[3] == 'k' &&
7667 return -KEY_seekdir;
7673 if (name[3] == 'p' &&
7678 return -KEY_setpgrp;
7688 if (name[2] == 'm' &&
7694 return -KEY_shmread;
7700 if (name[2] == 'r' &&
7706 return -KEY_sprintf;
7715 if (name[3] == 'l' &&
7720 return -KEY_symlink;
7729 if (name[4] == 'a' &&
7733 return -KEY_syscall;
7739 if (name[4] == 'p' &&
7743 return -KEY_sysopen;
7749 if (name[4] == 'e' &&
7753 return -KEY_sysread;
7759 if (name[4] == 'e' &&
7763 return -KEY_sysseek;
7781 if (name[1] == 'e' &&
7788 return -KEY_telldir;
7797 if (name[2] == 'f' &&
7803 return -KEY_ucfirst;
7809 if (name[2] == 's' &&
7815 return -KEY_unshift;
7825 if (name[1] == 'a' &&
7832 return -KEY_waitpid;
7841 case 8: /* 26 tokens of length 8 */
7845 if (name[1] == 'U' &&
7853 return KEY_AUTOLOAD;
7864 if (name[3] == 'A' &&
7870 return KEY___DATA__;
7876 if (name[3] == 'I' &&
7882 return -KEY___FILE__;
7888 if (name[3] == 'I' &&
7894 return -KEY___LINE__;
7910 if (name[2] == 'o' &&
7917 return -KEY_closedir;
7923 if (name[2] == 'n' &&
7930 return -KEY_continue;
7940 if (name[1] == 'b' &&
7948 return -KEY_dbmclose;
7954 if (name[1] == 'n' &&
7960 if (name[4] == 'r' &&
7965 return -KEY_endgrent;
7971 if (name[4] == 'w' &&
7976 return -KEY_endpwent;
7989 if (name[1] == 'o' &&
7997 return -KEY_formline;
8003 if (name[1] == 'e' &&
8014 if (name[6] == 'n' &&
8017 return -KEY_getgrent;
8023 if (name[6] == 'i' &&
8026 return -KEY_getgrgid;
8032 if (name[6] == 'a' &&
8035 return -KEY_getgrnam;
8048 if (name[4] == 'o' &&
8053 return -KEY_getlogin;
8064 if (name[6] == 'n' &&
8067 return -KEY_getpwent;
8073 if (name[6] == 'a' &&
8076 return -KEY_getpwnam;
8082 if (name[6] == 'i' &&
8085 return -KEY_getpwuid;
8105 if (name[1] == 'e' &&
8112 if (name[5] == 'i' &&
8119 return -KEY_readline;
8124 return -KEY_readlink;
8135 if (name[5] == 'i' &&
8139 return -KEY_readpipe;
8160 if (name[4] == 'r' &&
8165 return -KEY_setgrent;
8171 if (name[4] == 'w' &&
8176 return -KEY_setpwent;
8192 if (name[3] == 'w' &&
8198 return -KEY_shmwrite;
8204 if (name[3] == 't' &&
8210 return -KEY_shutdown;
8220 if (name[2] == 's' &&
8227 return -KEY_syswrite;
8237 if (name[1] == 'r' &&
8245 return -KEY_truncate;
8254 case 9: /* 8 tokens of length 9 */
8258 if (name[1] == 'n' &&
8267 return -KEY_endnetent;
8273 if (name[1] == 'e' &&
8282 return -KEY_getnetent;
8288 if (name[1] == 'o' &&
8297 return -KEY_localtime;
8303 if (name[1] == 'r' &&
8312 return KEY_prototype;
8318 if (name[1] == 'u' &&
8327 return -KEY_quotemeta;
8333 if (name[1] == 'e' &&
8342 return -KEY_rewinddir;
8348 if (name[1] == 'e' &&
8357 return -KEY_setnetent;
8363 if (name[1] == 'a' &&
8372 return -KEY_wantarray;
8381 case 10: /* 9 tokens of length 10 */
8385 if (name[1] == 'n' &&
8391 if (name[4] == 'o' &&
8398 return -KEY_endhostent;
8404 if (name[4] == 'e' &&
8411 return -KEY_endservent;
8424 if (name[1] == 'e' &&
8430 if (name[4] == 'o' &&
8437 return -KEY_gethostent;
8446 if (name[5] == 'r' &&
8452 return -KEY_getservent;
8458 if (name[5] == 'c' &&
8464 return -KEY_getsockopt;
8489 if (name[4] == 'o' &&
8496 return -KEY_sethostent;
8505 if (name[5] == 'r' &&
8511 return -KEY_setservent;
8517 if (name[5] == 'c' &&
8523 return -KEY_setsockopt;
8540 if (name[2] == 'c' &&
8549 return -KEY_socketpair;
8562 case 11: /* 8 tokens of length 11 */
8566 if (name[1] == '_' &&
8577 return -KEY___PACKAGE__;
8583 if (name[1] == 'n' &&
8594 return -KEY_endprotoent;
8600 if (name[1] == 'e' &&
8609 if (name[5] == 'e' &&
8616 return -KEY_getpeername;
8625 if (name[6] == 'o' &&
8631 return -KEY_getpriority;
8637 if (name[6] == 't' &&
8643 return -KEY_getprotoent;
8657 if (name[4] == 'o' &&
8665 return -KEY_getsockname;
8678 if (name[1] == 'e' &&
8686 if (name[6] == 'o' &&
8692 return -KEY_setpriority;
8698 if (name[6] == 't' &&
8704 return -KEY_setprotoent;
8720 case 12: /* 2 tokens of length 12 */
8721 if (name[0] == 'g' &&
8733 if (name[9] == 'd' &&
8736 { /* getnetbyaddr */
8737 return -KEY_getnetbyaddr;
8743 if (name[9] == 'a' &&
8746 { /* getnetbyname */
8747 return -KEY_getnetbyname;
8759 case 13: /* 4 tokens of length 13 */
8760 if (name[0] == 'g' &&
8767 if (name[4] == 'o' &&
8776 if (name[10] == 'd' &&
8779 { /* gethostbyaddr */
8780 return -KEY_gethostbyaddr;
8786 if (name[10] == 'a' &&
8789 { /* gethostbyname */
8790 return -KEY_gethostbyname;
8803 if (name[4] == 'e' &&
8812 if (name[10] == 'a' &&
8815 { /* getservbyname */
8816 return -KEY_getservbyname;
8822 if (name[10] == 'o' &&
8825 { /* getservbyport */
8826 return -KEY_getservbyport;
8845 case 14: /* 1 tokens of length 14 */
8846 if (name[0] == 'g' &&
8860 { /* getprotobyname */
8861 return -KEY_getprotobyname;
8866 case 16: /* 1 tokens of length 16 */
8867 if (name[0] == 'g' &&
8883 { /* getprotobynumber */
8884 return -KEY_getprotobynumber;
8898 S_checkcomma(pTHX_ register char *s, const char *name, const char *what)
8902 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8903 if (ckWARN(WARN_SYNTAX)) {
8905 for (w = s+2; *w && level; w++) {
8912 for (; *w && isSPACE(*w); w++) ;
8913 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
8914 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8915 "%s (...) interpreted as function",name);
8918 while (s < PL_bufend && isSPACE(*s))
8922 while (s < PL_bufend && isSPACE(*s))
8924 if (isIDFIRST_lazy_if(s,UTF)) {
8926 while (isALNUM_lazy_if(s,UTF))
8928 while (s < PL_bufend && isSPACE(*s))
8932 *s = '\0'; /* XXX If we didn't do this, we could const a lot of toke.c */
8933 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
8937 Perl_croak(aTHX_ "No comma allowed after %s", what);
8942 /* Either returns sv, or mortalizes sv and returns a new SV*.
8943 Best used as sv=new_constant(..., sv, ...).
8944 If s, pv are NULL, calls subroutine with one argument,
8945 and type is used with error messages only. */
8948 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
8952 HV *table = GvHV(PL_hintgv); /* ^H */
8956 const char *why1, *why2, *why3;
8958 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8961 why2 = strEQ(key,"charnames")
8962 ? "(possibly a missing \"use charnames ...\")"
8964 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8965 (type ? type: "undef"), why2);
8967 /* This is convoluted and evil ("goto considered harmful")
8968 * but I do not understand the intricacies of all the different
8969 * failure modes of %^H in here. The goal here is to make
8970 * the most probable error message user-friendly. --jhi */
8975 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8976 (type ? type: "undef"), why1, why2, why3);
8978 yyerror(SvPVX_const(msg));
8982 cvp = hv_fetch(table, key, strlen(key), FALSE);
8983 if (!cvp || !SvOK(*cvp)) {
8986 why3 = "} is not defined";
8989 sv_2mortal(sv); /* Parent created it permanently */
8992 pv = sv_2mortal(newSVpvn(s, len));
8994 typesv = sv_2mortal(newSVpv(type, 0));
8996 typesv = &PL_sv_undef;
8998 PUSHSTACKi(PERLSI_OVERLOAD);
9010 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9014 /* Check the eval first */
9015 if (!PL_in_eval && SvTRUE(ERRSV)) {
9016 sv_catpv(ERRSV, "Propagated");
9017 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
9019 res = SvREFCNT_inc(sv);
9023 (void)SvREFCNT_inc(res);
9032 why1 = "Call to &{$^H{";
9034 why3 = "}} did not return a defined value";
9042 /* Returns a NUL terminated string, with the length of the string written to
9046 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9048 register char *d = dest;
9049 register char *e = d + destlen - 3; /* two-character token, ending NUL */
9052 Perl_croak(aTHX_ ident_too_long);
9053 if (isALNUM(*s)) /* UTF handled below */
9055 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
9060 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
9064 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9065 char *t = s + UTF8SKIP(s);
9066 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9068 if (d + (t - s) > e)
9069 Perl_croak(aTHX_ ident_too_long);
9070 Copy(s, d, t - s, char);
9083 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9093 e = d + destlen - 3; /* two-character token, ending NUL */
9095 while (isDIGIT(*s)) {
9097 Perl_croak(aTHX_ ident_too_long);
9104 Perl_croak(aTHX_ ident_too_long);
9105 if (isALNUM(*s)) /* UTF handled below */
9107 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9112 else if (*s == ':' && s[1] == ':') {
9116 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9117 char *t = s + UTF8SKIP(s);
9118 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9120 if (d + (t - s) > e)
9121 Perl_croak(aTHX_ ident_too_long);
9122 Copy(s, d, t - s, char);
9133 if (PL_lex_state != LEX_NORMAL)
9134 PL_lex_state = LEX_INTERPENDMAYBE;
9137 if (*s == '$' && s[1] &&
9138 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9151 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9156 if (isSPACE(s[-1])) {
9158 const char ch = *s++;
9159 if (!SPACE_OR_TAB(ch)) {
9165 if (isIDFIRST_lazy_if(d,UTF)) {
9169 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
9171 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
9174 Copy(s, d, e - s, char);
9179 while ((isALNUM(*s) || *s == ':') && d < e)
9182 Perl_croak(aTHX_ ident_too_long);
9185 while (s < send && SPACE_OR_TAB(*s)) s++;
9186 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9187 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
9188 const char *brack = *s == '[' ? "[...]" : "{...}";
9189 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9190 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9191 funny, dest, brack, funny, dest, brack);
9194 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9198 /* Handle extended ${^Foo} variables
9199 * 1999-02-27 mjd-perl-patch@plover.com */
9200 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9204 while (isALNUM(*s) && d < e) {
9208 Perl_croak(aTHX_ ident_too_long);
9213 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9214 PL_lex_state = LEX_INTERPEND;
9219 if (PL_lex_state == LEX_NORMAL) {
9220 if (ckWARN(WARN_AMBIGUOUS) &&
9221 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
9223 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9224 "Ambiguous use of %c{%s} resolved to %c%s",
9225 funny, dest, funny, dest);
9230 s = bracket; /* let the parser handle it */
9234 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9235 PL_lex_state = LEX_INTERPEND;
9240 Perl_pmflag(pTHX_ U32* pmfl, int ch)
9245 *pmfl |= PMf_GLOBAL;
9247 *pmfl |= PMf_CONTINUE;
9251 *pmfl |= PMf_MULTILINE;
9253 *pmfl |= PMf_SINGLELINE;
9255 *pmfl |= PMf_EXTENDED;
9259 S_scan_pat(pTHX_ char *start, I32 type)
9262 char *s = scan_str(start,FALSE,FALSE);
9265 Perl_croak(aTHX_ "Search pattern not terminated");
9267 pm = (PMOP*)newPMOP(type, 0);
9268 if (PL_multi_open == '?')
9269 pm->op_pmflags |= PMf_ONCE;
9271 while (*s && strchr("iomsx", *s))
9272 pmflag(&pm->op_pmflags,*s++);
9275 while (*s && strchr("iogcmsx", *s))
9276 pmflag(&pm->op_pmflags,*s++);
9278 /* issue a warning if /c is specified,but /g is not */
9279 if (ckWARN(WARN_REGEXP) &&
9280 (pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9282 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_without_g);
9285 pm->op_pmpermflags = pm->op_pmflags;
9287 PL_lex_op = (OP*)pm;
9288 yylval.ival = OP_MATCH;
9293 S_scan_subst(pTHX_ char *start)
9301 yylval.ival = OP_NULL;
9303 s = scan_str(start,FALSE,FALSE);
9306 Perl_croak(aTHX_ "Substitution pattern not terminated");
9308 if (s[-1] == PL_multi_open)
9311 first_start = PL_multi_start;
9312 s = scan_str(s,FALSE,FALSE);
9315 SvREFCNT_dec(PL_lex_stuff);
9316 PL_lex_stuff = Nullsv;
9318 Perl_croak(aTHX_ "Substitution replacement not terminated");
9320 PL_multi_start = first_start; /* so whole substitution is taken together */
9322 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9328 else if (strchr("iogcmsx", *s))
9329 pmflag(&pm->op_pmflags,*s++);
9334 /* /c is not meaningful with s/// */
9335 if (ckWARN(WARN_REGEXP) && (pm->op_pmflags & PMf_CONTINUE))
9337 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_in_subst);
9342 PL_sublex_info.super_bufptr = s;
9343 PL_sublex_info.super_bufend = PL_bufend;
9345 pm->op_pmflags |= PMf_EVAL;
9346 repl = newSVpvn("",0);
9348 sv_catpv(repl, es ? "eval " : "do ");
9349 sv_catpvn(repl, "{ ", 2);
9350 sv_catsv(repl, PL_lex_repl);
9351 sv_catpvn(repl, " };", 2);
9353 SvREFCNT_dec(PL_lex_repl);
9357 pm->op_pmpermflags = pm->op_pmflags;
9358 PL_lex_op = (OP*)pm;
9359 yylval.ival = OP_SUBST;
9364 S_scan_trans(pTHX_ char *start)
9373 yylval.ival = OP_NULL;
9375 s = scan_str(start,FALSE,FALSE);
9377 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9378 if (s[-1] == PL_multi_open)
9381 s = scan_str(s,FALSE,FALSE);
9384 SvREFCNT_dec(PL_lex_stuff);
9385 PL_lex_stuff = Nullsv;
9387 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9390 complement = del = squash = 0;
9394 complement = OPpTRANS_COMPLEMENT;
9397 del = OPpTRANS_DELETE;
9400 squash = OPpTRANS_SQUASH;
9409 New(803, tbl, complement&&!del?258:256, short);
9410 o = newPVOP(OP_TRANS, 0, (char*)tbl);
9411 o->op_private &= ~OPpTRANS_ALL;
9412 o->op_private |= del|squash|complement|
9413 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9414 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9417 yylval.ival = OP_TRANS;
9422 S_scan_heredoc(pTHX_ register char *s)
9425 I32 op_type = OP_SCALAR;
9429 const char newline[] = "\n";
9430 const char *found_newline;
9434 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9438 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9441 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
9442 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9445 s = delimcpy(d, e, s, PL_bufend, term, &len);
9455 if (!isALNUM_lazy_if(s,UTF))
9456 deprecate_old("bare << to mean <<\"\"");
9457 for (; isALNUM_lazy_if(s,UTF); s++) {
9462 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9463 Perl_croak(aTHX_ "Delimiter for here document is too long");
9466 len = d - PL_tokenbuf;
9467 #ifndef PERL_STRICT_CR
9468 d = strchr(s, '\r');
9470 char * const olds = s;
9472 while (s < PL_bufend) {
9478 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9487 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9491 if ( outer || !(found_newline = ninstr(s,PL_bufend,newline,newline+1)) ) {
9492 herewas = newSVpvn(s,PL_bufend-s);
9496 herewas = newSVpvn(s,found_newline-s);
9498 s += SvCUR(herewas);
9500 tmpstr = NEWSV(87,79);
9501 sv_upgrade(tmpstr, SVt_PVIV);
9504 SvIV_set(tmpstr, -1);
9506 else if (term == '`') {
9507 op_type = OP_BACKTICK;
9508 SvIV_set(tmpstr, '\\');
9512 PL_multi_start = CopLINE(PL_curcop);
9513 PL_multi_open = PL_multi_close = '<';
9514 term = *PL_tokenbuf;
9515 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9516 char *bufptr = PL_sublex_info.super_bufptr;
9517 char *bufend = PL_sublex_info.super_bufend;
9518 char * const olds = s - SvCUR(herewas);
9519 s = strchr(bufptr, '\n');
9523 while (s < bufend &&
9524 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9526 CopLINE_inc(PL_curcop);
9529 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9530 missingterm(PL_tokenbuf);
9532 sv_setpvn(herewas,bufptr,d-bufptr+1);
9533 sv_setpvn(tmpstr,d+1,s-d);
9535 sv_catpvn(herewas,s,bufend-s);
9536 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9543 while (s < PL_bufend &&
9544 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9546 CopLINE_inc(PL_curcop);
9548 if (s >= PL_bufend) {
9549 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9550 missingterm(PL_tokenbuf);
9552 sv_setpvn(tmpstr,d+1,s-d);
9554 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9556 sv_catpvn(herewas,s,PL_bufend-s);
9557 sv_setsv(PL_linestr,herewas);
9558 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9559 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9560 PL_last_lop = PL_last_uni = Nullch;
9563 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
9564 while (s >= PL_bufend) { /* multiple line string? */
9566 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
9567 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9568 missingterm(PL_tokenbuf);
9570 CopLINE_inc(PL_curcop);
9571 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9572 PL_last_lop = PL_last_uni = Nullch;
9573 #ifndef PERL_STRICT_CR
9574 if (PL_bufend - PL_linestart >= 2) {
9575 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9576 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9578 PL_bufend[-2] = '\n';
9580 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9582 else if (PL_bufend[-1] == '\r')
9583 PL_bufend[-1] = '\n';
9585 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9586 PL_bufend[-1] = '\n';
9588 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9589 SV *sv = NEWSV(88,0);
9591 sv_upgrade(sv, SVt_PVMG);
9592 sv_setsv(sv,PL_linestr);
9595 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
9597 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9598 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9599 *(SvPVX(PL_linestr) + off ) = ' ';
9600 sv_catsv(PL_linestr,herewas);
9601 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9602 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9606 sv_catsv(tmpstr,PL_linestr);
9611 PL_multi_end = CopLINE(PL_curcop);
9612 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9613 SvPV_shrink_to_cur(tmpstr);
9615 SvREFCNT_dec(herewas);
9617 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9619 else if (PL_encoding)
9620 sv_recode_to_utf8(tmpstr, PL_encoding);
9622 PL_lex_stuff = tmpstr;
9623 yylval.ival = op_type;
9628 takes: current position in input buffer
9629 returns: new position in input buffer
9630 side-effects: yylval and lex_op are set.
9635 <FH> read from filehandle
9636 <pkg::FH> read from package qualified filehandle
9637 <pkg'FH> read from package qualified filehandle
9638 <$fh> read from filehandle in $fh
9644 S_scan_inputsymbol(pTHX_ char *start)
9646 register char *s = start; /* current position in buffer */
9652 d = PL_tokenbuf; /* start of temp holding space */
9653 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9654 end = strchr(s, '\n');
9657 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9659 /* die if we didn't have space for the contents of the <>,
9660 or if it didn't end, or if we see a newline
9663 if (len >= sizeof PL_tokenbuf)
9664 Perl_croak(aTHX_ "Excessively long <> operator");
9666 Perl_croak(aTHX_ "Unterminated <> operator");
9671 Remember, only scalar variables are interpreted as filehandles by
9672 this code. Anything more complex (e.g., <$fh{$num}>) will be
9673 treated as a glob() call.
9674 This code makes use of the fact that except for the $ at the front,
9675 a scalar variable and a filehandle look the same.
9677 if (*d == '$' && d[1]) d++;
9679 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9680 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9683 /* If we've tried to read what we allow filehandles to look like, and
9684 there's still text left, then it must be a glob() and not a getline.
9685 Use scan_str to pull out the stuff between the <> and treat it
9686 as nothing more than a string.
9689 if (d - PL_tokenbuf != len) {
9690 yylval.ival = OP_GLOB;
9692 s = scan_str(start,FALSE,FALSE);
9694 Perl_croak(aTHX_ "Glob not terminated");
9698 bool readline_overriden = FALSE;
9699 GV *gv_readline = Nullgv;
9701 /* we're in a filehandle read situation */
9704 /* turn <> into <ARGV> */
9706 Copy("ARGV",d,5,char);
9708 /* Check whether readline() is overriden */
9709 if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
9710 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9712 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
9713 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
9714 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9715 readline_overriden = TRUE;
9717 /* if <$fh>, create the ops to turn the variable into a
9723 /* try to find it in the pad for this block, otherwise find
9724 add symbol table ops
9726 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
9727 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
9728 HV *stash = PAD_COMPNAME_OURSTASH(tmp);
9729 HEK *stashname = HvNAME_HEK(stash);
9730 SV *sym = sv_2mortal(newSVhek(stashname));
9731 sv_catpvn(sym, "::", 2);
9737 OP *o = newOP(OP_PADSV, 0);
9739 PL_lex_op = readline_overriden
9740 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9741 append_elem(OP_LIST, o,
9742 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9743 : (OP*)newUNOP(OP_READLINE, 0, o);
9752 ? (GV_ADDMULTI | GV_ADDINEVAL)
9755 PL_lex_op = readline_overriden
9756 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9757 append_elem(OP_LIST,
9758 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9759 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9760 : (OP*)newUNOP(OP_READLINE, 0,
9761 newUNOP(OP_RV2SV, 0,
9762 newGVOP(OP_GV, 0, gv)));
9764 if (!readline_overriden)
9765 PL_lex_op->op_flags |= OPf_SPECIAL;
9766 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
9767 yylval.ival = OP_NULL;
9770 /* If it's none of the above, it must be a literal filehandle
9771 (<Foo::BAR> or <FOO>) so build a simple readline OP */
9773 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
9774 PL_lex_op = readline_overriden
9775 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9776 append_elem(OP_LIST,
9777 newGVOP(OP_GV, 0, gv),
9778 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9779 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9780 yylval.ival = OP_NULL;
9789 takes: start position in buffer
9790 keep_quoted preserve \ on the embedded delimiter(s)
9791 keep_delims preserve the delimiters around the string
9792 returns: position to continue reading from buffer
9793 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9794 updates the read buffer.
9796 This subroutine pulls a string out of the input. It is called for:
9797 q single quotes q(literal text)
9798 ' single quotes 'literal text'
9799 qq double quotes qq(interpolate $here please)
9800 " double quotes "interpolate $here please"
9801 qx backticks qx(/bin/ls -l)
9802 ` backticks `/bin/ls -l`
9803 qw quote words @EXPORT_OK = qw( func() $spam )
9804 m// regexp match m/this/
9805 s/// regexp substitute s/this/that/
9806 tr/// string transliterate tr/this/that/
9807 y/// string transliterate y/this/that/
9808 ($*@) sub prototypes sub foo ($)
9809 (stuff) sub attr parameters sub foo : attr(stuff)
9810 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9812 In most of these cases (all but <>, patterns and transliterate)
9813 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9814 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9815 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9818 It skips whitespace before the string starts, and treats the first
9819 character as the delimiter. If the delimiter is one of ([{< then
9820 the corresponding "close" character )]}> is used as the closing
9821 delimiter. It allows quoting of delimiters, and if the string has
9822 balanced delimiters ([{<>}]) it allows nesting.
9824 On success, the SV with the resulting string is put into lex_stuff or,
9825 if that is already non-NULL, into lex_repl. The second case occurs only
9826 when parsing the RHS of the special constructs s/// and tr/// (y///).
9827 For convenience, the terminating delimiter character is stuffed into
9832 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9834 SV *sv; /* scalar value: string */
9835 char *tmps; /* temp string, used for delimiter matching */
9836 register char *s = start; /* current position in the buffer */
9837 register char term; /* terminating character */
9838 register char *to; /* current position in the sv's data */
9839 I32 brackets = 1; /* bracket nesting level */
9840 bool has_utf8 = FALSE; /* is there any utf8 content? */
9841 I32 termcode; /* terminating char. code */
9842 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
9843 STRLEN termlen; /* length of terminating string */
9844 char *last = NULL; /* last position for nesting bracket */
9846 /* skip space before the delimiter */
9850 /* mark where we are, in case we need to report errors */
9853 /* after skipping whitespace, the next character is the terminator */
9856 termcode = termstr[0] = term;
9860 termcode = utf8_to_uvchr((U8*)s, &termlen);
9861 Copy(s, termstr, termlen, U8);
9862 if (!UTF8_IS_INVARIANT(term))
9866 /* mark where we are */
9867 PL_multi_start = CopLINE(PL_curcop);
9868 PL_multi_open = term;
9870 /* find corresponding closing delimiter */
9871 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9872 termcode = termstr[0] = term = tmps[5];
9874 PL_multi_close = term;
9876 /* create a new SV to hold the contents. 87 is leak category, I'm
9877 assuming. 79 is the SV's initial length. What a random number. */
9879 sv_upgrade(sv, SVt_PVIV);
9880 SvIV_set(sv, termcode);
9881 (void)SvPOK_only(sv); /* validate pointer */
9883 /* move past delimiter and try to read a complete string */
9885 sv_catpvn(sv, s, termlen);
9888 if (PL_encoding && !UTF) {
9892 int offset = s - SvPVX_const(PL_linestr);
9893 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9894 &offset, (char*)termstr, termlen);
9895 const char *ns = SvPVX_const(PL_linestr) + offset;
9896 char *svlast = SvEND(sv) - 1;
9898 for (; s < ns; s++) {
9899 if (*s == '\n' && !PL_rsfp)
9900 CopLINE_inc(PL_curcop);
9903 goto read_more_line;
9905 /* handle quoted delimiters */
9906 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9908 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9910 if ((svlast-1 - t) % 2) {
9914 SvCUR_set(sv, SvCUR(sv) - 1);
9919 if (PL_multi_open == PL_multi_close) {
9927 for (t = w = last; t < svlast; w++, t++) {
9928 /* At here, all closes are "was quoted" one,
9929 so we don't check PL_multi_close. */
9931 if (!keep_quoted && *(t+1) == PL_multi_open)
9936 else if (*t == PL_multi_open)
9944 SvCUR_set(sv, w - SvPVX_const(sv));
9947 if (--brackets <= 0)
9953 SvCUR_set(sv, SvCUR(sv) - 1);
9959 /* extend sv if need be */
9960 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9961 /* set 'to' to the next character in the sv's string */
9962 to = SvPVX(sv)+SvCUR(sv);
9964 /* if open delimiter is the close delimiter read unbridle */
9965 if (PL_multi_open == PL_multi_close) {
9966 for (; s < PL_bufend; s++,to++) {
9967 /* embedded newlines increment the current line number */
9968 if (*s == '\n' && !PL_rsfp)
9969 CopLINE_inc(PL_curcop);
9970 /* handle quoted delimiters */
9971 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9972 if (!keep_quoted && s[1] == term)
9974 /* any other quotes are simply copied straight through */
9978 /* terminate when run out of buffer (the for() condition), or
9979 have found the terminator */
9980 else if (*s == term) {
9983 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9986 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9992 /* if the terminator isn't the same as the start character (e.g.,
9993 matched brackets), we have to allow more in the quoting, and
9994 be prepared for nested brackets.
9997 /* read until we run out of string, or we find the terminator */
9998 for (; s < PL_bufend; s++,to++) {
9999 /* embedded newlines increment the line count */
10000 if (*s == '\n' && !PL_rsfp)
10001 CopLINE_inc(PL_curcop);
10002 /* backslashes can escape the open or closing characters */
10003 if (*s == '\\' && s+1 < PL_bufend) {
10004 if (!keep_quoted &&
10005 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10010 /* allow nested opens and closes */
10011 else if (*s == PL_multi_close && --brackets <= 0)
10013 else if (*s == PL_multi_open)
10015 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10020 /* terminate the copied string and update the sv's end-of-string */
10022 SvCUR_set(sv, to - SvPVX_const(sv));
10025 * this next chunk reads more into the buffer if we're not done yet
10029 break; /* handle case where we are done yet :-) */
10031 #ifndef PERL_STRICT_CR
10032 if (to - SvPVX_const(sv) >= 2) {
10033 if ((to[-2] == '\r' && to[-1] == '\n') ||
10034 (to[-2] == '\n' && to[-1] == '\r'))
10038 SvCUR_set(sv, to - SvPVX_const(sv));
10040 else if (to[-1] == '\r')
10043 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10048 /* if we're out of file, or a read fails, bail and reset the current
10049 line marker so we can report where the unterminated string began
10052 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10054 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10057 /* we read a line, so increment our line counter */
10058 CopLINE_inc(PL_curcop);
10060 /* update debugger info */
10061 if (PERLDB_LINE && PL_curstash != PL_debstash) {
10062 SV *sv = NEWSV(88,0);
10064 sv_upgrade(sv, SVt_PVMG);
10065 sv_setsv(sv,PL_linestr);
10066 (void)SvIOK_on(sv);
10068 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
10071 /* having changed the buffer, we must update PL_bufend */
10072 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10073 PL_last_lop = PL_last_uni = Nullch;
10076 /* at this point, we have successfully read the delimited string */
10078 if (!PL_encoding || UTF) {
10080 sv_catpvn(sv, s, termlen);
10083 if (has_utf8 || PL_encoding)
10086 PL_multi_end = CopLINE(PL_curcop);
10088 /* if we allocated too much space, give some back */
10089 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10090 SvLEN_set(sv, SvCUR(sv) + 1);
10091 SvPV_renew(sv, SvLEN(sv));
10094 /* decide whether this is the first or second quoted string we've read
10107 takes: pointer to position in buffer
10108 returns: pointer to new position in buffer
10109 side-effects: builds ops for the constant in yylval.op
10111 Read a number in any of the formats that Perl accepts:
10113 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10114 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10117 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10119 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10122 If it reads a number without a decimal point or an exponent, it will
10123 try converting the number to an integer and see if it can do so
10124 without loss of precision.
10128 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10130 register const char *s = start; /* current position in buffer */
10131 register char *d; /* destination in temp buffer */
10132 register char *e; /* end of temp buffer */
10133 NV nv; /* number read, as a double */
10134 SV *sv = Nullsv; /* place to put the converted number */
10135 bool floatit; /* boolean: int or float? */
10136 const char *lastub = 0; /* position of last underbar */
10137 static char const number_too_long[] = "Number too long";
10139 /* We use the first character to decide what type of number this is */
10143 Perl_croak(aTHX_ "panic: scan_num");
10145 /* if it starts with a 0, it could be an octal number, a decimal in
10146 0.13 disguise, or a hexadecimal number, or a binary number. */
10150 u holds the "number so far"
10151 shift the power of 2 of the base
10152 (hex == 4, octal == 3, binary == 1)
10153 overflowed was the number more than we can hold?
10155 Shift is used when we add a digit. It also serves as an "are
10156 we in octal/hex/binary?" indicator to disallow hex characters
10157 when in octal mode.
10162 bool overflowed = FALSE;
10163 bool just_zero = TRUE; /* just plain 0 or binary number? */
10164 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10165 static const char* const bases[5] =
10166 { "", "binary", "", "octal", "hexadecimal" };
10167 static const char* const Bases[5] =
10168 { "", "Binary", "", "Octal", "Hexadecimal" };
10169 static const char* const maxima[5] =
10171 "0b11111111111111111111111111111111",
10175 const char *base, *Base, *max;
10177 /* check for hex */
10182 } else if (s[1] == 'b') {
10187 /* check for a decimal in disguise */
10188 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10190 /* so it must be octal */
10197 if (ckWARN(WARN_SYNTAX))
10198 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10199 "Misplaced _ in number");
10203 base = bases[shift];
10204 Base = Bases[shift];
10205 max = maxima[shift];
10207 /* read the rest of the number */
10209 /* x is used in the overflow test,
10210 b is the digit we're adding on. */
10215 /* if we don't mention it, we're done */
10219 /* _ are ignored -- but warned about if consecutive */
10221 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
10222 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10223 "Misplaced _ in number");
10227 /* 8 and 9 are not octal */
10228 case '8': case '9':
10230 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10234 case '2': case '3': case '4':
10235 case '5': case '6': case '7':
10237 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10240 case '0': case '1':
10241 b = *s++ & 15; /* ASCII digit -> value of digit */
10245 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10246 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10247 /* make sure they said 0x */
10250 b = (*s++ & 7) + 9;
10252 /* Prepare to put the digit we have onto the end
10253 of the number so far. We check for overflows.
10259 x = u << shift; /* make room for the digit */
10261 if ((x >> shift) != u
10262 && !(PL_hints & HINT_NEW_BINARY)) {
10265 if (ckWARN_d(WARN_OVERFLOW))
10266 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10267 "Integer overflow in %s number",
10270 u = x | b; /* add the digit to the end */
10273 n *= nvshift[shift];
10274 /* If an NV has not enough bits in its
10275 * mantissa to represent an UV this summing of
10276 * small low-order numbers is a waste of time
10277 * (because the NV cannot preserve the
10278 * low-order bits anyway): we could just
10279 * remember when did we overflow and in the
10280 * end just multiply n by the right
10288 /* if we get here, we had success: make a scalar value from
10293 /* final misplaced underbar check */
10294 if (s[-1] == '_') {
10295 if (ckWARN(WARN_SYNTAX))
10296 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10301 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
10302 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10303 "%s number > %s non-portable",
10309 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
10310 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10311 "%s number > %s non-portable",
10316 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10317 sv = new_constant(start, s - start, "integer",
10319 else if (PL_hints & HINT_NEW_BINARY)
10320 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
10325 handle decimal numbers.
10326 we're also sent here when we read a 0 as the first digit
10328 case '1': case '2': case '3': case '4': case '5':
10329 case '6': case '7': case '8': case '9': case '.':
10332 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10335 /* read next group of digits and _ and copy into d */
10336 while (isDIGIT(*s) || *s == '_') {
10337 /* skip underscores, checking for misplaced ones
10341 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
10342 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10343 "Misplaced _ in number");
10347 /* check for end of fixed-length buffer */
10349 Perl_croak(aTHX_ number_too_long);
10350 /* if we're ok, copy the character */
10355 /* final misplaced underbar check */
10356 if (lastub && s == lastub + 1) {
10357 if (ckWARN(WARN_SYNTAX))
10358 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10361 /* read a decimal portion if there is one. avoid
10362 3..5 being interpreted as the number 3. followed
10365 if (*s == '.' && s[1] != '.') {
10370 if (ckWARN(WARN_SYNTAX))
10371 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10372 "Misplaced _ in number");
10376 /* copy, ignoring underbars, until we run out of digits.
10378 for (; isDIGIT(*s) || *s == '_'; s++) {
10379 /* fixed length buffer check */
10381 Perl_croak(aTHX_ number_too_long);
10383 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
10384 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10385 "Misplaced _ in number");
10391 /* fractional part ending in underbar? */
10392 if (s[-1] == '_') {
10393 if (ckWARN(WARN_SYNTAX))
10394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10395 "Misplaced _ in number");
10397 if (*s == '.' && isDIGIT(s[1])) {
10398 /* oops, it's really a v-string, but without the "v" */
10404 /* read exponent part, if present */
10405 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10409 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10410 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10412 /* stray preinitial _ */
10414 if (ckWARN(WARN_SYNTAX))
10415 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10416 "Misplaced _ in number");
10420 /* allow positive or negative exponent */
10421 if (*s == '+' || *s == '-')
10424 /* stray initial _ */
10426 if (ckWARN(WARN_SYNTAX))
10427 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10428 "Misplaced _ in number");
10432 /* read digits of exponent */
10433 while (isDIGIT(*s) || *s == '_') {
10436 Perl_croak(aTHX_ number_too_long);
10440 if (ckWARN(WARN_SYNTAX) &&
10441 ((lastub && s == lastub + 1) ||
10442 (!isDIGIT(s[1]) && s[1] != '_')))
10443 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10444 "Misplaced _ in number");
10451 /* make an sv from the string */
10455 We try to do an integer conversion first if no characters
10456 indicating "float" have been found.
10461 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10463 if (flags == IS_NUMBER_IN_UV) {
10465 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
10468 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10469 if (uv <= (UV) IV_MIN)
10470 sv_setiv(sv, -(IV)uv);
10477 /* terminate the string */
10479 nv = Atof(PL_tokenbuf);
10483 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
10484 (PL_hints & HINT_NEW_INTEGER) )
10485 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
10486 (floatit ? "float" : "integer"),
10490 /* if it starts with a v, it could be a v-string */
10493 sv = NEWSV(92,5); /* preallocate storage space */
10494 s = scan_vstring(s,sv);
10498 /* make the op for the constant and return */
10501 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10503 lvalp->opval = Nullop;
10509 S_scan_formline(pTHX_ register char *s)
10511 register char *eol;
10513 SV *stuff = newSVpvn("",0);
10514 bool needargs = FALSE;
10515 bool eofmt = FALSE;
10517 while (!needargs) {
10519 #ifdef PERL_STRICT_CR
10520 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
10522 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
10524 if (*t == '\n' || t == PL_bufend) {
10529 if (PL_in_eval && !PL_rsfp) {
10530 eol = (char *) memchr(s,'\n',PL_bufend-s);
10535 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10537 for (t = s; t < eol; t++) {
10538 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10540 goto enough; /* ~~ must be first line in formline */
10542 if (*t == '@' || *t == '^')
10546 sv_catpvn(stuff, s, eol-s);
10547 #ifndef PERL_STRICT_CR
10548 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10549 char *end = SvPVX(stuff) + SvCUR(stuff);
10552 SvCUR_set(stuff, SvCUR(stuff) - 1);
10561 s = filter_gets(PL_linestr, PL_rsfp, 0);
10562 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
10563 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
10564 PL_last_lop = PL_last_uni = Nullch;
10573 if (SvCUR(stuff)) {
10576 PL_lex_state = LEX_NORMAL;
10577 PL_nextval[PL_nexttoke].ival = 0;
10581 PL_lex_state = LEX_FORMLINE;
10583 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10585 else if (PL_encoding)
10586 sv_recode_to_utf8(stuff, PL_encoding);
10588 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10590 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
10594 SvREFCNT_dec(stuff);
10596 PL_lex_formbrack = 0;
10607 PL_cshlen = strlen(PL_cshname);
10612 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10614 const I32 oldsavestack_ix = PL_savestack_ix;
10615 CV* outsidecv = PL_compcv;
10618 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10620 SAVEI32(PL_subline);
10621 save_item(PL_subname);
10622 SAVESPTR(PL_compcv);
10624 PL_compcv = (CV*)NEWSV(1104,0);
10625 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
10626 CvFLAGS(PL_compcv) |= flags;
10628 PL_subline = CopLINE(PL_curcop);
10629 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10630 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
10631 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10633 return oldsavestack_ix;
10637 #pragma segment Perl_yylex
10640 Perl_yywarn(pTHX_ const char *s)
10642 PL_in_eval |= EVAL_WARNONLY;
10644 PL_in_eval &= ~EVAL_WARNONLY;
10649 Perl_yyerror(pTHX_ const char *s)
10651 const char *where = NULL;
10652 const char *context = NULL;
10656 if (!yychar || (yychar == ';' && !PL_rsfp))
10658 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10659 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10660 PL_oldbufptr != PL_bufptr) {
10663 The code below is removed for NetWare because it abends/crashes on NetWare
10664 when the script has error such as not having the closing quotes like:
10665 if ($var eq "value)
10666 Checking of white spaces is anyway done in NetWare code.
10669 while (isSPACE(*PL_oldoldbufptr))
10672 context = PL_oldoldbufptr;
10673 contlen = PL_bufptr - PL_oldoldbufptr;
10675 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10676 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10679 The code below is removed for NetWare because it abends/crashes on NetWare
10680 when the script has error such as not having the closing quotes like:
10681 if ($var eq "value)
10682 Checking of white spaces is anyway done in NetWare code.
10685 while (isSPACE(*PL_oldbufptr))
10688 context = PL_oldbufptr;
10689 contlen = PL_bufptr - PL_oldbufptr;
10691 else if (yychar > 255)
10692 where = "next token ???";
10693 else if (yychar == -2) { /* YYEMPTY */
10694 if (PL_lex_state == LEX_NORMAL ||
10695 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10696 where = "at end of line";
10697 else if (PL_lex_inpat)
10698 where = "within pattern";
10700 where = "within string";
10703 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
10705 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10706 else if (isPRINT_LC(yychar))
10707 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
10709 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10710 where = SvPVX_const(where_sv);
10712 msg = sv_2mortal(newSVpv(s, 0));
10713 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10714 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10716 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10718 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10719 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10720 Perl_sv_catpvf(aTHX_ msg,
10721 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10722 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10725 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
10726 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
10729 if (PL_error_count >= 10) {
10730 if (PL_in_eval && SvCUR(ERRSV))
10731 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10732 ERRSV, OutCopFILE(PL_curcop));
10734 Perl_croak(aTHX_ "%s has too many errors.\n",
10735 OutCopFILE(PL_curcop));
10738 PL_in_my_stash = Nullhv;
10742 #pragma segment Main
10746 S_swallow_bom(pTHX_ U8 *s)
10748 const STRLEN slen = SvCUR(PL_linestr);
10751 if (s[1] == 0xFE) {
10752 /* UTF-16 little-endian? (or UTF32-LE?) */
10753 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
10754 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
10755 #ifndef PERL_NO_UTF16_FILTER
10756 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
10759 if (PL_bufend > (char*)s) {
10763 filter_add(utf16rev_textfilter, NULL);
10764 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
10765 utf16_to_utf8_reversed(s, news,
10766 PL_bufend - (char*)s - 1,
10768 sv_setpvn(PL_linestr, (const char*)news, newlen);
10770 SvUTF8_on(PL_linestr);
10771 s = (U8*)SvPVX(PL_linestr);
10772 PL_bufend = SvPVX(PL_linestr) + newlen;
10775 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
10780 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
10781 #ifndef PERL_NO_UTF16_FILTER
10782 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10785 if (PL_bufend > (char *)s) {
10789 filter_add(utf16_textfilter, NULL);
10790 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
10791 utf16_to_utf8(s, news,
10792 PL_bufend - (char*)s,
10794 sv_setpvn(PL_linestr, (const char*)news, newlen);
10796 SvUTF8_on(PL_linestr);
10797 s = (U8*)SvPVX(PL_linestr);
10798 PL_bufend = SvPVX(PL_linestr) + newlen;
10801 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
10806 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10807 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10808 s += 3; /* UTF-8 */
10814 if (s[2] == 0xFE && s[3] == 0xFF) {
10815 /* UTF-32 big-endian */
10816 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
10819 else if (s[2] == 0 && s[3] != 0) {
10822 * are a good indicator of UTF-16BE. */
10823 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10828 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10831 * are a good indicator of UTF-16LE. */
10832 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10841 * Restore a source filter.
10845 restore_rsfp(pTHX_ void *f)
10847 PerlIO *fp = (PerlIO*)f;
10849 if (PL_rsfp == PerlIO_stdin())
10850 PerlIO_clearerr(PL_rsfp);
10851 else if (PL_rsfp && (PL_rsfp != fp))
10852 PerlIO_close(PL_rsfp);
10856 #ifndef PERL_NO_UTF16_FILTER
10858 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10860 const STRLEN old = SvCUR(sv);
10861 const I32 count = FILTER_READ(idx+1, sv, maxlen);
10862 DEBUG_P(PerlIO_printf(Perl_debug_log,
10863 "utf16_textfilter(%p): %d %d (%d)\n",
10864 utf16_textfilter, idx, maxlen, (int) count));
10868 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
10869 Copy(SvPVX_const(sv), tmps, old, char);
10870 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
10871 SvCUR(sv) - old, &newlen);
10872 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
10874 DEBUG_P({sv_dump(sv);});
10879 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10881 const STRLEN old = SvCUR(sv);
10882 const I32 count = FILTER_READ(idx+1, sv, maxlen);
10883 DEBUG_P(PerlIO_printf(Perl_debug_log,
10884 "utf16rev_textfilter(%p): %d %d (%d)\n",
10885 utf16rev_textfilter, idx, maxlen, (int) count));
10889 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
10890 Copy(SvPVX_const(sv), tmps, old, char);
10891 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
10892 SvCUR(sv) - old, &newlen);
10893 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
10895 DEBUG_P({ sv_dump(sv); });
10901 Returns a pointer to the next character after the parsed
10902 vstring, as well as updating the passed in sv.
10904 Function must be called like
10907 s = scan_vstring(s,sv);
10909 The sv should already be large enough to store the vstring
10910 passed in, for performance reasons.
10915 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
10917 const char *pos = s;
10918 const char *start = s;
10919 if (*pos == 'v') pos++; /* get past 'v' */
10920 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
10922 if ( *pos != '.') {
10923 /* this may not be a v-string if followed by => */
10924 const char *next = pos;
10925 while (next < PL_bufend && isSPACE(*next))
10927 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
10928 /* return string not v-string */
10929 sv_setpvn(sv,(char *)s,pos-s);
10930 return (char *)pos;
10934 if (!isALPHA(*pos)) {
10936 U8 tmpbuf[UTF8_MAXBYTES+1];
10939 if (*s == 'v') s++; /* get past 'v' */
10941 sv_setpvn(sv, "", 0);
10946 /* this is atoi() that tolerates underscores */
10947 const char *end = pos;
10949 while (--end >= s) {
10954 rev += (*end - '0') * mult;
10956 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
10957 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10958 "Integer overflow in decimal number");
10962 if (rev > 0x7FFFFFFF)
10963 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10965 /* Append native character for the rev point */
10966 tmpend = uvchr_to_utf8(tmpbuf, rev);
10967 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10968 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10970 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
10976 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
10980 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10988 * c-indentation-style: bsd
10989 * c-basic-offset: 4
10990 * indent-tabs-mode: t
10993 * ex: set ts=8 sts=4 sw=4 noet: