3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static char ident_too_long[] = "Identifier too long";
30 static char c_without_g[] = "Use of /c modifier is meaningless without /g";
31 static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
33 static void restore_rsfp(pTHX_ void *f);
34 #ifndef PERL_NO_UTF16_FILTER
35 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
36 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39 #define XFAKEBRACK 128
42 #ifdef USE_UTF8_SCRIPTS
43 # define UTF (!IN_BYTES)
45 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
48 /* In variables named $^X, these are the legal values for X.
49 * 1999-02-27 mjd-perl-patch@plover.com */
50 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
52 /* On MacOS, respect nonbreaking spaces */
53 #ifdef MACOS_TRADITIONAL
54 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
56 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
59 /* LEX_* are values for PL_lex_state, the state of the lexer.
60 * They are arranged oddly so that the guard on the switch statement
61 * can get by with a single comparison (if the compiler is smart enough).
64 /* #define LEX_NOTPARSING 11 is done in perl.h. */
67 #define LEX_INTERPNORMAL 9
68 #define LEX_INTERPCASEMOD 8
69 #define LEX_INTERPPUSH 7
70 #define LEX_INTERPSTART 6
71 #define LEX_INTERPEND 5
72 #define LEX_INTERPENDMAYBE 4
73 #define LEX_INTERPCONCAT 3
74 #define LEX_INTERPCONST 2
75 #define LEX_FORMLINE 1
76 #define LEX_KNOWNEXT 0
79 static char* lex_state_names[] = {
100 /* CLINE is a macro that ensures PL_copline has a sane value */
105 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
108 * Convenience functions to return different tokens and prime the
109 * lexer for the next token. They all take an argument.
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
121 * FUN1 : not used, except for not, which isn't a UNIOP
122 * BOop : bitwise or or xor
124 * SHop : shift operator
125 * PWop : power operator
126 * PMop : pattern-matching operator
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
130 * Rop : relational operator <= != gt
132 * Also see LOP and lop() below.
135 #ifdef DEBUGGING /* Serve -DT. */
136 # define REPORT(retval) tokereport(s,(int)retval)
138 # define REPORT(retval) (retval)
141 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
142 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
143 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
144 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
145 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
146 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
147 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
148 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
149 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
150 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
151 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
152 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
153 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
154 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
155 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
156 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
157 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
158 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
159 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
160 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
162 /* This bit of chicanery makes a unary function followed by
163 * a parenthesis into a function with one argument, highest precedence.
164 * The UNIDOR macro is for unary functions that can be followed by the //
165 * operator (such as C<shift // 0>).
167 #define UNI2(f,x) return ( \
171 PL_last_uni = PL_oldbufptr, \
172 PL_last_lop_op = f, \
174 (*s == '(' || (s = skipspace(s), *s == '(') \
175 ? (int)FUNC1 : (int)UNIOP)))
176 #define UNI(f) UNI2(f,XTERM)
177 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
179 #define UNIBRACK(f) return ( \
182 PL_last_uni = PL_oldbufptr, \
184 (*s == '(' || (s = skipspace(s), *s == '(') \
185 ? (int)FUNC1 : (int)UNIOP)))
187 /* grandfather return to old style */
188 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
192 /* how to interpret the yylval associated with the token */
196 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
202 static struct debug_tokens { int token, type; char *name;} debug_tokens[] =
204 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
205 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
206 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
207 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
208 { ARROW, TOKENTYPE_NONE, "ARROW" },
209 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
210 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
211 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
212 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
213 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
214 { DO, TOKENTYPE_NONE, "DO" },
215 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
216 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
217 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
218 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
219 { ELSE, TOKENTYPE_NONE, "ELSE" },
220 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
221 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
222 { FOR, TOKENTYPE_IVAL, "FOR" },
223 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
224 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
225 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
226 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
227 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
228 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
229 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
230 { IF, TOKENTYPE_IVAL, "IF" },
231 { LABEL, TOKENTYPE_PVAL, "LABEL" },
232 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
233 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
234 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
235 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
236 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
237 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
238 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
239 { MY, TOKENTYPE_IVAL, "MY" },
240 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
241 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
242 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
243 { OROP, TOKENTYPE_IVAL, "OROP" },
244 { OROR, TOKENTYPE_NONE, "OROR" },
245 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
246 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
247 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
248 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
249 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
250 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
251 { PREINC, TOKENTYPE_NONE, "PREINC" },
252 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
253 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
254 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
255 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
256 { SUB, TOKENTYPE_NONE, "SUB" },
257 { THING, TOKENTYPE_OPVAL, "THING" },
258 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
259 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
260 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
261 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
262 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
263 { USE, TOKENTYPE_IVAL, "USE" },
264 { WHILE, TOKENTYPE_IVAL, "WHILE" },
265 { WORD, TOKENTYPE_OPVAL, "WORD" },
266 { 0, TOKENTYPE_NONE, 0 }
269 /* dump the returned token in rv, plus any optional arg in yylval */
272 S_tokereport(pTHX_ char* s, I32 rv)
276 enum token_type type = TOKENTYPE_NONE;
277 struct debug_tokens *p;
278 SV* report = newSVpvn("<== ", 4);
280 for (p = debug_tokens; p->token; p++) {
281 if (p->token == (int)rv) {
288 Perl_sv_catpv(aTHX_ report, name);
289 else if ((char)rv > ' ' && (char)rv < '~')
290 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
292 Perl_sv_catpv(aTHX_ report, "EOF");
294 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
297 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
300 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival);
302 case TOKENTYPE_OPNUM:
303 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
304 PL_op_name[yylval.ival]);
307 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
309 case TOKENTYPE_OPVAL:
311 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
312 PL_op_name[yylval.opval->op_type]);
314 Perl_sv_catpv(aTHX_ report, "(opval=null)");
317 Perl_sv_catpvf(aTHX_ report, " at line %d [", CopLINE(PL_curcop));
318 if (s - PL_bufptr > 0)
319 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
321 if (PL_oldbufptr && *PL_oldbufptr)
322 sv_catpv(report, PL_tokenbuf);
324 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
334 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
335 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
339 S_ao(pTHX_ int toketype)
341 if (*PL_bufptr == '=') {
343 if (toketype == ANDAND)
344 yylval.ival = OP_ANDASSIGN;
345 else if (toketype == OROR)
346 yylval.ival = OP_ORASSIGN;
347 else if (toketype == DORDOR)
348 yylval.ival = OP_DORASSIGN;
356 * When Perl expects an operator and finds something else, no_op
357 * prints the warning. It always prints "<something> found where
358 * operator expected. It prints "Missing semicolon on previous line?"
359 * if the surprise occurs at the start of the line. "do you need to
360 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
361 * where the compiler doesn't know if foo is a method call or a function.
362 * It prints "Missing operator before end of line" if there's nothing
363 * after the missing operator, or "... before <...>" if there is something
364 * after the missing operator.
368 S_no_op(pTHX_ char *what, char *s)
370 char *oldbp = PL_bufptr;
371 bool is_first = (PL_oldbufptr == PL_linestart);
377 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
378 if (ckWARN_d(WARN_SYNTAX)) {
380 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
381 "\t(Missing semicolon on previous line?)\n");
382 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
384 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
385 if (t < PL_bufptr && isSPACE(*t))
386 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
387 "\t(Do you need to predeclare %.*s?)\n",
388 t - PL_oldoldbufptr, PL_oldoldbufptr);
392 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
393 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
401 * Complain about missing quote/regexp/heredoc terminator.
402 * If it's called with (char *)NULL then it cauterizes the line buffer.
403 * If we're in a delimited string and the delimiter is a control
404 * character, it's reformatted into a two-char sequence like ^C.
409 S_missingterm(pTHX_ char *s)
414 char *nl = strrchr(s,'\n');
420 iscntrl(PL_multi_close)
422 PL_multi_close < 32 || PL_multi_close == 127
426 tmpbuf[1] = toCTRL(PL_multi_close);
432 *tmpbuf = (char)PL_multi_close;
436 q = strchr(s,'"') ? '\'' : '"';
437 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
445 Perl_deprecate(pTHX_ char *s)
447 if (ckWARN(WARN_DEPRECATED))
448 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
452 Perl_deprecate_old(pTHX_ char *s)
454 /* This function should NOT be called for any new deprecated warnings */
455 /* Use Perl_deprecate instead */
457 /* It is here to maintain backward compatibility with the pre-5.8 */
458 /* warnings category hierarchy. The "deprecated" category used to */
459 /* live under the "syntax" category. It is now a top-level category */
460 /* in its own right. */
462 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
463 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
464 "Use of %s is deprecated", s);
469 * Deprecate a comma-less variable list.
475 deprecate_old("comma-less variable list");
479 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
480 * utf16-to-utf8-reversed.
483 #ifdef PERL_CR_FILTER
487 register char *s = SvPVX(sv);
488 register char *e = s + SvCUR(sv);
489 /* outer loop optimized to do nothing if there are no CR-LFs */
491 if (*s++ == '\r' && *s == '\n') {
492 /* hit a CR-LF, need to copy the rest */
493 register char *d = s - 1;
496 if (*s == '\r' && s[1] == '\n')
507 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
509 I32 count = FILTER_READ(idx+1, sv, maxlen);
510 if (count > 0 && !maxlen)
518 * Initialize variables. Uses the Perl save_stack to save its state (for
519 * recursive calls to the parser).
523 Perl_lex_start(pTHX_ SV *line)
528 SAVEI32(PL_lex_dojoin);
529 SAVEI32(PL_lex_brackets);
530 SAVEI32(PL_lex_casemods);
531 SAVEI32(PL_lex_starts);
532 SAVEI32(PL_lex_state);
533 SAVEVPTR(PL_lex_inpat);
534 SAVEI32(PL_lex_inwhat);
535 if (PL_lex_state == LEX_KNOWNEXT) {
536 I32 toke = PL_nexttoke;
537 while (--toke >= 0) {
538 SAVEI32(PL_nexttype[toke]);
539 SAVEVPTR(PL_nextval[toke]);
541 SAVEI32(PL_nexttoke);
543 SAVECOPLINE(PL_curcop);
546 SAVEPPTR(PL_oldbufptr);
547 SAVEPPTR(PL_oldoldbufptr);
548 SAVEPPTR(PL_last_lop);
549 SAVEPPTR(PL_last_uni);
550 SAVEPPTR(PL_linestart);
551 SAVESPTR(PL_linestr);
552 SAVEGENERICPV(PL_lex_brackstack);
553 SAVEGENERICPV(PL_lex_casestack);
554 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
555 SAVESPTR(PL_lex_stuff);
556 SAVEI32(PL_lex_defer);
557 SAVEI32(PL_sublex_info.sub_inwhat);
558 SAVESPTR(PL_lex_repl);
560 SAVEINT(PL_lex_expect);
562 PL_lex_state = LEX_NORMAL;
566 New(899, PL_lex_brackstack, 120, char);
567 New(899, PL_lex_casestack, 12, char);
569 *PL_lex_casestack = '\0';
572 PL_lex_stuff = Nullsv;
573 PL_lex_repl = Nullsv;
577 PL_sublex_info.sub_inwhat = 0;
579 if (SvREADONLY(PL_linestr))
580 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
581 s = SvPV(PL_linestr, len);
582 if (!len || s[len-1] != ';') {
583 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
584 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
585 sv_catpvn(PL_linestr, "\n;", 2);
587 SvTEMP_off(PL_linestr);
588 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
589 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
590 PL_last_lop = PL_last_uni = Nullch;
596 * Finalizer for lexing operations. Must be called when the parser is
597 * done with the lexer.
603 PL_doextract = FALSE;
608 * This subroutine has nothing to do with tilting, whether at windmills
609 * or pinball tables. Its name is short for "increment line". It
610 * increments the current line number in CopLINE(PL_curcop) and checks
611 * to see whether the line starts with a comment of the form
612 * # line 500 "foo.pm"
613 * If so, it sets the current line number and file to the values in the comment.
617 S_incline(pTHX_ char *s)
624 CopLINE_inc(PL_curcop);
627 while (SPACE_OR_TAB(*s)) s++;
628 if (strnEQ(s, "line", 4))
632 if (SPACE_OR_TAB(*s))
636 while (SPACE_OR_TAB(*s)) s++;
642 while (SPACE_OR_TAB(*s))
644 if (*s == '"' && (t = strchr(s+1, '"'))) {
649 for (t = s; !isSPACE(*t); t++) ;
652 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
654 if (*e != '\n' && *e != '\0')
655 return; /* false alarm */
660 CopFILE_free(PL_curcop);
661 CopFILE_set(PL_curcop, s);
664 CopLINE_set(PL_curcop, atoi(n)-1);
669 * Called to gobble the appropriate amount and type of whitespace.
670 * Skips comments as well.
674 S_skipspace(pTHX_ register char *s)
676 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
677 while (s < PL_bufend && SPACE_OR_TAB(*s))
683 SSize_t oldprevlen, oldoldprevlen;
684 SSize_t oldloplen = 0, oldunilen = 0;
685 while (s < PL_bufend && isSPACE(*s)) {
686 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
691 if (s < PL_bufend && *s == '#') {
692 while (s < PL_bufend && *s != '\n')
696 if (PL_in_eval && !PL_rsfp) {
703 /* only continue to recharge the buffer if we're at the end
704 * of the buffer, we're not reading from a source filter, and
705 * we're in normal lexing mode
707 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
708 PL_lex_state == LEX_FORMLINE)
711 /* try to recharge the buffer */
712 if ((s = filter_gets(PL_linestr, PL_rsfp,
713 (prevlen = SvCUR(PL_linestr)))) == Nullch)
715 /* end of file. Add on the -p or -n magic */
718 ";}continue{print or die qq(-p destination: $!\\n);}");
719 PL_minus_n = PL_minus_p = 0;
721 else if (PL_minus_n) {
722 sv_setpvn(PL_linestr, ";}", 2);
726 sv_setpvn(PL_linestr,";", 1);
728 /* reset variables for next time we lex */
729 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
731 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
732 PL_last_lop = PL_last_uni = Nullch;
734 /* Close the filehandle. Could be from -P preprocessor,
735 * STDIN, or a regular file. If we were reading code from
736 * STDIN (because the commandline held no -e or filename)
737 * then we don't close it, we reset it so the code can
738 * read from STDIN too.
741 if (PL_preprocess && !PL_in_eval)
742 (void)PerlProc_pclose(PL_rsfp);
743 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
744 PerlIO_clearerr(PL_rsfp);
746 (void)PerlIO_close(PL_rsfp);
751 /* not at end of file, so we only read another line */
752 /* make corresponding updates to old pointers, for yyerror() */
753 oldprevlen = PL_oldbufptr - PL_bufend;
754 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
756 oldunilen = PL_last_uni - PL_bufend;
758 oldloplen = PL_last_lop - PL_bufend;
759 PL_linestart = PL_bufptr = s + prevlen;
760 PL_bufend = s + SvCUR(PL_linestr);
762 PL_oldbufptr = s + oldprevlen;
763 PL_oldoldbufptr = s + oldoldprevlen;
765 PL_last_uni = s + oldunilen;
767 PL_last_lop = s + oldloplen;
770 /* debugger active and we're not compiling the debugger code,
771 * so store the line into the debugger's array of lines
773 if (PERLDB_LINE && PL_curstash != PL_debstash) {
774 SV *sv = NEWSV(85,0);
776 sv_upgrade(sv, SVt_PVMG);
777 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
780 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
787 * Check the unary operators to ensure there's no ambiguity in how they're
788 * used. An ambiguous piece of code would be:
790 * This doesn't mean rand() + 5. Because rand() is a unary operator,
791 * the +5 is its argument.
800 if (PL_oldoldbufptr != PL_last_uni)
802 while (isSPACE(*PL_last_uni))
804 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
805 if ((t = strchr(s, '(')) && t < PL_bufptr)
807 if (ckWARN_d(WARN_AMBIGUOUS)){
810 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
811 "Warning: Use of \"%s\" without parentheses is ambiguous",
818 * LOP : macro to build a list operator. Its behaviour has been replaced
819 * with a subroutine, S_lop() for which LOP is just another name.
822 #define LOP(f,x) return lop(f,x,s)
826 * Build a list operator (or something that might be one). The rules:
827 * - if we have a next token, then it's a list operator [why?]
828 * - if the next thing is an opening paren, then it's a function
829 * - else it's a list operator
833 S_lop(pTHX_ I32 f, int x, char *s)
839 PL_last_lop = PL_oldbufptr;
840 PL_last_lop_op = (OPCODE)f;
842 return REPORT(LSTOP);
849 return REPORT(LSTOP);
854 * When the lexer realizes it knows the next token (for instance,
855 * it is reordering tokens for the parser) then it can call S_force_next
856 * to know what token to return the next time the lexer is called. Caller
857 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
858 * handles the token correctly.
862 S_force_next(pTHX_ I32 type)
864 PL_nexttype[PL_nexttoke] = type;
866 if (PL_lex_state != LEX_KNOWNEXT) {
867 PL_lex_defer = PL_lex_state;
868 PL_lex_expect = PL_expect;
869 PL_lex_state = LEX_KNOWNEXT;
874 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
876 SV *sv = newSVpvn(start,len);
877 if (UTF && !IN_BYTES && is_utf8_string((U8*)start, len))
884 * When the lexer knows the next thing is a word (for instance, it has
885 * just seen -> and it knows that the next char is a word char, then
886 * it calls S_force_word to stick the next word into the PL_next lookahead.
889 * char *start : buffer position (must be within PL_linestr)
890 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
891 * int check_keyword : if true, Perl checks to make sure the word isn't
892 * a keyword (do this if the word is a label, e.g. goto FOO)
893 * int allow_pack : if true, : characters will also be allowed (require,
895 * int allow_initial_tick : used by the "sub" lexer only.
899 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
904 start = skipspace(start);
906 if (isIDFIRST_lazy_if(s,UTF) ||
907 (allow_pack && *s == ':') ||
908 (allow_initial_tick && *s == '\'') )
910 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
911 if (check_keyword && keyword(PL_tokenbuf, len))
913 if (token == METHOD) {
918 PL_expect = XOPERATOR;
921 PL_nextval[PL_nexttoke].opval
922 = (OP*)newSVOP(OP_CONST,0,
923 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
924 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
932 * Called when the lexer wants $foo *foo &foo etc, but the program
933 * text only contains the "foo" portion. The first argument is a pointer
934 * to the "foo", and the second argument is the type symbol to prefix.
935 * Forces the next token to be a "WORD".
936 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
940 S_force_ident(pTHX_ register char *s, int kind)
943 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
944 PL_nextval[PL_nexttoke].opval = o;
947 o->op_private = OPpCONST_ENTERED;
948 /* XXX see note in pp_entereval() for why we forgo typo
949 warnings if the symbol must be introduced in an eval.
951 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
952 kind == '$' ? SVt_PV :
953 kind == '@' ? SVt_PVAV :
954 kind == '%' ? SVt_PVHV :
962 Perl_str_to_version(pTHX_ SV *sv)
967 char *start = SvPVx(sv,len);
968 bool utf = SvUTF8(sv) ? TRUE : FALSE;
969 char *end = start + len;
970 while (start < end) {
974 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
979 retval += ((NV)n)/nshift;
988 * Forces the next token to be a version number.
989 * If the next token appears to be an invalid version number, (e.g. "v2b"),
990 * and if "guessing" is TRUE, then no new token is created (and the caller
991 * must use an alternative parsing method).
995 S_force_version(pTHX_ char *s, int guessing)
997 OP *version = Nullop;
1006 while (isDIGIT(*d) || *d == '_' || *d == '.')
1008 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1010 s = scan_num(s, &yylval);
1011 version = yylval.opval;
1012 ver = cSVOPx(version)->op_sv;
1013 if (SvPOK(ver) && !SvNIOK(ver)) {
1014 (void)SvUPGRADE(ver, SVt_PVNV);
1015 SvNVX(ver) = str_to_version(ver);
1016 SvNOK_on(ver); /* hint that it is a version */
1023 /* NOTE: The parser sees the package name and the VERSION swapped */
1024 PL_nextval[PL_nexttoke].opval = version;
1032 * Tokenize a quoted string passed in as an SV. It finds the next
1033 * chunk, up to end of string or a backslash. It may make a new
1034 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1039 S_tokeq(pTHX_ SV *sv)
1042 register char *send;
1050 s = SvPV_force(sv, len);
1051 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1054 while (s < send && *s != '\\')
1059 if ( PL_hints & HINT_NEW_STRING ) {
1060 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
1066 if (s + 1 < send && (s[1] == '\\'))
1067 s++; /* all that, just for this */
1072 SvCUR_set(sv, d - SvPVX(sv));
1074 if ( PL_hints & HINT_NEW_STRING )
1075 return new_constant(NULL, 0, "q", sv, pv, "q");
1080 * Now come three functions related to double-quote context,
1081 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1082 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1083 * interact with PL_lex_state, and create fake ( ... ) argument lists
1084 * to handle functions and concatenation.
1085 * They assume that whoever calls them will be setting up a fake
1086 * join call, because each subthing puts a ',' after it. This lets
1089 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1091 * (I'm not sure whether the spurious commas at the end of lcfirst's
1092 * arguments and join's arguments are created or not).
1097 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1099 * Pattern matching will set PL_lex_op to the pattern-matching op to
1100 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1102 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1104 * Everything else becomes a FUNC.
1106 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1107 * had an OP_CONST or OP_READLINE). This just sets us up for a
1108 * call to S_sublex_push().
1112 S_sublex_start(pTHX)
1114 register I32 op_type = yylval.ival;
1116 if (op_type == OP_NULL) {
1117 yylval.opval = PL_lex_op;
1121 if (op_type == OP_CONST || op_type == OP_READLINE) {
1122 SV *sv = tokeq(PL_lex_stuff);
1124 if (SvTYPE(sv) == SVt_PVIV) {
1125 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1131 nsv = newSVpvn(p, len);
1137 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1138 PL_lex_stuff = Nullsv;
1139 /* Allow <FH> // "foo" */
1140 if (op_type == OP_READLINE)
1141 PL_expect = XTERMORDORDOR;
1145 PL_sublex_info.super_state = PL_lex_state;
1146 PL_sublex_info.sub_inwhat = op_type;
1147 PL_sublex_info.sub_op = PL_lex_op;
1148 PL_lex_state = LEX_INTERPPUSH;
1152 yylval.opval = PL_lex_op;
1162 * Create a new scope to save the lexing state. The scope will be
1163 * ended in S_sublex_done. Returns a '(', starting the function arguments
1164 * to the uc, lc, etc. found before.
1165 * Sets PL_lex_state to LEX_INTERPCONCAT.
1173 PL_lex_state = PL_sublex_info.super_state;
1174 SAVEI32(PL_lex_dojoin);
1175 SAVEI32(PL_lex_brackets);
1176 SAVEI32(PL_lex_casemods);
1177 SAVEI32(PL_lex_starts);
1178 SAVEI32(PL_lex_state);
1179 SAVEVPTR(PL_lex_inpat);
1180 SAVEI32(PL_lex_inwhat);
1181 SAVECOPLINE(PL_curcop);
1182 SAVEPPTR(PL_bufptr);
1183 SAVEPPTR(PL_bufend);
1184 SAVEPPTR(PL_oldbufptr);
1185 SAVEPPTR(PL_oldoldbufptr);
1186 SAVEPPTR(PL_last_lop);
1187 SAVEPPTR(PL_last_uni);
1188 SAVEPPTR(PL_linestart);
1189 SAVESPTR(PL_linestr);
1190 SAVEGENERICPV(PL_lex_brackstack);
1191 SAVEGENERICPV(PL_lex_casestack);
1193 PL_linestr = PL_lex_stuff;
1194 PL_lex_stuff = Nullsv;
1196 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1197 = SvPVX(PL_linestr);
1198 PL_bufend += SvCUR(PL_linestr);
1199 PL_last_lop = PL_last_uni = Nullch;
1200 SAVEFREESV(PL_linestr);
1202 PL_lex_dojoin = FALSE;
1203 PL_lex_brackets = 0;
1204 New(899, PL_lex_brackstack, 120, char);
1205 New(899, PL_lex_casestack, 12, char);
1206 PL_lex_casemods = 0;
1207 *PL_lex_casestack = '\0';
1209 PL_lex_state = LEX_INTERPCONCAT;
1210 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1212 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1213 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1214 PL_lex_inpat = PL_sublex_info.sub_op;
1216 PL_lex_inpat = Nullop;
1223 * Restores lexer state after a S_sublex_push.
1229 if (!PL_lex_starts++) {
1230 SV *sv = newSVpvn("",0);
1231 if (SvUTF8(PL_linestr))
1233 PL_expect = XOPERATOR;
1234 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1238 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1239 PL_lex_state = LEX_INTERPCASEMOD;
1243 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1244 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1245 PL_linestr = PL_lex_repl;
1247 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1248 PL_bufend += SvCUR(PL_linestr);
1249 PL_last_lop = PL_last_uni = Nullch;
1250 SAVEFREESV(PL_linestr);
1251 PL_lex_dojoin = FALSE;
1252 PL_lex_brackets = 0;
1253 PL_lex_casemods = 0;
1254 *PL_lex_casestack = '\0';
1256 if (SvEVALED(PL_lex_repl)) {
1257 PL_lex_state = LEX_INTERPNORMAL;
1259 /* we don't clear PL_lex_repl here, so that we can check later
1260 whether this is an evalled subst; that means we rely on the
1261 logic to ensure sublex_done() is called again only via the
1262 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1265 PL_lex_state = LEX_INTERPCONCAT;
1266 PL_lex_repl = Nullsv;
1272 PL_bufend = SvPVX(PL_linestr);
1273 PL_bufend += SvCUR(PL_linestr);
1274 PL_expect = XOPERATOR;
1275 PL_sublex_info.sub_inwhat = 0;
1283 Extracts a pattern, double-quoted string, or transliteration. This
1286 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1287 processing a pattern (PL_lex_inpat is true), a transliteration
1288 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1290 Returns a pointer to the character scanned up to. Iff this is
1291 advanced from the start pointer supplied (ie if anything was
1292 successfully parsed), will leave an OP for the substring scanned
1293 in yylval. Caller must intuit reason for not parsing further
1294 by looking at the next characters herself.
1298 double-quoted style: \r and \n
1299 regexp special ones: \D \s
1301 backrefs: \1 (deprecated in substitution replacements)
1302 case and quoting: \U \Q \E
1303 stops on @ and $, but not for $ as tail anchor
1305 In transliterations:
1306 characters are VERY literal, except for - not at the start or end
1307 of the string, which indicates a range. scan_const expands the
1308 range to the full set of intermediate characters.
1310 In double-quoted strings:
1312 double-quoted style: \r and \n
1314 backrefs: \1 (deprecated)
1315 case and quoting: \U \Q \E
1318 scan_const does *not* construct ops to handle interpolated strings.
1319 It stops processing as soon as it finds an embedded $ or @ variable
1320 and leaves it to the caller to work out what's going on.
1322 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1324 $ in pattern could be $foo or could be tail anchor. Assumption:
1325 it's a tail anchor if $ is the last thing in the string, or if it's
1326 followed by one of ")| \n\t"
1328 \1 (backreferences) are turned into $1
1330 The structure of the code is
1331 while (there's a character to process) {
1332 handle transliteration ranges
1333 skip regexp comments
1334 skip # initiated comments in //x patterns
1335 check for embedded @foo
1336 check for embedded scalars
1338 leave intact backslashes from leave (below)
1339 deprecate \1 in strings and sub replacements
1340 handle string-changing backslashes \l \U \Q \E, etc.
1341 switch (what was escaped) {
1342 handle - in a transliteration (becomes a literal -)
1343 handle \132 octal characters
1344 handle 0x15 hex characters
1345 handle \cV (control V)
1346 handle printf backslashes (\f, \r, \n, etc)
1348 } (end if backslash)
1349 } (end while character to read)
1354 S_scan_const(pTHX_ char *start)
1356 register char *send = PL_bufend; /* end of the constant */
1357 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1358 register char *s = start; /* start of the constant */
1359 register char *d = SvPVX(sv); /* destination for copies */
1360 bool dorange = FALSE; /* are we in a translit range? */
1361 bool didrange = FALSE; /* did we just finish a range? */
1362 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1363 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1366 const char *leaveit = /* set of acceptably-backslashed characters */
1368 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1371 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1372 /* If we are doing a trans and we know we want UTF8 set expectation */
1373 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1374 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1378 while (s < send || dorange) {
1379 /* get transliterations out of the way (they're most literal) */
1380 if (PL_lex_inwhat == OP_TRANS) {
1381 /* expand a range A-Z to the full set of characters. AIE! */
1383 I32 i; /* current expanded character */
1384 I32 min; /* first character in range */
1385 I32 max; /* last character in range */
1388 char *c = (char*)utf8_hop((U8*)d, -1);
1392 *c = (char)UTF_TO_NATIVE(0xff);
1393 /* mark the range as done, and continue */
1399 i = d - SvPVX(sv); /* remember current offset */
1400 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1401 d = SvPVX(sv) + i; /* refresh d after realloc */
1402 d -= 2; /* eat the first char and the - */
1404 min = (U8)*d; /* first char in range */
1405 max = (U8)d[1]; /* last char in range */
1409 "Invalid range \"%c-%c\" in transliteration operator",
1410 (char)min, (char)max);
1414 if ((isLOWER(min) && isLOWER(max)) ||
1415 (isUPPER(min) && isUPPER(max))) {
1417 for (i = min; i <= max; i++)
1419 *d++ = NATIVE_TO_NEED(has_utf8,i);
1421 for (i = min; i <= max; i++)
1423 *d++ = NATIVE_TO_NEED(has_utf8,i);
1428 for (i = min; i <= max; i++)
1431 /* mark the range as done, and continue */
1437 /* range begins (ignore - as first or last char) */
1438 else if (*s == '-' && s+1 < send && s != start) {
1440 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1443 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1455 /* if we get here, we're not doing a transliteration */
1457 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1458 except for the last char, which will be done separately. */
1459 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1461 while (s+1 < send && *s != ')')
1462 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1464 else if (s[2] == '{' /* This should match regcomp.c */
1465 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1468 char *regparse = s + (s[2] == '{' ? 3 : 4);
1471 while (count && (c = *regparse)) {
1472 if (c == '\\' && regparse[1])
1480 if (*regparse != ')')
1481 regparse--; /* Leave one char for continuation. */
1482 while (s < regparse)
1483 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1487 /* likewise skip #-initiated comments in //x patterns */
1488 else if (*s == '#' && PL_lex_inpat &&
1489 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1490 while (s+1 < send && *s != '\n')
1491 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1494 /* check for embedded arrays
1495 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1497 else if (*s == '@' && s[1]
1498 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1501 /* check for embedded scalars. only stop if we're sure it's a
1504 else if (*s == '$') {
1505 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1507 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1508 break; /* in regexp, $ might be tail anchor */
1511 /* End of else if chain - OP_TRANS rejoin rest */
1514 if (*s == '\\' && s+1 < send) {
1517 /* some backslashes we leave behind */
1518 if (*leaveit && *s && strchr(leaveit, *s)) {
1519 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1520 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1524 /* deprecate \1 in strings and substitution replacements */
1525 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1526 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1528 if (ckWARN(WARN_SYNTAX))
1529 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1534 /* string-change backslash escapes */
1535 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1540 /* if we get here, it's either a quoted -, or a digit */
1543 /* quoted - in transliterations */
1545 if (PL_lex_inwhat == OP_TRANS) {
1552 if (ckWARN(WARN_MISC) &&
1555 Perl_warner(aTHX_ packWARN(WARN_MISC),
1556 "Unrecognized escape \\%c passed through",
1558 /* default action is to copy the quoted character */
1559 goto default_action;
1562 /* \132 indicates an octal constant */
1563 case '0': case '1': case '2': case '3':
1564 case '4': case '5': case '6': case '7':
1568 uv = grok_oct(s, &len, &flags, NULL);
1571 goto NUM_ESCAPE_INSERT;
1573 /* \x24 indicates a hex constant */
1577 char* e = strchr(s, '}');
1578 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1579 PERL_SCAN_DISALLOW_PREFIX;
1584 yyerror("Missing right brace on \\x{}");
1588 uv = grok_hex(s, &len, &flags, NULL);
1594 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1595 uv = grok_hex(s, &len, &flags, NULL);
1601 /* Insert oct or hex escaped character.
1602 * There will always enough room in sv since such
1603 * escapes will be longer than any UTF-8 sequence
1604 * they can end up as. */
1606 /* We need to map to chars to ASCII before doing the tests
1609 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1610 if (!has_utf8 && uv > 255) {
1611 /* Might need to recode whatever we have
1612 * accumulated so far if it contains any
1615 * (Can't we keep track of that and avoid
1616 * this rescan? --jhi)
1620 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1621 if (!NATIVE_IS_INVARIANT(*c)) {
1626 STRLEN offset = d - SvPVX(sv);
1628 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1632 while (src >= (U8 *)SvPVX(sv)) {
1633 if (!NATIVE_IS_INVARIANT(*src)) {
1634 U8 ch = NATIVE_TO_ASCII(*src);
1635 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1636 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1646 if (has_utf8 || uv > 255) {
1647 d = (char*)uvchr_to_utf8((U8*)d, uv);
1649 if (PL_lex_inwhat == OP_TRANS &&
1650 PL_sublex_info.sub_op) {
1651 PL_sublex_info.sub_op->op_private |=
1652 (PL_lex_repl ? OPpTRANS_FROM_UTF
1665 /* \N{LATIN SMALL LETTER A} is a named character */
1669 char* e = strchr(s, '}');
1675 yyerror("Missing right brace on \\N{}");
1679 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1681 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1682 PERL_SCAN_DISALLOW_PREFIX;
1685 uv = grok_hex(s, &len, &flags, NULL);
1687 goto NUM_ESCAPE_INSERT;
1689 res = newSVpvn(s + 1, e - s - 1);
1690 res = new_constant( Nullch, 0, "charnames",
1691 res, Nullsv, "\\N{...}" );
1693 sv_utf8_upgrade(res);
1694 str = SvPV(res,len);
1695 #ifdef EBCDIC_NEVER_MIND
1696 /* charnames uses pack U and that has been
1697 * recently changed to do the below uni->native
1698 * mapping, so this would be redundant (and wrong,
1699 * the code point would be doubly converted).
1700 * But leave this in just in case the pack U change
1701 * gets revoked, but the semantics is still
1702 * desireable for charnames. --jhi */
1704 UV uv = utf8_to_uvchr((U8*)str, 0);
1707 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1709 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1710 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1711 str = SvPV(res, len);
1715 if (!has_utf8 && SvUTF8(res)) {
1716 char *ostart = SvPVX(sv);
1717 SvCUR_set(sv, d - ostart);
1720 sv_utf8_upgrade(sv);
1721 /* this just broke our allocation above... */
1722 SvGROW(sv, (STRLEN)(send - start));
1723 d = SvPVX(sv) + SvCUR(sv);
1726 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1727 char *odest = SvPVX(sv);
1729 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1730 d = SvPVX(sv) + (d - odest);
1732 Copy(str, d, len, char);
1739 yyerror("Missing braces on \\N{}");
1742 /* \c is a control character */
1751 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1754 yyerror("Missing control char name in \\c");
1758 /* printf-style backslashes, formfeeds, newlines, etc */
1760 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1763 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1766 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1769 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1772 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1775 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1778 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1784 } /* end if (backslash) */
1787 /* If we started with encoded form, or already know we want it
1788 and then encode the next character */
1789 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1791 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1792 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1795 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1796 STRLEN off = d - SvPVX(sv);
1797 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1799 d = (char*)uvchr_to_utf8((U8*)d, uv);
1803 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1805 } /* while loop to process each character */
1807 /* terminate the string and set up the sv */
1809 SvCUR_set(sv, d - SvPVX(sv));
1810 if (SvCUR(sv) >= SvLEN(sv))
1811 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1814 if (PL_encoding && !has_utf8) {
1815 sv_recode_to_utf8(sv, PL_encoding);
1821 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1822 PL_sublex_info.sub_op->op_private |=
1823 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1827 /* shrink the sv if we allocated more than we used */
1828 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1829 SvLEN_set(sv, SvCUR(sv) + 1);
1830 Renew(SvPVX(sv), SvLEN(sv), char);
1833 /* return the substring (via yylval) only if we parsed anything */
1834 if (s > PL_bufptr) {
1835 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1836 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1838 ( PL_lex_inwhat == OP_TRANS
1840 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1843 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1850 * Returns TRUE if there's more to the expression (e.g., a subscript),
1853 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1855 * ->[ and ->{ return TRUE
1856 * { and [ outside a pattern are always subscripts, so return TRUE
1857 * if we're outside a pattern and it's not { or [, then return FALSE
1858 * if we're in a pattern and the first char is a {
1859 * {4,5} (any digits around the comma) returns FALSE
1860 * if we're in a pattern and the first char is a [
1862 * [SOMETHING] has a funky algorithm to decide whether it's a
1863 * character class or not. It has to deal with things like
1864 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1865 * anything else returns TRUE
1868 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1871 S_intuit_more(pTHX_ register char *s)
1873 if (PL_lex_brackets)
1875 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1877 if (*s != '{' && *s != '[')
1882 /* In a pattern, so maybe we have {n,m}. */
1899 /* On the other hand, maybe we have a character class */
1902 if (*s == ']' || *s == '^')
1905 /* this is terrifying, and it works */
1906 int weight = 2; /* let's weigh the evidence */
1908 unsigned char un_char = 255, last_un_char;
1909 char *send = strchr(s,']');
1910 char tmpbuf[sizeof PL_tokenbuf * 4];
1912 if (!send) /* has to be an expression */
1915 Zero(seen,256,char);
1918 else if (isDIGIT(*s)) {
1920 if (isDIGIT(s[1]) && s[2] == ']')
1926 for (; s < send; s++) {
1927 last_un_char = un_char;
1928 un_char = (unsigned char)*s;
1933 weight -= seen[un_char] * 10;
1934 if (isALNUM_lazy_if(s+1,UTF)) {
1935 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
1936 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
1941 else if (*s == '$' && s[1] &&
1942 strchr("[#!%*<>()-=",s[1])) {
1943 if (/*{*/ strchr("])} =",s[2]))
1952 if (strchr("wds]",s[1]))
1954 else if (seen['\''] || seen['"'])
1956 else if (strchr("rnftbxcav",s[1]))
1958 else if (isDIGIT(s[1])) {
1960 while (s[1] && isDIGIT(s[1]))
1970 if (strchr("aA01! ",last_un_char))
1972 if (strchr("zZ79~",s[1]))
1974 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1975 weight -= 5; /* cope with negative subscript */
1978 if (!isALNUM(last_un_char)
1979 && !(last_un_char == '$' || last_un_char == '@'
1980 || last_un_char == '&')
1981 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
1986 if (keyword(tmpbuf, d - tmpbuf))
1989 if (un_char == last_un_char + 1)
1991 weight -= seen[un_char];
1996 if (weight >= 0) /* probably a character class */
2006 * Does all the checking to disambiguate
2008 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2009 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2011 * First argument is the stuff after the first token, e.g. "bar".
2013 * Not a method if bar is a filehandle.
2014 * Not a method if foo is a subroutine prototyped to take a filehandle.
2015 * Not a method if it's really "Foo $bar"
2016 * Method if it's "foo $bar"
2017 * Not a method if it's really "print foo $bar"
2018 * Method if it's really "foo package::" (interpreted as package->foo)
2019 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2020 * Not a method if bar is a filehandle or package, but is quoted with
2025 S_intuit_method(pTHX_ char *start, GV *gv)
2027 char *s = start + (*start == '$');
2028 char tmpbuf[sizeof PL_tokenbuf];
2036 if ((cv = GvCVu(gv))) {
2037 char *proto = SvPVX(cv);
2047 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2048 /* start is the beginning of the possible filehandle/object,
2049 * and s is the end of it
2050 * tmpbuf is a copy of it
2053 if (*start == '$') {
2054 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2059 return *s == '(' ? FUNCMETH : METHOD;
2061 if (!keyword(tmpbuf, len)) {
2062 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2067 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
2068 if (indirgv && GvCVu(indirgv))
2070 /* filehandle or package name makes it a method */
2071 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2073 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2074 return 0; /* no assumptions -- "=>" quotes bearword */
2076 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2077 newSVpvn(tmpbuf,len));
2078 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2082 return *s == '(' ? FUNCMETH : METHOD;
2090 * Return a string of Perl code to load the debugger. If PERL5DB
2091 * is set, it will return the contents of that, otherwise a
2092 * compile-time require of perl5db.pl.
2099 char *pdb = PerlEnv_getenv("PERL5DB");
2103 SETERRNO(0,SS_NORMAL);
2104 return "BEGIN { require 'perl5db.pl' }";
2110 /* Encoded script support. filter_add() effectively inserts a
2111 * 'pre-processing' function into the current source input stream.
2112 * Note that the filter function only applies to the current source file
2113 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2115 * The datasv parameter (which may be NULL) can be used to pass
2116 * private data to this instance of the filter. The filter function
2117 * can recover the SV using the FILTER_DATA macro and use it to
2118 * store private buffers and state information.
2120 * The supplied datasv parameter is upgraded to a PVIO type
2121 * and the IoDIRP/IoANY field is used to store the function pointer,
2122 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2123 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2124 * private use must be set using malloc'd pointers.
2128 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2133 if (!PL_rsfp_filters)
2134 PL_rsfp_filters = newAV();
2136 datasv = NEWSV(255,0);
2137 if (!SvUPGRADE(datasv, SVt_PVIO))
2138 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
2139 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
2140 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2141 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2142 (void*)funcp, SvPV_nolen(datasv)));
2143 av_unshift(PL_rsfp_filters, 1);
2144 av_store(PL_rsfp_filters, 0, datasv) ;
2149 /* Delete most recently added instance of this filter function. */
2151 Perl_filter_del(pTHX_ filter_t funcp)
2154 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
2155 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2157 /* if filter is on top of stack (usual case) just pop it off */
2158 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2159 if (IoANY(datasv) == (void *)funcp) {
2160 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2161 IoANY(datasv) = (void *)NULL;
2162 sv_free(av_pop(PL_rsfp_filters));
2166 /* we need to search for the correct entry and clear it */
2167 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2171 /* Invoke the idxth filter function for the current rsfp. */
2172 /* maxlen 0 = read one text line */
2174 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2179 if (!PL_rsfp_filters)
2181 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2182 /* Provide a default input filter to make life easy. */
2183 /* Note that we append to the line. This is handy. */
2184 DEBUG_P(PerlIO_printf(Perl_debug_log,
2185 "filter_read %d: from rsfp\n", idx));
2189 int old_len = SvCUR(buf_sv) ;
2191 /* ensure buf_sv is large enough */
2192 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2193 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2194 if (PerlIO_error(PL_rsfp))
2195 return -1; /* error */
2197 return 0 ; /* end of file */
2199 SvCUR_set(buf_sv, old_len + len) ;
2202 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2203 if (PerlIO_error(PL_rsfp))
2204 return -1; /* error */
2206 return 0 ; /* end of file */
2209 return SvCUR(buf_sv);
2211 /* Skip this filter slot if filter has been deleted */
2212 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2213 DEBUG_P(PerlIO_printf(Perl_debug_log,
2214 "filter_read %d: skipped (filter deleted)\n",
2216 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2218 /* Get function pointer hidden within datasv */
2219 funcp = (filter_t)IoANY(datasv);
2220 DEBUG_P(PerlIO_printf(Perl_debug_log,
2221 "filter_read %d: via function %p (%s)\n",
2222 idx, (void*)funcp, SvPV_nolen(datasv)));
2223 /* Call function. The function is expected to */
2224 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2225 /* Return: <0:error, =0:eof, >0:not eof */
2226 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2230 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2232 #ifdef PERL_CR_FILTER
2233 if (!PL_rsfp_filters) {
2234 filter_add(S_cr_textfilter,NULL);
2237 if (PL_rsfp_filters) {
2239 SvCUR_set(sv, 0); /* start with empty line */
2240 if (FILTER_READ(0, sv, 0) > 0)
2241 return ( SvPVX(sv) ) ;
2246 return (sv_gets(sv, fp, append));
2250 S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
2254 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2258 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2259 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2261 return GvHV(gv); /* Foo:: */
2264 /* use constant CLASS => 'MyClass' */
2265 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2267 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2268 pkgname = SvPV_nolen(sv);
2272 return gv_stashpv(pkgname, FALSE);
2276 static char* exp_name[] =
2277 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2278 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2285 Works out what to call the token just pulled out of the input
2286 stream. The yacc parser takes care of taking the ops we return and
2287 stitching them into a tree.
2293 if read an identifier
2294 if we're in a my declaration
2295 croak if they tried to say my($foo::bar)
2296 build the ops for a my() declaration
2297 if it's an access to a my() variable
2298 are we in a sort block?
2299 croak if my($a); $a <=> $b
2300 build ops for access to a my() variable
2301 if in a dq string, and they've said @foo and we can't find @foo
2303 build ops for a bareword
2304 if we already built the token before, use it.
2309 #pragma segment Perl_yylex
2314 register char *s = PL_bufptr;
2321 I32 orig_keyword = 0;
2324 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2325 lex_state_names[PL_lex_state]);
2327 /* check if there's an identifier for us to look at */
2328 if (PL_pending_ident)
2329 return REPORT(S_pending_ident(aTHX));
2331 /* no identifier pending identification */
2333 switch (PL_lex_state) {
2335 case LEX_NORMAL: /* Some compilers will produce faster */
2336 case LEX_INTERPNORMAL: /* code if we comment these out. */
2340 /* when we've already built the next token, just pull it out of the queue */
2343 yylval = PL_nextval[PL_nexttoke];
2345 PL_lex_state = PL_lex_defer;
2346 PL_expect = PL_lex_expect;
2347 PL_lex_defer = LEX_NORMAL;
2349 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2350 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2351 (IV)PL_nexttype[PL_nexttoke]); });
2353 return REPORT(PL_nexttype[PL_nexttoke]);
2355 /* interpolated case modifiers like \L \U, including \Q and \E.
2356 when we get here, PL_bufptr is at the \
2358 case LEX_INTERPCASEMOD:
2360 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2361 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2363 /* handle \E or end of string */
2364 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2368 if (PL_lex_casemods) {
2369 oldmod = PL_lex_casestack[--PL_lex_casemods];
2370 PL_lex_casestack[PL_lex_casemods] = '\0';
2372 if (PL_bufptr != PL_bufend
2373 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2375 PL_lex_state = LEX_INTERPCONCAT;
2379 if (PL_bufptr != PL_bufend)
2381 PL_lex_state = LEX_INTERPCONCAT;
2385 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2386 "### Saw case modifier at '%s'\n", PL_bufptr); });
2388 if (s[1] == '\\' && s[2] == 'E') {
2390 PL_lex_state = LEX_INTERPCONCAT;
2394 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2395 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2396 if ((*s == 'L' || *s == 'U') &&
2397 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2398 PL_lex_casestack[--PL_lex_casemods] = '\0';
2401 if (PL_lex_casemods > 10)
2402 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2403 PL_lex_casestack[PL_lex_casemods++] = *s;
2404 PL_lex_casestack[PL_lex_casemods] = '\0';
2405 PL_lex_state = LEX_INTERPCONCAT;
2406 PL_nextval[PL_nexttoke].ival = 0;
2409 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2411 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2413 PL_nextval[PL_nexttoke].ival = OP_LC;
2415 PL_nextval[PL_nexttoke].ival = OP_UC;
2417 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2419 Perl_croak(aTHX_ "panic: yylex");
2423 if (PL_lex_starts) {
2426 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2427 if (PL_lex_casemods == 1 && PL_lex_inpat)
2436 case LEX_INTERPPUSH:
2437 return REPORT(sublex_push());
2439 case LEX_INTERPSTART:
2440 if (PL_bufptr == PL_bufend)
2441 return REPORT(sublex_done());
2442 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2443 "### Interpolated variable at '%s'\n", PL_bufptr); });
2445 PL_lex_dojoin = (*PL_bufptr == '@');
2446 PL_lex_state = LEX_INTERPNORMAL;
2447 if (PL_lex_dojoin) {
2448 PL_nextval[PL_nexttoke].ival = 0;
2450 force_ident("\"", '$');
2451 PL_nextval[PL_nexttoke].ival = 0;
2453 PL_nextval[PL_nexttoke].ival = 0;
2455 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2458 if (PL_lex_starts++) {
2460 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2461 if (!PL_lex_casemods && PL_lex_inpat)
2468 case LEX_INTERPENDMAYBE:
2469 if (intuit_more(PL_bufptr)) {
2470 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2476 if (PL_lex_dojoin) {
2477 PL_lex_dojoin = FALSE;
2478 PL_lex_state = LEX_INTERPCONCAT;
2481 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2482 && SvEVALED(PL_lex_repl))
2484 if (PL_bufptr != PL_bufend)
2485 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2486 PL_lex_repl = Nullsv;
2489 case LEX_INTERPCONCAT:
2491 if (PL_lex_brackets)
2492 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2494 if (PL_bufptr == PL_bufend)
2495 return REPORT(sublex_done());
2497 if (SvIVX(PL_linestr) == '\'') {
2498 SV *sv = newSVsv(PL_linestr);
2501 else if ( PL_hints & HINT_NEW_RE )
2502 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2503 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2507 s = scan_const(PL_bufptr);
2509 PL_lex_state = LEX_INTERPCASEMOD;
2511 PL_lex_state = LEX_INTERPSTART;
2514 if (s != PL_bufptr) {
2515 PL_nextval[PL_nexttoke] = yylval;
2518 if (PL_lex_starts++) {
2519 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2520 if (!PL_lex_casemods && PL_lex_inpat)
2533 PL_lex_state = LEX_NORMAL;
2534 s = scan_formline(PL_bufptr);
2535 if (!PL_lex_formbrack)
2541 PL_oldoldbufptr = PL_oldbufptr;
2544 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
2545 exp_name[PL_expect], s);
2551 if (isIDFIRST_lazy_if(s,UTF))
2553 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2556 goto fake_eof; /* emulate EOF on ^D or ^Z */
2561 if (PL_lex_brackets) {
2562 if (PL_lex_formbrack)
2563 yyerror("Format not terminated");
2565 yyerror("Missing right curly or square bracket");
2567 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2568 "### Tokener got EOF\n");
2572 if (s++ < PL_bufend)
2573 goto retry; /* ignore stray nulls */
2576 if (!PL_in_eval && !PL_preambled) {
2577 PL_preambled = TRUE;
2578 sv_setpv(PL_linestr,incl_perldb());
2579 if (SvCUR(PL_linestr))
2580 sv_catpvn(PL_linestr,";", 1);
2582 while(AvFILLp(PL_preambleav) >= 0) {
2583 SV *tmpsv = av_shift(PL_preambleav);
2584 sv_catsv(PL_linestr, tmpsv);
2585 sv_catpvn(PL_linestr, ";", 1);
2588 sv_free((SV*)PL_preambleav);
2589 PL_preambleav = NULL;
2591 if (PL_minus_n || PL_minus_p) {
2592 sv_catpv(PL_linestr, "LINE: while (<>) {");
2594 sv_catpv(PL_linestr,"chomp;");
2597 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2598 || *PL_splitstr == '"')
2599 && strchr(PL_splitstr + 1, *PL_splitstr))
2600 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2602 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2603 bytes can be used as quoting characters. :-) */
2604 /* The count here deliberately includes the NUL
2605 that terminates the C string constant. This
2606 embeds the opening NUL into the string. */
2607 sv_catpvn(PL_linestr, "our @F=split(q", 15);
2612 sv_catpvn(PL_linestr, s, 1);
2613 sv_catpvn(PL_linestr, s, 1);
2615 /* This loop will embed the trailing NUL of
2616 PL_linestr as the last thing it does before
2618 sv_catpvn(PL_linestr, ");", 2);
2622 sv_catpv(PL_linestr,"our @F=split(' ');");
2625 sv_catpvn(PL_linestr, "\n", 1);
2626 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2627 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2628 PL_last_lop = PL_last_uni = Nullch;
2629 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2630 SV *sv = NEWSV(85,0);
2632 sv_upgrade(sv, SVt_PVMG);
2633 sv_setsv(sv,PL_linestr);
2636 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2641 bof = PL_rsfp ? TRUE : FALSE;
2642 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2645 if (PL_preprocess && !PL_in_eval)
2646 (void)PerlProc_pclose(PL_rsfp);
2647 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2648 PerlIO_clearerr(PL_rsfp);
2650 (void)PerlIO_close(PL_rsfp);
2652 PL_doextract = FALSE;
2654 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2655 sv_setpv(PL_linestr,PL_minus_p
2656 ? ";}continue{print;}" : ";}");
2657 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2658 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2659 PL_last_lop = PL_last_uni = Nullch;
2660 PL_minus_n = PL_minus_p = 0;
2663 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2664 PL_last_lop = PL_last_uni = Nullch;
2665 sv_setpv(PL_linestr,"");
2666 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2668 /* If it looks like the start of a BOM or raw UTF-16,
2669 * check if it in fact is. */
2675 #ifdef PERLIO_IS_STDIO
2676 # ifdef __GNU_LIBRARY__
2677 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2678 # define FTELL_FOR_PIPE_IS_BROKEN
2682 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2683 # define FTELL_FOR_PIPE_IS_BROKEN
2688 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2689 /* This loses the possibility to detect the bof
2690 * situation on perl -P when the libc5 is being used.
2691 * Workaround? Maybe attach some extra state to PL_rsfp?
2694 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2696 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2699 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2700 s = swallow_bom((U8*)s);
2704 /* Incest with pod. */
2705 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2706 sv_setpv(PL_linestr, "");
2707 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2708 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2709 PL_last_lop = PL_last_uni = Nullch;
2710 PL_doextract = FALSE;
2714 } while (PL_doextract);
2715 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2716 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2717 SV *sv = NEWSV(85,0);
2719 sv_upgrade(sv, SVt_PVMG);
2720 sv_setsv(sv,PL_linestr);
2723 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2725 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2726 PL_last_lop = PL_last_uni = Nullch;
2727 if (CopLINE(PL_curcop) == 1) {
2728 while (s < PL_bufend && isSPACE(*s))
2730 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2734 if (*s == '#' && *(s+1) == '!')
2736 #ifdef ALTERNATE_SHEBANG
2738 static char as[] = ALTERNATE_SHEBANG;
2739 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2740 d = s + (sizeof(as) - 1);
2742 #endif /* ALTERNATE_SHEBANG */
2751 while (*d && !isSPACE(*d))
2755 #ifdef ARG_ZERO_IS_SCRIPT
2756 if (ipathend > ipath) {
2758 * HP-UX (at least) sets argv[0] to the script name,
2759 * which makes $^X incorrect. And Digital UNIX and Linux,
2760 * at least, set argv[0] to the basename of the Perl
2761 * interpreter. So, having found "#!", we'll set it right.
2763 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
2764 assert(SvPOK(x) || SvGMAGICAL(x));
2765 if (sv_eq(x, CopFILESV(PL_curcop))) {
2766 sv_setpvn(x, ipath, ipathend - ipath);
2772 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2773 char *lstart = SvPV(x,llen);
2775 bstart += blen - llen;
2776 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2777 sv_setpvn(x, ipath, ipathend - ipath);
2782 TAINT_NOT; /* $^X is always tainted, but that's OK */
2784 #endif /* ARG_ZERO_IS_SCRIPT */
2789 d = instr(s,"perl -");
2791 d = instr(s,"perl");
2793 /* avoid getting into infinite loops when shebang
2794 * line contains "Perl" rather than "perl" */
2796 for (d = ipathend-4; d >= ipath; --d) {
2797 if ((*d == 'p' || *d == 'P')
2798 && !ibcmp(d, "perl", 4))
2808 #ifdef ALTERNATE_SHEBANG
2810 * If the ALTERNATE_SHEBANG on this system starts with a
2811 * character that can be part of a Perl expression, then if
2812 * we see it but not "perl", we're probably looking at the
2813 * start of Perl code, not a request to hand off to some
2814 * other interpreter. Similarly, if "perl" is there, but
2815 * not in the first 'word' of the line, we assume the line
2816 * contains the start of the Perl program.
2818 if (d && *s != '#') {
2820 while (*c && !strchr("; \t\r\n\f\v#", *c))
2823 d = Nullch; /* "perl" not in first word; ignore */
2825 *s = '#'; /* Don't try to parse shebang line */
2827 #endif /* ALTERNATE_SHEBANG */
2828 #ifndef MACOS_TRADITIONAL
2833 !instr(s,"indir") &&
2834 instr(PL_origargv[0],"perl"))
2840 while (s < PL_bufend && isSPACE(*s))
2842 if (s < PL_bufend) {
2843 Newz(899,newargv,PL_origargc+3,char*);
2845 while (s < PL_bufend && !isSPACE(*s))
2848 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2851 newargv = PL_origargv;
2854 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2856 Perl_croak(aTHX_ "Can't exec %s", ipath);
2860 U32 oldpdb = PL_perldb;
2861 bool oldn = PL_minus_n;
2862 bool oldp = PL_minus_p;
2864 while (*d && !isSPACE(*d)) d++;
2865 while (SPACE_OR_TAB(*d)) d++;
2868 bool switches_done = PL_doswitches;
2870 if (*d == 'M' || *d == 'm') {
2872 while (*d && !isSPACE(*d)) d++;
2873 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
2876 d = moreswitches(d);
2878 if (PL_doswitches && !switches_done) {
2879 int argc = PL_origargc;
2880 char **argv = PL_origargv;
2883 } while (argc && argv[0][0] == '-' && argv[0][1]);
2884 init_argv_symbols(argc,argv);
2886 if ((PERLDB_LINE && !oldpdb) ||
2887 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
2888 /* if we have already added "LINE: while (<>) {",
2889 we must not do it again */
2891 sv_setpv(PL_linestr, "");
2892 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2893 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2894 PL_last_lop = PL_last_uni = Nullch;
2895 PL_preambled = FALSE;
2897 (void)gv_fetchfile(PL_origfilename);
2900 if (PL_doswitches && !switches_done) {
2901 int argc = PL_origargc;
2902 char **argv = PL_origargv;
2905 } while (argc && argv[0][0] == '-' && argv[0][1]);
2906 init_argv_symbols(argc,argv);
2912 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2914 PL_lex_state = LEX_FORMLINE;
2919 #ifdef PERL_STRICT_CR
2920 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2922 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
2924 case ' ': case '\t': case '\f': case 013:
2925 #ifdef MACOS_TRADITIONAL
2932 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2933 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2934 /* handle eval qq[#line 1 "foo"\n ...] */
2935 CopLINE_dec(PL_curcop);
2939 while (s < d && *s != '\n')
2943 else if (s > d) /* Found by Ilya: feed random input to Perl. */
2944 Perl_croak(aTHX_ "panic: input overflow");
2946 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2948 PL_lex_state = LEX_FORMLINE;
2958 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
2965 while (s < PL_bufend && SPACE_OR_TAB(*s))
2968 if (strnEQ(s,"=>",2)) {
2969 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
2970 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2971 "### Saw unary minus before =>, forcing word '%s'\n", s);
2973 OPERATOR('-'); /* unary minus */
2975 PL_last_uni = PL_oldbufptr;
2977 case 'r': ftst = OP_FTEREAD; break;
2978 case 'w': ftst = OP_FTEWRITE; break;
2979 case 'x': ftst = OP_FTEEXEC; break;
2980 case 'o': ftst = OP_FTEOWNED; break;
2981 case 'R': ftst = OP_FTRREAD; break;
2982 case 'W': ftst = OP_FTRWRITE; break;
2983 case 'X': ftst = OP_FTREXEC; break;
2984 case 'O': ftst = OP_FTROWNED; break;
2985 case 'e': ftst = OP_FTIS; break;
2986 case 'z': ftst = OP_FTZERO; break;
2987 case 's': ftst = OP_FTSIZE; break;
2988 case 'f': ftst = OP_FTFILE; break;
2989 case 'd': ftst = OP_FTDIR; break;
2990 case 'l': ftst = OP_FTLINK; break;
2991 case 'p': ftst = OP_FTPIPE; break;
2992 case 'S': ftst = OP_FTSOCK; break;
2993 case 'u': ftst = OP_FTSUID; break;
2994 case 'g': ftst = OP_FTSGID; break;
2995 case 'k': ftst = OP_FTSVTX; break;
2996 case 'b': ftst = OP_FTBLK; break;
2997 case 'c': ftst = OP_FTCHR; break;
2998 case 't': ftst = OP_FTTTY; break;
2999 case 'T': ftst = OP_FTTEXT; break;
3000 case 'B': ftst = OP_FTBINARY; break;
3001 case 'M': case 'A': case 'C':
3002 gv_fetchpv("\024",TRUE, SVt_PV);
3004 case 'M': ftst = OP_FTMTIME; break;
3005 case 'A': ftst = OP_FTATIME; break;
3006 case 'C': ftst = OP_FTCTIME; break;
3014 PL_last_lop_op = (OPCODE)ftst;
3015 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3016 "### Saw file test %c\n", (int)ftst);
3021 /* Assume it was a minus followed by a one-letter named
3022 * subroutine call (or a -bareword), then. */
3023 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3024 "### '-%c' looked like a file test but was not\n",
3033 if (PL_expect == XOPERATOR)
3038 else if (*s == '>') {
3041 if (isIDFIRST_lazy_if(s,UTF)) {
3042 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3050 if (PL_expect == XOPERATOR)
3053 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3055 OPERATOR('-'); /* unary minus */
3062 if (PL_expect == XOPERATOR)
3067 if (PL_expect == XOPERATOR)
3070 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3076 if (PL_expect != XOPERATOR) {
3077 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3078 PL_expect = XOPERATOR;
3079 force_ident(PL_tokenbuf, '*');
3092 if (PL_expect == XOPERATOR) {
3096 PL_tokenbuf[0] = '%';
3097 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3098 if (!PL_tokenbuf[1]) {
3101 PL_pending_ident = '%';
3120 switch (PL_expect) {
3123 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3125 PL_bufptr = s; /* update in case we back off */
3131 PL_expect = XTERMBLOCK;
3135 while (isIDFIRST_lazy_if(s,UTF)) {
3136 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3137 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3138 if (tmp < 0) tmp = -tmp;
3154 d = scan_str(d,TRUE,TRUE);
3156 /* MUST advance bufptr here to avoid bogus
3157 "at end of line" context messages from yyerror().
3159 PL_bufptr = s + len;
3160 yyerror("Unterminated attribute parameter in attribute list");
3163 return REPORT(0); /* EOF indicator */
3167 SV *sv = newSVpvn(s, len);
3168 sv_catsv(sv, PL_lex_stuff);
3169 attrs = append_elem(OP_LIST, attrs,
3170 newSVOP(OP_CONST, 0, sv));
3171 SvREFCNT_dec(PL_lex_stuff);
3172 PL_lex_stuff = Nullsv;
3175 if (len == 6 && strnEQ(s, "unique", len)) {
3176 if (PL_in_my == KEY_our)
3178 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3180 ; /* skip to avoid loading attributes.pm */
3183 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3186 /* NOTE: any CV attrs applied here need to be part of
3187 the CVf_BUILTIN_ATTRS define in cv.h! */
3188 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3189 CvLVALUE_on(PL_compcv);
3190 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3191 CvLOCKED_on(PL_compcv);
3192 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3193 CvMETHOD_on(PL_compcv);
3194 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3195 CvASSERTION_on(PL_compcv);
3196 /* After we've set the flags, it could be argued that
3197 we don't need to do the attributes.pm-based setting
3198 process, and shouldn't bother appending recognized
3199 flags. To experiment with that, uncomment the
3200 following "else". (Note that's already been
3201 uncommented. That keeps the above-applied built-in
3202 attributes from being intercepted (and possibly
3203 rejected) by a package's attribute routines, but is
3204 justified by the performance win for the common case
3205 of applying only built-in attributes.) */
3207 attrs = append_elem(OP_LIST, attrs,
3208 newSVOP(OP_CONST, 0,
3212 if (*s == ':' && s[1] != ':')
3215 break; /* require real whitespace or :'s */
3217 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3218 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
3219 char q = ((*s == '\'') ? '"' : '\'');
3220 /* If here for an expression, and parsed no attrs, back off. */
3221 if (tmp == '=' && !attrs) {
3225 /* MUST advance bufptr here to avoid bogus "at end of line"
3226 context messages from yyerror().
3230 yyerror("Unterminated attribute list");
3232 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3240 PL_nextval[PL_nexttoke].opval = attrs;
3248 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3249 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3266 if (PL_lex_brackets <= 0)
3267 yyerror("Unmatched right square bracket");
3270 if (PL_lex_state == LEX_INTERPNORMAL) {
3271 if (PL_lex_brackets == 0) {
3272 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3273 PL_lex_state = LEX_INTERPEND;
3280 if (PL_lex_brackets > 100) {
3281 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3283 switch (PL_expect) {
3285 if (PL_lex_formbrack) {
3289 if (PL_oldoldbufptr == PL_last_lop)
3290 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3292 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3293 OPERATOR(HASHBRACK);
3295 while (s < PL_bufend && SPACE_OR_TAB(*s))
3298 PL_tokenbuf[0] = '\0';
3299 if (d < PL_bufend && *d == '-') {
3300 PL_tokenbuf[0] = '-';
3302 while (d < PL_bufend && SPACE_OR_TAB(*d))
3305 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3306 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3308 while (d < PL_bufend && SPACE_OR_TAB(*d))
3311 char minus = (PL_tokenbuf[0] == '-');
3312 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3320 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3325 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3330 if (PL_oldoldbufptr == PL_last_lop)
3331 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3333 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3336 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3338 /* This hack is to get the ${} in the message. */
3340 yyerror("syntax error");
3343 OPERATOR(HASHBRACK);
3345 /* This hack serves to disambiguate a pair of curlies
3346 * as being a block or an anon hash. Normally, expectation
3347 * determines that, but in cases where we're not in a
3348 * position to expect anything in particular (like inside
3349 * eval"") we have to resolve the ambiguity. This code
3350 * covers the case where the first term in the curlies is a
3351 * quoted string. Most other cases need to be explicitly
3352 * disambiguated by prepending a `+' before the opening
3353 * curly in order to force resolution as an anon hash.
3355 * XXX should probably propagate the outer expectation
3356 * into eval"" to rely less on this hack, but that could
3357 * potentially break current behavior of eval"".
3361 if (*s == '\'' || *s == '"' || *s == '`') {
3362 /* common case: get past first string, handling escapes */
3363 for (t++; t < PL_bufend && *t != *s;)
3364 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3368 else if (*s == 'q') {
3371 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3374 /* skip q//-like construct */
3376 char open, close, term;
3379 while (t < PL_bufend && isSPACE(*t))
3381 /* check for q => */
3382 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3383 OPERATOR(HASHBRACK);
3387 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3391 for (t++; t < PL_bufend; t++) {
3392 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3394 else if (*t == open)
3398 for (t++; t < PL_bufend; t++) {
3399 if (*t == '\\' && t+1 < PL_bufend)
3401 else if (*t == close && --brackets <= 0)
3403 else if (*t == open)
3410 /* skip plain q word */
3411 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3414 else if (isALNUM_lazy_if(t,UTF)) {
3416 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3419 while (t < PL_bufend && isSPACE(*t))
3421 /* if comma follows first term, call it an anon hash */
3422 /* XXX it could be a comma expression with loop modifiers */
3423 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3424 || (*t == '=' && t[1] == '>')))
3425 OPERATOR(HASHBRACK);
3426 if (PL_expect == XREF)
3429 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3435 yylval.ival = CopLINE(PL_curcop);
3436 if (isSPACE(*s) || *s == '#')
3437 PL_copline = NOLINE; /* invalidate current command line number */
3442 if (PL_lex_brackets <= 0)
3443 yyerror("Unmatched right curly bracket");
3445 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3446 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3447 PL_lex_formbrack = 0;
3448 if (PL_lex_state == LEX_INTERPNORMAL) {
3449 if (PL_lex_brackets == 0) {
3450 if (PL_expect & XFAKEBRACK) {
3451 PL_expect &= XENUMMASK;
3452 PL_lex_state = LEX_INTERPEND;
3454 return yylex(); /* ignore fake brackets */
3456 if (*s == '-' && s[1] == '>')
3457 PL_lex_state = LEX_INTERPENDMAYBE;
3458 else if (*s != '[' && *s != '{')
3459 PL_lex_state = LEX_INTERPEND;
3462 if (PL_expect & XFAKEBRACK) {
3463 PL_expect &= XENUMMASK;
3465 return yylex(); /* ignore fake brackets */
3475 if (PL_expect == XOPERATOR) {
3476 if (ckWARN(WARN_SEMICOLON)
3477 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3479 CopLINE_dec(PL_curcop);
3480 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3481 CopLINE_inc(PL_curcop);
3486 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3488 PL_expect = XOPERATOR;
3489 force_ident(PL_tokenbuf, '&');
3493 yylval.ival = (OPpENTERSUB_AMPER<<8);
3512 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
3513 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
3515 if (PL_expect == XSTATE && isALPHA(tmp) &&
3516 (s == PL_linestart+1 || s[-2] == '\n') )
3518 if (PL_in_eval && !PL_rsfp) {
3523 if (strnEQ(s,"=cut",4)) {
3537 PL_doextract = TRUE;
3540 if (PL_lex_brackets < PL_lex_formbrack) {
3542 #ifdef PERL_STRICT_CR
3543 for (t = s; SPACE_OR_TAB(*t); t++) ;
3545 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3547 if (*t == '\n' || *t == '#') {
3559 /* was this !=~ where !~ was meant?
3560 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3562 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3565 while (t < PL_bufend && isSPACE(*t))
3568 if (*t == '/' || *t == '?' ||
3569 ((*t == 'm' || *t == 's' || *t == 'y') && !isALNUM(t[1])) ||
3570 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3571 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3572 "!=~ should be !~");
3581 if (PL_expect != XOPERATOR) {
3582 if (s[1] != '<' && !strchr(s,'>'))
3585 s = scan_heredoc(s);
3587 s = scan_inputsymbol(s);
3588 TERM(sublex_start());
3593 SHop(OP_LEFT_SHIFT);
3607 SHop(OP_RIGHT_SHIFT);
3616 if (PL_expect == XOPERATOR) {
3617 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3620 return REPORT(','); /* grandfather non-comma-format format */
3624 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3625 PL_tokenbuf[0] = '@';
3626 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3627 sizeof PL_tokenbuf - 1, FALSE);
3628 if (PL_expect == XOPERATOR)
3629 no_op("Array length", s);
3630 if (!PL_tokenbuf[1])
3632 PL_expect = XOPERATOR;
3633 PL_pending_ident = '#';
3637 PL_tokenbuf[0] = '$';
3638 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3639 sizeof PL_tokenbuf - 1, FALSE);
3640 if (PL_expect == XOPERATOR)
3642 if (!PL_tokenbuf[1]) {
3644 yyerror("Final $ should be \\$ or $name");
3648 /* This kludge not intended to be bulletproof. */
3649 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3650 yylval.opval = newSVOP(OP_CONST, 0,
3651 newSViv(PL_compiling.cop_arybase));
3652 yylval.opval->op_private = OPpCONST_ARYBASE;
3658 if (PL_lex_state == LEX_NORMAL)
3661 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3664 PL_tokenbuf[0] = '@';
3665 if (ckWARN(WARN_SYNTAX)) {
3667 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3670 PL_bufptr = skipspace(PL_bufptr);
3671 while (t < PL_bufend && *t != ']')
3673 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3674 "Multidimensional syntax %.*s not supported",
3675 (t - PL_bufptr) + 1, PL_bufptr);
3679 else if (*s == '{') {
3680 PL_tokenbuf[0] = '%';
3681 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
3682 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3684 char tmpbuf[sizeof PL_tokenbuf];
3686 for (t++; isSPACE(*t); t++) ;
3687 if (isIDFIRST_lazy_if(t,UTF)) {
3688 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
3689 for (; isSPACE(*t); t++) ;
3690 if (*t == ';' && get_cv(tmpbuf, FALSE))
3691 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3692 "You need to quote \"%s\"", tmpbuf);
3698 PL_expect = XOPERATOR;
3699 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3700 bool islop = (PL_last_lop == PL_oldoldbufptr);
3701 if (!islop || PL_last_lop_op == OP_GREPSTART)
3702 PL_expect = XOPERATOR;
3703 else if (strchr("$@\"'`q", *s))
3704 PL_expect = XTERM; /* e.g. print $fh "foo" */
3705 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3706 PL_expect = XTERM; /* e.g. print $fh &sub */
3707 else if (isIDFIRST_lazy_if(s,UTF)) {
3708 char tmpbuf[sizeof PL_tokenbuf];
3709 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3710 if ((tmp = keyword(tmpbuf, len))) {
3711 /* binary operators exclude handle interpretations */
3723 PL_expect = XTERM; /* e.g. print $fh length() */
3728 PL_expect = XTERM; /* e.g. print $fh subr() */
3731 else if (isDIGIT(*s))
3732 PL_expect = XTERM; /* e.g. print $fh 3 */
3733 else if (*s == '.' && isDIGIT(s[1]))
3734 PL_expect = XTERM; /* e.g. print $fh .3 */
3735 else if ((*s == '?' || *s == '-' || *s == '+')
3736 && !isSPACE(s[1]) && s[1] != '=')
3737 PL_expect = XTERM; /* e.g. print $fh -1 */
3738 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/')
3739 PL_expect = XTERM; /* e.g. print $fh /.../
3740 XXX except DORDOR operator */
3741 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3742 PL_expect = XTERM; /* print $fh <<"EOF" */
3744 PL_pending_ident = '$';
3748 if (PL_expect == XOPERATOR)
3750 PL_tokenbuf[0] = '@';
3751 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3752 if (!PL_tokenbuf[1]) {
3755 if (PL_lex_state == LEX_NORMAL)
3757 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3759 PL_tokenbuf[0] = '%';
3761 /* Warn about @ where they meant $. */
3762 if (ckWARN(WARN_SYNTAX)) {
3763 if (*s == '[' || *s == '{') {
3765 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3767 if (*t == '}' || *t == ']') {
3769 PL_bufptr = skipspace(PL_bufptr);
3770 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3771 "Scalar value %.*s better written as $%.*s",
3772 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
3777 PL_pending_ident = '@';
3780 case '/': /* may be division, defined-or, or pattern */
3781 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3785 case '?': /* may either be conditional or pattern */
3786 if(PL_expect == XOPERATOR) {
3794 /* A // operator. */
3804 /* Disable warning on "study /blah/" */
3805 if (PL_oldoldbufptr == PL_last_uni
3806 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3807 || memNE(PL_last_uni, "study", 5)
3808 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3811 s = scan_pat(s,OP_MATCH);
3812 TERM(sublex_start());
3816 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3817 #ifdef PERL_STRICT_CR
3820 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3822 && (s == PL_linestart || s[-1] == '\n') )
3824 PL_lex_formbrack = 0;
3828 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3834 yylval.ival = OPf_SPECIAL;
3840 if (PL_expect != XOPERATOR)
3845 case '0': case '1': case '2': case '3': case '4':
3846 case '5': case '6': case '7': case '8': case '9':
3847 s = scan_num(s, &yylval);
3848 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3849 "### Saw number in '%s'\n", s);
3851 if (PL_expect == XOPERATOR)
3856 s = scan_str(s,FALSE,FALSE);
3857 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3858 "### Saw string before '%s'\n", s);
3860 if (PL_expect == XOPERATOR) {
3861 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3864 return REPORT(','); /* grandfather non-comma-format format */
3870 missingterm((char*)0);
3871 yylval.ival = OP_CONST;
3872 TERM(sublex_start());
3875 s = scan_str(s,FALSE,FALSE);
3876 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3877 "### Saw string before '%s'\n", s);
3879 if (PL_expect == XOPERATOR) {
3880 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3883 return REPORT(','); /* grandfather non-comma-format format */
3889 missingterm((char*)0);
3890 yylval.ival = OP_CONST;
3891 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
3892 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
3893 yylval.ival = OP_STRINGIFY;
3897 TERM(sublex_start());
3900 s = scan_str(s,FALSE,FALSE);
3901 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3902 "### Saw backtick string before '%s'\n", s);
3904 if (PL_expect == XOPERATOR)
3905 no_op("Backticks",s);
3907 missingterm((char*)0);
3908 yylval.ival = OP_BACKTICK;
3910 TERM(sublex_start());
3914 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
3915 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
3917 if (PL_expect == XOPERATOR)
3918 no_op("Backslash",s);
3922 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
3926 while (isDIGIT(*start) || *start == '_')
3928 if (*start == '.' && isDIGIT(start[1])) {
3929 s = scan_num(s, &yylval);
3932 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
3933 else if (!isALPHA(*start) && (PL_expect == XTERM
3934 || PL_expect == XREF || PL_expect == XSTATE
3935 || PL_expect == XTERMORDORDOR)) {
3939 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3942 s = scan_num(s, &yylval);
3949 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
3989 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3991 /* Some keywords can be followed by any delimiter, including ':' */
3992 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3993 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3994 (PL_tokenbuf[0] == 'q' &&
3995 strchr("qwxr", PL_tokenbuf[1])))));
3997 /* x::* is just a word, unless x is "CORE" */
3998 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4002 while (d < PL_bufend && isSPACE(*d))
4003 d++; /* no comments skipped here, or s### is misparsed */
4005 /* Is this a label? */
4006 if (!tmp && PL_expect == XSTATE
4007 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4009 yylval.pval = savepv(PL_tokenbuf);
4014 /* Check for keywords */
4015 tmp = keyword(PL_tokenbuf, len);
4017 /* Is this a word before a => operator? */
4018 if (*d == '=' && d[1] == '>') {
4021 = (OP*)newSVOP(OP_CONST, 0,
4022 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4023 yylval.opval->op_private = OPpCONST_BARE;
4027 if (tmp < 0) { /* second-class keyword? */
4028 GV *ogv = Nullgv; /* override (winner) */
4029 GV *hgv = Nullgv; /* hidden (loser) */
4030 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4032 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
4035 if (GvIMPORTED_CV(gv))
4037 else if (! CvMETHOD(cv))
4041 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4042 (gv = *gvp) != (GV*)&PL_sv_undef &&
4043 GvCVu(gv) && GvIMPORTED_CV(gv))
4050 tmp = 0; /* overridden by import or by GLOBAL */
4053 && -tmp==KEY_lock /* XXX generalizable kludge */
4055 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
4057 tmp = 0; /* any sub overrides "weak" keyword */
4062 && PL_expect != XOPERATOR
4063 && PL_expect != XTERMORDORDOR)
4065 /* any sub overrides the "err" keyword, except when really an
4066 * operator is expected */
4069 else { /* no override */
4071 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4072 Perl_warner(aTHX_ packWARN(WARN_MISC),
4073 "dump() better written as CORE::dump()");
4077 if (ckWARN(WARN_AMBIGUOUS) && hgv
4078 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
4079 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4080 "Ambiguous call resolved as CORE::%s(), %s",
4081 GvENAME(hgv), "qualify as such or use &");
4088 default: /* not a keyword */
4092 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4094 /* Get the rest if it looks like a package qualifier */
4096 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4098 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4101 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4102 *s == '\'' ? "'" : "::");
4107 if (PL_expect == XOPERATOR) {
4108 if (PL_bufptr == PL_linestart) {
4109 CopLINE_dec(PL_curcop);
4110 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4111 CopLINE_inc(PL_curcop);
4114 no_op("Bareword",s);
4117 /* Look for a subroutine with this name in current package,
4118 unless name is "Foo::", in which case Foo is a bearword
4119 (and a package name). */
4122 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4124 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
4125 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4126 "Bareword \"%s\" refers to nonexistent package",
4129 PL_tokenbuf[len] = '\0';
4136 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
4139 /* if we saw a global override before, get the right name */
4142 sv = newSVpvn("CORE::GLOBAL::",14);
4143 sv_catpv(sv,PL_tokenbuf);
4146 /* If len is 0, newSVpv does strlen(), which is correct.
4147 If len is non-zero, then it will be the true length,
4148 and so the scalar will be created correctly. */
4149 sv = newSVpv(PL_tokenbuf,len);
4152 /* Presume this is going to be a bareword of some sort. */
4155 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4156 yylval.opval->op_private = OPpCONST_BARE;
4157 /* UTF-8 package name? */
4158 if (UTF && !IN_BYTES &&
4159 is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
4162 /* And if "Foo::", then that's what it certainly is. */
4167 /* See if it's the indirect object for a list operator. */
4169 if (PL_oldoldbufptr &&
4170 PL_oldoldbufptr < PL_bufptr &&
4171 (PL_oldoldbufptr == PL_last_lop
4172 || PL_oldoldbufptr == PL_last_uni) &&
4173 /* NO SKIPSPACE BEFORE HERE! */
4174 (PL_expect == XREF ||
4175 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4177 bool immediate_paren = *s == '(';
4179 /* (Now we can afford to cross potential line boundary.) */
4182 /* Two barewords in a row may indicate method call. */
4184 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
4187 /* If not a declared subroutine, it's an indirect object. */
4188 /* (But it's an indir obj regardless for sort.) */
4190 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4191 ((!gv || !GvCVu(gv)) &&
4192 (PL_last_lop_op != OP_MAPSTART &&
4193 PL_last_lop_op != OP_GREPSTART))))
4195 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4200 PL_expect = XOPERATOR;
4203 /* Is this a word before a => operator? */
4204 if (*s == '=' && s[1] == '>' && !pkgname) {
4206 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4207 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4208 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4212 /* If followed by a paren, it's certainly a subroutine. */
4215 if (gv && GvCVu(gv)) {
4216 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4217 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
4222 PL_nextval[PL_nexttoke].opval = yylval.opval;
4223 PL_expect = XOPERATOR;
4229 /* If followed by var or block, call it a method (unless sub) */
4231 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
4232 PL_last_lop = PL_oldbufptr;
4233 PL_last_lop_op = OP_METHOD;
4237 /* If followed by a bareword, see if it looks like indir obj. */
4240 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4241 && (tmp = intuit_method(s,gv)))
4244 /* Not a method, so call it a subroutine (if defined) */
4246 if (gv && GvCVu(gv)) {
4248 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4249 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4250 "Ambiguous use of -%s resolved as -&%s()",
4251 PL_tokenbuf, PL_tokenbuf);
4252 /* Check for a constant sub */
4254 if ((sv = cv_const_sv(cv))) {
4256 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4257 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4258 yylval.opval->op_private = 0;
4262 /* Resolve to GV now. */
4263 op_free(yylval.opval);
4264 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4265 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4266 PL_last_lop = PL_oldbufptr;
4267 PL_last_lop_op = OP_ENTERSUB;
4268 /* Is there a prototype? */
4271 char *proto = SvPV((SV*)cv, len);
4274 if (*proto == '$' && proto[1] == '\0')
4276 while (*proto == ';')
4278 if (*proto == '&' && *s == '{') {
4279 sv_setpv(PL_subname, PL_curstash ?
4280 "__ANON__" : "__ANON__::__ANON__");
4284 PL_nextval[PL_nexttoke].opval = yylval.opval;
4290 /* Call it a bare word */
4292 if (PL_hints & HINT_STRICT_SUBS)
4293 yylval.opval->op_private |= OPpCONST_STRICT;
4296 if (ckWARN(WARN_RESERVED)) {
4297 if (lastchar != '-') {
4298 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4299 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4300 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4307 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4308 && ckWARN_d(WARN_AMBIGUOUS)) {
4309 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4310 "Operator or semicolon missing before %c%s",
4311 lastchar, PL_tokenbuf);
4312 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4313 "Ambiguous use of %c resolved as operator %c",
4314 lastchar, lastchar);
4320 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4321 newSVpv(CopFILE(PL_curcop),0));
4325 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4326 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4329 case KEY___PACKAGE__:
4330 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4332 ? newSVpv(HvNAME(PL_curstash), 0)
4341 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4342 char *pname = "main";
4343 if (PL_tokenbuf[2] == 'D')
4344 pname = HvNAME(PL_curstash ? PL_curstash : PL_defstash);
4345 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
4348 GvIOp(gv) = newIO();
4349 IoIFP(GvIOp(gv)) = PL_rsfp;
4350 #if defined(HAS_FCNTL) && defined(F_SETFD)
4352 int fd = PerlIO_fileno(PL_rsfp);
4353 fcntl(fd,F_SETFD,fd >= 3);
4356 /* Mark this internal pseudo-handle as clean */
4357 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4359 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4360 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4361 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4363 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4364 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4365 /* if the script was opened in binmode, we need to revert
4366 * it to text mode for compatibility; but only iff it has CRs
4367 * XXX this is a questionable hack at best. */
4368 if (PL_bufend-PL_bufptr > 2
4369 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4372 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4373 loc = PerlIO_tell(PL_rsfp);
4374 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4377 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4379 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4380 #endif /* NETWARE */
4381 #ifdef PERLIO_IS_STDIO /* really? */
4382 # if defined(__BORLANDC__)
4383 /* XXX see note in do_binmode() */
4384 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4388 PerlIO_seek(PL_rsfp, loc, 0);
4392 #ifdef PERLIO_LAYERS
4395 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4396 else if (PL_encoding) {
4403 XPUSHs(PL_encoding);
4405 call_method("name", G_SCALAR);
4409 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4410 Perl_form(aTHX_ ":encoding(%"SVf")",
4428 if (PL_expect == XSTATE) {
4435 if (*s == ':' && s[1] == ':') {
4438 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4439 if (!(tmp = keyword(PL_tokenbuf, len)))
4440 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4454 LOP(OP_ACCEPT,XTERM);
4460 LOP(OP_ATAN2,XTERM);
4466 LOP(OP_BINMODE,XTERM);
4469 LOP(OP_BLESS,XTERM);
4478 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
4495 if (!PL_cryptseen) {
4496 PL_cryptseen = TRUE;
4500 LOP(OP_CRYPT,XTERM);
4503 LOP(OP_CHMOD,XTERM);
4506 LOP(OP_CHOWN,XTERM);
4509 LOP(OP_CONNECT,XTERM);
4525 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4529 PL_hints |= HINT_BLOCK_SCOPE;
4539 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4540 LOP(OP_DBMOPEN,XTERM);
4546 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4553 yylval.ival = CopLINE(PL_curcop);
4567 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4568 UNIBRACK(OP_ENTEREVAL);
4586 case KEY_endhostent:
4592 case KEY_endservent:
4595 case KEY_endprotoent:
4606 yylval.ival = CopLINE(PL_curcop);
4608 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4610 if ((PL_bufend - p) >= 3 &&
4611 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4613 else if ((PL_bufend - p) >= 4 &&
4614 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4617 if (isIDFIRST_lazy_if(p,UTF)) {
4618 p = scan_ident(p, PL_bufend,
4619 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4623 Perl_croak(aTHX_ "Missing $ on loop variable");
4628 LOP(OP_FORMLINE,XTERM);
4634 LOP(OP_FCNTL,XTERM);
4640 LOP(OP_FLOCK,XTERM);
4649 LOP(OP_GREPSTART, XREF);
4652 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4667 case KEY_getpriority:
4668 LOP(OP_GETPRIORITY,XTERM);
4670 case KEY_getprotobyname:
4673 case KEY_getprotobynumber:
4674 LOP(OP_GPBYNUMBER,XTERM);
4676 case KEY_getprotoent:
4688 case KEY_getpeername:
4689 UNI(OP_GETPEERNAME);
4691 case KEY_gethostbyname:
4694 case KEY_gethostbyaddr:
4695 LOP(OP_GHBYADDR,XTERM);
4697 case KEY_gethostent:
4700 case KEY_getnetbyname:
4703 case KEY_getnetbyaddr:
4704 LOP(OP_GNBYADDR,XTERM);
4709 case KEY_getservbyname:
4710 LOP(OP_GSBYNAME,XTERM);
4712 case KEY_getservbyport:
4713 LOP(OP_GSBYPORT,XTERM);
4715 case KEY_getservent:
4718 case KEY_getsockname:
4719 UNI(OP_GETSOCKNAME);
4721 case KEY_getsockopt:
4722 LOP(OP_GSOCKOPT,XTERM);
4744 yylval.ival = CopLINE(PL_curcop);
4748 LOP(OP_INDEX,XTERM);
4754 LOP(OP_IOCTL,XTERM);
4766 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4798 LOP(OP_LISTEN,XTERM);
4807 s = scan_pat(s,OP_MATCH);
4808 TERM(sublex_start());
4811 LOP(OP_MAPSTART, XREF);
4814 LOP(OP_MKDIR,XTERM);
4817 LOP(OP_MSGCTL,XTERM);
4820 LOP(OP_MSGGET,XTERM);
4823 LOP(OP_MSGRCV,XTERM);
4826 LOP(OP_MSGSND,XTERM);
4832 if (isIDFIRST_lazy_if(s,UTF)) {
4833 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
4834 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4836 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
4837 if (!PL_in_my_stash) {
4840 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
4848 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4855 if (PL_expect != XSTATE)
4856 yyerror("\"no\" not allowed in expression");
4857 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4858 s = force_version(s, FALSE);
4863 if (*s == '(' || (s = skipspace(s), *s == '('))
4870 if (isIDFIRST_lazy_if(s,UTF)) {
4872 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
4873 for (t=d; *t && isSPACE(*t); t++) ;
4874 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
4876 && !(t[0] == '=' && t[1] == '>')
4878 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
4879 "Precedence problem: open %.*s should be open(%.*s)",
4880 d - s, s, d - s, s);
4886 yylval.ival = OP_OR;
4896 LOP(OP_OPEN_DIR,XTERM);
4899 checkcomma(s,PL_tokenbuf,"filehandle");
4903 checkcomma(s,PL_tokenbuf,"filehandle");
4922 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4926 LOP(OP_PIPE_OP,XTERM);
4929 s = scan_str(s,FALSE,FALSE);
4931 missingterm((char*)0);
4932 yylval.ival = OP_CONST;
4933 TERM(sublex_start());
4939 s = scan_str(s,FALSE,FALSE);
4941 missingterm((char*)0);
4943 if (SvCUR(PL_lex_stuff)) {
4946 d = SvPV_force(PL_lex_stuff, len);
4949 for (; isSPACE(*d) && len; --len, ++d) ;
4952 if (!warned && ckWARN(WARN_QW)) {
4953 for (; !isSPACE(*d) && len; --len, ++d) {
4955 Perl_warner(aTHX_ packWARN(WARN_QW),
4956 "Possible attempt to separate words with commas");
4959 else if (*d == '#') {
4960 Perl_warner(aTHX_ packWARN(WARN_QW),
4961 "Possible attempt to put comments in qw() list");
4967 for (; !isSPACE(*d) && len; --len, ++d) ;
4969 sv = newSVpvn(b, d-b);
4970 if (DO_UTF8(PL_lex_stuff))
4972 words = append_elem(OP_LIST, words,
4973 newSVOP(OP_CONST, 0, tokeq(sv)));
4977 PL_nextval[PL_nexttoke].opval = words;
4982 SvREFCNT_dec(PL_lex_stuff);
4983 PL_lex_stuff = Nullsv;
4989 s = scan_str(s,FALSE,FALSE);
4991 missingterm((char*)0);
4992 yylval.ival = OP_STRINGIFY;
4993 if (SvIVX(PL_lex_stuff) == '\'')
4994 SvIVX(PL_lex_stuff) = 0; /* qq'$foo' should intepolate */
4995 TERM(sublex_start());
4998 s = scan_pat(s,OP_QR);
4999 TERM(sublex_start());
5002 s = scan_str(s,FALSE,FALSE);
5004 missingterm((char*)0);
5005 yylval.ival = OP_BACKTICK;
5007 TERM(sublex_start());
5015 s = force_version(s, FALSE);
5017 else if (*s != 'v' || !isDIGIT(s[1])
5018 || (s = force_version(s, TRUE), *s == 'v'))
5020 *PL_tokenbuf = '\0';
5021 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5022 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5023 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5025 yyerror("<> should be quotes");
5033 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5037 LOP(OP_RENAME,XTERM);
5046 LOP(OP_RINDEX,XTERM);
5056 UNIDOR(OP_READLINE);
5069 LOP(OP_REVERSE,XTERM);
5072 UNIDOR(OP_READLINK);
5080 TERM(sublex_start());
5082 TOKEN(1); /* force error */
5091 LOP(OP_SELECT,XTERM);
5097 LOP(OP_SEMCTL,XTERM);
5100 LOP(OP_SEMGET,XTERM);
5103 LOP(OP_SEMOP,XTERM);
5109 LOP(OP_SETPGRP,XTERM);
5111 case KEY_setpriority:
5112 LOP(OP_SETPRIORITY,XTERM);
5114 case KEY_sethostent:
5120 case KEY_setservent:
5123 case KEY_setprotoent:
5133 LOP(OP_SEEKDIR,XTERM);
5135 case KEY_setsockopt:
5136 LOP(OP_SSOCKOPT,XTERM);
5142 LOP(OP_SHMCTL,XTERM);
5145 LOP(OP_SHMGET,XTERM);
5148 LOP(OP_SHMREAD,XTERM);
5151 LOP(OP_SHMWRITE,XTERM);
5154 LOP(OP_SHUTDOWN,XTERM);
5163 LOP(OP_SOCKET,XTERM);
5165 case KEY_socketpair:
5166 LOP(OP_SOCKPAIR,XTERM);
5169 checkcomma(s,PL_tokenbuf,"subroutine name");
5171 if (*s == ';' || *s == ')') /* probably a close */
5172 Perl_croak(aTHX_ "sort is now a reserved word");
5174 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5178 LOP(OP_SPLIT,XTERM);
5181 LOP(OP_SPRINTF,XTERM);
5184 LOP(OP_SPLICE,XTERM);
5199 LOP(OP_SUBSTR,XTERM);
5205 char tmpbuf[sizeof PL_tokenbuf];
5206 SSize_t tboffset = 0;
5207 expectation attrful;
5208 bool have_name, have_proto, bad_proto;
5213 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5214 (*s == ':' && s[1] == ':'))
5217 attrful = XATTRBLOCK;
5218 /* remember buffer pos'n for later force_word */
5219 tboffset = s - PL_oldbufptr;
5220 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5221 if (strchr(tmpbuf, ':'))
5222 sv_setpv(PL_subname, tmpbuf);
5224 sv_setsv(PL_subname,PL_curstname);
5225 sv_catpvn(PL_subname,"::",2);
5226 sv_catpvn(PL_subname,tmpbuf,len);
5233 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5234 PL_expect = XTERMBLOCK;
5235 attrful = XATTRTERM;
5236 sv_setpv(PL_subname,"?");
5240 if (key == KEY_format) {
5242 PL_lex_formbrack = PL_lex_brackets + 1;
5244 (void) force_word(PL_oldbufptr + tboffset, WORD,
5249 /* Look for a prototype */
5253 s = scan_str(s,FALSE,FALSE);
5255 Perl_croak(aTHX_ "Prototype not terminated");
5256 /* strip spaces and check for bad characters */
5257 d = SvPVX(PL_lex_stuff);
5260 for (p = d; *p; ++p) {
5263 if (!strchr("$@%*;[]&\\", *p))
5268 if (bad_proto && ckWARN(WARN_SYNTAX))
5269 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5270 "Illegal character in prototype for %"SVf" : %s",
5272 SvCUR(PL_lex_stuff) = tmp;
5280 if (*s == ':' && s[1] != ':')
5281 PL_expect = attrful;
5282 else if (*s != '{' && key == KEY_sub) {
5284 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5286 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5290 PL_nextval[PL_nexttoke].opval =
5291 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5292 PL_lex_stuff = Nullsv;
5296 sv_setpv(PL_subname,
5297 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5300 (void) force_word(PL_oldbufptr + tboffset, WORD,
5309 LOP(OP_SYSTEM,XREF);
5312 LOP(OP_SYMLINK,XTERM);
5315 LOP(OP_SYSCALL,XTERM);
5318 LOP(OP_SYSOPEN,XTERM);
5321 LOP(OP_SYSSEEK,XTERM);
5324 LOP(OP_SYSREAD,XTERM);
5327 LOP(OP_SYSWRITE,XTERM);
5331 TERM(sublex_start());
5352 LOP(OP_TRUNCATE,XTERM);
5364 yylval.ival = CopLINE(PL_curcop);
5368 yylval.ival = CopLINE(PL_curcop);
5372 LOP(OP_UNLINK,XTERM);
5378 LOP(OP_UNPACK,XTERM);
5381 LOP(OP_UTIME,XTERM);
5387 LOP(OP_UNSHIFT,XTERM);
5390 if (PL_expect != XSTATE)
5391 yyerror("\"use\" not allowed in expression");
5393 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
5394 s = force_version(s, TRUE);
5395 if (*s == ';' || (s = skipspace(s), *s == ';')) {
5396 PL_nextval[PL_nexttoke].opval = Nullop;
5399 else if (*s == 'v') {
5400 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5401 s = force_version(s, FALSE);
5405 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5406 s = force_version(s, FALSE);
5418 yylval.ival = CopLINE(PL_curcop);
5422 PL_hints |= HINT_BLOCK_SCOPE;
5429 LOP(OP_WAITPID,XTERM);
5438 ctl_l[0] = toCTRL('L');
5440 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5443 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5448 if (PL_expect == XOPERATOR)
5454 yylval.ival = OP_XOR;
5459 TERM(sublex_start());
5464 #pragma segment Main
5468 S_pending_ident(pTHX)
5471 register I32 tmp = 0;
5472 /* pit holds the identifier we read and pending_ident is reset */
5473 char pit = PL_pending_ident;
5474 PL_pending_ident = 0;
5476 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5477 "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
5479 /* if we're in a my(), we can't allow dynamics here.
5480 $foo'bar has already been turned into $foo::bar, so
5481 just check for colons.
5483 if it's a legal name, the OP is a PADANY.
5486 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5487 if (strchr(PL_tokenbuf,':'))
5488 yyerror(Perl_form(aTHX_ "No package name allowed for "
5489 "variable %s in \"our\"",
5491 tmp = allocmy(PL_tokenbuf);
5494 if (strchr(PL_tokenbuf,':'))
5495 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5497 yylval.opval = newOP(OP_PADANY, 0);
5498 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5504 build the ops for accesses to a my() variable.
5506 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5507 then used in a comparison. This catches most, but not
5508 all cases. For instance, it catches
5509 sort { my($a); $a <=> $b }
5511 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5512 (although why you'd do that is anyone's guess).
5515 if (!strchr(PL_tokenbuf,':')) {
5517 tmp = pad_findmy(PL_tokenbuf);
5518 if (tmp != NOT_IN_PAD) {
5519 /* might be an "our" variable" */
5520 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5521 /* build ops for a bareword */
5522 SV *sym = newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)), 0);
5523 sv_catpvn(sym, "::", 2);
5524 sv_catpv(sym, PL_tokenbuf+1);
5525 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5526 yylval.opval->op_private = OPpCONST_ENTERED;
5529 ? (GV_ADDMULTI | GV_ADDINEVAL)
5532 ((PL_tokenbuf[0] == '$') ? SVt_PV
5533 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5538 /* if it's a sort block and they're naming $a or $b */
5539 if (PL_last_lop_op == OP_SORT &&
5540 PL_tokenbuf[0] == '$' &&
5541 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5544 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5545 d < PL_bufend && *d != '\n';
5548 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5549 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5555 yylval.opval = newOP(OP_PADANY, 0);
5556 yylval.opval->op_targ = tmp;
5562 Whine if they've said @foo in a doublequoted string,
5563 and @foo isn't a variable we can find in the symbol
5566 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5567 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
5568 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5569 && ckWARN(WARN_AMBIGUOUS))
5571 /* Downgraded from fatal to warning 20000522 mjd */
5572 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5573 "Possible unintended interpolation of %s in string",
5578 /* build ops for a bareword */
5579 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5580 yylval.opval->op_private = OPpCONST_ENTERED;
5581 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
5582 ((PL_tokenbuf[0] == '$') ? SVt_PV
5583 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5589 * The following code was generated by perl_keyword.pl.
5593 Perl_keyword (pTHX_ char *name, I32 len)
5597 case 1: /* 5 tokens of length 1 */
5639 case 2: /* 18 tokens of length 2 */
5807 case 3: /* 28 tokens of length 3 */
5811 if (name[1] == 'N' &&
5874 if (name[1] == 'i' &&
5914 if (name[1] == 'o' &&
5923 if (name[1] == 'e' &&
5932 if (name[1] == 'n' &&
5941 if (name[1] == 'o' &&
5950 if (name[1] == 'a' &&
5959 if (name[1] == 'o' &&
6025 if (name[1] == 'e' &&
6057 if (name[1] == 'i' &&
6066 if (name[1] == 's' &&
6075 if (name[1] == 'e' &&
6084 if (name[1] == 'o' &&
6096 case 4: /* 40 tokens of length 4 */
6100 if (name[1] == 'O' &&
6110 if (name[1] == 'N' &&
6120 if (name[1] == 'i' &&
6130 if (name[1] == 'h' &&
6140 if (name[1] == 'u' &&
6153 if (name[2] == 'c' &&
6162 if (name[2] == 's' &&
6171 if (name[2] == 'a' &&
6207 if (name[1] == 'o' &&
6220 if (name[2] == 't' &&
6229 if (name[2] == 'o' &&
6238 if (name[2] == 't' &&
6247 if (name[2] == 'e' &&
6260 if (name[1] == 'o' &&
6273 if (name[2] == 'y' &&
6282 if (name[2] == 'l' &&
6298 if (name[2] == 's' &&
6307 if (name[2] == 'n' &&
6316 if (name[2] == 'c' &&
6329 if (name[1] == 'e' &&
6339 if (name[1] == 'p' &&
6352 if (name[2] == 'c' &&
6361 if (name[2] == 'p' &&
6370 if (name[2] == 's' &&
6386 if (name[2] == 'n' &&
6456 if (name[2] == 'r' &&
6465 if (name[2] == 'r' &&
6474 if (name[2] == 'a' &&
6490 if (name[2] == 'l' &&
6557 case 5: /* 36 tokens of length 5 */
6561 if (name[1] == 'E' &&
6572 if (name[1] == 'H' &&
6586 if (name[2] == 'a' &&
6596 if (name[2] == 'a' &&
6610 if (name[1] == 'l' &&
6627 if (name[3] == 'i' &&
6636 if (name[3] == 'o' &&
6672 if (name[2] == 'o' &&
6682 if (name[2] == 'y' &&
6696 if (name[1] == 'l' &&
6710 if (name[2] == 'n' &&
6720 if (name[2] == 'o' &&
6737 if (name[2] == 'd' &&
6747 if (name[2] == 'c' &&
6764 if (name[2] == 'c' &&
6774 if (name[2] == 't' &&
6788 if (name[1] == 'k' &&
6799 if (name[1] == 'r' &&
6813 if (name[2] == 's' &&
6823 if (name[2] == 'd' &&
6840 if (name[2] == 'm' &&
6850 if (name[2] == 'i' &&
6860 if (name[2] == 'e' &&
6870 if (name[2] == 'l' &&
6880 if (name[2] == 'a' &&
6890 if (name[2] == 'u' &&
6904 if (name[1] == 'i' &&
6918 if (name[2] == 'a' &&
6931 if (name[3] == 'e' &&
6970 if (name[2] == 'i' &&
6987 if (name[2] == 'i' &&
6997 if (name[2] == 'i' &&
7014 case 6: /* 33 tokens of length 6 */
7018 if (name[1] == 'c' &&
7033 if (name[2] == 'l' &&
7044 if (name[2] == 'r' &&
7059 if (name[1] == 'e' &&
7074 if (name[2] == 's' &&
7079 if(ckWARN_d(WARN_SYNTAX))
7080 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
7086 if (name[2] == 'i' &&
7104 if (name[2] == 'l' &&
7115 if (name[2] == 'r' &&
7130 if (name[1] == 'm' &&
7145 if (name[2] == 'n' &&
7156 if (name[2] == 's' &&
7171 if (name[1] == 's' &&
7177 if (name[4] == 't' &&
7186 if (name[4] == 'e' &&
7195 if (name[4] == 'c' &&
7204 if (name[4] == 'n' &&
7220 if (name[1] == 'r' &&
7238 if (name[3] == 'a' &&
7248 if (name[3] == 'u' &&
7262 if (name[2] == 'n' &&
7280 if (name[2] == 'a' &&
7294 if (name[3] == 'e' &&
7307 if (name[4] == 't' &&
7316 if (name[4] == 'e' &&
7338 if (name[4] == 't' &&
7347 if (name[4] == 'e' &&
7363 if (name[2] == 'c' &&
7374 if (name[2] == 'l' &&
7385 if (name[2] == 'b' &&
7396 if (name[2] == 's' &&
7419 if (name[4] == 's' &&
7428 if (name[4] == 'n' &&
7441 if (name[3] == 'a' &&
7458 if (name[1] == 'a' &&
7473 case 7: /* 28 tokens of length 7 */
7477 if (name[1] == 'E' &&
7490 if (name[1] == '_' &&
7503 if (name[1] == 'i' &&
7510 return -KEY_binmode;
7516 if (name[1] == 'o' &&
7523 return -KEY_connect;
7532 if (name[2] == 'm' &&
7538 return -KEY_dbmopen;
7544 if (name[2] == 'f' &&
7560 if (name[1] == 'o' &&
7573 if (name[1] == 'e' &&
7580 if (name[5] == 'r' &&
7583 return -KEY_getpgrp;
7589 if (name[5] == 'i' &&
7592 return -KEY_getppid;
7605 if (name[1] == 'c' &&
7612 return -KEY_lcfirst;
7618 if (name[1] == 'p' &&
7625 return -KEY_opendir;
7631 if (name[1] == 'a' &&
7649 if (name[3] == 'd' &&
7654 return -KEY_readdir;
7660 if (name[3] == 'u' &&
7671 if (name[3] == 'e' &&
7676 return -KEY_reverse;
7695 if (name[3] == 'k' &&
7700 return -KEY_seekdir;
7706 if (name[3] == 'p' &&
7711 return -KEY_setpgrp;
7721 if (name[2] == 'm' &&
7727 return -KEY_shmread;
7733 if (name[2] == 'r' &&
7739 return -KEY_sprintf;
7748 if (name[3] == 'l' &&
7753 return -KEY_symlink;
7762 if (name[4] == 'a' &&
7766 return -KEY_syscall;
7772 if (name[4] == 'p' &&
7776 return -KEY_sysopen;
7782 if (name[4] == 'e' &&
7786 return -KEY_sysread;
7792 if (name[4] == 'e' &&
7796 return -KEY_sysseek;
7814 if (name[1] == 'e' &&
7821 return -KEY_telldir;
7830 if (name[2] == 'f' &&
7836 return -KEY_ucfirst;
7842 if (name[2] == 's' &&
7848 return -KEY_unshift;
7858 if (name[1] == 'a' &&
7865 return -KEY_waitpid;
7874 case 8: /* 26 tokens of length 8 */
7878 if (name[1] == 'U' &&
7886 return KEY_AUTOLOAD;
7897 if (name[3] == 'A' &&
7903 return KEY___DATA__;
7909 if (name[3] == 'I' &&
7915 return -KEY___FILE__;
7921 if (name[3] == 'I' &&
7927 return -KEY___LINE__;
7943 if (name[2] == 'o' &&
7950 return -KEY_closedir;
7956 if (name[2] == 'n' &&
7963 return -KEY_continue;
7973 if (name[1] == 'b' &&
7981 return -KEY_dbmclose;
7987 if (name[1] == 'n' &&
7993 if (name[4] == 'r' &&
7998 return -KEY_endgrent;
8004 if (name[4] == 'w' &&
8009 return -KEY_endpwent;
8022 if (name[1] == 'o' &&
8030 return -KEY_formline;
8036 if (name[1] == 'e' &&
8047 if (name[6] == 'n' &&
8050 return -KEY_getgrent;
8056 if (name[6] == 'i' &&
8059 return -KEY_getgrgid;
8065 if (name[6] == 'a' &&
8068 return -KEY_getgrnam;
8081 if (name[4] == 'o' &&
8086 return -KEY_getlogin;
8097 if (name[6] == 'n' &&
8100 return -KEY_getpwent;
8106 if (name[6] == 'a' &&
8109 return -KEY_getpwnam;
8115 if (name[6] == 'i' &&
8118 return -KEY_getpwuid;
8138 if (name[1] == 'e' &&
8145 if (name[5] == 'i' &&
8152 return -KEY_readline;
8159 return -KEY_readlink;
8172 if (name[5] == 'i' &&
8176 return -KEY_readpipe;
8197 if (name[4] == 'r' &&
8202 return -KEY_setgrent;
8208 if (name[4] == 'w' &&
8213 return -KEY_setpwent;
8229 if (name[3] == 'w' &&
8235 return -KEY_shmwrite;
8241 if (name[3] == 't' &&
8247 return -KEY_shutdown;
8257 if (name[2] == 's' &&
8264 return -KEY_syswrite;
8274 if (name[1] == 'r' &&
8282 return -KEY_truncate;
8291 case 9: /* 8 tokens of length 9 */
8295 if (name[1] == 'n' &&
8304 return -KEY_endnetent;
8310 if (name[1] == 'e' &&
8319 return -KEY_getnetent;
8325 if (name[1] == 'o' &&
8334 return -KEY_localtime;
8340 if (name[1] == 'r' &&
8349 return KEY_prototype;
8355 if (name[1] == 'u' &&
8364 return -KEY_quotemeta;
8370 if (name[1] == 'e' &&
8379 return -KEY_rewinddir;
8385 if (name[1] == 'e' &&
8394 return -KEY_setnetent;
8400 if (name[1] == 'a' &&
8409 return -KEY_wantarray;
8418 case 10: /* 9 tokens of length 10 */
8422 if (name[1] == 'n' &&
8428 if (name[4] == 'o' &&
8435 return -KEY_endhostent;
8441 if (name[4] == 'e' &&
8448 return -KEY_endservent;
8461 if (name[1] == 'e' &&
8467 if (name[4] == 'o' &&
8474 return -KEY_gethostent;
8483 if (name[5] == 'r' &&
8489 return -KEY_getservent;
8495 if (name[5] == 'c' &&
8501 return -KEY_getsockopt;
8526 if (name[4] == 'o' &&
8533 return -KEY_sethostent;
8542 if (name[5] == 'r' &&
8548 return -KEY_setservent;
8554 if (name[5] == 'c' &&
8560 return -KEY_setsockopt;
8577 if (name[2] == 'c' &&
8586 return -KEY_socketpair;
8599 case 11: /* 8 tokens of length 11 */
8603 if (name[1] == '_' &&
8614 return -KEY___PACKAGE__;
8620 if (name[1] == 'n' &&
8631 return -KEY_endprotoent;
8637 if (name[1] == 'e' &&
8646 if (name[5] == 'e' &&
8653 return -KEY_getpeername;
8662 if (name[6] == 'o' &&
8668 return -KEY_getpriority;
8674 if (name[6] == 't' &&
8680 return -KEY_getprotoent;
8694 if (name[4] == 'o' &&
8702 return -KEY_getsockname;
8715 if (name[1] == 'e' &&
8723 if (name[6] == 'o' &&
8729 return -KEY_setpriority;
8735 if (name[6] == 't' &&
8741 return -KEY_setprotoent;
8757 case 12: /* 2 tokens of length 12 */
8758 if (name[0] == 'g' &&
8770 if (name[9] == 'd' &&
8773 { /* getnetbyaddr */
8774 return -KEY_getnetbyaddr;
8780 if (name[9] == 'a' &&
8783 { /* getnetbyname */
8784 return -KEY_getnetbyname;
8796 case 13: /* 4 tokens of length 13 */
8797 if (name[0] == 'g' &&
8804 if (name[4] == 'o' &&
8813 if (name[10] == 'd' &&
8816 { /* gethostbyaddr */
8817 return -KEY_gethostbyaddr;
8823 if (name[10] == 'a' &&
8826 { /* gethostbyname */
8827 return -KEY_gethostbyname;
8840 if (name[4] == 'e' &&
8849 if (name[10] == 'a' &&
8852 { /* getservbyname */
8853 return -KEY_getservbyname;
8859 if (name[10] == 'o' &&
8862 { /* getservbyport */
8863 return -KEY_getservbyport;
8882 case 14: /* 1 tokens of length 14 */
8883 if (name[0] == 'g' &&
8897 { /* getprotobyname */
8898 return -KEY_getprotobyname;
8903 case 16: /* 1 tokens of length 16 */
8904 if (name[0] == 'g' &&
8920 { /* getprotobynumber */
8921 return -KEY_getprotobynumber;
8935 S_checkcomma(pTHX_ register char *s, char *name, char *what)
8939 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8940 if (ckWARN(WARN_SYNTAX)) {
8942 for (w = s+2; *w && level; w++) {
8949 for (; *w && isSPACE(*w); w++) ;
8950 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
8951 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8952 "%s (...) interpreted as function",name);
8955 while (s < PL_bufend && isSPACE(*s))
8959 while (s < PL_bufend && isSPACE(*s))
8961 if (isIDFIRST_lazy_if(s,UTF)) {
8963 while (isALNUM_lazy_if(s,UTF))
8965 while (s < PL_bufend && isSPACE(*s))
8970 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
8974 Perl_croak(aTHX_ "No comma allowed after %s", what);
8979 /* Either returns sv, or mortalizes sv and returns a new SV*.
8980 Best used as sv=new_constant(..., sv, ...).
8981 If s, pv are NULL, calls subroutine with one argument,
8982 and type is used with error messages only. */
8985 S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
8989 HV *table = GvHV(PL_hintgv); /* ^H */
8993 const char *why1, *why2, *why3;
8995 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8998 why2 = strEQ(key,"charnames")
8999 ? "(possibly a missing \"use charnames ...\")"
9001 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
9002 (type ? type: "undef"), why2);
9004 /* This is convoluted and evil ("goto considered harmful")
9005 * but I do not understand the intricacies of all the different
9006 * failure modes of %^H in here. The goal here is to make
9007 * the most probable error message user-friendly. --jhi */
9012 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
9013 (type ? type: "undef"), why1, why2, why3);
9015 yyerror(SvPVX(msg));
9019 cvp = hv_fetch(table, key, strlen(key), FALSE);
9020 if (!cvp || !SvOK(*cvp)) {
9023 why3 = "} is not defined";
9026 sv_2mortal(sv); /* Parent created it permanently */
9029 pv = sv_2mortal(newSVpvn(s, len));
9031 typesv = sv_2mortal(newSVpv(type, 0));
9033 typesv = &PL_sv_undef;
9035 PUSHSTACKi(PERLSI_OVERLOAD);
9047 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9051 /* Check the eval first */
9052 if (!PL_in_eval && SvTRUE(ERRSV)) {
9054 sv_catpv(ERRSV, "Propagated");
9055 yyerror(SvPV(ERRSV, n_a)); /* Duplicates the message inside eval */
9057 res = SvREFCNT_inc(sv);
9061 (void)SvREFCNT_inc(res);
9070 why1 = "Call to &{$^H{";
9072 why3 = "}} did not return a defined value";
9080 /* Returns a NUL terminated string, with the length of the string written to
9084 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9086 register char *d = dest;
9087 register char *e = d + destlen - 3; /* two-character token, ending NUL */
9090 Perl_croak(aTHX_ ident_too_long);
9091 if (isALNUM(*s)) /* UTF handled below */
9093 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
9098 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
9102 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9103 char *t = s + UTF8SKIP(s);
9104 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9106 if (d + (t - s) > e)
9107 Perl_croak(aTHX_ ident_too_long);
9108 Copy(s, d, t - s, char);
9121 S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN destlen, I32 ck_uni)
9131 e = d + destlen - 3; /* two-character token, ending NUL */
9133 while (isDIGIT(*s)) {
9135 Perl_croak(aTHX_ ident_too_long);
9142 Perl_croak(aTHX_ ident_too_long);
9143 if (isALNUM(*s)) /* UTF handled below */
9145 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9150 else if (*s == ':' && s[1] == ':') {
9154 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9155 char *t = s + UTF8SKIP(s);
9156 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9158 if (d + (t - s) > e)
9159 Perl_croak(aTHX_ ident_too_long);
9160 Copy(s, d, t - s, char);
9171 if (PL_lex_state != LEX_NORMAL)
9172 PL_lex_state = LEX_INTERPENDMAYBE;
9175 if (*s == '$' && s[1] &&
9176 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9189 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9194 if (isSPACE(s[-1])) {
9197 if (!SPACE_OR_TAB(ch)) {
9203 if (isIDFIRST_lazy_if(d,UTF)) {
9207 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
9209 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
9212 Copy(s, d, e - s, char);
9217 while ((isALNUM(*s) || *s == ':') && d < e)
9220 Perl_croak(aTHX_ ident_too_long);
9223 while (s < send && SPACE_OR_TAB(*s)) s++;
9224 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9225 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
9226 const char *brack = *s == '[' ? "[...]" : "{...}";
9227 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9228 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9229 funny, dest, brack, funny, dest, brack);
9232 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9236 /* Handle extended ${^Foo} variables
9237 * 1999-02-27 mjd-perl-patch@plover.com */
9238 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9242 while (isALNUM(*s) && d < e) {
9246 Perl_croak(aTHX_ ident_too_long);
9251 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9252 PL_lex_state = LEX_INTERPEND;
9257 if (PL_lex_state == LEX_NORMAL) {
9258 if (ckWARN(WARN_AMBIGUOUS) &&
9259 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
9261 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9262 "Ambiguous use of %c{%s} resolved to %c%s",
9263 funny, dest, funny, dest);
9268 s = bracket; /* let the parser handle it */
9272 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9273 PL_lex_state = LEX_INTERPEND;
9278 Perl_pmflag(pTHX_ U32* pmfl, int ch)
9283 *pmfl |= PMf_GLOBAL;
9285 *pmfl |= PMf_CONTINUE;
9289 *pmfl |= PMf_MULTILINE;
9291 *pmfl |= PMf_SINGLELINE;
9293 *pmfl |= PMf_EXTENDED;
9297 S_scan_pat(pTHX_ char *start, I32 type)
9302 s = scan_str(start,FALSE,FALSE);
9304 Perl_croak(aTHX_ "Search pattern not terminated");
9306 pm = (PMOP*)newPMOP(type, 0);
9307 if (PL_multi_open == '?')
9308 pm->op_pmflags |= PMf_ONCE;
9310 while (*s && strchr("iomsx", *s))
9311 pmflag(&pm->op_pmflags,*s++);
9314 while (*s && strchr("iogcmsx", *s))
9315 pmflag(&pm->op_pmflags,*s++);
9317 /* issue a warning if /c is specified,but /g is not */
9318 if (ckWARN(WARN_REGEXP) &&
9319 (pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9321 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_without_g);
9324 pm->op_pmpermflags = pm->op_pmflags;
9326 PL_lex_op = (OP*)pm;
9327 yylval.ival = OP_MATCH;
9332 S_scan_subst(pTHX_ char *start)
9339 yylval.ival = OP_NULL;
9341 s = scan_str(start,FALSE,FALSE);
9344 Perl_croak(aTHX_ "Substitution pattern not terminated");
9346 if (s[-1] == PL_multi_open)
9349 first_start = PL_multi_start;
9350 s = scan_str(s,FALSE,FALSE);
9353 SvREFCNT_dec(PL_lex_stuff);
9354 PL_lex_stuff = Nullsv;
9356 Perl_croak(aTHX_ "Substitution replacement not terminated");
9358 PL_multi_start = first_start; /* so whole substitution is taken together */
9360 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9366 else if (strchr("iogcmsx", *s))
9367 pmflag(&pm->op_pmflags,*s++);
9372 /* /c is not meaningful with s/// */
9373 if (ckWARN(WARN_REGEXP) && (pm->op_pmflags & PMf_CONTINUE))
9375 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_in_subst);
9380 PL_sublex_info.super_bufptr = s;
9381 PL_sublex_info.super_bufend = PL_bufend;
9383 pm->op_pmflags |= PMf_EVAL;
9384 repl = newSVpvn("",0);
9386 sv_catpv(repl, es ? "eval " : "do ");
9387 sv_catpvn(repl, "{ ", 2);
9388 sv_catsv(repl, PL_lex_repl);
9389 sv_catpvn(repl, " };", 2);
9391 SvREFCNT_dec(PL_lex_repl);
9395 pm->op_pmpermflags = pm->op_pmflags;
9396 PL_lex_op = (OP*)pm;
9397 yylval.ival = OP_SUBST;
9402 S_scan_trans(pTHX_ char *start)
9411 yylval.ival = OP_NULL;
9413 s = scan_str(start,FALSE,FALSE);
9415 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9416 if (s[-1] == PL_multi_open)
9419 s = scan_str(s,FALSE,FALSE);
9422 SvREFCNT_dec(PL_lex_stuff);
9423 PL_lex_stuff = Nullsv;
9425 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9428 complement = del = squash = 0;
9432 complement = OPpTRANS_COMPLEMENT;
9435 del = OPpTRANS_DELETE;
9438 squash = OPpTRANS_SQUASH;
9447 New(803, tbl, complement&&!del?258:256, short);
9448 o = newPVOP(OP_TRANS, 0, (char*)tbl);
9449 o->op_private &= ~OPpTRANS_ALL;
9450 o->op_private |= del|squash|complement|
9451 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9452 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9455 yylval.ival = OP_TRANS;
9460 S_scan_heredoc(pTHX_ register char *s)
9463 I32 op_type = OP_SCALAR;
9470 int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9474 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9477 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
9478 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9481 s = delimcpy(d, e, s, PL_bufend, term, &len);
9491 if (!isALNUM_lazy_if(s,UTF))
9492 deprecate_old("bare << to mean <<\"\"");
9493 for (; isALNUM_lazy_if(s,UTF); s++) {
9498 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9499 Perl_croak(aTHX_ "Delimiter for here document is too long");
9502 len = d - PL_tokenbuf;
9503 #ifndef PERL_STRICT_CR
9504 d = strchr(s, '\r');
9508 while (s < PL_bufend) {
9514 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9523 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
9528 if (outer || !(d=ninstr(s,PL_bufend,d,d+1)))
9529 herewas = newSVpvn(s,PL_bufend-s);
9531 s--, herewas = newSVpvn(s,d-s);
9532 s += SvCUR(herewas);
9534 tmpstr = NEWSV(87,79);
9535 sv_upgrade(tmpstr, SVt_PVIV);
9540 else if (term == '`') {
9541 op_type = OP_BACKTICK;
9542 SvIVX(tmpstr) = '\\';
9546 PL_multi_start = CopLINE(PL_curcop);
9547 PL_multi_open = PL_multi_close = '<';
9548 term = *PL_tokenbuf;
9549 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9550 char *bufptr = PL_sublex_info.super_bufptr;
9551 char *bufend = PL_sublex_info.super_bufend;
9552 char *olds = s - SvCUR(herewas);
9553 s = strchr(bufptr, '\n');
9557 while (s < bufend &&
9558 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9560 CopLINE_inc(PL_curcop);
9563 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9564 missingterm(PL_tokenbuf);
9566 sv_setpvn(herewas,bufptr,d-bufptr+1);
9567 sv_setpvn(tmpstr,d+1,s-d);
9569 sv_catpvn(herewas,s,bufend-s);
9570 Copy(SvPVX(herewas),bufptr,SvCUR(herewas) + 1,char);
9577 while (s < PL_bufend &&
9578 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9580 CopLINE_inc(PL_curcop);
9582 if (s >= PL_bufend) {
9583 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9584 missingterm(PL_tokenbuf);
9586 sv_setpvn(tmpstr,d+1,s-d);
9588 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9590 sv_catpvn(herewas,s,PL_bufend-s);
9591 sv_setsv(PL_linestr,herewas);
9592 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9593 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9594 PL_last_lop = PL_last_uni = Nullch;
9597 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
9598 while (s >= PL_bufend) { /* multiple line string? */
9600 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
9601 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9602 missingterm(PL_tokenbuf);
9604 CopLINE_inc(PL_curcop);
9605 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9606 PL_last_lop = PL_last_uni = Nullch;
9607 #ifndef PERL_STRICT_CR
9608 if (PL_bufend - PL_linestart >= 2) {
9609 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9610 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9612 PL_bufend[-2] = '\n';
9614 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
9616 else if (PL_bufend[-1] == '\r')
9617 PL_bufend[-1] = '\n';
9619 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9620 PL_bufend[-1] = '\n';
9622 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9623 SV *sv = NEWSV(88,0);
9625 sv_upgrade(sv, SVt_PVMG);
9626 sv_setsv(sv,PL_linestr);
9629 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
9631 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9632 STRLEN off = PL_bufend - 1 - SvPVX(PL_linestr);
9633 *(SvPVX(PL_linestr) + off ) = ' ';
9634 sv_catsv(PL_linestr,herewas);
9635 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9636 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9640 sv_catsv(tmpstr,PL_linestr);
9645 PL_multi_end = CopLINE(PL_curcop);
9646 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9647 SvLEN_set(tmpstr, SvCUR(tmpstr) + 1);
9648 Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
9650 SvREFCNT_dec(herewas);
9652 if (UTF && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
9654 else if (PL_encoding)
9655 sv_recode_to_utf8(tmpstr, PL_encoding);
9657 PL_lex_stuff = tmpstr;
9658 yylval.ival = op_type;
9663 takes: current position in input buffer
9664 returns: new position in input buffer
9665 side-effects: yylval and lex_op are set.
9670 <FH> read from filehandle
9671 <pkg::FH> read from package qualified filehandle
9672 <pkg'FH> read from package qualified filehandle
9673 <$fh> read from filehandle in $fh
9679 S_scan_inputsymbol(pTHX_ char *start)
9681 register char *s = start; /* current position in buffer */
9687 d = PL_tokenbuf; /* start of temp holding space */
9688 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9689 end = strchr(s, '\n');
9692 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9694 /* die if we didn't have space for the contents of the <>,
9695 or if it didn't end, or if we see a newline
9698 if (len >= sizeof PL_tokenbuf)
9699 Perl_croak(aTHX_ "Excessively long <> operator");
9701 Perl_croak(aTHX_ "Unterminated <> operator");
9706 Remember, only scalar variables are interpreted as filehandles by
9707 this code. Anything more complex (e.g., <$fh{$num}>) will be
9708 treated as a glob() call.
9709 This code makes use of the fact that except for the $ at the front,
9710 a scalar variable and a filehandle look the same.
9712 if (*d == '$' && d[1]) d++;
9714 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9715 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9718 /* If we've tried to read what we allow filehandles to look like, and
9719 there's still text left, then it must be a glob() and not a getline.
9720 Use scan_str to pull out the stuff between the <> and treat it
9721 as nothing more than a string.
9724 if (d - PL_tokenbuf != len) {
9725 yylval.ival = OP_GLOB;
9727 s = scan_str(start,FALSE,FALSE);
9729 Perl_croak(aTHX_ "Glob not terminated");
9733 bool readline_overriden = FALSE;
9734 GV *gv_readline = Nullgv;
9736 /* we're in a filehandle read situation */
9739 /* turn <> into <ARGV> */
9741 Copy("ARGV",d,5,char);
9743 /* Check whether readline() is overriden */
9744 if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
9745 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9747 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
9748 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
9749 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9750 readline_overriden = TRUE;
9752 /* if <$fh>, create the ops to turn the variable into a
9758 /* try to find it in the pad for this block, otherwise find
9759 add symbol table ops
9761 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
9762 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
9763 SV *sym = sv_2mortal(
9764 newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)),0));
9765 sv_catpvn(sym, "::", 2);
9771 OP *o = newOP(OP_PADSV, 0);
9773 PL_lex_op = readline_overriden
9774 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9775 append_elem(OP_LIST, o,
9776 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9777 : (OP*)newUNOP(OP_READLINE, 0, o);
9786 ? (GV_ADDMULTI | GV_ADDINEVAL)
9789 PL_lex_op = readline_overriden
9790 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9791 append_elem(OP_LIST,
9792 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9793 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9794 : (OP*)newUNOP(OP_READLINE, 0,
9795 newUNOP(OP_RV2SV, 0,
9796 newGVOP(OP_GV, 0, gv)));
9798 if (!readline_overriden)
9799 PL_lex_op->op_flags |= OPf_SPECIAL;
9800 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
9801 yylval.ival = OP_NULL;
9804 /* If it's none of the above, it must be a literal filehandle
9805 (<Foo::BAR> or <FOO>) so build a simple readline OP */
9807 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
9808 PL_lex_op = readline_overriden
9809 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9810 append_elem(OP_LIST,
9811 newGVOP(OP_GV, 0, gv),
9812 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9813 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9814 yylval.ival = OP_NULL;
9823 takes: start position in buffer
9824 keep_quoted preserve \ on the embedded delimiter(s)
9825 keep_delims preserve the delimiters around the string
9826 returns: position to continue reading from buffer
9827 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9828 updates the read buffer.
9830 This subroutine pulls a string out of the input. It is called for:
9831 q single quotes q(literal text)
9832 ' single quotes 'literal text'
9833 qq double quotes qq(interpolate $here please)
9834 " double quotes "interpolate $here please"
9835 qx backticks qx(/bin/ls -l)
9836 ` backticks `/bin/ls -l`
9837 qw quote words @EXPORT_OK = qw( func() $spam )
9838 m// regexp match m/this/
9839 s/// regexp substitute s/this/that/
9840 tr/// string transliterate tr/this/that/
9841 y/// string transliterate y/this/that/
9842 ($*@) sub prototypes sub foo ($)
9843 (stuff) sub attr parameters sub foo : attr(stuff)
9844 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9846 In most of these cases (all but <>, patterns and transliterate)
9847 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9848 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9849 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9852 It skips whitespace before the string starts, and treats the first
9853 character as the delimiter. If the delimiter is one of ([{< then
9854 the corresponding "close" character )]}> is used as the closing
9855 delimiter. It allows quoting of delimiters, and if the string has
9856 balanced delimiters ([{<>}]) it allows nesting.
9858 On success, the SV with the resulting string is put into lex_stuff or,
9859 if that is already non-NULL, into lex_repl. The second case occurs only
9860 when parsing the RHS of the special constructs s/// and tr/// (y///).
9861 For convenience, the terminating delimiter character is stuffed into
9866 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9868 SV *sv; /* scalar value: string */
9869 char *tmps; /* temp string, used for delimiter matching */
9870 register char *s = start; /* current position in the buffer */
9871 register char term; /* terminating character */
9872 register char *to; /* current position in the sv's data */
9873 I32 brackets = 1; /* bracket nesting level */
9874 bool has_utf8 = FALSE; /* is there any utf8 content? */
9875 I32 termcode; /* terminating char. code */
9876 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
9877 STRLEN termlen; /* length of terminating string */
9878 char *last = NULL; /* last position for nesting bracket */
9880 /* skip space before the delimiter */
9884 /* mark where we are, in case we need to report errors */
9887 /* after skipping whitespace, the next character is the terminator */
9890 termcode = termstr[0] = term;
9894 termcode = utf8_to_uvchr((U8*)s, &termlen);
9895 Copy(s, termstr, termlen, U8);
9896 if (!UTF8_IS_INVARIANT(term))
9900 /* mark where we are */
9901 PL_multi_start = CopLINE(PL_curcop);
9902 PL_multi_open = term;
9904 /* find corresponding closing delimiter */
9905 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9906 termcode = termstr[0] = term = tmps[5];
9908 PL_multi_close = term;
9910 /* create a new SV to hold the contents. 87 is leak category, I'm
9911 assuming. 79 is the SV's initial length. What a random number. */
9913 sv_upgrade(sv, SVt_PVIV);
9914 SvIVX(sv) = termcode;
9915 (void)SvPOK_only(sv); /* validate pointer */
9917 /* move past delimiter and try to read a complete string */
9919 sv_catpvn(sv, s, termlen);
9922 if (PL_encoding && !UTF) {
9926 int offset = s - SvPVX(PL_linestr);
9927 bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9928 &offset, (char*)termstr, termlen);
9929 char *ns = SvPVX(PL_linestr) + offset;
9930 char *svlast = SvEND(sv) - 1;
9932 for (; s < ns; s++) {
9933 if (*s == '\n' && !PL_rsfp)
9934 CopLINE_inc(PL_curcop);
9937 goto read_more_line;
9939 /* handle quoted delimiters */
9940 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9942 for (t = svlast-2; t >= SvPVX(sv) && *t == '\\';)
9944 if ((svlast-1 - t) % 2) {
9948 SvCUR_set(sv, SvCUR(sv) - 1);
9953 if (PL_multi_open == PL_multi_close) {
9960 for (w = t = last; t < svlast; w++, t++) {
9961 /* At here, all closes are "was quoted" one,
9962 so we don't check PL_multi_close. */
9964 if (!keep_quoted && *(t+1) == PL_multi_open)
9969 else if (*t == PL_multi_open)
9977 SvCUR_set(sv, w - SvPVX(sv));
9980 if (--brackets <= 0)
9986 SvCUR_set(sv, SvCUR(sv) - 1);
9992 /* extend sv if need be */
9993 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9994 /* set 'to' to the next character in the sv's string */
9995 to = SvPVX(sv)+SvCUR(sv);
9997 /* if open delimiter is the close delimiter read unbridle */
9998 if (PL_multi_open == PL_multi_close) {
9999 for (; s < PL_bufend; s++,to++) {
10000 /* embedded newlines increment the current line number */
10001 if (*s == '\n' && !PL_rsfp)
10002 CopLINE_inc(PL_curcop);
10003 /* handle quoted delimiters */
10004 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10005 if (!keep_quoted && s[1] == term)
10007 /* any other quotes are simply copied straight through */
10011 /* terminate when run out of buffer (the for() condition), or
10012 have found the terminator */
10013 else if (*s == term) {
10016 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10019 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10025 /* if the terminator isn't the same as the start character (e.g.,
10026 matched brackets), we have to allow more in the quoting, and
10027 be prepared for nested brackets.
10030 /* read until we run out of string, or we find the terminator */
10031 for (; s < PL_bufend; s++,to++) {
10032 /* embedded newlines increment the line count */
10033 if (*s == '\n' && !PL_rsfp)
10034 CopLINE_inc(PL_curcop);
10035 /* backslashes can escape the open or closing characters */
10036 if (*s == '\\' && s+1 < PL_bufend) {
10037 if (!keep_quoted &&
10038 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10043 /* allow nested opens and closes */
10044 else if (*s == PL_multi_close && --brackets <= 0)
10046 else if (*s == PL_multi_open)
10048 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10053 /* terminate the copied string and update the sv's end-of-string */
10055 SvCUR_set(sv, to - SvPVX(sv));
10058 * this next chunk reads more into the buffer if we're not done yet
10062 break; /* handle case where we are done yet :-) */
10064 #ifndef PERL_STRICT_CR
10065 if (to - SvPVX(sv) >= 2) {
10066 if ((to[-2] == '\r' && to[-1] == '\n') ||
10067 (to[-2] == '\n' && to[-1] == '\r'))
10071 SvCUR_set(sv, to - SvPVX(sv));
10073 else if (to[-1] == '\r')
10076 else if (to - SvPVX(sv) == 1 && to[-1] == '\r')
10081 /* if we're out of file, or a read fails, bail and reset the current
10082 line marker so we can report where the unterminated string began
10085 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10087 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10090 /* we read a line, so increment our line counter */
10091 CopLINE_inc(PL_curcop);
10093 /* update debugger info */
10094 if (PERLDB_LINE && PL_curstash != PL_debstash) {
10095 SV *sv = NEWSV(88,0);
10097 sv_upgrade(sv, SVt_PVMG);
10098 sv_setsv(sv,PL_linestr);
10099 (void)SvIOK_on(sv);
10101 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
10104 /* having changed the buffer, we must update PL_bufend */
10105 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10106 PL_last_lop = PL_last_uni = Nullch;
10109 /* at this point, we have successfully read the delimited string */
10111 if (!PL_encoding || UTF) {
10113 sv_catpvn(sv, s, termlen);
10116 if (has_utf8 || PL_encoding)
10119 PL_multi_end = CopLINE(PL_curcop);
10121 /* if we allocated too much space, give some back */
10122 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10123 SvLEN_set(sv, SvCUR(sv) + 1);
10124 Renew(SvPVX(sv), SvLEN(sv), char);
10127 /* decide whether this is the first or second quoted string we've read
10140 takes: pointer to position in buffer
10141 returns: pointer to new position in buffer
10142 side-effects: builds ops for the constant in yylval.op
10144 Read a number in any of the formats that Perl accepts:
10146 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10147 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10150 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10152 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10155 If it reads a number without a decimal point or an exponent, it will
10156 try converting the number to an integer and see if it can do so
10157 without loss of precision.
10161 Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
10163 register char *s = start; /* current position in buffer */
10164 register char *d; /* destination in temp buffer */
10165 register char *e; /* end of temp buffer */
10166 NV nv; /* number read, as a double */
10167 SV *sv = Nullsv; /* place to put the converted number */
10168 bool floatit; /* boolean: int or float? */
10169 char *lastub = 0; /* position of last underbar */
10170 static char number_too_long[] = "Number too long";
10172 /* We use the first character to decide what type of number this is */
10176 Perl_croak(aTHX_ "panic: scan_num");
10178 /* if it starts with a 0, it could be an octal number, a decimal in
10179 0.13 disguise, or a hexadecimal number, or a binary number. */
10183 u holds the "number so far"
10184 shift the power of 2 of the base
10185 (hex == 4, octal == 3, binary == 1)
10186 overflowed was the number more than we can hold?
10188 Shift is used when we add a digit. It also serves as an "are
10189 we in octal/hex/binary?" indicator to disallow hex characters
10190 when in octal mode.
10195 bool overflowed = FALSE;
10196 bool just_zero = TRUE; /* just plain 0 or binary number? */
10197 static NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10198 static char* bases[5] = { "", "binary", "", "octal",
10200 static char* Bases[5] = { "", "Binary", "", "Octal",
10202 static char *maxima[5] = { "",
10203 "0b11111111111111111111111111111111",
10207 char *base, *Base, *max;
10209 /* check for hex */
10214 } else if (s[1] == 'b') {
10219 /* check for a decimal in disguise */
10220 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10222 /* so it must be octal */
10229 if (ckWARN(WARN_SYNTAX))
10230 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10231 "Misplaced _ in number");
10235 base = bases[shift];
10236 Base = Bases[shift];
10237 max = maxima[shift];
10239 /* read the rest of the number */
10241 /* x is used in the overflow test,
10242 b is the digit we're adding on. */
10247 /* if we don't mention it, we're done */
10251 /* _ are ignored -- but warned about if consecutive */
10253 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
10254 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10255 "Misplaced _ in number");
10259 /* 8 and 9 are not octal */
10260 case '8': case '9':
10262 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10266 case '2': case '3': case '4':
10267 case '5': case '6': case '7':
10269 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10272 case '0': case '1':
10273 b = *s++ & 15; /* ASCII digit -> value of digit */
10277 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10278 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10279 /* make sure they said 0x */
10282 b = (*s++ & 7) + 9;
10284 /* Prepare to put the digit we have onto the end
10285 of the number so far. We check for overflows.
10291 x = u << shift; /* make room for the digit */
10293 if ((x >> shift) != u
10294 && !(PL_hints & HINT_NEW_BINARY)) {
10297 if (ckWARN_d(WARN_OVERFLOW))
10298 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10299 "Integer overflow in %s number",
10302 u = x | b; /* add the digit to the end */
10305 n *= nvshift[shift];
10306 /* If an NV has not enough bits in its
10307 * mantissa to represent an UV this summing of
10308 * small low-order numbers is a waste of time
10309 * (because the NV cannot preserve the
10310 * low-order bits anyway): we could just
10311 * remember when did we overflow and in the
10312 * end just multiply n by the right
10320 /* if we get here, we had success: make a scalar value from
10325 /* final misplaced underbar check */
10326 if (s[-1] == '_') {
10327 if (ckWARN(WARN_SYNTAX))
10328 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10333 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
10334 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10335 "%s number > %s non-portable",
10341 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
10342 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10343 "%s number > %s non-portable",
10348 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10349 sv = new_constant(start, s - start, "integer",
10351 else if (PL_hints & HINT_NEW_BINARY)
10352 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
10357 handle decimal numbers.
10358 we're also sent here when we read a 0 as the first digit
10360 case '1': case '2': case '3': case '4': case '5':
10361 case '6': case '7': case '8': case '9': case '.':
10364 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10367 /* read next group of digits and _ and copy into d */
10368 while (isDIGIT(*s) || *s == '_') {
10369 /* skip underscores, checking for misplaced ones
10373 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
10374 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10375 "Misplaced _ in number");
10379 /* check for end of fixed-length buffer */
10381 Perl_croak(aTHX_ number_too_long);
10382 /* if we're ok, copy the character */
10387 /* final misplaced underbar check */
10388 if (lastub && s == lastub + 1) {
10389 if (ckWARN(WARN_SYNTAX))
10390 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10393 /* read a decimal portion if there is one. avoid
10394 3..5 being interpreted as the number 3. followed
10397 if (*s == '.' && s[1] != '.') {
10402 if (ckWARN(WARN_SYNTAX))
10403 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10404 "Misplaced _ in number");
10408 /* copy, ignoring underbars, until we run out of digits.
10410 for (; isDIGIT(*s) || *s == '_'; s++) {
10411 /* fixed length buffer check */
10413 Perl_croak(aTHX_ number_too_long);
10415 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
10416 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10417 "Misplaced _ in number");
10423 /* fractional part ending in underbar? */
10424 if (s[-1] == '_') {
10425 if (ckWARN(WARN_SYNTAX))
10426 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10427 "Misplaced _ in number");
10429 if (*s == '.' && isDIGIT(s[1])) {
10430 /* oops, it's really a v-string, but without the "v" */
10436 /* read exponent part, if present */
10437 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10441 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10442 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10444 /* stray preinitial _ */
10446 if (ckWARN(WARN_SYNTAX))
10447 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10448 "Misplaced _ in number");
10452 /* allow positive or negative exponent */
10453 if (*s == '+' || *s == '-')
10456 /* stray initial _ */
10458 if (ckWARN(WARN_SYNTAX))
10459 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10460 "Misplaced _ in number");
10464 /* read digits of exponent */
10465 while (isDIGIT(*s) || *s == '_') {
10468 Perl_croak(aTHX_ number_too_long);
10472 if (ckWARN(WARN_SYNTAX) &&
10473 ((lastub && s == lastub + 1) ||
10474 (!isDIGIT(s[1]) && s[1] != '_')))
10475 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10476 "Misplaced _ in number");
10483 /* make an sv from the string */
10487 We try to do an integer conversion first if no characters
10488 indicating "float" have been found.
10493 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10495 if (flags == IS_NUMBER_IN_UV) {
10497 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
10500 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10501 if (uv <= (UV) IV_MIN)
10502 sv_setiv(sv, -(IV)uv);
10509 /* terminate the string */
10511 nv = Atof(PL_tokenbuf);
10515 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
10516 (PL_hints & HINT_NEW_INTEGER) )
10517 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
10518 (floatit ? "float" : "integer"),
10522 /* if it starts with a v, it could be a v-string */
10525 sv = NEWSV(92,5); /* preallocate storage space */
10526 s = scan_vstring(s,sv);
10530 /* make the op for the constant and return */
10533 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10535 lvalp->opval = Nullop;
10541 S_scan_formline(pTHX_ register char *s)
10543 register char *eol;
10545 SV *stuff = newSVpvn("",0);
10546 bool needargs = FALSE;
10547 bool eofmt = FALSE;
10549 while (!needargs) {
10552 #ifdef PERL_STRICT_CR
10553 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
10555 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
10557 if (*t == '\n' || t == PL_bufend) {
10562 if (PL_in_eval && !PL_rsfp) {
10563 eol = memchr(s,'\n',PL_bufend-s);
10568 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10570 for (t = s; t < eol; t++) {
10571 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10573 goto enough; /* ~~ must be first line in formline */
10575 if (*t == '@' || *t == '^')
10579 sv_catpvn(stuff, s, eol-s);
10580 #ifndef PERL_STRICT_CR
10581 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10582 char *end = SvPVX(stuff) + SvCUR(stuff);
10594 s = filter_gets(PL_linestr, PL_rsfp, 0);
10595 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
10596 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
10597 PL_last_lop = PL_last_uni = Nullch;
10606 if (SvCUR(stuff)) {
10609 PL_lex_state = LEX_NORMAL;
10610 PL_nextval[PL_nexttoke].ival = 0;
10614 PL_lex_state = LEX_FORMLINE;
10616 if (UTF && is_utf8_string((U8*)SvPVX(stuff), SvCUR(stuff)))
10618 else if (PL_encoding)
10619 sv_recode_to_utf8(stuff, PL_encoding);
10621 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10623 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
10627 SvREFCNT_dec(stuff);
10629 PL_lex_formbrack = 0;
10640 PL_cshlen = strlen(PL_cshname);
10645 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10647 I32 oldsavestack_ix = PL_savestack_ix;
10648 CV* outsidecv = PL_compcv;
10651 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10653 SAVEI32(PL_subline);
10654 save_item(PL_subname);
10655 SAVESPTR(PL_compcv);
10657 PL_compcv = (CV*)NEWSV(1104,0);
10658 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
10659 CvFLAGS(PL_compcv) |= flags;
10661 PL_subline = CopLINE(PL_curcop);
10662 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10663 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
10664 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10666 return oldsavestack_ix;
10670 #pragma segment Perl_yylex
10673 Perl_yywarn(pTHX_ char *s)
10675 PL_in_eval |= EVAL_WARNONLY;
10677 PL_in_eval &= ~EVAL_WARNONLY;
10682 Perl_yyerror(pTHX_ char *s)
10684 char *where = NULL;
10685 char *context = NULL;
10689 if (!yychar || (yychar == ';' && !PL_rsfp))
10691 else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 &&
10692 PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) {
10695 The code below is removed for NetWare because it abends/crashes on NetWare
10696 when the script has error such as not having the closing quotes like:
10697 if ($var eq "value)
10698 Checking of white spaces is anyway done in NetWare code.
10701 while (isSPACE(*PL_oldoldbufptr))
10704 context = PL_oldoldbufptr;
10705 contlen = PL_bufptr - PL_oldoldbufptr;
10707 else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 &&
10708 PL_oldbufptr != PL_bufptr) {
10711 The code below is removed for NetWare because it abends/crashes on NetWare
10712 when the script has error such as not having the closing quotes like:
10713 if ($var eq "value)
10714 Checking of white spaces is anyway done in NetWare code.
10717 while (isSPACE(*PL_oldbufptr))
10720 context = PL_oldbufptr;
10721 contlen = PL_bufptr - PL_oldbufptr;
10723 else if (yychar > 255)
10724 where = "next token ???";
10725 else if (yychar == -2) { /* YYEMPTY */
10726 if (PL_lex_state == LEX_NORMAL ||
10727 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10728 where = "at end of line";
10729 else if (PL_lex_inpat)
10730 where = "within pattern";
10732 where = "within string";
10735 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
10737 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10738 else if (isPRINT_LC(yychar))
10739 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
10741 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10742 where = SvPVX(where_sv);
10744 msg = sv_2mortal(newSVpv(s, 0));
10745 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10746 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10748 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10750 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10751 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10752 Perl_sv_catpvf(aTHX_ msg,
10753 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10754 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10757 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
10758 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
10761 if (PL_error_count >= 10) {
10762 if (PL_in_eval && SvCUR(ERRSV))
10763 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10764 ERRSV, OutCopFILE(PL_curcop));
10766 Perl_croak(aTHX_ "%s has too many errors.\n",
10767 OutCopFILE(PL_curcop));
10770 PL_in_my_stash = Nullhv;
10774 #pragma segment Main
10778 S_swallow_bom(pTHX_ U8 *s)
10781 slen = SvCUR(PL_linestr);
10784 if (s[1] == 0xFE) {
10785 /* UTF-16 little-endian? (or UTF32-LE?) */
10786 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
10787 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
10788 #ifndef PERL_NO_UTF16_FILTER
10789 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
10792 if (PL_bufend > (char*)s) {
10796 filter_add(utf16rev_textfilter, NULL);
10797 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
10798 utf16_to_utf8_reversed(s, news,
10799 PL_bufend - (char*)s - 1,
10801 sv_setpvn(PL_linestr, (const char*)news, newlen);
10803 SvUTF8_on(PL_linestr);
10804 s = (U8*)SvPVX(PL_linestr);
10805 PL_bufend = SvPVX(PL_linestr) + newlen;
10808 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
10813 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
10814 #ifndef PERL_NO_UTF16_FILTER
10815 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10818 if (PL_bufend > (char *)s) {
10822 filter_add(utf16_textfilter, NULL);
10823 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
10824 utf16_to_utf8(s, news,
10825 PL_bufend - (char*)s,
10827 sv_setpvn(PL_linestr, (const char*)news, newlen);
10829 SvUTF8_on(PL_linestr);
10830 s = (U8*)SvPVX(PL_linestr);
10831 PL_bufend = SvPVX(PL_linestr) + newlen;
10834 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
10839 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10840 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10841 s += 3; /* UTF-8 */
10847 if (s[2] == 0xFE && s[3] == 0xFF) {
10848 /* UTF-32 big-endian */
10849 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
10852 else if (s[2] == 0 && s[3] != 0) {
10855 * are a good indicator of UTF-16BE. */
10856 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10861 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10864 * are a good indicator of UTF-16LE. */
10865 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10874 * Restore a source filter.
10878 restore_rsfp(pTHX_ void *f)
10880 PerlIO *fp = (PerlIO*)f;
10882 if (PL_rsfp == PerlIO_stdin())
10883 PerlIO_clearerr(PL_rsfp);
10884 else if (PL_rsfp && (PL_rsfp != fp))
10885 PerlIO_close(PL_rsfp);
10889 #ifndef PERL_NO_UTF16_FILTER
10891 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10893 STRLEN old = SvCUR(sv);
10894 I32 count = FILTER_READ(idx+1, sv, maxlen);
10895 DEBUG_P(PerlIO_printf(Perl_debug_log,
10896 "utf16_textfilter(%p): %d %d (%d)\n",
10897 utf16_textfilter, idx, maxlen, (int) count));
10901 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
10902 Copy(SvPVX(sv), tmps, old, char);
10903 utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
10904 SvCUR(sv) - old, &newlen);
10905 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
10907 DEBUG_P({sv_dump(sv);});
10912 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10914 STRLEN old = SvCUR(sv);
10915 I32 count = FILTER_READ(idx+1, sv, maxlen);
10916 DEBUG_P(PerlIO_printf(Perl_debug_log,
10917 "utf16rev_textfilter(%p): %d %d (%d)\n",
10918 utf16rev_textfilter, idx, maxlen, (int) count));
10922 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
10923 Copy(SvPVX(sv), tmps, old, char);
10924 utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
10925 SvCUR(sv) - old, &newlen);
10926 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
10928 DEBUG_P({ sv_dump(sv); });
10934 Returns a pointer to the next character after the parsed
10935 vstring, as well as updating the passed in sv.
10937 Function must be called like
10940 s = scan_vstring(s,sv);
10942 The sv should already be large enough to store the vstring
10943 passed in, for performance reasons.
10948 Perl_scan_vstring(pTHX_ char *s, SV *sv)
10952 if (*pos == 'v') pos++; /* get past 'v' */
10953 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
10955 if ( *pos != '.') {
10956 /* this may not be a v-string if followed by => */
10958 while (next < PL_bufend && isSPACE(*next))
10960 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
10961 /* return string not v-string */
10962 sv_setpvn(sv,(char *)s,pos-s);
10967 if (!isALPHA(*pos)) {
10969 U8 tmpbuf[UTF8_MAXBYTES+1];
10972 if (*s == 'v') s++; /* get past 'v' */
10974 sv_setpvn(sv, "", 0);
10979 /* this is atoi() that tolerates underscores */
10982 while (--end >= s) {
10987 rev += (*end - '0') * mult;
10989 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
10990 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10991 "Integer overflow in decimal number");
10995 if (rev > 0x7FFFFFFF)
10996 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10998 /* Append native character for the rev point */
10999 tmpend = uvchr_to_utf8(tmpbuf, rev);
11000 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11001 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11003 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
11009 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11013 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);