3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static char ident_too_long[] = "Identifier too long";
30 static char c_without_g[] = "Use of /c modifier is meaningless without /g";
31 static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
33 static void restore_rsfp(pTHX_ void *f);
34 #ifndef PERL_NO_UTF16_FILTER
35 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
36 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39 #define XFAKEBRACK 128
42 #ifdef USE_UTF8_SCRIPTS
43 # define UTF (!IN_BYTES)
45 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
48 /* In variables named $^X, these are the legal values for X.
49 * 1999-02-27 mjd-perl-patch@plover.com */
50 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
52 /* On MacOS, respect nonbreaking spaces */
53 #ifdef MACOS_TRADITIONAL
54 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
56 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
59 /* LEX_* are values for PL_lex_state, the state of the lexer.
60 * They are arranged oddly so that the guard on the switch statement
61 * can get by with a single comparison (if the compiler is smart enough).
64 /* #define LEX_NOTPARSING 11 is done in perl.h. */
67 #define LEX_INTERPNORMAL 9
68 #define LEX_INTERPCASEMOD 8
69 #define LEX_INTERPPUSH 7
70 #define LEX_INTERPSTART 6
71 #define LEX_INTERPEND 5
72 #define LEX_INTERPENDMAYBE 4
73 #define LEX_INTERPCONCAT 3
74 #define LEX_INTERPCONST 2
75 #define LEX_FORMLINE 1
76 #define LEX_KNOWNEXT 0
79 static char* lex_state_names[] = {
100 /* CLINE is a macro that ensures PL_copline has a sane value */
105 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
108 * Convenience functions to return different tokens and prime the
109 * lexer for the next token. They all take an argument.
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
121 * FUN1 : not used, except for not, which isn't a UNIOP
122 * BOop : bitwise or or xor
124 * SHop : shift operator
125 * PWop : power operator
126 * PMop : pattern-matching operator
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
130 * Rop : relational operator <= != gt
132 * Also see LOP and lop() below.
135 #ifdef DEBUGGING /* Serve -DT. */
136 # define REPORT(retval) tokereport(s,(int)retval)
138 # define REPORT(retval) (retval)
141 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
142 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
143 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
144 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
145 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
146 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
147 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
148 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
149 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
150 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
151 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
152 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
153 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
154 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
155 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
156 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
157 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
158 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
159 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
160 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
162 /* This bit of chicanery makes a unary function followed by
163 * a parenthesis into a function with one argument, highest precedence.
164 * The UNIDOR macro is for unary functions that can be followed by the //
165 * operator (such as C<shift // 0>).
167 #define UNI2(f,x) return ( \
171 PL_last_uni = PL_oldbufptr, \
172 PL_last_lop_op = f, \
174 (*s == '(' || (s = skipspace(s), *s == '(') \
175 ? (int)FUNC1 : (int)UNIOP)))
176 #define UNI(f) UNI2(f,XTERM)
177 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
179 #define UNIBRACK(f) return ( \
182 PL_last_uni = PL_oldbufptr, \
184 (*s == '(' || (s = skipspace(s), *s == '(') \
185 ? (int)FUNC1 : (int)UNIOP)))
187 /* grandfather return to old style */
188 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
192 /* how to interpret the yylval associated with the token */
196 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
202 static struct debug_tokens { int token, type; char *name;} debug_tokens[] =
204 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
205 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
206 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
207 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
208 { ARROW, TOKENTYPE_NONE, "ARROW" },
209 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
210 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
211 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
212 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
213 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
214 { DO, TOKENTYPE_NONE, "DO" },
215 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
216 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
217 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
218 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
219 { ELSE, TOKENTYPE_NONE, "ELSE" },
220 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
221 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
222 { FOR, TOKENTYPE_IVAL, "FOR" },
223 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
224 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
225 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
226 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
227 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
228 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
229 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
230 { IF, TOKENTYPE_IVAL, "IF" },
231 { LABEL, TOKENTYPE_PVAL, "LABEL" },
232 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
233 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
234 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
235 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
236 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
237 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
238 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
239 { MY, TOKENTYPE_IVAL, "MY" },
240 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
241 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
242 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
243 { OROP, TOKENTYPE_IVAL, "OROP" },
244 { OROR, TOKENTYPE_NONE, "OROR" },
245 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
246 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
247 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
248 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
249 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
250 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
251 { PREINC, TOKENTYPE_NONE, "PREINC" },
252 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
253 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
254 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
255 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
256 { SUB, TOKENTYPE_NONE, "SUB" },
257 { THING, TOKENTYPE_OPVAL, "THING" },
258 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
259 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
260 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
261 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
262 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
263 { USE, TOKENTYPE_IVAL, "USE" },
264 { WHILE, TOKENTYPE_IVAL, "WHILE" },
265 { WORD, TOKENTYPE_OPVAL, "WORD" },
266 { 0, TOKENTYPE_NONE, 0 }
269 /* dump the returned token in rv, plus any optional arg in yylval */
272 S_tokereport(pTHX_ char* s, I32 rv)
276 enum token_type type = TOKENTYPE_NONE;
277 struct debug_tokens *p;
278 SV* report = newSVpvn("<== ", 4);
280 for (p = debug_tokens; p->token; p++) {
281 if (p->token == (int)rv) {
288 Perl_sv_catpv(aTHX_ report, name);
289 else if ((char)rv > ' ' && (char)rv < '~')
290 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
292 Perl_sv_catpv(aTHX_ report, "EOF");
294 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
297 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
300 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival);
302 case TOKENTYPE_OPNUM:
303 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
304 PL_op_name[yylval.ival]);
307 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
309 case TOKENTYPE_OPVAL:
311 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
312 PL_op_name[yylval.opval->op_type]);
314 Perl_sv_catpv(aTHX_ report, "(opval=null)");
317 Perl_sv_catpvf(aTHX_ report, " at line %d [", CopLINE(PL_curcop));
318 if (s - PL_bufptr > 0)
319 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
321 if (PL_oldbufptr && *PL_oldbufptr)
322 sv_catpv(report, PL_tokenbuf);
324 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
334 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
335 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
339 S_ao(pTHX_ int toketype)
341 if (*PL_bufptr == '=') {
343 if (toketype == ANDAND)
344 yylval.ival = OP_ANDASSIGN;
345 else if (toketype == OROR)
346 yylval.ival = OP_ORASSIGN;
347 else if (toketype == DORDOR)
348 yylval.ival = OP_DORASSIGN;
356 * When Perl expects an operator and finds something else, no_op
357 * prints the warning. It always prints "<something> found where
358 * operator expected. It prints "Missing semicolon on previous line?"
359 * if the surprise occurs at the start of the line. "do you need to
360 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
361 * where the compiler doesn't know if foo is a method call or a function.
362 * It prints "Missing operator before end of line" if there's nothing
363 * after the missing operator, or "... before <...>" if there is something
364 * after the missing operator.
368 S_no_op(pTHX_ char *what, char *s)
370 char *oldbp = PL_bufptr;
371 bool is_first = (PL_oldbufptr == PL_linestart);
377 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
378 if (ckWARN_d(WARN_SYNTAX)) {
380 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
381 "\t(Missing semicolon on previous line?)\n");
382 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
384 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
385 if (t < PL_bufptr && isSPACE(*t))
386 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
387 "\t(Do you need to predeclare %.*s?)\n",
388 t - PL_oldoldbufptr, PL_oldoldbufptr);
392 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
393 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
401 * Complain about missing quote/regexp/heredoc terminator.
402 * If it's called with (char *)NULL then it cauterizes the line buffer.
403 * If we're in a delimited string and the delimiter is a control
404 * character, it's reformatted into a two-char sequence like ^C.
409 S_missingterm(pTHX_ char *s)
414 char *nl = strrchr(s,'\n');
420 iscntrl(PL_multi_close)
422 PL_multi_close < 32 || PL_multi_close == 127
426 tmpbuf[1] = toCTRL(PL_multi_close);
432 *tmpbuf = (char)PL_multi_close;
436 q = strchr(s,'"') ? '\'' : '"';
437 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
445 Perl_deprecate(pTHX_ char *s)
447 if (ckWARN(WARN_DEPRECATED))
448 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
452 Perl_deprecate_old(pTHX_ char *s)
454 /* This function should NOT be called for any new deprecated warnings */
455 /* Use Perl_deprecate instead */
457 /* It is here to maintain backward compatibility with the pre-5.8 */
458 /* warnings category hierarchy. The "deprecated" category used to */
459 /* live under the "syntax" category. It is now a top-level category */
460 /* in its own right. */
462 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
463 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
464 "Use of %s is deprecated", s);
469 * Deprecate a comma-less variable list.
475 deprecate_old("comma-less variable list");
479 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
480 * utf16-to-utf8-reversed.
483 #ifdef PERL_CR_FILTER
487 register char *s = SvPVX(sv);
488 register char *e = s + SvCUR(sv);
489 /* outer loop optimized to do nothing if there are no CR-LFs */
491 if (*s++ == '\r' && *s == '\n') {
492 /* hit a CR-LF, need to copy the rest */
493 register char *d = s - 1;
496 if (*s == '\r' && s[1] == '\n')
507 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
509 I32 count = FILTER_READ(idx+1, sv, maxlen);
510 if (count > 0 && !maxlen)
518 * Initialize variables. Uses the Perl save_stack to save its state (for
519 * recursive calls to the parser).
523 Perl_lex_start(pTHX_ SV *line)
528 SAVEI32(PL_lex_dojoin);
529 SAVEI32(PL_lex_brackets);
530 SAVEI32(PL_lex_casemods);
531 SAVEI32(PL_lex_starts);
532 SAVEI32(PL_lex_state);
533 SAVEVPTR(PL_lex_inpat);
534 SAVEI32(PL_lex_inwhat);
535 if (PL_lex_state == LEX_KNOWNEXT) {
536 I32 toke = PL_nexttoke;
537 while (--toke >= 0) {
538 SAVEI32(PL_nexttype[toke]);
539 SAVEVPTR(PL_nextval[toke]);
541 SAVEI32(PL_nexttoke);
543 SAVECOPLINE(PL_curcop);
546 SAVEPPTR(PL_oldbufptr);
547 SAVEPPTR(PL_oldoldbufptr);
548 SAVEPPTR(PL_last_lop);
549 SAVEPPTR(PL_last_uni);
550 SAVEPPTR(PL_linestart);
551 SAVESPTR(PL_linestr);
552 SAVEGENERICPV(PL_lex_brackstack);
553 SAVEGENERICPV(PL_lex_casestack);
554 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
555 SAVESPTR(PL_lex_stuff);
556 SAVEI32(PL_lex_defer);
557 SAVEI32(PL_sublex_info.sub_inwhat);
558 SAVESPTR(PL_lex_repl);
560 SAVEINT(PL_lex_expect);
562 PL_lex_state = LEX_NORMAL;
566 New(899, PL_lex_brackstack, 120, char);
567 New(899, PL_lex_casestack, 12, char);
569 *PL_lex_casestack = '\0';
572 PL_lex_stuff = Nullsv;
573 PL_lex_repl = Nullsv;
577 PL_sublex_info.sub_inwhat = 0;
579 if (SvREADONLY(PL_linestr))
580 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
581 s = SvPV(PL_linestr, len);
582 if (!len || s[len-1] != ';') {
583 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
584 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
585 sv_catpvn(PL_linestr, "\n;", 2);
587 SvTEMP_off(PL_linestr);
588 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
589 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
590 PL_last_lop = PL_last_uni = Nullch;
596 * Finalizer for lexing operations. Must be called when the parser is
597 * done with the lexer.
603 PL_doextract = FALSE;
608 * This subroutine has nothing to do with tilting, whether at windmills
609 * or pinball tables. Its name is short for "increment line". It
610 * increments the current line number in CopLINE(PL_curcop) and checks
611 * to see whether the line starts with a comment of the form
612 * # line 500 "foo.pm"
613 * If so, it sets the current line number and file to the values in the comment.
617 S_incline(pTHX_ char *s)
624 CopLINE_inc(PL_curcop);
627 while (SPACE_OR_TAB(*s)) s++;
628 if (strnEQ(s, "line", 4))
632 if (SPACE_OR_TAB(*s))
636 while (SPACE_OR_TAB(*s)) s++;
642 while (SPACE_OR_TAB(*s))
644 if (*s == '"' && (t = strchr(s+1, '"'))) {
649 for (t = s; !isSPACE(*t); t++) ;
652 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
654 if (*e != '\n' && *e != '\0')
655 return; /* false alarm */
660 CopFILE_free(PL_curcop);
661 CopFILE_set(PL_curcop, s);
664 CopLINE_set(PL_curcop, atoi(n)-1);
669 * Called to gobble the appropriate amount and type of whitespace.
670 * Skips comments as well.
674 S_skipspace(pTHX_ register char *s)
676 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
677 while (s < PL_bufend && SPACE_OR_TAB(*s))
683 SSize_t oldprevlen, oldoldprevlen;
684 SSize_t oldloplen = 0, oldunilen = 0;
685 while (s < PL_bufend && isSPACE(*s)) {
686 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
691 if (s < PL_bufend && *s == '#') {
692 while (s < PL_bufend && *s != '\n')
696 if (PL_in_eval && !PL_rsfp) {
703 /* only continue to recharge the buffer if we're at the end
704 * of the buffer, we're not reading from a source filter, and
705 * we're in normal lexing mode
707 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
708 PL_lex_state == LEX_FORMLINE)
711 /* try to recharge the buffer */
712 if ((s = filter_gets(PL_linestr, PL_rsfp,
713 (prevlen = SvCUR(PL_linestr)))) == Nullch)
715 /* end of file. Add on the -p or -n magic */
718 ";}continue{print or die qq(-p destination: $!\\n);}");
719 PL_minus_n = PL_minus_p = 0;
721 else if (PL_minus_n) {
722 sv_setpvn(PL_linestr, ";}", 2);
726 sv_setpvn(PL_linestr,";", 1);
728 /* reset variables for next time we lex */
729 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
731 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
732 PL_last_lop = PL_last_uni = Nullch;
734 /* Close the filehandle. Could be from -P preprocessor,
735 * STDIN, or a regular file. If we were reading code from
736 * STDIN (because the commandline held no -e or filename)
737 * then we don't close it, we reset it so the code can
738 * read from STDIN too.
741 if (PL_preprocess && !PL_in_eval)
742 (void)PerlProc_pclose(PL_rsfp);
743 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
744 PerlIO_clearerr(PL_rsfp);
746 (void)PerlIO_close(PL_rsfp);
751 /* not at end of file, so we only read another line */
752 /* make corresponding updates to old pointers, for yyerror() */
753 oldprevlen = PL_oldbufptr - PL_bufend;
754 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
756 oldunilen = PL_last_uni - PL_bufend;
758 oldloplen = PL_last_lop - PL_bufend;
759 PL_linestart = PL_bufptr = s + prevlen;
760 PL_bufend = s + SvCUR(PL_linestr);
762 PL_oldbufptr = s + oldprevlen;
763 PL_oldoldbufptr = s + oldoldprevlen;
765 PL_last_uni = s + oldunilen;
767 PL_last_lop = s + oldloplen;
770 /* debugger active and we're not compiling the debugger code,
771 * so store the line into the debugger's array of lines
773 if (PERLDB_LINE && PL_curstash != PL_debstash) {
774 SV *sv = NEWSV(85,0);
776 sv_upgrade(sv, SVt_PVMG);
777 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
780 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
787 * Check the unary operators to ensure there's no ambiguity in how they're
788 * used. An ambiguous piece of code would be:
790 * This doesn't mean rand() + 5. Because rand() is a unary operator,
791 * the +5 is its argument.
800 if (PL_oldoldbufptr != PL_last_uni)
802 while (isSPACE(*PL_last_uni))
804 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
805 if ((t = strchr(s, '(')) && t < PL_bufptr)
807 if (ckWARN_d(WARN_AMBIGUOUS)){
810 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
811 "Warning: Use of \"%s\" without parentheses is ambiguous",
818 * LOP : macro to build a list operator. Its behaviour has been replaced
819 * with a subroutine, S_lop() for which LOP is just another name.
822 #define LOP(f,x) return lop(f,x,s)
826 * Build a list operator (or something that might be one). The rules:
827 * - if we have a next token, then it's a list operator [why?]
828 * - if the next thing is an opening paren, then it's a function
829 * - else it's a list operator
833 S_lop(pTHX_ I32 f, int x, char *s)
839 PL_last_lop = PL_oldbufptr;
840 PL_last_lop_op = (OPCODE)f;
842 return REPORT(LSTOP);
849 return REPORT(LSTOP);
854 * When the lexer realizes it knows the next token (for instance,
855 * it is reordering tokens for the parser) then it can call S_force_next
856 * to know what token to return the next time the lexer is called. Caller
857 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
858 * handles the token correctly.
862 S_force_next(pTHX_ I32 type)
864 PL_nexttype[PL_nexttoke] = type;
866 if (PL_lex_state != LEX_KNOWNEXT) {
867 PL_lex_defer = PL_lex_state;
868 PL_lex_expect = PL_expect;
869 PL_lex_state = LEX_KNOWNEXT;
874 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
876 SV *sv = newSVpvn(start,len);
877 if (UTF && !IN_BYTES && is_utf8_string((U8*)start, len))
884 * When the lexer knows the next thing is a word (for instance, it has
885 * just seen -> and it knows that the next char is a word char, then
886 * it calls S_force_word to stick the next word into the PL_next lookahead.
889 * char *start : buffer position (must be within PL_linestr)
890 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
891 * int check_keyword : if true, Perl checks to make sure the word isn't
892 * a keyword (do this if the word is a label, e.g. goto FOO)
893 * int allow_pack : if true, : characters will also be allowed (require,
895 * int allow_initial_tick : used by the "sub" lexer only.
899 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
904 start = skipspace(start);
906 if (isIDFIRST_lazy_if(s,UTF) ||
907 (allow_pack && *s == ':') ||
908 (allow_initial_tick && *s == '\'') )
910 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
911 if (check_keyword && keyword(PL_tokenbuf, len))
913 if (token == METHOD) {
918 PL_expect = XOPERATOR;
921 PL_nextval[PL_nexttoke].opval
922 = (OP*)newSVOP(OP_CONST,0,
923 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
924 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
932 * Called when the lexer wants $foo *foo &foo etc, but the program
933 * text only contains the "foo" portion. The first argument is a pointer
934 * to the "foo", and the second argument is the type symbol to prefix.
935 * Forces the next token to be a "WORD".
936 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
940 S_force_ident(pTHX_ register char *s, int kind)
943 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
944 PL_nextval[PL_nexttoke].opval = o;
947 o->op_private = OPpCONST_ENTERED;
948 /* XXX see note in pp_entereval() for why we forgo typo
949 warnings if the symbol must be introduced in an eval.
951 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
952 kind == '$' ? SVt_PV :
953 kind == '@' ? SVt_PVAV :
954 kind == '%' ? SVt_PVHV :
962 Perl_str_to_version(pTHX_ SV *sv)
967 char *start = SvPVx(sv,len);
968 bool utf = SvUTF8(sv) ? TRUE : FALSE;
969 char *end = start + len;
970 while (start < end) {
974 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
979 retval += ((NV)n)/nshift;
988 * Forces the next token to be a version number.
989 * If the next token appears to be an invalid version number, (e.g. "v2b"),
990 * and if "guessing" is TRUE, then no new token is created (and the caller
991 * must use an alternative parsing method).
995 S_force_version(pTHX_ char *s, int guessing)
997 OP *version = Nullop;
1006 while (isDIGIT(*d) || *d == '_' || *d == '.')
1008 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1010 s = scan_num(s, &yylval);
1011 version = yylval.opval;
1012 ver = cSVOPx(version)->op_sv;
1013 if (SvPOK(ver) && !SvNIOK(ver)) {
1014 (void)SvUPGRADE(ver, SVt_PVNV);
1015 SvNVX(ver) = str_to_version(ver);
1016 SvNOK_on(ver); /* hint that it is a version */
1023 /* NOTE: The parser sees the package name and the VERSION swapped */
1024 PL_nextval[PL_nexttoke].opval = version;
1032 * Tokenize a quoted string passed in as an SV. It finds the next
1033 * chunk, up to end of string or a backslash. It may make a new
1034 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1039 S_tokeq(pTHX_ SV *sv)
1042 register char *send;
1050 s = SvPV_force(sv, len);
1051 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1054 while (s < send && *s != '\\')
1059 if ( PL_hints & HINT_NEW_STRING ) {
1060 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
1066 if (s + 1 < send && (s[1] == '\\'))
1067 s++; /* all that, just for this */
1072 SvCUR_set(sv, d - SvPVX(sv));
1074 if ( PL_hints & HINT_NEW_STRING )
1075 return new_constant(NULL, 0, "q", sv, pv, "q");
1080 * Now come three functions related to double-quote context,
1081 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1082 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1083 * interact with PL_lex_state, and create fake ( ... ) argument lists
1084 * to handle functions and concatenation.
1085 * They assume that whoever calls them will be setting up a fake
1086 * join call, because each subthing puts a ',' after it. This lets
1089 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1091 * (I'm not sure whether the spurious commas at the end of lcfirst's
1092 * arguments and join's arguments are created or not).
1097 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1099 * Pattern matching will set PL_lex_op to the pattern-matching op to
1100 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1102 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1104 * Everything else becomes a FUNC.
1106 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1107 * had an OP_CONST or OP_READLINE). This just sets us up for a
1108 * call to S_sublex_push().
1112 S_sublex_start(pTHX)
1114 register I32 op_type = yylval.ival;
1116 if (op_type == OP_NULL) {
1117 yylval.opval = PL_lex_op;
1121 if (op_type == OP_CONST || op_type == OP_READLINE) {
1122 SV *sv = tokeq(PL_lex_stuff);
1124 if (SvTYPE(sv) == SVt_PVIV) {
1125 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1131 nsv = newSVpvn(p, len);
1137 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1138 PL_lex_stuff = Nullsv;
1139 /* Allow <FH> // "foo" */
1140 if (op_type == OP_READLINE)
1141 PL_expect = XTERMORDORDOR;
1145 PL_sublex_info.super_state = PL_lex_state;
1146 PL_sublex_info.sub_inwhat = op_type;
1147 PL_sublex_info.sub_op = PL_lex_op;
1148 PL_lex_state = LEX_INTERPPUSH;
1152 yylval.opval = PL_lex_op;
1162 * Create a new scope to save the lexing state. The scope will be
1163 * ended in S_sublex_done. Returns a '(', starting the function arguments
1164 * to the uc, lc, etc. found before.
1165 * Sets PL_lex_state to LEX_INTERPCONCAT.
1173 PL_lex_state = PL_sublex_info.super_state;
1174 SAVEI32(PL_lex_dojoin);
1175 SAVEI32(PL_lex_brackets);
1176 SAVEI32(PL_lex_casemods);
1177 SAVEI32(PL_lex_starts);
1178 SAVEI32(PL_lex_state);
1179 SAVEVPTR(PL_lex_inpat);
1180 SAVEI32(PL_lex_inwhat);
1181 SAVECOPLINE(PL_curcop);
1182 SAVEPPTR(PL_bufptr);
1183 SAVEPPTR(PL_bufend);
1184 SAVEPPTR(PL_oldbufptr);
1185 SAVEPPTR(PL_oldoldbufptr);
1186 SAVEPPTR(PL_last_lop);
1187 SAVEPPTR(PL_last_uni);
1188 SAVEPPTR(PL_linestart);
1189 SAVESPTR(PL_linestr);
1190 SAVEGENERICPV(PL_lex_brackstack);
1191 SAVEGENERICPV(PL_lex_casestack);
1193 PL_linestr = PL_lex_stuff;
1194 PL_lex_stuff = Nullsv;
1196 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1197 = SvPVX(PL_linestr);
1198 PL_bufend += SvCUR(PL_linestr);
1199 PL_last_lop = PL_last_uni = Nullch;
1200 SAVEFREESV(PL_linestr);
1202 PL_lex_dojoin = FALSE;
1203 PL_lex_brackets = 0;
1204 New(899, PL_lex_brackstack, 120, char);
1205 New(899, PL_lex_casestack, 12, char);
1206 PL_lex_casemods = 0;
1207 *PL_lex_casestack = '\0';
1209 PL_lex_state = LEX_INTERPCONCAT;
1210 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1212 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1213 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1214 PL_lex_inpat = PL_sublex_info.sub_op;
1216 PL_lex_inpat = Nullop;
1223 * Restores lexer state after a S_sublex_push.
1229 if (!PL_lex_starts++) {
1230 SV *sv = newSVpvn("",0);
1231 if (SvUTF8(PL_linestr))
1233 PL_expect = XOPERATOR;
1234 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1238 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1239 PL_lex_state = LEX_INTERPCASEMOD;
1243 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1244 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1245 PL_linestr = PL_lex_repl;
1247 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1248 PL_bufend += SvCUR(PL_linestr);
1249 PL_last_lop = PL_last_uni = Nullch;
1250 SAVEFREESV(PL_linestr);
1251 PL_lex_dojoin = FALSE;
1252 PL_lex_brackets = 0;
1253 PL_lex_casemods = 0;
1254 *PL_lex_casestack = '\0';
1256 if (SvEVALED(PL_lex_repl)) {
1257 PL_lex_state = LEX_INTERPNORMAL;
1259 /* we don't clear PL_lex_repl here, so that we can check later
1260 whether this is an evalled subst; that means we rely on the
1261 logic to ensure sublex_done() is called again only via the
1262 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1265 PL_lex_state = LEX_INTERPCONCAT;
1266 PL_lex_repl = Nullsv;
1272 PL_bufend = SvPVX(PL_linestr);
1273 PL_bufend += SvCUR(PL_linestr);
1274 PL_expect = XOPERATOR;
1275 PL_sublex_info.sub_inwhat = 0;
1283 Extracts a pattern, double-quoted string, or transliteration. This
1286 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1287 processing a pattern (PL_lex_inpat is true), a transliteration
1288 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1290 Returns a pointer to the character scanned up to. Iff this is
1291 advanced from the start pointer supplied (ie if anything was
1292 successfully parsed), will leave an OP for the substring scanned
1293 in yylval. Caller must intuit reason for not parsing further
1294 by looking at the next characters herself.
1298 double-quoted style: \r and \n
1299 regexp special ones: \D \s
1301 backrefs: \1 (deprecated in substitution replacements)
1302 case and quoting: \U \Q \E
1303 stops on @ and $, but not for $ as tail anchor
1305 In transliterations:
1306 characters are VERY literal, except for - not at the start or end
1307 of the string, which indicates a range. scan_const expands the
1308 range to the full set of intermediate characters.
1310 In double-quoted strings:
1312 double-quoted style: \r and \n
1314 backrefs: \1 (deprecated)
1315 case and quoting: \U \Q \E
1318 scan_const does *not* construct ops to handle interpolated strings.
1319 It stops processing as soon as it finds an embedded $ or @ variable
1320 and leaves it to the caller to work out what's going on.
1322 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1324 $ in pattern could be $foo or could be tail anchor. Assumption:
1325 it's a tail anchor if $ is the last thing in the string, or if it's
1326 followed by one of ")| \n\t"
1328 \1 (backreferences) are turned into $1
1330 The structure of the code is
1331 while (there's a character to process) {
1332 handle transliteration ranges
1333 skip regexp comments
1334 skip # initiated comments in //x patterns
1335 check for embedded @foo
1336 check for embedded scalars
1338 leave intact backslashes from leave (below)
1339 deprecate \1 in strings and sub replacements
1340 handle string-changing backslashes \l \U \Q \E, etc.
1341 switch (what was escaped) {
1342 handle - in a transliteration (becomes a literal -)
1343 handle \132 octal characters
1344 handle 0x15 hex characters
1345 handle \cV (control V)
1346 handle printf backslashes (\f, \r, \n, etc)
1348 } (end if backslash)
1349 } (end while character to read)
1354 S_scan_const(pTHX_ char *start)
1356 register char *send = PL_bufend; /* end of the constant */
1357 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1358 register char *s = start; /* start of the constant */
1359 register char *d = SvPVX(sv); /* destination for copies */
1360 bool dorange = FALSE; /* are we in a translit range? */
1361 bool didrange = FALSE; /* did we just finish a range? */
1362 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1363 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1366 const char *leaveit = /* set of acceptably-backslashed characters */
1368 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1371 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1372 /* If we are doing a trans and we know we want UTF8 set expectation */
1373 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1374 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1378 while (s < send || dorange) {
1379 /* get transliterations out of the way (they're most literal) */
1380 if (PL_lex_inwhat == OP_TRANS) {
1381 /* expand a range A-Z to the full set of characters. AIE! */
1383 I32 i; /* current expanded character */
1384 I32 min; /* first character in range */
1385 I32 max; /* last character in range */
1388 char *c = (char*)utf8_hop((U8*)d, -1);
1392 *c = (char)UTF_TO_NATIVE(0xff);
1393 /* mark the range as done, and continue */
1399 i = d - SvPVX(sv); /* remember current offset */
1400 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1401 d = SvPVX(sv) + i; /* refresh d after realloc */
1402 d -= 2; /* eat the first char and the - */
1404 min = (U8)*d; /* first char in range */
1405 max = (U8)d[1]; /* last char in range */
1409 "Invalid range \"%c-%c\" in transliteration operator",
1410 (char)min, (char)max);
1414 if ((isLOWER(min) && isLOWER(max)) ||
1415 (isUPPER(min) && isUPPER(max))) {
1417 for (i = min; i <= max; i++)
1419 *d++ = NATIVE_TO_NEED(has_utf8,i);
1421 for (i = min; i <= max; i++)
1423 *d++ = NATIVE_TO_NEED(has_utf8,i);
1428 for (i = min; i <= max; i++)
1431 /* mark the range as done, and continue */
1437 /* range begins (ignore - as first or last char) */
1438 else if (*s == '-' && s+1 < send && s != start) {
1440 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1443 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1455 /* if we get here, we're not doing a transliteration */
1457 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1458 except for the last char, which will be done separately. */
1459 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1461 while (s+1 < send && *s != ')')
1462 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1464 else if (s[2] == '{' /* This should match regcomp.c */
1465 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1468 char *regparse = s + (s[2] == '{' ? 3 : 4);
1471 while (count && (c = *regparse)) {
1472 if (c == '\\' && regparse[1])
1480 if (*regparse != ')')
1481 regparse--; /* Leave one char for continuation. */
1482 while (s < regparse)
1483 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1487 /* likewise skip #-initiated comments in //x patterns */
1488 else if (*s == '#' && PL_lex_inpat &&
1489 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1490 while (s+1 < send && *s != '\n')
1491 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1494 /* check for embedded arrays
1495 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1497 else if (*s == '@' && s[1]
1498 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1501 /* check for embedded scalars. only stop if we're sure it's a
1504 else if (*s == '$') {
1505 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1507 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1508 break; /* in regexp, $ might be tail anchor */
1511 /* End of else if chain - OP_TRANS rejoin rest */
1514 if (*s == '\\' && s+1 < send) {
1517 /* some backslashes we leave behind */
1518 if (*leaveit && *s && strchr(leaveit, *s)) {
1519 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1520 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1524 /* deprecate \1 in strings and substitution replacements */
1525 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1526 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1528 if (ckWARN(WARN_SYNTAX))
1529 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1534 /* string-change backslash escapes */
1535 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1540 /* if we get here, it's either a quoted -, or a digit */
1543 /* quoted - in transliterations */
1545 if (PL_lex_inwhat == OP_TRANS) {
1552 if (ckWARN(WARN_MISC) &&
1555 Perl_warner(aTHX_ packWARN(WARN_MISC),
1556 "Unrecognized escape \\%c passed through",
1558 /* default action is to copy the quoted character */
1559 goto default_action;
1562 /* \132 indicates an octal constant */
1563 case '0': case '1': case '2': case '3':
1564 case '4': case '5': case '6': case '7':
1568 uv = grok_oct(s, &len, &flags, NULL);
1571 goto NUM_ESCAPE_INSERT;
1573 /* \x24 indicates a hex constant */
1577 char* e = strchr(s, '}');
1578 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1579 PERL_SCAN_DISALLOW_PREFIX;
1584 yyerror("Missing right brace on \\x{}");
1588 uv = grok_hex(s, &len, &flags, NULL);
1594 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1595 uv = grok_hex(s, &len, &flags, NULL);
1601 /* Insert oct or hex escaped character.
1602 * There will always enough room in sv since such
1603 * escapes will be longer than any UTF-8 sequence
1604 * they can end up as. */
1606 /* We need to map to chars to ASCII before doing the tests
1609 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1610 if (!has_utf8 && uv > 255) {
1611 /* Might need to recode whatever we have
1612 * accumulated so far if it contains any
1615 * (Can't we keep track of that and avoid
1616 * this rescan? --jhi)
1620 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1621 if (!NATIVE_IS_INVARIANT(*c)) {
1626 STRLEN offset = d - SvPVX(sv);
1628 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1632 while (src >= (U8 *)SvPVX(sv)) {
1633 if (!NATIVE_IS_INVARIANT(*src)) {
1634 U8 ch = NATIVE_TO_ASCII(*src);
1635 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1636 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1646 if (has_utf8 || uv > 255) {
1647 d = (char*)uvchr_to_utf8((U8*)d, uv);
1649 if (PL_lex_inwhat == OP_TRANS &&
1650 PL_sublex_info.sub_op) {
1651 PL_sublex_info.sub_op->op_private |=
1652 (PL_lex_repl ? OPpTRANS_FROM_UTF
1665 /* \N{LATIN SMALL LETTER A} is a named character */
1669 char* e = strchr(s, '}');
1675 yyerror("Missing right brace on \\N{}");
1679 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1681 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1682 PERL_SCAN_DISALLOW_PREFIX;
1685 uv = grok_hex(s, &len, &flags, NULL);
1687 goto NUM_ESCAPE_INSERT;
1689 res = newSVpvn(s + 1, e - s - 1);
1690 res = new_constant( Nullch, 0, "charnames",
1691 res, Nullsv, "\\N{...}" );
1693 sv_utf8_upgrade(res);
1694 str = SvPV(res,len);
1695 #ifdef EBCDIC_NEVER_MIND
1696 /* charnames uses pack U and that has been
1697 * recently changed to do the below uni->native
1698 * mapping, so this would be redundant (and wrong,
1699 * the code point would be doubly converted).
1700 * But leave this in just in case the pack U change
1701 * gets revoked, but the semantics is still
1702 * desireable for charnames. --jhi */
1704 UV uv = utf8_to_uvchr((U8*)str, 0);
1707 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1709 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1710 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1711 str = SvPV(res, len);
1715 if (!has_utf8 && SvUTF8(res)) {
1716 char *ostart = SvPVX(sv);
1717 SvCUR_set(sv, d - ostart);
1720 sv_utf8_upgrade(sv);
1721 /* this just broke our allocation above... */
1722 SvGROW(sv, (STRLEN)(send - start));
1723 d = SvPVX(sv) + SvCUR(sv);
1726 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1727 char *odest = SvPVX(sv);
1729 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1730 d = SvPVX(sv) + (d - odest);
1732 Copy(str, d, len, char);
1739 yyerror("Missing braces on \\N{}");
1742 /* \c is a control character */
1751 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1754 yyerror("Missing control char name in \\c");
1758 /* printf-style backslashes, formfeeds, newlines, etc */
1760 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1763 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1766 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1769 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1772 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1775 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1778 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1784 } /* end if (backslash) */
1787 /* If we started with encoded form, or already know we want it
1788 and then encode the next character */
1789 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1791 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1792 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1795 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1796 STRLEN off = d - SvPVX(sv);
1797 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1799 d = (char*)uvchr_to_utf8((U8*)d, uv);
1803 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1805 } /* while loop to process each character */
1807 /* terminate the string and set up the sv */
1809 SvCUR_set(sv, d - SvPVX(sv));
1810 if (SvCUR(sv) >= SvLEN(sv))
1811 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1814 if (PL_encoding && !has_utf8) {
1815 sv_recode_to_utf8(sv, PL_encoding);
1821 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1822 PL_sublex_info.sub_op->op_private |=
1823 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1827 /* shrink the sv if we allocated more than we used */
1828 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1829 SvLEN_set(sv, SvCUR(sv) + 1);
1830 Renew(SvPVX(sv), SvLEN(sv), char);
1833 /* return the substring (via yylval) only if we parsed anything */
1834 if (s > PL_bufptr) {
1835 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1836 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1838 ( PL_lex_inwhat == OP_TRANS
1840 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1843 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1850 * Returns TRUE if there's more to the expression (e.g., a subscript),
1853 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1855 * ->[ and ->{ return TRUE
1856 * { and [ outside a pattern are always subscripts, so return TRUE
1857 * if we're outside a pattern and it's not { or [, then return FALSE
1858 * if we're in a pattern and the first char is a {
1859 * {4,5} (any digits around the comma) returns FALSE
1860 * if we're in a pattern and the first char is a [
1862 * [SOMETHING] has a funky algorithm to decide whether it's a
1863 * character class or not. It has to deal with things like
1864 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1865 * anything else returns TRUE
1868 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1871 S_intuit_more(pTHX_ register char *s)
1873 if (PL_lex_brackets)
1875 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1877 if (*s != '{' && *s != '[')
1882 /* In a pattern, so maybe we have {n,m}. */
1899 /* On the other hand, maybe we have a character class */
1902 if (*s == ']' || *s == '^')
1905 /* this is terrifying, and it works */
1906 int weight = 2; /* let's weigh the evidence */
1908 unsigned char un_char = 255, last_un_char;
1909 char *send = strchr(s,']');
1910 char tmpbuf[sizeof PL_tokenbuf * 4];
1912 if (!send) /* has to be an expression */
1915 Zero(seen,256,char);
1918 else if (isDIGIT(*s)) {
1920 if (isDIGIT(s[1]) && s[2] == ']')
1926 for (; s < send; s++) {
1927 last_un_char = un_char;
1928 un_char = (unsigned char)*s;
1933 weight -= seen[un_char] * 10;
1934 if (isALNUM_lazy_if(s+1,UTF)) {
1935 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
1936 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
1941 else if (*s == '$' && s[1] &&
1942 strchr("[#!%*<>()-=",s[1])) {
1943 if (/*{*/ strchr("])} =",s[2]))
1952 if (strchr("wds]",s[1]))
1954 else if (seen['\''] || seen['"'])
1956 else if (strchr("rnftbxcav",s[1]))
1958 else if (isDIGIT(s[1])) {
1960 while (s[1] && isDIGIT(s[1]))
1970 if (strchr("aA01! ",last_un_char))
1972 if (strchr("zZ79~",s[1]))
1974 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1975 weight -= 5; /* cope with negative subscript */
1978 if (!isALNUM(last_un_char)
1979 && !(last_un_char == '$' || last_un_char == '@'
1980 || last_un_char == '&')
1981 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
1986 if (keyword(tmpbuf, d - tmpbuf))
1989 if (un_char == last_un_char + 1)
1991 weight -= seen[un_char];
1996 if (weight >= 0) /* probably a character class */
2006 * Does all the checking to disambiguate
2008 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2009 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2011 * First argument is the stuff after the first token, e.g. "bar".
2013 * Not a method if bar is a filehandle.
2014 * Not a method if foo is a subroutine prototyped to take a filehandle.
2015 * Not a method if it's really "Foo $bar"
2016 * Method if it's "foo $bar"
2017 * Not a method if it's really "print foo $bar"
2018 * Method if it's really "foo package::" (interpreted as package->foo)
2019 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2020 * Not a method if bar is a filehandle or package, but is quoted with
2025 S_intuit_method(pTHX_ char *start, GV *gv)
2027 char *s = start + (*start == '$');
2028 char tmpbuf[sizeof PL_tokenbuf];
2036 if ((cv = GvCVu(gv))) {
2037 char *proto = SvPVX(cv);
2047 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2048 /* start is the beginning of the possible filehandle/object,
2049 * and s is the end of it
2050 * tmpbuf is a copy of it
2053 if (*start == '$') {
2054 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2059 return *s == '(' ? FUNCMETH : METHOD;
2061 if (!keyword(tmpbuf, len)) {
2062 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2067 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
2068 if (indirgv && GvCVu(indirgv))
2070 /* filehandle or package name makes it a method */
2071 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2073 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2074 return 0; /* no assumptions -- "=>" quotes bearword */
2076 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2077 newSVpvn(tmpbuf,len));
2078 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2082 return *s == '(' ? FUNCMETH : METHOD;
2090 * Return a string of Perl code to load the debugger. If PERL5DB
2091 * is set, it will return the contents of that, otherwise a
2092 * compile-time require of perl5db.pl.
2099 char *pdb = PerlEnv_getenv("PERL5DB");
2103 SETERRNO(0,SS_NORMAL);
2104 return "BEGIN { require 'perl5db.pl' }";
2110 /* Encoded script support. filter_add() effectively inserts a
2111 * 'pre-processing' function into the current source input stream.
2112 * Note that the filter function only applies to the current source file
2113 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2115 * The datasv parameter (which may be NULL) can be used to pass
2116 * private data to this instance of the filter. The filter function
2117 * can recover the SV using the FILTER_DATA macro and use it to
2118 * store private buffers and state information.
2120 * The supplied datasv parameter is upgraded to a PVIO type
2121 * and the IoDIRP/IoANY field is used to store the function pointer,
2122 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2123 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2124 * private use must be set using malloc'd pointers.
2128 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2133 if (!PL_rsfp_filters)
2134 PL_rsfp_filters = newAV();
2136 datasv = NEWSV(255,0);
2137 if (!SvUPGRADE(datasv, SVt_PVIO))
2138 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
2139 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
2140 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2141 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2142 (void*)funcp, SvPV_nolen(datasv)));
2143 av_unshift(PL_rsfp_filters, 1);
2144 av_store(PL_rsfp_filters, 0, datasv) ;
2149 /* Delete most recently added instance of this filter function. */
2151 Perl_filter_del(pTHX_ filter_t funcp)
2154 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
2155 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2157 /* if filter is on top of stack (usual case) just pop it off */
2158 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2159 if (IoANY(datasv) == (void *)funcp) {
2160 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2161 IoANY(datasv) = (void *)NULL;
2162 sv_free(av_pop(PL_rsfp_filters));
2166 /* we need to search for the correct entry and clear it */
2167 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2171 /* Invoke the idxth filter function for the current rsfp. */
2172 /* maxlen 0 = read one text line */
2174 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2179 if (!PL_rsfp_filters)
2181 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2182 /* Provide a default input filter to make life easy. */
2183 /* Note that we append to the line. This is handy. */
2184 DEBUG_P(PerlIO_printf(Perl_debug_log,
2185 "filter_read %d: from rsfp\n", idx));
2189 int old_len = SvCUR(buf_sv) ;
2191 /* ensure buf_sv is large enough */
2192 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2193 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2194 if (PerlIO_error(PL_rsfp))
2195 return -1; /* error */
2197 return 0 ; /* end of file */
2199 SvCUR_set(buf_sv, old_len + len) ;
2202 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2203 if (PerlIO_error(PL_rsfp))
2204 return -1; /* error */
2206 return 0 ; /* end of file */
2209 return SvCUR(buf_sv);
2211 /* Skip this filter slot if filter has been deleted */
2212 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2213 DEBUG_P(PerlIO_printf(Perl_debug_log,
2214 "filter_read %d: skipped (filter deleted)\n",
2216 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2218 /* Get function pointer hidden within datasv */
2219 funcp = (filter_t)IoANY(datasv);
2220 DEBUG_P(PerlIO_printf(Perl_debug_log,
2221 "filter_read %d: via function %p (%s)\n",
2222 idx, (void*)funcp, SvPV_nolen(datasv)));
2223 /* Call function. The function is expected to */
2224 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2225 /* Return: <0:error, =0:eof, >0:not eof */
2226 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2230 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2232 #ifdef PERL_CR_FILTER
2233 if (!PL_rsfp_filters) {
2234 filter_add(S_cr_textfilter,NULL);
2237 if (PL_rsfp_filters) {
2239 SvCUR_set(sv, 0); /* start with empty line */
2240 if (FILTER_READ(0, sv, 0) > 0)
2241 return ( SvPVX(sv) ) ;
2246 return (sv_gets(sv, fp, append));
2250 S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
2254 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2258 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2259 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2261 return GvHV(gv); /* Foo:: */
2264 /* use constant CLASS => 'MyClass' */
2265 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2267 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2268 pkgname = SvPV_nolen(sv);
2272 return gv_stashpv(pkgname, FALSE);
2276 static char* exp_name[] =
2277 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2278 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2285 Works out what to call the token just pulled out of the input
2286 stream. The yacc parser takes care of taking the ops we return and
2287 stitching them into a tree.
2293 if read an identifier
2294 if we're in a my declaration
2295 croak if they tried to say my($foo::bar)
2296 build the ops for a my() declaration
2297 if it's an access to a my() variable
2298 are we in a sort block?
2299 croak if my($a); $a <=> $b
2300 build ops for access to a my() variable
2301 if in a dq string, and they've said @foo and we can't find @foo
2303 build ops for a bareword
2304 if we already built the token before, use it.
2309 #pragma segment Perl_yylex
2314 register char *s = PL_bufptr;
2321 I32 orig_keyword = 0;
2324 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2325 lex_state_names[PL_lex_state]);
2327 /* check if there's an identifier for us to look at */
2328 if (PL_pending_ident)
2329 return REPORT(S_pending_ident(aTHX));
2331 /* no identifier pending identification */
2333 switch (PL_lex_state) {
2335 case LEX_NORMAL: /* Some compilers will produce faster */
2336 case LEX_INTERPNORMAL: /* code if we comment these out. */
2340 /* when we've already built the next token, just pull it out of the queue */
2343 yylval = PL_nextval[PL_nexttoke];
2345 PL_lex_state = PL_lex_defer;
2346 PL_expect = PL_lex_expect;
2347 PL_lex_defer = LEX_NORMAL;
2349 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2350 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2351 (IV)PL_nexttype[PL_nexttoke]); });
2353 return REPORT(PL_nexttype[PL_nexttoke]);
2355 /* interpolated case modifiers like \L \U, including \Q and \E.
2356 when we get here, PL_bufptr is at the \
2358 case LEX_INTERPCASEMOD:
2360 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2361 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2363 /* handle \E or end of string */
2364 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2368 if (PL_lex_casemods) {
2369 oldmod = PL_lex_casestack[--PL_lex_casemods];
2370 PL_lex_casestack[PL_lex_casemods] = '\0';
2372 if (PL_bufptr != PL_bufend
2373 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2375 PL_lex_state = LEX_INTERPCONCAT;
2379 if (PL_bufptr != PL_bufend)
2381 PL_lex_state = LEX_INTERPCONCAT;
2385 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2386 "### Saw case modifier at '%s'\n", PL_bufptr); });
2388 if (s[1] == '\\' && s[2] == 'E') {
2390 PL_lex_state = LEX_INTERPCONCAT;
2394 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2395 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2396 if ((*s == 'L' || *s == 'U') &&
2397 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2398 PL_lex_casestack[--PL_lex_casemods] = '\0';
2401 if (PL_lex_casemods > 10)
2402 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2403 PL_lex_casestack[PL_lex_casemods++] = *s;
2404 PL_lex_casestack[PL_lex_casemods] = '\0';
2405 PL_lex_state = LEX_INTERPCONCAT;
2406 PL_nextval[PL_nexttoke].ival = 0;
2409 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2411 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2413 PL_nextval[PL_nexttoke].ival = OP_LC;
2415 PL_nextval[PL_nexttoke].ival = OP_UC;
2417 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2419 Perl_croak(aTHX_ "panic: yylex");
2423 if (PL_lex_starts) {
2426 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2427 if (PL_lex_casemods == 1 && PL_lex_inpat)
2436 case LEX_INTERPPUSH:
2437 return REPORT(sublex_push());
2439 case LEX_INTERPSTART:
2440 if (PL_bufptr == PL_bufend)
2441 return REPORT(sublex_done());
2442 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2443 "### Interpolated variable at '%s'\n", PL_bufptr); });
2445 PL_lex_dojoin = (*PL_bufptr == '@');
2446 PL_lex_state = LEX_INTERPNORMAL;
2447 if (PL_lex_dojoin) {
2448 PL_nextval[PL_nexttoke].ival = 0;
2450 force_ident("\"", '$');
2451 PL_nextval[PL_nexttoke].ival = 0;
2453 PL_nextval[PL_nexttoke].ival = 0;
2455 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2458 if (PL_lex_starts++) {
2460 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2461 if (!PL_lex_casemods && PL_lex_inpat)
2468 case LEX_INTERPENDMAYBE:
2469 if (intuit_more(PL_bufptr)) {
2470 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2476 if (PL_lex_dojoin) {
2477 PL_lex_dojoin = FALSE;
2478 PL_lex_state = LEX_INTERPCONCAT;
2481 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2482 && SvEVALED(PL_lex_repl))
2484 if (PL_bufptr != PL_bufend)
2485 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2486 PL_lex_repl = Nullsv;
2489 case LEX_INTERPCONCAT:
2491 if (PL_lex_brackets)
2492 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2494 if (PL_bufptr == PL_bufend)
2495 return REPORT(sublex_done());
2497 if (SvIVX(PL_linestr) == '\'') {
2498 SV *sv = newSVsv(PL_linestr);
2501 else if ( PL_hints & HINT_NEW_RE )
2502 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2503 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2507 s = scan_const(PL_bufptr);
2509 PL_lex_state = LEX_INTERPCASEMOD;
2511 PL_lex_state = LEX_INTERPSTART;
2514 if (s != PL_bufptr) {
2515 PL_nextval[PL_nexttoke] = yylval;
2518 if (PL_lex_starts++) {
2519 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2520 if (!PL_lex_casemods && PL_lex_inpat)
2533 PL_lex_state = LEX_NORMAL;
2534 s = scan_formline(PL_bufptr);
2535 if (!PL_lex_formbrack)
2541 PL_oldoldbufptr = PL_oldbufptr;
2544 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
2545 exp_name[PL_expect], s);
2551 if (isIDFIRST_lazy_if(s,UTF))
2553 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2556 goto fake_eof; /* emulate EOF on ^D or ^Z */
2561 if (PL_lex_brackets) {
2562 if (PL_lex_formbrack)
2563 yyerror("Format not terminated");
2565 yyerror("Missing right curly or square bracket");
2567 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2568 "### Tokener got EOF\n");
2572 if (s++ < PL_bufend)
2573 goto retry; /* ignore stray nulls */
2576 if (!PL_in_eval && !PL_preambled) {
2577 PL_preambled = TRUE;
2578 sv_setpv(PL_linestr,incl_perldb());
2579 if (SvCUR(PL_linestr))
2580 sv_catpvn(PL_linestr,";", 1);
2582 while(AvFILLp(PL_preambleav) >= 0) {
2583 SV *tmpsv = av_shift(PL_preambleav);
2584 sv_catsv(PL_linestr, tmpsv);
2585 sv_catpvn(PL_linestr, ";", 1);
2588 sv_free((SV*)PL_preambleav);
2589 PL_preambleav = NULL;
2591 if (PL_minus_n || PL_minus_p) {
2592 sv_catpv(PL_linestr, "LINE: while (<>) {");
2594 sv_catpv(PL_linestr,"chomp;");
2597 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2598 || *PL_splitstr == '"')
2599 && strchr(PL_splitstr + 1, *PL_splitstr))
2600 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2602 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2603 bytes can be used as quoting characters. :-) */
2604 /* The count here deliberately includes the NUL
2605 that terminates the C string constant. This
2606 embeds the opening NUL into the string. */
2607 sv_catpvn(PL_linestr, "our @F=split(q", 15);
2612 sv_catpvn(PL_linestr, s, 1);
2613 sv_catpvn(PL_linestr, s, 1);
2615 /* This loop will embed the trailing NUL of
2616 PL_linestr as the last thing it does before
2618 sv_catpvn(PL_linestr, ");", 2);
2622 sv_catpv(PL_linestr,"our @F=split(' ');");
2625 sv_catpvn(PL_linestr, "\n", 1);
2626 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2627 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2628 PL_last_lop = PL_last_uni = Nullch;
2629 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2630 SV *sv = NEWSV(85,0);
2632 sv_upgrade(sv, SVt_PVMG);
2633 sv_setsv(sv,PL_linestr);
2636 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2641 bof = PL_rsfp ? TRUE : FALSE;
2642 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2645 if (PL_preprocess && !PL_in_eval)
2646 (void)PerlProc_pclose(PL_rsfp);
2647 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2648 PerlIO_clearerr(PL_rsfp);
2650 (void)PerlIO_close(PL_rsfp);
2652 PL_doextract = FALSE;
2654 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2655 sv_setpv(PL_linestr,PL_minus_p
2656 ? ";}continue{print;}" : ";}");
2657 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2658 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2659 PL_last_lop = PL_last_uni = Nullch;
2660 PL_minus_n = PL_minus_p = 0;
2663 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2664 PL_last_lop = PL_last_uni = Nullch;
2665 sv_setpv(PL_linestr,"");
2666 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2668 /* If it looks like the start of a BOM or raw UTF-16,
2669 * check if it in fact is. */
2675 #ifdef PERLIO_IS_STDIO
2676 # ifdef __GNU_LIBRARY__
2677 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2678 # define FTELL_FOR_PIPE_IS_BROKEN
2682 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2683 # define FTELL_FOR_PIPE_IS_BROKEN
2688 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2689 /* This loses the possibility to detect the bof
2690 * situation on perl -P when the libc5 is being used.
2691 * Workaround? Maybe attach some extra state to PL_rsfp?
2694 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2696 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2699 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2700 s = swallow_bom((U8*)s);
2704 /* Incest with pod. */
2705 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2706 sv_setpv(PL_linestr, "");
2707 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2708 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2709 PL_last_lop = PL_last_uni = Nullch;
2710 PL_doextract = FALSE;
2714 } while (PL_doextract);
2715 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2716 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2717 SV *sv = NEWSV(85,0);
2719 sv_upgrade(sv, SVt_PVMG);
2720 sv_setsv(sv,PL_linestr);
2723 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2725 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2726 PL_last_lop = PL_last_uni = Nullch;
2727 if (CopLINE(PL_curcop) == 1) {
2728 while (s < PL_bufend && isSPACE(*s))
2730 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2734 if (*s == '#' && *(s+1) == '!')
2736 #ifdef ALTERNATE_SHEBANG
2738 static char as[] = ALTERNATE_SHEBANG;
2739 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2740 d = s + (sizeof(as) - 1);
2742 #endif /* ALTERNATE_SHEBANG */
2751 while (*d && !isSPACE(*d))
2755 #ifdef ARG_ZERO_IS_SCRIPT
2756 if (ipathend > ipath) {
2758 * HP-UX (at least) sets argv[0] to the script name,
2759 * which makes $^X incorrect. And Digital UNIX and Linux,
2760 * at least, set argv[0] to the basename of the Perl
2761 * interpreter. So, having found "#!", we'll set it right.
2763 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
2764 assert(SvPOK(x) || SvGMAGICAL(x));
2765 if (sv_eq(x, CopFILESV(PL_curcop))) {
2766 sv_setpvn(x, ipath, ipathend - ipath);
2772 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2773 char *lstart = SvPV(x,llen);
2775 bstart += blen - llen;
2776 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2777 sv_setpvn(x, ipath, ipathend - ipath);
2782 TAINT_NOT; /* $^X is always tainted, but that's OK */
2784 #endif /* ARG_ZERO_IS_SCRIPT */
2789 d = instr(s,"perl -");
2791 d = instr(s,"perl");
2793 /* avoid getting into infinite loops when shebang
2794 * line contains "Perl" rather than "perl" */
2796 for (d = ipathend-4; d >= ipath; --d) {
2797 if ((*d == 'p' || *d == 'P')
2798 && !ibcmp(d, "perl", 4))
2808 #ifdef ALTERNATE_SHEBANG
2810 * If the ALTERNATE_SHEBANG on this system starts with a
2811 * character that can be part of a Perl expression, then if
2812 * we see it but not "perl", we're probably looking at the
2813 * start of Perl code, not a request to hand off to some
2814 * other interpreter. Similarly, if "perl" is there, but
2815 * not in the first 'word' of the line, we assume the line
2816 * contains the start of the Perl program.
2818 if (d && *s != '#') {
2820 while (*c && !strchr("; \t\r\n\f\v#", *c))
2823 d = Nullch; /* "perl" not in first word; ignore */
2825 *s = '#'; /* Don't try to parse shebang line */
2827 #endif /* ALTERNATE_SHEBANG */
2828 #ifndef MACOS_TRADITIONAL
2833 !instr(s,"indir") &&
2834 instr(PL_origargv[0],"perl"))
2840 while (s < PL_bufend && isSPACE(*s))
2842 if (s < PL_bufend) {
2843 Newz(899,newargv,PL_origargc+3,char*);
2845 while (s < PL_bufend && !isSPACE(*s))
2848 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2851 newargv = PL_origargv;
2854 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2856 Perl_croak(aTHX_ "Can't exec %s", ipath);
2860 U32 oldpdb = PL_perldb;
2861 bool oldn = PL_minus_n;
2862 bool oldp = PL_minus_p;
2864 while (*d && !isSPACE(*d)) d++;
2865 while (SPACE_OR_TAB(*d)) d++;
2868 bool switches_done = PL_doswitches;
2870 if (*d == 'M' || *d == 'm') {
2872 while (*d && !isSPACE(*d)) d++;
2873 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
2876 d = moreswitches(d);
2878 if (PL_doswitches && !switches_done) {
2879 int argc = PL_origargc;
2880 char **argv = PL_origargv;
2883 } while (argc && argv[0][0] == '-' && argv[0][1]);
2884 init_argv_symbols(argc,argv);
2886 if ((PERLDB_LINE && !oldpdb) ||
2887 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
2888 /* if we have already added "LINE: while (<>) {",
2889 we must not do it again */
2891 sv_setpv(PL_linestr, "");
2892 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2893 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2894 PL_last_lop = PL_last_uni = Nullch;
2895 PL_preambled = FALSE;
2897 (void)gv_fetchfile(PL_origfilename);
2900 if (PL_doswitches && !switches_done) {
2901 int argc = PL_origargc;
2902 char **argv = PL_origargv;
2905 } while (argc && argv[0][0] == '-' && argv[0][1]);
2906 init_argv_symbols(argc,argv);
2912 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2914 PL_lex_state = LEX_FORMLINE;
2919 #ifdef PERL_STRICT_CR
2920 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2922 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
2924 case ' ': case '\t': case '\f': case 013:
2925 #ifdef MACOS_TRADITIONAL
2932 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2933 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2934 /* handle eval qq[#line 1 "foo"\n ...] */
2935 CopLINE_dec(PL_curcop);
2939 while (s < d && *s != '\n')
2943 else if (s > d) /* Found by Ilya: feed random input to Perl. */
2944 Perl_croak(aTHX_ "panic: input overflow");
2946 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2948 PL_lex_state = LEX_FORMLINE;
2958 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
2965 while (s < PL_bufend && SPACE_OR_TAB(*s))
2968 if (strnEQ(s,"=>",2)) {
2969 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
2970 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2971 "### Saw unary minus before =>, forcing word '%s'\n", s);
2973 OPERATOR('-'); /* unary minus */
2975 PL_last_uni = PL_oldbufptr;
2977 case 'r': ftst = OP_FTEREAD; break;
2978 case 'w': ftst = OP_FTEWRITE; break;
2979 case 'x': ftst = OP_FTEEXEC; break;
2980 case 'o': ftst = OP_FTEOWNED; break;
2981 case 'R': ftst = OP_FTRREAD; break;
2982 case 'W': ftst = OP_FTRWRITE; break;
2983 case 'X': ftst = OP_FTREXEC; break;
2984 case 'O': ftst = OP_FTROWNED; break;
2985 case 'e': ftst = OP_FTIS; break;
2986 case 'z': ftst = OP_FTZERO; break;
2987 case 's': ftst = OP_FTSIZE; break;
2988 case 'f': ftst = OP_FTFILE; break;
2989 case 'd': ftst = OP_FTDIR; break;
2990 case 'l': ftst = OP_FTLINK; break;
2991 case 'p': ftst = OP_FTPIPE; break;
2992 case 'S': ftst = OP_FTSOCK; break;
2993 case 'u': ftst = OP_FTSUID; break;
2994 case 'g': ftst = OP_FTSGID; break;
2995 case 'k': ftst = OP_FTSVTX; break;
2996 case 'b': ftst = OP_FTBLK; break;
2997 case 'c': ftst = OP_FTCHR; break;
2998 case 't': ftst = OP_FTTTY; break;
2999 case 'T': ftst = OP_FTTEXT; break;
3000 case 'B': ftst = OP_FTBINARY; break;
3001 case 'M': case 'A': case 'C':
3002 gv_fetchpv("\024",TRUE, SVt_PV);
3004 case 'M': ftst = OP_FTMTIME; break;
3005 case 'A': ftst = OP_FTATIME; break;
3006 case 'C': ftst = OP_FTCTIME; break;
3014 PL_last_lop_op = (OPCODE)ftst;
3015 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3016 "### Saw file test %c\n", (int)ftst);
3021 /* Assume it was a minus followed by a one-letter named
3022 * subroutine call (or a -bareword), then. */
3023 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3024 "### '-%c' looked like a file test but was not\n",
3033 if (PL_expect == XOPERATOR)
3038 else if (*s == '>') {
3041 if (isIDFIRST_lazy_if(s,UTF)) {
3042 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3050 if (PL_expect == XOPERATOR)
3053 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3055 OPERATOR('-'); /* unary minus */
3062 if (PL_expect == XOPERATOR)
3067 if (PL_expect == XOPERATOR)
3070 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3076 if (PL_expect != XOPERATOR) {
3077 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3078 PL_expect = XOPERATOR;
3079 force_ident(PL_tokenbuf, '*');
3092 if (PL_expect == XOPERATOR) {
3096 PL_tokenbuf[0] = '%';
3097 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3098 if (!PL_tokenbuf[1]) {
3101 PL_pending_ident = '%';
3120 switch (PL_expect) {
3123 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3125 PL_bufptr = s; /* update in case we back off */
3131 PL_expect = XTERMBLOCK;
3135 while (isIDFIRST_lazy_if(s,UTF)) {
3136 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3137 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3138 if (tmp < 0) tmp = -tmp;
3154 d = scan_str(d,TRUE,TRUE);
3156 /* MUST advance bufptr here to avoid bogus
3157 "at end of line" context messages from yyerror().
3159 PL_bufptr = s + len;
3160 yyerror("Unterminated attribute parameter in attribute list");
3163 return REPORT(0); /* EOF indicator */
3167 SV *sv = newSVpvn(s, len);
3168 sv_catsv(sv, PL_lex_stuff);
3169 attrs = append_elem(OP_LIST, attrs,
3170 newSVOP(OP_CONST, 0, sv));
3171 SvREFCNT_dec(PL_lex_stuff);
3172 PL_lex_stuff = Nullsv;
3175 if (len == 6 && strnEQ(s, "unique", len)) {
3176 if (PL_in_my == KEY_our)
3178 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3180 ; /* skip to avoid loading attributes.pm */
3183 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3186 /* NOTE: any CV attrs applied here need to be part of
3187 the CVf_BUILTIN_ATTRS define in cv.h! */
3188 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3189 CvLVALUE_on(PL_compcv);
3190 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3191 CvLOCKED_on(PL_compcv);
3192 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3193 CvMETHOD_on(PL_compcv);
3194 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3195 CvASSERTION_on(PL_compcv);
3196 /* After we've set the flags, it could be argued that
3197 we don't need to do the attributes.pm-based setting
3198 process, and shouldn't bother appending recognized
3199 flags. To experiment with that, uncomment the
3200 following "else". (Note that's already been
3201 uncommented. That keeps the above-applied built-in
3202 attributes from being intercepted (and possibly
3203 rejected) by a package's attribute routines, but is
3204 justified by the performance win for the common case
3205 of applying only built-in attributes.) */
3207 attrs = append_elem(OP_LIST, attrs,
3208 newSVOP(OP_CONST, 0,
3212 if (*s == ':' && s[1] != ':')
3215 break; /* require real whitespace or :'s */
3217 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3218 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
3219 char q = ((*s == '\'') ? '"' : '\'');
3220 /* If here for an expression, and parsed no attrs, back off. */
3221 if (tmp == '=' && !attrs) {
3225 /* MUST advance bufptr here to avoid bogus "at end of line"
3226 context messages from yyerror().
3230 yyerror("Unterminated attribute list");
3232 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3240 PL_nextval[PL_nexttoke].opval = attrs;
3248 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3249 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3266 if (PL_lex_brackets <= 0)
3267 yyerror("Unmatched right square bracket");
3270 if (PL_lex_state == LEX_INTERPNORMAL) {
3271 if (PL_lex_brackets == 0) {
3272 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3273 PL_lex_state = LEX_INTERPEND;
3280 if (PL_lex_brackets > 100) {
3281 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3283 switch (PL_expect) {
3285 if (PL_lex_formbrack) {
3289 if (PL_oldoldbufptr == PL_last_lop)
3290 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3292 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3293 OPERATOR(HASHBRACK);
3295 while (s < PL_bufend && SPACE_OR_TAB(*s))
3298 PL_tokenbuf[0] = '\0';
3299 if (d < PL_bufend && *d == '-') {
3300 PL_tokenbuf[0] = '-';
3302 while (d < PL_bufend && SPACE_OR_TAB(*d))
3305 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3306 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3308 while (d < PL_bufend && SPACE_OR_TAB(*d))
3311 char minus = (PL_tokenbuf[0] == '-');
3312 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3320 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3325 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3330 if (PL_oldoldbufptr == PL_last_lop)
3331 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3333 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3336 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3338 /* This hack is to get the ${} in the message. */
3340 yyerror("syntax error");
3343 OPERATOR(HASHBRACK);
3345 /* This hack serves to disambiguate a pair of curlies
3346 * as being a block or an anon hash. Normally, expectation
3347 * determines that, but in cases where we're not in a
3348 * position to expect anything in particular (like inside
3349 * eval"") we have to resolve the ambiguity. This code
3350 * covers the case where the first term in the curlies is a
3351 * quoted string. Most other cases need to be explicitly
3352 * disambiguated by prepending a `+' before the opening
3353 * curly in order to force resolution as an anon hash.
3355 * XXX should probably propagate the outer expectation
3356 * into eval"" to rely less on this hack, but that could
3357 * potentially break current behavior of eval"".
3361 if (*s == '\'' || *s == '"' || *s == '`') {
3362 /* common case: get past first string, handling escapes */
3363 for (t++; t < PL_bufend && *t != *s;)
3364 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3368 else if (*s == 'q') {
3371 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3374 /* skip q//-like construct */
3376 char open, close, term;
3379 while (t < PL_bufend && isSPACE(*t))
3381 /* check for q => */
3382 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3383 OPERATOR(HASHBRACK);
3387 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3391 for (t++; t < PL_bufend; t++) {
3392 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3394 else if (*t == open)
3398 for (t++; t < PL_bufend; t++) {
3399 if (*t == '\\' && t+1 < PL_bufend)
3401 else if (*t == close && --brackets <= 0)
3403 else if (*t == open)
3410 /* skip plain q word */
3411 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3414 else if (isALNUM_lazy_if(t,UTF)) {
3416 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3419 while (t < PL_bufend && isSPACE(*t))
3421 /* if comma follows first term, call it an anon hash */
3422 /* XXX it could be a comma expression with loop modifiers */
3423 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3424 || (*t == '=' && t[1] == '>')))
3425 OPERATOR(HASHBRACK);
3426 if (PL_expect == XREF)
3429 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3435 yylval.ival = CopLINE(PL_curcop);
3436 if (isSPACE(*s) || *s == '#')
3437 PL_copline = NOLINE; /* invalidate current command line number */
3442 if (PL_lex_brackets <= 0)
3443 yyerror("Unmatched right curly bracket");
3445 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3446 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3447 PL_lex_formbrack = 0;
3448 if (PL_lex_state == LEX_INTERPNORMAL) {
3449 if (PL_lex_brackets == 0) {
3450 if (PL_expect & XFAKEBRACK) {
3451 PL_expect &= XENUMMASK;
3452 PL_lex_state = LEX_INTERPEND;
3454 return yylex(); /* ignore fake brackets */
3456 if (*s == '-' && s[1] == '>')
3457 PL_lex_state = LEX_INTERPENDMAYBE;
3458 else if (*s != '[' && *s != '{')
3459 PL_lex_state = LEX_INTERPEND;
3462 if (PL_expect & XFAKEBRACK) {
3463 PL_expect &= XENUMMASK;
3465 return yylex(); /* ignore fake brackets */
3475 if (PL_expect == XOPERATOR) {
3476 if (ckWARN(WARN_SEMICOLON)
3477 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3479 CopLINE_dec(PL_curcop);
3480 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3481 CopLINE_inc(PL_curcop);
3486 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3488 PL_expect = XOPERATOR;
3489 force_ident(PL_tokenbuf, '&');
3493 yylval.ival = (OPpENTERSUB_AMPER<<8);
3512 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
3513 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
3515 if (PL_expect == XSTATE && isALPHA(tmp) &&
3516 (s == PL_linestart+1 || s[-2] == '\n') )
3518 if (PL_in_eval && !PL_rsfp) {
3523 if (strnEQ(s,"=cut",4)) {
3537 PL_doextract = TRUE;
3540 if (PL_lex_brackets < PL_lex_formbrack) {
3542 #ifdef PERL_STRICT_CR
3543 for (t = s; SPACE_OR_TAB(*t); t++) ;
3545 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3547 if (*t == '\n' || *t == '#') {
3559 /* was this !=~ where !~ was meant?
3560 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3562 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3565 while (t < PL_bufend && isSPACE(*t))
3568 if (*t == '/' || *t == '?' ||
3569 ((*t == 'm' || *t == 's' || *t == 'y') && !isALNUM(t[1])) ||
3570 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3571 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3572 "!=~ should be !~");
3581 if (PL_expect != XOPERATOR) {
3582 if (s[1] != '<' && !strchr(s,'>'))
3585 s = scan_heredoc(s);
3587 s = scan_inputsymbol(s);
3588 TERM(sublex_start());
3593 SHop(OP_LEFT_SHIFT);
3607 SHop(OP_RIGHT_SHIFT);
3616 if (PL_expect == XOPERATOR) {
3617 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3620 return REPORT(','); /* grandfather non-comma-format format */
3624 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3625 PL_tokenbuf[0] = '@';
3626 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3627 sizeof PL_tokenbuf - 1, FALSE);
3628 if (PL_expect == XOPERATOR)
3629 no_op("Array length", s);
3630 if (!PL_tokenbuf[1])
3632 PL_expect = XOPERATOR;
3633 PL_pending_ident = '#';
3637 PL_tokenbuf[0] = '$';
3638 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3639 sizeof PL_tokenbuf - 1, FALSE);
3640 if (PL_expect == XOPERATOR)
3642 if (!PL_tokenbuf[1]) {
3644 yyerror("Final $ should be \\$ or $name");
3648 /* This kludge not intended to be bulletproof. */
3649 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3650 yylval.opval = newSVOP(OP_CONST, 0,
3651 newSViv(PL_compiling.cop_arybase));
3652 yylval.opval->op_private = OPpCONST_ARYBASE;
3658 if (PL_lex_state == LEX_NORMAL)
3661 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3664 PL_tokenbuf[0] = '@';
3665 if (ckWARN(WARN_SYNTAX)) {
3667 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3670 PL_bufptr = skipspace(PL_bufptr);
3671 while (t < PL_bufend && *t != ']')
3673 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3674 "Multidimensional syntax %.*s not supported",
3675 (t - PL_bufptr) + 1, PL_bufptr);
3679 else if (*s == '{') {
3680 PL_tokenbuf[0] = '%';
3681 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
3682 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3684 char tmpbuf[sizeof PL_tokenbuf];
3686 for (t++; isSPACE(*t); t++) ;
3687 if (isIDFIRST_lazy_if(t,UTF)) {
3688 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
3689 for (; isSPACE(*t); t++) ;
3690 if (*t == ';' && get_cv(tmpbuf, FALSE))
3691 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3692 "You need to quote \"%s\"", tmpbuf);
3698 PL_expect = XOPERATOR;
3699 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3700 bool islop = (PL_last_lop == PL_oldoldbufptr);
3701 if (!islop || PL_last_lop_op == OP_GREPSTART)
3702 PL_expect = XOPERATOR;
3703 else if (strchr("$@\"'`q", *s))
3704 PL_expect = XTERM; /* e.g. print $fh "foo" */
3705 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3706 PL_expect = XTERM; /* e.g. print $fh &sub */
3707 else if (isIDFIRST_lazy_if(s,UTF)) {
3708 char tmpbuf[sizeof PL_tokenbuf];
3709 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3710 if ((tmp = keyword(tmpbuf, len))) {
3711 /* binary operators exclude handle interpretations */
3723 PL_expect = XTERM; /* e.g. print $fh length() */
3728 PL_expect = XTERM; /* e.g. print $fh subr() */
3731 else if (isDIGIT(*s))
3732 PL_expect = XTERM; /* e.g. print $fh 3 */
3733 else if (*s == '.' && isDIGIT(s[1]))
3734 PL_expect = XTERM; /* e.g. print $fh .3 */
3735 else if ((*s == '?' || *s == '-' || *s == '+')
3736 && !isSPACE(s[1]) && s[1] != '=')
3737 PL_expect = XTERM; /* e.g. print $fh -1 */
3738 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/')
3739 PL_expect = XTERM; /* e.g. print $fh /.../
3740 XXX except DORDOR operator */
3741 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3742 PL_expect = XTERM; /* print $fh <<"EOF" */
3744 PL_pending_ident = '$';
3748 if (PL_expect == XOPERATOR)
3750 PL_tokenbuf[0] = '@';
3751 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3752 if (!PL_tokenbuf[1]) {
3755 if (PL_lex_state == LEX_NORMAL)
3757 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3759 PL_tokenbuf[0] = '%';
3761 /* Warn about @ where they meant $. */
3762 if (ckWARN(WARN_SYNTAX)) {
3763 if (*s == '[' || *s == '{') {
3765 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3767 if (*t == '}' || *t == ']') {
3769 PL_bufptr = skipspace(PL_bufptr);
3770 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3771 "Scalar value %.*s better written as $%.*s",
3772 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
3777 PL_pending_ident = '@';
3780 case '/': /* may be division, defined-or, or pattern */
3781 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3785 case '?': /* may either be conditional or pattern */
3786 if(PL_expect == XOPERATOR) {
3794 /* A // operator. */
3804 /* Disable warning on "study /blah/" */
3805 if (PL_oldoldbufptr == PL_last_uni
3806 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3807 || memNE(PL_last_uni, "study", 5)
3808 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3811 s = scan_pat(s,OP_MATCH);
3812 TERM(sublex_start());
3816 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3817 #ifdef PERL_STRICT_CR
3820 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3822 && (s == PL_linestart || s[-1] == '\n') )
3824 PL_lex_formbrack = 0;
3828 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3834 yylval.ival = OPf_SPECIAL;
3840 if (PL_expect != XOPERATOR)
3845 case '0': case '1': case '2': case '3': case '4':
3846 case '5': case '6': case '7': case '8': case '9':
3847 s = scan_num(s, &yylval);
3848 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3849 "### Saw number in '%s'\n", s);
3851 if (PL_expect == XOPERATOR)
3856 s = scan_str(s,FALSE,FALSE);
3857 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3858 "### Saw string before '%s'\n", s);
3860 if (PL_expect == XOPERATOR) {
3861 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3864 return REPORT(','); /* grandfather non-comma-format format */
3870 missingterm((char*)0);
3871 yylval.ival = OP_CONST;
3872 TERM(sublex_start());
3875 s = scan_str(s,FALSE,FALSE);
3876 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3877 "### Saw string before '%s'\n", s);
3879 if (PL_expect == XOPERATOR) {
3880 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3883 return REPORT(','); /* grandfather non-comma-format format */
3889 missingterm((char*)0);
3890 yylval.ival = OP_CONST;
3891 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
3892 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
3893 yylval.ival = OP_STRINGIFY;
3897 TERM(sublex_start());
3900 s = scan_str(s,FALSE,FALSE);
3901 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3902 "### Saw backtick string before '%s'\n", s);
3904 if (PL_expect == XOPERATOR)
3905 no_op("Backticks",s);
3907 missingterm((char*)0);
3908 yylval.ival = OP_BACKTICK;
3910 TERM(sublex_start());
3914 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
3915 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
3917 if (PL_expect == XOPERATOR)
3918 no_op("Backslash",s);
3922 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
3926 while (isDIGIT(*start) || *start == '_')
3928 if (*start == '.' && isDIGIT(start[1])) {
3929 s = scan_num(s, &yylval);
3932 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
3933 else if (!isALPHA(*start) && (PL_expect == XTERM
3934 || PL_expect == XREF || PL_expect == XSTATE
3935 || PL_expect == XTERMORDORDOR)) {
3939 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3942 s = scan_num(s, &yylval);
3949 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
3989 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3991 /* Some keywords can be followed by any delimiter, including ':' */
3992 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3993 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3994 (PL_tokenbuf[0] == 'q' &&
3995 strchr("qwxr", PL_tokenbuf[1])))));
3997 /* x::* is just a word, unless x is "CORE" */
3998 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4002 while (d < PL_bufend && isSPACE(*d))
4003 d++; /* no comments skipped here, or s### is misparsed */
4005 /* Is this a label? */
4006 if (!tmp && PL_expect == XSTATE
4007 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4009 yylval.pval = savepv(PL_tokenbuf);
4014 /* Check for keywords */
4015 tmp = keyword(PL_tokenbuf, len);
4017 /* Is this a word before a => operator? */
4018 if (*d == '=' && d[1] == '>') {
4021 = (OP*)newSVOP(OP_CONST, 0,
4022 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4023 yylval.opval->op_private = OPpCONST_BARE;
4027 if (tmp < 0) { /* second-class keyword? */
4028 GV *ogv = Nullgv; /* override (winner) */
4029 GV *hgv = Nullgv; /* hidden (loser) */
4030 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4032 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
4035 if (GvIMPORTED_CV(gv))
4037 else if (! CvMETHOD(cv))
4041 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4042 (gv = *gvp) != (GV*)&PL_sv_undef &&
4043 GvCVu(gv) && GvIMPORTED_CV(gv))
4050 tmp = 0; /* overridden by import or by GLOBAL */
4053 && -tmp==KEY_lock /* XXX generalizable kludge */
4055 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
4057 tmp = 0; /* any sub overrides "weak" keyword */
4062 && PL_expect != XOPERATOR
4063 && PL_expect != XTERMORDORDOR)
4065 /* any sub overrides the "err" keyword, except when really an
4066 * operator is expected */
4069 else { /* no override */
4071 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4072 Perl_warner(aTHX_ packWARN(WARN_MISC),
4073 "dump() better written as CORE::dump()");
4077 if (ckWARN(WARN_AMBIGUOUS) && hgv
4078 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
4079 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4080 "Ambiguous call resolved as CORE::%s(), %s",
4081 GvENAME(hgv), "qualify as such or use &");
4088 default: /* not a keyword */
4092 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4094 /* Get the rest if it looks like a package qualifier */
4096 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4098 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4101 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4102 *s == '\'' ? "'" : "::");
4107 if (PL_expect == XOPERATOR) {
4108 if (PL_bufptr == PL_linestart) {
4109 CopLINE_dec(PL_curcop);
4110 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4111 CopLINE_inc(PL_curcop);
4114 no_op("Bareword",s);
4117 /* Look for a subroutine with this name in current package,
4118 unless name is "Foo::", in which case Foo is a bearword
4119 (and a package name). */
4122 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4124 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
4125 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4126 "Bareword \"%s\" refers to nonexistent package",
4129 PL_tokenbuf[len] = '\0';
4136 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
4139 /* if we saw a global override before, get the right name */
4142 sv = newSVpvn("CORE::GLOBAL::",14);
4143 sv_catpv(sv,PL_tokenbuf);
4146 /* If len is 0, newSVpv does strlen(), which is correct.
4147 If len is non-zero, then it will be the true length,
4148 and so the scalar will be created correctly. */
4149 sv = newSVpv(PL_tokenbuf,len);
4152 /* Presume this is going to be a bareword of some sort. */
4155 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4156 yylval.opval->op_private = OPpCONST_BARE;
4157 /* UTF-8 package name? */
4158 if (UTF && !IN_BYTES &&
4159 is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
4162 /* And if "Foo::", then that's what it certainly is. */
4167 /* See if it's the indirect object for a list operator. */
4169 if (PL_oldoldbufptr &&
4170 PL_oldoldbufptr < PL_bufptr &&
4171 (PL_oldoldbufptr == PL_last_lop
4172 || PL_oldoldbufptr == PL_last_uni) &&
4173 /* NO SKIPSPACE BEFORE HERE! */
4174 (PL_expect == XREF ||
4175 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4177 bool immediate_paren = *s == '(';
4179 /* (Now we can afford to cross potential line boundary.) */
4182 /* Two barewords in a row may indicate method call. */
4184 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
4187 /* If not a declared subroutine, it's an indirect object. */
4188 /* (But it's an indir obj regardless for sort.) */
4190 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4191 ((!gv || !GvCVu(gv)) &&
4192 (PL_last_lop_op != OP_MAPSTART &&
4193 PL_last_lop_op != OP_GREPSTART))))
4195 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4200 PL_expect = XOPERATOR;
4203 /* Is this a word before a => operator? */
4204 if (*s == '=' && s[1] == '>' && !pkgname) {
4206 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4207 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4208 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4212 /* If followed by a paren, it's certainly a subroutine. */
4215 if (gv && GvCVu(gv)) {
4216 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4217 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
4222 PL_nextval[PL_nexttoke].opval = yylval.opval;
4223 PL_expect = XOPERATOR;
4229 /* If followed by var or block, call it a method (unless sub) */
4231 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
4232 PL_last_lop = PL_oldbufptr;
4233 PL_last_lop_op = OP_METHOD;
4237 /* If followed by a bareword, see if it looks like indir obj. */
4240 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4241 && (tmp = intuit_method(s,gv)))
4244 /* Not a method, so call it a subroutine (if defined) */
4246 if (gv && GvCVu(gv)) {
4248 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4249 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4250 "Ambiguous use of -%s resolved as -&%s()",
4251 PL_tokenbuf, PL_tokenbuf);
4252 /* Check for a constant sub */
4254 if ((sv = cv_const_sv(cv))) {
4256 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4257 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4258 yylval.opval->op_private = 0;
4262 /* Resolve to GV now. */
4263 op_free(yylval.opval);
4264 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4265 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4266 PL_last_lop = PL_oldbufptr;
4267 PL_last_lop_op = OP_ENTERSUB;
4268 /* Is there a prototype? */
4271 char *proto = SvPV((SV*)cv, len);
4274 if (*proto == '$' && proto[1] == '\0')
4276 while (*proto == ';')
4278 if (*proto == '&' && *s == '{') {
4279 sv_setpv(PL_subname, PL_curstash ?
4280 "__ANON__" : "__ANON__::__ANON__");
4284 PL_nextval[PL_nexttoke].opval = yylval.opval;
4290 /* Call it a bare word */
4292 if (PL_hints & HINT_STRICT_SUBS)
4293 yylval.opval->op_private |= OPpCONST_STRICT;
4296 if (ckWARN(WARN_RESERVED)) {
4297 if (lastchar != '-') {
4298 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4299 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4300 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4307 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4308 && ckWARN_d(WARN_AMBIGUOUS)) {
4309 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4310 "Operator or semicolon missing before %c%s",
4311 lastchar, PL_tokenbuf);
4312 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4313 "Ambiguous use of %c resolved as operator %c",
4314 lastchar, lastchar);
4320 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4321 newSVpv(CopFILE(PL_curcop),0));
4325 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4326 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4329 case KEY___PACKAGE__:
4330 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4332 ? newSVpv(HvNAME(PL_curstash), 0)
4341 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4342 char *pname = "main";
4343 if (PL_tokenbuf[2] == 'D')
4344 pname = HvNAME(PL_curstash ? PL_curstash : PL_defstash);
4345 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
4348 GvIOp(gv) = newIO();
4349 IoIFP(GvIOp(gv)) = PL_rsfp;
4350 #if defined(HAS_FCNTL) && defined(F_SETFD)
4352 int fd = PerlIO_fileno(PL_rsfp);
4353 fcntl(fd,F_SETFD,fd >= 3);
4356 /* Mark this internal pseudo-handle as clean */
4357 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4359 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4360 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4361 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4363 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4364 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4365 /* if the script was opened in binmode, we need to revert
4366 * it to text mode for compatibility; but only iff it has CRs
4367 * XXX this is a questionable hack at best. */
4368 if (PL_bufend-PL_bufptr > 2
4369 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4372 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4373 loc = PerlIO_tell(PL_rsfp);
4374 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4377 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4379 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4380 #endif /* NETWARE */
4381 #ifdef PERLIO_IS_STDIO /* really? */
4382 # if defined(__BORLANDC__)
4383 /* XXX see note in do_binmode() */
4384 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4388 PerlIO_seek(PL_rsfp, loc, 0);
4392 #ifdef PERLIO_LAYERS
4395 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4396 else if (PL_encoding) {
4403 XPUSHs(PL_encoding);
4405 call_method("name", G_SCALAR);
4409 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4410 Perl_form(aTHX_ ":encoding(%"SVf")",
4428 if (PL_expect == XSTATE) {
4435 if (*s == ':' && s[1] == ':') {
4438 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4439 if (!(tmp = keyword(PL_tokenbuf, len)))
4440 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4454 LOP(OP_ACCEPT,XTERM);
4460 LOP(OP_ATAN2,XTERM);
4466 LOP(OP_BINMODE,XTERM);
4469 LOP(OP_BLESS,XTERM);
4478 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
4495 if (!PL_cryptseen) {
4496 PL_cryptseen = TRUE;
4500 LOP(OP_CRYPT,XTERM);
4503 LOP(OP_CHMOD,XTERM);
4506 LOP(OP_CHOWN,XTERM);
4509 LOP(OP_CONNECT,XTERM);
4525 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4529 PL_hints |= HINT_BLOCK_SCOPE;
4539 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4540 LOP(OP_DBMOPEN,XTERM);
4546 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4553 yylval.ival = CopLINE(PL_curcop);
4567 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4568 UNIBRACK(OP_ENTEREVAL);
4586 case KEY_endhostent:
4592 case KEY_endservent:
4595 case KEY_endprotoent:
4606 yylval.ival = CopLINE(PL_curcop);
4608 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4610 if ((PL_bufend - p) >= 3 &&
4611 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4613 else if ((PL_bufend - p) >= 4 &&
4614 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4617 if (isIDFIRST_lazy_if(p,UTF)) {
4618 p = scan_ident(p, PL_bufend,
4619 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4623 Perl_croak(aTHX_ "Missing $ on loop variable");
4628 LOP(OP_FORMLINE,XTERM);
4634 LOP(OP_FCNTL,XTERM);
4640 LOP(OP_FLOCK,XTERM);
4649 LOP(OP_GREPSTART, XREF);
4652 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4667 case KEY_getpriority:
4668 LOP(OP_GETPRIORITY,XTERM);
4670 case KEY_getprotobyname:
4673 case KEY_getprotobynumber:
4674 LOP(OP_GPBYNUMBER,XTERM);
4676 case KEY_getprotoent:
4688 case KEY_getpeername:
4689 UNI(OP_GETPEERNAME);
4691 case KEY_gethostbyname:
4694 case KEY_gethostbyaddr:
4695 LOP(OP_GHBYADDR,XTERM);
4697 case KEY_gethostent:
4700 case KEY_getnetbyname:
4703 case KEY_getnetbyaddr:
4704 LOP(OP_GNBYADDR,XTERM);
4709 case KEY_getservbyname:
4710 LOP(OP_GSBYNAME,XTERM);
4712 case KEY_getservbyport:
4713 LOP(OP_GSBYPORT,XTERM);
4715 case KEY_getservent:
4718 case KEY_getsockname:
4719 UNI(OP_GETSOCKNAME);
4721 case KEY_getsockopt:
4722 LOP(OP_GSOCKOPT,XTERM);
4744 yylval.ival = CopLINE(PL_curcop);
4748 LOP(OP_INDEX,XTERM);
4754 LOP(OP_IOCTL,XTERM);
4766 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4798 LOP(OP_LISTEN,XTERM);
4807 s = scan_pat(s,OP_MATCH);
4808 TERM(sublex_start());
4811 LOP(OP_MAPSTART, XREF);
4814 LOP(OP_MKDIR,XTERM);
4817 LOP(OP_MSGCTL,XTERM);
4820 LOP(OP_MSGGET,XTERM);
4823 LOP(OP_MSGRCV,XTERM);
4826 LOP(OP_MSGSND,XTERM);
4832 if (isIDFIRST_lazy_if(s,UTF)) {
4833 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
4834 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4836 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
4837 if (!PL_in_my_stash) {
4840 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
4848 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4855 if (PL_expect != XSTATE)
4856 yyerror("\"no\" not allowed in expression");
4857 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4858 s = force_version(s, FALSE);
4863 if (*s == '(' || (s = skipspace(s), *s == '('))
4870 if (isIDFIRST_lazy_if(s,UTF)) {
4872 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
4873 for (t=d; *t && isSPACE(*t); t++) ;
4874 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
4876 && !(t[0] == '=' && t[1] == '>')
4878 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
4879 "Precedence problem: open %.*s should be open(%.*s)",
4880 d - s, s, d - s, s);
4886 yylval.ival = OP_OR;
4896 LOP(OP_OPEN_DIR,XTERM);
4899 checkcomma(s,PL_tokenbuf,"filehandle");
4903 checkcomma(s,PL_tokenbuf,"filehandle");
4922 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4926 LOP(OP_PIPE_OP,XTERM);
4929 s = scan_str(s,FALSE,FALSE);
4931 missingterm((char*)0);
4932 yylval.ival = OP_CONST;
4933 TERM(sublex_start());
4939 s = scan_str(s,FALSE,FALSE);
4941 missingterm((char*)0);
4943 if (SvCUR(PL_lex_stuff)) {
4946 d = SvPV_force(PL_lex_stuff, len);
4949 for (; isSPACE(*d) && len; --len, ++d) ;
4952 if (!warned && ckWARN(WARN_QW)) {
4953 for (; !isSPACE(*d) && len; --len, ++d) {
4955 Perl_warner(aTHX_ packWARN(WARN_QW),
4956 "Possible attempt to separate words with commas");
4959 else if (*d == '#') {
4960 Perl_warner(aTHX_ packWARN(WARN_QW),
4961 "Possible attempt to put comments in qw() list");
4967 for (; !isSPACE(*d) && len; --len, ++d) ;
4969 sv = newSVpvn(b, d-b);
4970 if (DO_UTF8(PL_lex_stuff))
4972 words = append_elem(OP_LIST, words,
4973 newSVOP(OP_CONST, 0, tokeq(sv)));
4977 PL_nextval[PL_nexttoke].opval = words;
4982 SvREFCNT_dec(PL_lex_stuff);
4983 PL_lex_stuff = Nullsv;
4989 s = scan_str(s,FALSE,FALSE);
4991 missingterm((char*)0);
4992 yylval.ival = OP_STRINGIFY;
4993 if (SvIVX(PL_lex_stuff) == '\'')
4994 SvIVX(PL_lex_stuff) = 0; /* qq'$foo' should intepolate */
4995 TERM(sublex_start());
4998 s = scan_pat(s,OP_QR);
4999 TERM(sublex_start());
5002 s = scan_str(s,FALSE,FALSE);
5004 missingterm((char*)0);
5005 yylval.ival = OP_BACKTICK;
5007 TERM(sublex_start());
5015 s = force_version(s, FALSE);
5017 else if (*s != 'v' || !isDIGIT(s[1])
5018 || (s = force_version(s, TRUE), *s == 'v'))
5020 *PL_tokenbuf = '\0';
5021 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5022 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5023 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5025 yyerror("<> should be quotes");
5033 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5037 LOP(OP_RENAME,XTERM);
5046 LOP(OP_RINDEX,XTERM);
5056 UNIDOR(OP_READLINE);
5069 LOP(OP_REVERSE,XTERM);
5072 UNIDOR(OP_READLINK);
5080 TERM(sublex_start());
5082 TOKEN(1); /* force error */
5091 LOP(OP_SELECT,XTERM);
5097 LOP(OP_SEMCTL,XTERM);
5100 LOP(OP_SEMGET,XTERM);
5103 LOP(OP_SEMOP,XTERM);
5109 LOP(OP_SETPGRP,XTERM);
5111 case KEY_setpriority:
5112 LOP(OP_SETPRIORITY,XTERM);
5114 case KEY_sethostent:
5120 case KEY_setservent:
5123 case KEY_setprotoent:
5133 LOP(OP_SEEKDIR,XTERM);
5135 case KEY_setsockopt:
5136 LOP(OP_SSOCKOPT,XTERM);
5142 LOP(OP_SHMCTL,XTERM);
5145 LOP(OP_SHMGET,XTERM);
5148 LOP(OP_SHMREAD,XTERM);
5151 LOP(OP_SHMWRITE,XTERM);
5154 LOP(OP_SHUTDOWN,XTERM);
5163 LOP(OP_SOCKET,XTERM);
5165 case KEY_socketpair:
5166 LOP(OP_SOCKPAIR,XTERM);
5169 checkcomma(s,PL_tokenbuf,"subroutine name");
5171 if (*s == ';' || *s == ')') /* probably a close */
5172 Perl_croak(aTHX_ "sort is now a reserved word");
5174 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5178 LOP(OP_SPLIT,XTERM);
5181 LOP(OP_SPRINTF,XTERM);
5184 LOP(OP_SPLICE,XTERM);
5199 LOP(OP_SUBSTR,XTERM);
5205 char tmpbuf[sizeof PL_tokenbuf];
5206 SSize_t tboffset = 0;
5207 expectation attrful;
5208 bool have_name, have_proto, bad_proto;
5213 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5214 (*s == ':' && s[1] == ':'))
5217 attrful = XATTRBLOCK;
5218 /* remember buffer pos'n for later force_word */
5219 tboffset = s - PL_oldbufptr;
5220 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5221 if (strchr(tmpbuf, ':'))
5222 sv_setpv(PL_subname, tmpbuf);
5224 sv_setsv(PL_subname,PL_curstname);
5225 sv_catpvn(PL_subname,"::",2);
5226 sv_catpvn(PL_subname,tmpbuf,len);
5233 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5234 PL_expect = XTERMBLOCK;
5235 attrful = XATTRTERM;
5236 sv_setpv(PL_subname,"?");
5240 if (key == KEY_format) {
5242 PL_lex_formbrack = PL_lex_brackets + 1;
5244 (void) force_word(PL_oldbufptr + tboffset, WORD,
5249 /* Look for a prototype */
5253 s = scan_str(s,FALSE,FALSE);
5255 Perl_croak(aTHX_ "Prototype not terminated");
5256 /* strip spaces and check for bad characters */
5257 d = SvPVX(PL_lex_stuff);
5260 for (p = d; *p; ++p) {
5263 if (!strchr("$@%*;[]&\\", *p))
5268 if (bad_proto && ckWARN(WARN_SYNTAX))
5269 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5270 "Illegal character in prototype for %"SVf" : %s",
5272 SvCUR(PL_lex_stuff) = tmp;
5280 if (*s == ':' && s[1] != ':')
5281 PL_expect = attrful;
5282 else if (*s != '{' && key == KEY_sub) {
5284 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5286 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5290 PL_nextval[PL_nexttoke].opval =
5291 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5292 PL_lex_stuff = Nullsv;
5296 sv_setpv(PL_subname,
5297 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5300 (void) force_word(PL_oldbufptr + tboffset, WORD,
5309 LOP(OP_SYSTEM,XREF);
5312 LOP(OP_SYMLINK,XTERM);
5315 LOP(OP_SYSCALL,XTERM);
5318 LOP(OP_SYSOPEN,XTERM);
5321 LOP(OP_SYSSEEK,XTERM);
5324 LOP(OP_SYSREAD,XTERM);
5327 LOP(OP_SYSWRITE,XTERM);
5331 TERM(sublex_start());
5352 LOP(OP_TRUNCATE,XTERM);
5364 yylval.ival = CopLINE(PL_curcop);
5368 yylval.ival = CopLINE(PL_curcop);
5372 LOP(OP_UNLINK,XTERM);
5378 LOP(OP_UNPACK,XTERM);
5381 LOP(OP_UTIME,XTERM);
5387 LOP(OP_UNSHIFT,XTERM);
5390 if (PL_expect != XSTATE)
5391 yyerror("\"use\" not allowed in expression");
5393 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
5394 s = force_version(s, TRUE);
5395 if (*s == ';' || (s = skipspace(s), *s == ';')) {
5396 PL_nextval[PL_nexttoke].opval = Nullop;
5399 else if (*s == 'v') {
5400 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5401 s = force_version(s, FALSE);
5405 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5406 s = force_version(s, FALSE);
5418 yylval.ival = CopLINE(PL_curcop);
5422 PL_hints |= HINT_BLOCK_SCOPE;
5429 LOP(OP_WAITPID,XTERM);
5438 ctl_l[0] = toCTRL('L');
5440 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5443 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5448 if (PL_expect == XOPERATOR)
5454 yylval.ival = OP_XOR;
5459 TERM(sublex_start());
5464 #pragma segment Main
5468 S_pending_ident(pTHX)
5471 register I32 tmp = 0;
5472 /* pit holds the identifier we read and pending_ident is reset */
5473 char pit = PL_pending_ident;
5474 PL_pending_ident = 0;
5476 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5477 "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
5479 /* if we're in a my(), we can't allow dynamics here.
5480 $foo'bar has already been turned into $foo::bar, so
5481 just check for colons.
5483 if it's a legal name, the OP is a PADANY.
5486 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5487 if (strchr(PL_tokenbuf,':'))
5488 yyerror(Perl_form(aTHX_ "No package name allowed for "
5489 "variable %s in \"our\"",
5491 tmp = allocmy(PL_tokenbuf);
5494 if (strchr(PL_tokenbuf,':'))
5495 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5497 yylval.opval = newOP(OP_PADANY, 0);
5498 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5504 build the ops for accesses to a my() variable.
5506 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5507 then used in a comparison. This catches most, but not
5508 all cases. For instance, it catches
5509 sort { my($a); $a <=> $b }
5511 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5512 (although why you'd do that is anyone's guess).
5515 if (!strchr(PL_tokenbuf,':')) {
5517 tmp = pad_findmy(PL_tokenbuf);
5518 if (tmp != NOT_IN_PAD) {
5519 /* might be an "our" variable" */
5520 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5521 /* build ops for a bareword */
5522 SV *sym = newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)), 0);
5523 sv_catpvn(sym, "::", 2);
5524 sv_catpv(sym, PL_tokenbuf+1);
5525 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5526 yylval.opval->op_private = OPpCONST_ENTERED;
5529 ? (GV_ADDMULTI | GV_ADDINEVAL)
5532 ((PL_tokenbuf[0] == '$') ? SVt_PV
5533 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5538 /* if it's a sort block and they're naming $a or $b */
5539 if (PL_last_lop_op == OP_SORT &&
5540 PL_tokenbuf[0] == '$' &&
5541 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5544 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5545 d < PL_bufend && *d != '\n';
5548 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5549 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5555 yylval.opval = newOP(OP_PADANY, 0);
5556 yylval.opval->op_targ = tmp;
5562 Whine if they've said @foo in a doublequoted string,
5563 and @foo isn't a variable we can find in the symbol
5566 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5567 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
5568 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5569 && ckWARN(WARN_AMBIGUOUS))
5571 /* Downgraded from fatal to warning 20000522 mjd */
5572 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5573 "Possible unintended interpolation of %s in string",
5578 /* build ops for a bareword */
5579 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5580 yylval.opval->op_private = OPpCONST_ENTERED;
5581 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
5582 ((PL_tokenbuf[0] == '$') ? SVt_PV
5583 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5588 /* Weights are the number of occurrences of that keyword in about 190M of
5589 input to Perl_keyword from a lot of real perl. This routine is about 20%
5590 faster than the routine it replaces. */
5593 Perl_keyword (pTHX_ char *name, I32 len) {
5594 /* Initially switch on the length of the name. */
5597 /* Names all of length 1. */
5599 /* Offset 0 gives the best switch position. */
5603 return KEY_m; /* Weight 148776 */
5608 return KEY_q; /* Weight 69076 */
5613 return KEY_s; /* Weight 403691 */
5618 return -KEY_x; /* Weight 38549 */
5623 return KEY_y; /* Weight 567 */
5629 /* Names all of length 2. */
5630 /* do eq ge gt if lc le lt my ne no or qq qr qw qx tr uc */
5631 /* Offset 0 gives the best switch position. */
5634 if (name[1] == 'o') {
5635 return KEY_do; /* Weight 96004 */
5639 if (name[1] == 'q') {
5640 return -KEY_eq; /* Weight 797065 */
5644 if (name[1] == 'e') {
5645 return -KEY_ge; /* Weight 5666 */
5647 if (name[1] == 't') {
5648 return -KEY_gt; /* Weight 897 */
5652 if (name[1] == 'f') {
5653 return KEY_if; /* Weight 2482605 */
5657 if (name[1] == 'c') {
5658 return -KEY_lc; /* Weight 38487 */
5660 if (name[1] == 'e') {
5661 return -KEY_le; /* Weight 4052 */
5663 if (name[1] == 't') {
5664 return -KEY_lt; /* Weight 335 */
5668 if (name[1] == 'y') {
5669 return KEY_my; /* Weight 3785925 */
5673 if (name[1] == 'e') {
5674 return -KEY_ne; /* Weight 112906 */
5676 if (name[1] == 'o') {
5677 return KEY_no; /* Weight 61989 */
5681 if (name[1] == 'r') {
5682 return -KEY_or; /* Weight 405163 */
5686 if (name[1] == 'w') {
5687 return KEY_qw; /* Weight 415641 */
5689 if (name[1] == 'q') {
5690 return KEY_qq; /* Weight 55149 */
5692 if (name[1] == 'r') {
5693 return KEY_qr; /* Weight 28519 */
5695 if (name[1] == 'x') {
5696 return KEY_qx; /* Weight 177 */
5700 if (name[1] == 'r') {
5701 return KEY_tr; /* Weight 22665 */
5705 if (name[1] == 'c') {
5706 return -KEY_uc; /* Weight 16961 */
5712 /* Names all of length 3. */
5713 /* END abs and chr cmp cos die eof err exp for hex int log map not oct ord
5714 our pop pos ref sin sub tie use vec xor */
5715 /* Offset 0 gives the best switch position. */
5718 if (name[0] == 'N' && name[1] == 'D') {
5719 return KEY_END; /* Weight 3565 */
5723 if (name[0] == 'n' && name[1] == 'd') {
5724 return -KEY_and; /* Weight 284867 */
5726 if (name[0] == 'b' && name[1] == 's') {
5727 return -KEY_abs; /* Weight 7767 */
5731 if (name[0] == 'h' && name[1] == 'r') {
5732 return -KEY_chr; /* Weight 35654 */
5734 if (name[0] == 'm' && name[1] == 'p') {
5735 return -KEY_cmp; /* Weight 6808 */
5737 if (name[0] == 'o' && name[1] == 's') {
5738 return -KEY_cos; /* Weight 447 */
5742 if (name[0] == 'i' && name[1] == 'e') {
5743 return -KEY_die; /* Weight 192203 */
5747 if (name[0] == 'o' && name[1] == 'f') {
5748 return -KEY_eof; /* Weight 1618 */
5750 if (name[0] == 'r' && name[1] == 'r') {
5751 return -KEY_err; /* Weight 522 */
5753 if (name[0] == 'x' && name[1] == 'p') {
5754 return -KEY_exp; /* Weight 423 */
5758 if (name[0] == 'o' && name[1] == 'r') {
5759 return KEY_for; /* Weight 118158 */
5763 if (name[0] == 'e' && name[1] == 'x') {
5764 return -KEY_hex; /* Weight 3629 */
5768 if (name[0] == 'n' && name[1] == 't') {
5769 return -KEY_int; /* Weight 18549 */
5773 if (name[0] == 'o' && name[1] == 'g') {
5778 if (name[0] == 'a' && name[1] == 'p') {
5779 return KEY_map; /* Weight 115207 */
5783 if (name[0] == 'o' && name[1] == 't') {
5784 return -KEY_not; /* Weight 55868 */
5788 if (name[0] == 'u' && name[1] == 'r') {
5789 return KEY_our; /* Weight 194417 */
5791 if (name[0] == 'r' && name[1] == 'd') {
5792 return -KEY_ord; /* Weight 22221 */
5794 if (name[0] == 'c' && name[1] == 't') {
5795 return -KEY_oct; /* Weight 4195 */
5799 if (name[0] == 'o' && name[1] == 'p') {
5800 return -KEY_pop; /* Weight 46933 */
5802 if (name[0] == 'o' && name[1] == 's') {
5803 return KEY_pos; /* Weight 5503 */
5807 if (name[0] == 'e' && name[1] == 'f') {
5808 return -KEY_ref; /* Weight 347102 */
5812 if (name[0] == 'u' && name[1] == 'b') {
5813 return KEY_sub; /* Weight 2053554 */
5815 if (name[0] == 'i' && name[1] == 'n') {
5816 return -KEY_sin; /* Weight 499 */
5820 if (name[0] == 'i' && name[1] == 'e') {
5821 return KEY_tie; /* Weight 10131 */
5825 if (name[0] == 's' && name[1] == 'e') {
5826 return KEY_use; /* Weight 686081 */
5830 if (name[0] == 'e' && name[1] == 'c') {
5831 return -KEY_vec; /* Weight 110566 */
5835 if (name[0] == 'o' && name[1] == 'r') {
5836 return -KEY_xor; /* Weight 619 */
5842 /* Names all of length 4. */
5843 /* CORE INIT bind chop dump each else eval exec exit fork getc glob goto
5844 grep join keys kill last link lock next open pack pipe push rand read
5845 recv redo seek send sort sqrt stat tell tied time wait warn */
5846 /* Offset 0 gives the best switch position. */
5849 if (!memcmp(name, "ORE", 3)) {
5851 return -KEY_CORE; /* Weight 47391 */
5855 if (!memcmp(name, "NIT", 3)) {
5857 return KEY_INIT; /* Weight 418 */
5861 if (!memcmp(name, "ind", 3)) {
5863 return -KEY_bind; /* Weight 290 */
5867 if (!memcmp(name, "hop", 3)) {
5869 return -KEY_chop; /* Weight 10172 */
5873 if (!memcmp(name, "ump", 3)) {
5875 return -KEY_dump; /* Weight 274 */
5879 if (!memcmp(name, "lse", 3)) {
5881 return KEY_else; /* Weight 527806 */
5883 if (!memcmp(name, "val", 3)) {
5885 return KEY_eval; /* Weight 136977 */
5887 if (!memcmp(name, "ach", 3)) {
5889 return -KEY_each; /* Weight 18414 */
5891 if (!memcmp(name, "xit", 3)) {
5893 return -KEY_exit; /* Weight 8262 */
5895 if (!memcmp(name, "xec", 3)) {
5897 return -KEY_exec; /* Weight 429 */
5901 if (!memcmp(name, "ork", 3)) {
5903 return -KEY_fork; /* Weight 327 */
5907 if (!memcmp(name, "oto", 3)) {
5909 return KEY_goto; /* Weight 109258 */
5911 if (!memcmp(name, "rep", 3)) {
5913 return KEY_grep; /* Weight 75912 */
5915 if (!memcmp(name, "lob", 3)) {
5917 return KEY_glob; /* Weight 2172 */
5919 if (!memcmp(name, "etc", 3)) {
5921 return -KEY_getc; /* Weight 981 */
5925 if (!memcmp(name, "oin", 3)) {
5927 return -KEY_join; /* Weight 130820 */
5931 if (!memcmp(name, "eys", 3)) {
5933 return -KEY_keys; /* Weight 131427 */
5935 if (!memcmp(name, "ill", 3)) {
5937 return -KEY_kill; /* Weight 382 */
5941 if (!memcmp(name, "ast", 3)) {
5943 return KEY_last; /* Weight 95078 */
5945 if (!memcmp(name, "ock", 3)) {
5947 return -KEY_lock; /* Weight 4210 */
5949 if (!memcmp(name, "ink", 3)) {
5951 return -KEY_link; /* Weight 425 */
5955 if (!memcmp(name, "ext", 3)) {
5957 return KEY_next; /* Weight 153355 */
5961 if (!memcmp(name, "pen", 3)) {
5963 return -KEY_open; /* Weight 39060 */
5967 if (!memcmp(name, "ush", 3)) {
5969 return -KEY_push; /* Weight 256975 */
5971 if (!memcmp(name, "ack", 3)) {
5973 return -KEY_pack; /* Weight 14491 */
5975 if (!memcmp(name, "ipe", 3)) {
5977 return -KEY_pipe; /* Weight 344 */
5981 if (!memcmp(name, "ead", 3)) {
5983 return -KEY_read; /* Weight 9434 */
5985 if (!memcmp(name, "edo", 3)) {
5987 return KEY_redo; /* Weight 5219 */
5989 if (!memcmp(name, "and", 3)) {
5991 return -KEY_rand; /* Weight 1824 */
5993 if (!memcmp(name, "ecv", 3)) {
5995 return -KEY_recv; /* Weight 250 */
5999 if (!memcmp(name, "tat", 3)) {
6001 return -KEY_stat; /* Weight 36702 */
6003 if (!memcmp(name, "ort", 3)) {
6005 return KEY_sort; /* Weight 36394 */
6007 if (!memcmp(name, "eek", 3)) {
6009 return -KEY_seek; /* Weight 2174 */
6011 if (!memcmp(name, "qrt", 3)) {
6013 return -KEY_sqrt; /* Weight 766 */
6015 if (!memcmp(name, "end", 3)) {
6017 return -KEY_send; /* Weight 496 */
6021 if (!memcmp(name, "ime", 3)) {
6023 return -KEY_time; /* Weight 32168 */
6025 if (!memcmp(name, "ied", 3)) {
6027 return KEY_tied; /* Weight 9749 */
6029 if (!memcmp(name, "ell", 3)) {
6031 return -KEY_tell; /* Weight 2578 */
6035 if (!memcmp(name, "arn", 3)) {
6037 return -KEY_warn; /* Weight 91372 */
6039 if (!memcmp(name, "ait", 3)) {
6047 /* Names all of length 5. */
6048 /* BEGIN CHECK alarm atan2 bless chdir chmod chomp chown close crypt elsif
6049 fcntl flock index ioctl local lstat mkdir print reset rmdir semop shift
6050 sleep split srand study times umask undef untie until utime while write
6052 /* Offset 3 gives the best switch position. */
6055 if (!memcmp(name, "CHECK", 5)) {
6057 return KEY_CHECK; /* Weight 538 */
6061 if (!memcmp(name, "BEGIN", 5)) {
6063 return KEY_BEGIN; /* Weight 24125 */
6067 if (!memcmp(name, "local", 5)) {
6069 return KEY_local; /* Weight 262973 */
6071 if (!memcmp(name, "lstat", 5)) {
6073 return -KEY_lstat; /* Weight 13859 */
6077 if (!memcmp(name, "flock", 5)) {
6079 return -KEY_flock; /* Weight 260 */
6083 if (!memcmp(name, "study", 5)) {
6085 return KEY_study; /* Weight 1933 */
6089 if (!memcmp(name, "undef", 5)) {
6091 return KEY_undef; /* Weight 311156 */
6093 if (!memcmp(name, "index", 5)) {
6095 return -KEY_index; /* Weight 51465 */
6097 if (!memcmp(name, "sleep", 5)) {
6099 return -KEY_sleep; /* Weight 519 */
6101 if (!memcmp(name, "times", 5)) {
6103 return -KEY_times; /* Weight 310 */
6105 if (!memcmp(name, "reset", 5)) {
6107 return -KEY_reset; /* Weight 127 */
6111 if (!memcmp(name, "shift", 5)) {
6113 return -KEY_shift; /* Weight 904125 */
6117 if (!memcmp(name, "elsif", 5)) {
6119 return KEY_elsif; /* Weight 322365 */
6121 if (!memcmp(name, "split", 5)) {
6123 return KEY_split; /* Weight 93678 */
6125 if (!memcmp(name, "chdir", 5)) {
6127 return -KEY_chdir; /* Weight 20317 */
6129 if (!memcmp(name, "mkdir", 5)) {
6131 return -KEY_mkdir; /* Weight 2951 */
6133 if (!memcmp(name, "rmdir", 5)) {
6135 return -KEY_rmdir; /* Weight 2493 */
6137 if (!memcmp(name, "until", 5)) {
6139 return KEY_until; /* Weight 818 */
6141 if (!memcmp(name, "untie", 5)) {
6143 return KEY_untie; /* Weight 420 */
6147 if (!memcmp(name, "while", 5)) {
6149 return KEY_while; /* Weight 120305 */
6153 if (!memcmp(name, "chomp", 5)) {
6155 return -KEY_chomp; /* Weight 22337 */
6157 if (!memcmp(name, "utime", 5)) {
6159 return -KEY_utime; /* Weight 3849 */
6163 if (!memcmp(name, "print", 5)) {
6165 return KEY_print; /* Weight 220904 */
6167 if (!memcmp(name, "atan2", 5)) {
6169 return -KEY_atan2; /* Weight 350 */
6171 if (!memcmp(name, "srand", 5)) {
6173 return -KEY_srand; /* Weight 41 */
6177 if (!memcmp(name, "chmod", 5)) {
6179 return -KEY_chmod; /* Weight 18455 */
6181 if (!memcmp(name, "semop", 5)) {
6187 if (!memcmp(name, "crypt", 5)) {
6189 return -KEY_crypt; /* Weight 8 */
6193 if (!memcmp(name, "alarm", 5)) {
6199 if (!memcmp(name, "bless", 5)) {
6201 return -KEY_bless; /* Weight 62111 */
6203 if (!memcmp(name, "close", 5)) {
6205 return -KEY_close; /* Weight 44077 */
6207 if (!memcmp(name, "umask", 5)) {
6209 return -KEY_umask; /* Weight 1658 */
6213 if (!memcmp(name, "write", 5)) {
6215 return -KEY_write; /* Weight 2525 */
6217 if (!memcmp(name, "fcntl", 5)) {
6219 return -KEY_fcntl; /* Weight 1257 */
6221 if (!memcmp(name, "ioctl", 5)) {
6223 return -KEY_ioctl; /* Weight 967 */
6227 if (!memcmp(name, "chown", 5)) {
6229 return -KEY_chown; /* Weight 34 */
6235 /* Names all of length 6. */
6236 /* accept caller chroot delete elseif exists fileno format gmtime length
6237 listen msgctl msgget msgrcv msgsnd printf rename return rindex scalar
6238 select semctl semget shmctl shmget socket splice substr system unless
6239 unlink unpack values */
6240 /* Offset 3 gives the best switch position. */
6243 if (!memcmp(name, "unpack", 6)) {
6245 return -KEY_unpack; /* Weight 7849 */
6247 if (!memcmp(name, "rename", 6)) {
6249 return -KEY_rename; /* Weight 4918 */
6253 if (!memcmp(name, "semctl", 6)) {
6255 return -KEY_semctl; /* Weight 17 */
6257 if (!memcmp(name, "msgctl", 6)) {
6261 if (!memcmp(name, "shmctl", 6)) {
6267 if (!memcmp(name, "rindex", 6)) {
6269 return -KEY_rindex; /* Weight 5005 */
6273 if (!memcmp(name, "unless", 6)) {
6275 return KEY_unless; /* Weight 913955 */
6277 if (!memcmp(name, "delete", 6)) {
6279 return KEY_delete; /* Weight 74966 */
6281 if (!memcmp(name, "select", 6)) {
6283 return -KEY_select; /* Weight 12209 */
6285 if (!memcmp(name, "fileno", 6)) {
6287 return -KEY_fileno; /* Weight 8591 */
6289 if (!memcmp(name, "accept", 6)) {
6291 return -KEY_accept; /* Weight 233 */
6293 if (!memcmp(name, "elseif", 6)) {
6295 /* This is somewhat hacky. */
6296 if(ckWARN_d(WARN_SYNTAX))
6297 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
6302 if (!memcmp(name, "length", 6)) {
6304 return -KEY_length; /* Weight 163975 */
6306 if (!memcmp(name, "msgget", 6)) {
6310 if (!memcmp(name, "semget", 6)) {
6314 if (!memcmp(name, "shmget", 6)) {
6320 if (!memcmp(name, "splice", 6)) {
6322 return -KEY_splice; /* Weight 25143 */
6324 if (!memcmp(name, "unlink", 6)) {
6326 return -KEY_unlink; /* Weight 18616 */
6328 if (!memcmp(name, "gmtime", 6)) {
6330 return -KEY_gmtime; /* Weight 4040 */
6334 if (!memcmp(name, "socket", 6)) {
6340 if (!memcmp(name, "caller", 6)) {
6342 return -KEY_caller; /* Weight 148457 */
6344 if (!memcmp(name, "scalar", 6)) {
6346 return KEY_scalar; /* Weight 43953 */
6350 if (!memcmp(name, "format", 6)) {
6352 return KEY_format; /* Weight 1735 */
6356 if (!memcmp(name, "printf", 6)) {
6358 return KEY_printf; /* Weight 6874 */
6362 if (!memcmp(name, "chroot", 6)) {
6368 if (!memcmp(name, "msgrcv", 6)) {
6374 if (!memcmp(name, "exists", 6)) {
6376 return KEY_exists; /* Weight 145939 */
6378 if (!memcmp(name, "substr", 6)) {
6380 return -KEY_substr; /* Weight 121344 */
6382 if (!memcmp(name, "msgsnd", 6)) {
6388 if (!memcmp(name, "system", 6)) {
6390 return -KEY_system; /* Weight 4326 */
6392 if (!memcmp(name, "listen", 6)) {
6398 if (!memcmp(name, "return", 6)) {
6400 return KEY_return; /* Weight 1401629 */
6402 if (!memcmp(name, "values", 6)) {
6404 return -KEY_values; /* Weight 10110 */
6410 /* Names all of length 7. */
6411 /* DESTROY __END__ binmode connect dbmopen defined foreach getpgrp getppid
6412 lcfirst opendir package readdir require reverse seekdir setpgrp shmread
6413 sprintf symlink syscall sysopen sysread sysseek telldir ucfirst unshift
6415 /* Offset 3 gives the best switch position. */
6418 if (!memcmp(name, "__END__", 7)) {
6420 return KEY___END__; /* Weight 112636 */
6424 if (!memcmp(name, "DESTROY", 7)) {
6426 return KEY_DESTROY; /* Weight 7 */
6430 if (!memcmp(name, "syscall", 7)) {
6432 return -KEY_syscall; /* Weight 560 */
6436 if (!memcmp(name, "readdir", 7)) {
6438 return -KEY_readdir; /* Weight 11716 */
6442 if (!memcmp(name, "foreach", 7)) {
6444 return KEY_foreach; /* Weight 281720 */
6446 if (!memcmp(name, "reverse", 7)) {
6448 return -KEY_reverse; /* Weight 10571 */
6452 if (!memcmp(name, "unshift", 7)) {
6454 return -KEY_unshift; /* Weight 36504 */
6458 if (!memcmp(name, "defined", 7)) {
6460 return KEY_defined; /* Weight 694277 */
6462 if (!memcmp(name, "sprintf", 7)) {
6464 return -KEY_sprintf; /* Weight 72704 */
6466 if (!memcmp(name, "ucfirst", 7)) {
6468 return -KEY_ucfirst; /* Weight 1012 */
6470 if (!memcmp(name, "lcfirst", 7)) {
6472 return -KEY_lcfirst; /* Weight 165 */
6476 if (!memcmp(name, "package", 7)) {
6478 return KEY_package; /* Weight 245661 */
6480 if (!memcmp(name, "seekdir", 7)) {
6482 return -KEY_seekdir; /* Weight 20 */
6486 if (!memcmp(name, "symlink", 7)) {
6488 return -KEY_symlink; /* Weight 386 */
6490 if (!memcmp(name, "telldir", 7)) {
6492 return -KEY_telldir; /* Weight 294 */
6496 if (!memcmp(name, "binmode", 7)) {
6498 return -KEY_binmode; /* Weight 12301 */
6502 if (!memcmp(name, "opendir", 7)) {
6504 return -KEY_opendir; /* Weight 9007 */
6506 if (!memcmp(name, "connect", 7)) {
6508 return -KEY_connect; /* Weight 526 */
6512 if (!memcmp(name, "sysopen", 7)) {
6514 return -KEY_sysopen; /* Weight 1230 */
6516 if (!memcmp(name, "dbmopen", 7)) {
6518 return -KEY_dbmopen;
6522 if (!memcmp(name, "getppid", 7)) {
6524 return -KEY_getppid; /* Weight 10 */
6526 if (!memcmp(name, "getpgrp", 7)) {
6528 return -KEY_getpgrp;
6530 if (!memcmp(name, "setpgrp", 7)) {
6532 return -KEY_setpgrp;
6536 if (!memcmp(name, "sysread", 7)) {
6538 return -KEY_sysread; /* Weight 3729 */
6540 if (!memcmp(name, "shmread", 7)) {
6542 return -KEY_shmread;
6546 if (!memcmp(name, "sysseek", 7)) {
6548 return -KEY_sysseek; /* Weight 721 */
6552 if (!memcmp(name, "waitpid", 7)) {
6554 return -KEY_waitpid; /* Weight 414 */
6558 if (!memcmp(name, "require", 7)) {
6560 return KEY_require; /* Weight 375220 */
6566 /* Names all of length 8. */
6567 /* AUTOLOAD __DATA__ __FILE__ __LINE__ closedir continue dbmclose endgrent
6568 endpwent formline getgrent getgrgid getgrnam getlogin getpwent getpwnam
6569 getpwuid readline readlink readpipe setgrent setpwent shmwrite shutdown
6570 syswrite truncate */
6571 /* Offset 3 gives the best switch position. */
6574 if (!memcmp(name, "__DATA__", 8)) {
6576 return KEY___DATA__; /* Weight 395 */
6580 if (!memcmp(name, "__FILE__", 8)) {
6582 return -KEY___FILE__; /* Weight 888 */
6584 if (!memcmp(name, "__LINE__", 8)) {
6586 return -KEY___LINE__; /* Weight 209 */
6590 if (!memcmp(name, "AUTOLOAD", 8)) {
6592 return KEY_AUTOLOAD; /* Weight 2713 */
6596 if (!memcmp(name, "dbmclose", 8)) {
6598 return -KEY_dbmclose;
6602 if (!memcmp(name, "readlink", 8)) {
6604 return -KEY_readlink; /* Weight 1537 */
6606 if (!memcmp(name, "readline", 8)) {
6608 return -KEY_readline; /* Weight 19 */
6610 if (!memcmp(name, "readpipe", 8)) {
6612 return -KEY_readpipe;
6616 if (!memcmp(name, "getgrgid", 8)) {
6618 return -KEY_getgrgid; /* Weight 67 */
6620 if (!memcmp(name, "getgrnam", 8)) {
6622 return -KEY_getgrnam; /* Weight 11 */
6624 if (!memcmp(name, "endgrent", 8)) {
6626 return -KEY_endgrent;
6628 if (!memcmp(name, "getgrent", 8)) {
6630 return -KEY_getgrent;
6632 if (!memcmp(name, "setgrent", 8)) {
6634 return -KEY_setgrent;
6638 if (!memcmp(name, "getlogin", 8)) {
6640 return -KEY_getlogin; /* Weight 158 */
6644 if (!memcmp(name, "formline", 8)) {
6646 return -KEY_formline; /* Weight 959 */
6650 if (!memcmp(name, "truncate", 8)) {
6652 return -KEY_truncate; /* Weight 1351 */
6656 if (!memcmp(name, "getpwuid", 8)) {
6658 return -KEY_getpwuid; /* Weight 681 */
6660 if (!memcmp(name, "getpwnam", 8)) {
6662 return -KEY_getpwnam; /* Weight 483 */
6664 if (!memcmp(name, "getpwent", 8)) {
6666 return -KEY_getpwent; /* Weight 12 */
6668 if (!memcmp(name, "endpwent", 8)) {
6670 return -KEY_endpwent;
6672 if (!memcmp(name, "setpwent", 8)) {
6674 return -KEY_setpwent;
6678 if (!memcmp(name, "closedir", 8)) {
6680 return -KEY_closedir; /* Weight 11986 */
6684 if (!memcmp(name, "continue", 8)) {
6686 return -KEY_continue; /* Weight 2925 */
6688 if (!memcmp(name, "shutdown", 8)) {
6690 return -KEY_shutdown;
6694 if (!memcmp(name, "syswrite", 8)) {
6696 return -KEY_syswrite; /* Weight 4437 */
6698 if (!memcmp(name, "shmwrite", 8)) {
6700 return -KEY_shmwrite;
6706 /* Names all of length 9. */
6707 /* endnetent getnetent localtime prototype quotemeta rewinddir setnetent
6709 /* Offset 0 gives the best switch position. */
6712 if (!memcmp(name, "ndnetent", 8)) {
6714 return -KEY_endnetent;
6718 if (!memcmp(name, "etnetent", 8)) {
6720 return -KEY_getnetent;
6724 if (!memcmp(name, "ocaltime", 8)) {
6726 return -KEY_localtime; /* Weight 7993 */
6730 if (!memcmp(name, "rototype", 8)) {
6732 return KEY_prototype; /* Weight 1602 */
6736 if (!memcmp(name, "uotemeta", 8)) {
6738 return -KEY_quotemeta; /* Weight 3120 */
6742 if (!memcmp(name, "ewinddir", 8)) {
6744 return -KEY_rewinddir; /* Weight 218 */
6748 if (!memcmp(name, "etnetent", 8)) {
6750 return -KEY_setnetent; /* Weight 1 */
6754 if (!memcmp(name, "antarray", 8)) {
6756 return -KEY_wantarray; /* Weight 43024 */
6762 /* Names all of length 10. */
6763 /* endhostent endservent gethostent getservent getsockopt sethostent
6764 setservent setsockopt socketpair */
6765 /* Offset 6 gives the best switch position. */
6768 if (!memcmp(name, "setsockopt", 10)) {
6770 return -KEY_setsockopt; /* Weight 356 */
6772 if (!memcmp(name, "getsockopt", 10)) {
6774 return -KEY_getsockopt; /* Weight 243 */
6778 if (!memcmp(name, "socketpair", 10)) {
6780 return -KEY_socketpair;
6784 if (!memcmp(name, "gethostent", 10)) {
6786 return -KEY_gethostent; /* Weight 3 */
6788 if (!memcmp(name, "endhostent", 10)) {
6790 return -KEY_endhostent;
6792 if (!memcmp(name, "sethostent", 10)) {
6794 return -KEY_sethostent;
6798 if (!memcmp(name, "getservent", 10)) {
6800 return -KEY_getservent; /* Weight 4 */
6802 if (!memcmp(name, "endservent", 10)) {
6804 return -KEY_endservent;
6806 if (!memcmp(name, "setservent", 10)) {
6808 return -KEY_setservent;
6814 /* Names all of length 11. */
6815 /* __PACKAGE__ endprotoent getpeername getpriority getprotoent getsockname
6816 setpriority setprotoent */
6817 /* Offset 5 gives the best switch position. */
6820 if (!memcmp(name, "__PACKAGE__", 11)) {
6822 return -KEY___PACKAGE__; /* Weight 36767 */
6826 if (!memcmp(name, "getsockname", 11)) {
6828 return -KEY_getsockname; /* Weight 235 */
6832 if (!memcmp(name, "getpeername", 11)) {
6834 return -KEY_getpeername; /* Weight 713 */
6838 if (!memcmp(name, "getpriority", 11)) {
6840 return -KEY_getpriority; /* Weight 5 */
6842 if (!memcmp(name, "setpriority", 11)) {
6844 return -KEY_setpriority; /* Weight 2 */
6848 if (!memcmp(name, "endprotoent", 11)) {
6850 return -KEY_endprotoent;
6852 if (!memcmp(name, "getprotoent", 11)) {
6854 return -KEY_getprotoent;
6856 if (!memcmp(name, "setprotoent", 11)) {
6858 return -KEY_setprotoent;
6864 /* Names all of length 12. */
6865 /* getnetbyaddr getnetbyname */
6866 /* Offset 9 gives the best switch position. */
6869 if (!memcmp(name, "getnetbyname", 12)) {
6871 return -KEY_getnetbyname;
6875 if (!memcmp(name, "getnetbyaddr", 12)) {
6877 return -KEY_getnetbyaddr;
6883 /* Names all of length 13. */
6884 /* gethostbyaddr gethostbyname getservbyname getservbyport */
6885 /* Offset 10 gives the best switch position. */
6888 if (!memcmp(name, "gethostbyname", 13)) {
6890 return -KEY_gethostbyname; /* Weight 970 */
6892 if (!memcmp(name, "getservbyname", 13)) {
6894 return -KEY_getservbyname; /* Weight 299 */
6898 if (!memcmp(name, "gethostbyaddr", 13)) {
6900 return -KEY_gethostbyaddr; /* Weight 68 */
6904 if (!memcmp(name, "getservbyport", 13)) {
6906 return -KEY_getservbyport;
6912 if (!memcmp(name, "getprotobyname", 14)) {
6913 return -KEY_getprotobyname; /* Weight 755 */
6917 if (!memcmp(name, "getprotobynumber", 16)) {
6918 return -KEY_getprotobynumber; /* Weight 232 */
6926 S_checkcomma(pTHX_ register char *s, char *name, char *what)
6930 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
6931 if (ckWARN(WARN_SYNTAX)) {
6933 for (w = s+2; *w && level; w++) {
6940 for (; *w && isSPACE(*w); w++) ;
6941 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
6942 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6943 "%s (...) interpreted as function",name);
6946 while (s < PL_bufend && isSPACE(*s))
6950 while (s < PL_bufend && isSPACE(*s))
6952 if (isIDFIRST_lazy_if(s,UTF)) {
6954 while (isALNUM_lazy_if(s,UTF))
6956 while (s < PL_bufend && isSPACE(*s))
6961 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
6965 Perl_croak(aTHX_ "No comma allowed after %s", what);
6970 /* Either returns sv, or mortalizes sv and returns a new SV*.
6971 Best used as sv=new_constant(..., sv, ...).
6972 If s, pv are NULL, calls subroutine with one argument,
6973 and type is used with error messages only. */
6976 S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
6980 HV *table = GvHV(PL_hintgv); /* ^H */
6984 const char *why1, *why2, *why3;
6986 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
6989 why2 = strEQ(key,"charnames")
6990 ? "(possibly a missing \"use charnames ...\")"
6992 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
6993 (type ? type: "undef"), why2);
6995 /* This is convoluted and evil ("goto considered harmful")
6996 * but I do not understand the intricacies of all the different
6997 * failure modes of %^H in here. The goal here is to make
6998 * the most probable error message user-friendly. --jhi */
7003 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
7004 (type ? type: "undef"), why1, why2, why3);
7006 yyerror(SvPVX(msg));
7010 cvp = hv_fetch(table, key, strlen(key), FALSE);
7011 if (!cvp || !SvOK(*cvp)) {
7014 why3 = "} is not defined";
7017 sv_2mortal(sv); /* Parent created it permanently */
7020 pv = sv_2mortal(newSVpvn(s, len));
7022 typesv = sv_2mortal(newSVpv(type, 0));
7024 typesv = &PL_sv_undef;
7026 PUSHSTACKi(PERLSI_OVERLOAD);
7038 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
7042 /* Check the eval first */
7043 if (!PL_in_eval && SvTRUE(ERRSV)) {
7045 sv_catpv(ERRSV, "Propagated");
7046 yyerror(SvPV(ERRSV, n_a)); /* Duplicates the message inside eval */
7048 res = SvREFCNT_inc(sv);
7052 (void)SvREFCNT_inc(res);
7061 why1 = "Call to &{$^H{";
7063 why3 = "}} did not return a defined value";
7071 /* Returns a NUL terminated string, with the length of the string written to
7075 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
7077 register char *d = dest;
7078 register char *e = d + destlen - 3; /* two-character token, ending NUL */
7081 Perl_croak(aTHX_ ident_too_long);
7082 if (isALNUM(*s)) /* UTF handled below */
7084 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
7089 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
7093 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
7094 char *t = s + UTF8SKIP(s);
7095 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
7097 if (d + (t - s) > e)
7098 Perl_croak(aTHX_ ident_too_long);
7099 Copy(s, d, t - s, char);
7112 S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN destlen, I32 ck_uni)
7122 e = d + destlen - 3; /* two-character token, ending NUL */
7124 while (isDIGIT(*s)) {
7126 Perl_croak(aTHX_ ident_too_long);
7133 Perl_croak(aTHX_ ident_too_long);
7134 if (isALNUM(*s)) /* UTF handled below */
7136 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
7141 else if (*s == ':' && s[1] == ':') {
7145 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
7146 char *t = s + UTF8SKIP(s);
7147 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
7149 if (d + (t - s) > e)
7150 Perl_croak(aTHX_ ident_too_long);
7151 Copy(s, d, t - s, char);
7162 if (PL_lex_state != LEX_NORMAL)
7163 PL_lex_state = LEX_INTERPENDMAYBE;
7166 if (*s == '$' && s[1] &&
7167 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
7180 if (*d == '^' && *s && isCONTROLVAR(*s)) {
7185 if (isSPACE(s[-1])) {
7188 if (!SPACE_OR_TAB(ch)) {
7194 if (isIDFIRST_lazy_if(d,UTF)) {
7198 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
7200 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
7203 Copy(s, d, e - s, char);
7208 while ((isALNUM(*s) || *s == ':') && d < e)
7211 Perl_croak(aTHX_ ident_too_long);
7214 while (s < send && SPACE_OR_TAB(*s)) s++;
7215 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
7216 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
7217 const char *brack = *s == '[' ? "[...]" : "{...}";
7218 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7219 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
7220 funny, dest, brack, funny, dest, brack);
7223 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
7227 /* Handle extended ${^Foo} variables
7228 * 1999-02-27 mjd-perl-patch@plover.com */
7229 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
7233 while (isALNUM(*s) && d < e) {
7237 Perl_croak(aTHX_ ident_too_long);
7242 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
7243 PL_lex_state = LEX_INTERPEND;
7248 if (PL_lex_state == LEX_NORMAL) {
7249 if (ckWARN(WARN_AMBIGUOUS) &&
7250 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
7252 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7253 "Ambiguous use of %c{%s} resolved to %c%s",
7254 funny, dest, funny, dest);
7259 s = bracket; /* let the parser handle it */
7263 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
7264 PL_lex_state = LEX_INTERPEND;
7269 Perl_pmflag(pTHX_ U32* pmfl, int ch)
7274 *pmfl |= PMf_GLOBAL;
7276 *pmfl |= PMf_CONTINUE;
7280 *pmfl |= PMf_MULTILINE;
7282 *pmfl |= PMf_SINGLELINE;
7284 *pmfl |= PMf_EXTENDED;
7288 S_scan_pat(pTHX_ char *start, I32 type)
7293 s = scan_str(start,FALSE,FALSE);
7295 Perl_croak(aTHX_ "Search pattern not terminated");
7297 pm = (PMOP*)newPMOP(type, 0);
7298 if (PL_multi_open == '?')
7299 pm->op_pmflags |= PMf_ONCE;
7301 while (*s && strchr("iomsx", *s))
7302 pmflag(&pm->op_pmflags,*s++);
7305 while (*s && strchr("iogcmsx", *s))
7306 pmflag(&pm->op_pmflags,*s++);
7308 /* issue a warning if /c is specified,but /g is not */
7309 if (ckWARN(WARN_REGEXP) &&
7310 (pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
7312 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_without_g);
7315 pm->op_pmpermflags = pm->op_pmflags;
7317 PL_lex_op = (OP*)pm;
7318 yylval.ival = OP_MATCH;
7323 S_scan_subst(pTHX_ char *start)
7330 yylval.ival = OP_NULL;
7332 s = scan_str(start,FALSE,FALSE);
7335 Perl_croak(aTHX_ "Substitution pattern not terminated");
7337 if (s[-1] == PL_multi_open)
7340 first_start = PL_multi_start;
7341 s = scan_str(s,FALSE,FALSE);
7344 SvREFCNT_dec(PL_lex_stuff);
7345 PL_lex_stuff = Nullsv;
7347 Perl_croak(aTHX_ "Substitution replacement not terminated");
7349 PL_multi_start = first_start; /* so whole substitution is taken together */
7351 pm = (PMOP*)newPMOP(OP_SUBST, 0);
7357 else if (strchr("iogcmsx", *s))
7358 pmflag(&pm->op_pmflags,*s++);
7363 /* /c is not meaningful with s/// */
7364 if (ckWARN(WARN_REGEXP) && (pm->op_pmflags & PMf_CONTINUE))
7366 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_in_subst);
7371 PL_sublex_info.super_bufptr = s;
7372 PL_sublex_info.super_bufend = PL_bufend;
7374 pm->op_pmflags |= PMf_EVAL;
7375 repl = newSVpvn("",0);
7377 sv_catpv(repl, es ? "eval " : "do ");
7378 sv_catpvn(repl, "{ ", 2);
7379 sv_catsv(repl, PL_lex_repl);
7380 sv_catpvn(repl, " };", 2);
7382 SvREFCNT_dec(PL_lex_repl);
7386 pm->op_pmpermflags = pm->op_pmflags;
7387 PL_lex_op = (OP*)pm;
7388 yylval.ival = OP_SUBST;
7393 S_scan_trans(pTHX_ char *start)
7402 yylval.ival = OP_NULL;
7404 s = scan_str(start,FALSE,FALSE);
7406 Perl_croak(aTHX_ "Transliteration pattern not terminated");
7407 if (s[-1] == PL_multi_open)
7410 s = scan_str(s,FALSE,FALSE);
7413 SvREFCNT_dec(PL_lex_stuff);
7414 PL_lex_stuff = Nullsv;
7416 Perl_croak(aTHX_ "Transliteration replacement not terminated");
7419 complement = del = squash = 0;
7423 complement = OPpTRANS_COMPLEMENT;
7426 del = OPpTRANS_DELETE;
7429 squash = OPpTRANS_SQUASH;
7438 New(803, tbl, complement&&!del?258:256, short);
7439 o = newPVOP(OP_TRANS, 0, (char*)tbl);
7440 o->op_private &= ~OPpTRANS_ALL;
7441 o->op_private |= del|squash|complement|
7442 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
7443 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
7446 yylval.ival = OP_TRANS;
7451 S_scan_heredoc(pTHX_ register char *s)
7454 I32 op_type = OP_SCALAR;
7461 int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
7465 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
7468 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
7469 if (*peek == '`' || *peek == '\'' || *peek =='"') {
7472 s = delimcpy(d, e, s, PL_bufend, term, &len);
7482 if (!isALNUM_lazy_if(s,UTF))
7483 deprecate_old("bare << to mean <<\"\"");
7484 for (; isALNUM_lazy_if(s,UTF); s++) {
7489 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
7490 Perl_croak(aTHX_ "Delimiter for here document is too long");
7493 len = d - PL_tokenbuf;
7494 #ifndef PERL_STRICT_CR
7495 d = strchr(s, '\r');
7499 while (s < PL_bufend) {
7505 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
7514 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
7519 if (outer || !(d=ninstr(s,PL_bufend,d,d+1)))
7520 herewas = newSVpvn(s,PL_bufend-s);
7522 s--, herewas = newSVpvn(s,d-s);
7523 s += SvCUR(herewas);
7525 tmpstr = NEWSV(87,79);
7526 sv_upgrade(tmpstr, SVt_PVIV);
7531 else if (term == '`') {
7532 op_type = OP_BACKTICK;
7533 SvIVX(tmpstr) = '\\';
7537 PL_multi_start = CopLINE(PL_curcop);
7538 PL_multi_open = PL_multi_close = '<';
7539 term = *PL_tokenbuf;
7540 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
7541 char *bufptr = PL_sublex_info.super_bufptr;
7542 char *bufend = PL_sublex_info.super_bufend;
7543 char *olds = s - SvCUR(herewas);
7544 s = strchr(bufptr, '\n');
7548 while (s < bufend &&
7549 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
7551 CopLINE_inc(PL_curcop);
7554 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
7555 missingterm(PL_tokenbuf);
7557 sv_setpvn(herewas,bufptr,d-bufptr+1);
7558 sv_setpvn(tmpstr,d+1,s-d);
7560 sv_catpvn(herewas,s,bufend-s);
7561 Copy(SvPVX(herewas),bufptr,SvCUR(herewas) + 1,char);
7568 while (s < PL_bufend &&
7569 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
7571 CopLINE_inc(PL_curcop);
7573 if (s >= PL_bufend) {
7574 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
7575 missingterm(PL_tokenbuf);
7577 sv_setpvn(tmpstr,d+1,s-d);
7579 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
7581 sv_catpvn(herewas,s,PL_bufend-s);
7582 sv_setsv(PL_linestr,herewas);
7583 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
7584 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7585 PL_last_lop = PL_last_uni = Nullch;
7588 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
7589 while (s >= PL_bufend) { /* multiple line string? */
7591 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
7592 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
7593 missingterm(PL_tokenbuf);
7595 CopLINE_inc(PL_curcop);
7596 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7597 PL_last_lop = PL_last_uni = Nullch;
7598 #ifndef PERL_STRICT_CR
7599 if (PL_bufend - PL_linestart >= 2) {
7600 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
7601 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
7603 PL_bufend[-2] = '\n';
7605 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
7607 else if (PL_bufend[-1] == '\r')
7608 PL_bufend[-1] = '\n';
7610 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
7611 PL_bufend[-1] = '\n';
7613 if (PERLDB_LINE && PL_curstash != PL_debstash) {
7614 SV *sv = NEWSV(88,0);
7616 sv_upgrade(sv, SVt_PVMG);
7617 sv_setsv(sv,PL_linestr);
7620 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
7622 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
7623 STRLEN off = PL_bufend - 1 - SvPVX(PL_linestr);
7624 *(SvPVX(PL_linestr) + off ) = ' ';
7625 sv_catsv(PL_linestr,herewas);
7626 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7627 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
7631 sv_catsv(tmpstr,PL_linestr);
7636 PL_multi_end = CopLINE(PL_curcop);
7637 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
7638 SvLEN_set(tmpstr, SvCUR(tmpstr) + 1);
7639 Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
7641 SvREFCNT_dec(herewas);
7643 if (UTF && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
7645 else if (PL_encoding)
7646 sv_recode_to_utf8(tmpstr, PL_encoding);
7648 PL_lex_stuff = tmpstr;
7649 yylval.ival = op_type;
7654 takes: current position in input buffer
7655 returns: new position in input buffer
7656 side-effects: yylval and lex_op are set.
7661 <FH> read from filehandle
7662 <pkg::FH> read from package qualified filehandle
7663 <pkg'FH> read from package qualified filehandle
7664 <$fh> read from filehandle in $fh
7670 S_scan_inputsymbol(pTHX_ char *start)
7672 register char *s = start; /* current position in buffer */
7678 d = PL_tokenbuf; /* start of temp holding space */
7679 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
7680 end = strchr(s, '\n');
7683 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
7685 /* die if we didn't have space for the contents of the <>,
7686 or if it didn't end, or if we see a newline
7689 if (len >= sizeof PL_tokenbuf)
7690 Perl_croak(aTHX_ "Excessively long <> operator");
7692 Perl_croak(aTHX_ "Unterminated <> operator");
7697 Remember, only scalar variables are interpreted as filehandles by
7698 this code. Anything more complex (e.g., <$fh{$num}>) will be
7699 treated as a glob() call.
7700 This code makes use of the fact that except for the $ at the front,
7701 a scalar variable and a filehandle look the same.
7703 if (*d == '$' && d[1]) d++;
7705 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7706 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
7709 /* If we've tried to read what we allow filehandles to look like, and
7710 there's still text left, then it must be a glob() and not a getline.
7711 Use scan_str to pull out the stuff between the <> and treat it
7712 as nothing more than a string.
7715 if (d - PL_tokenbuf != len) {
7716 yylval.ival = OP_GLOB;
7718 s = scan_str(start,FALSE,FALSE);
7720 Perl_croak(aTHX_ "Glob not terminated");
7724 bool readline_overriden = FALSE;
7725 GV *gv_readline = Nullgv;
7727 /* we're in a filehandle read situation */
7730 /* turn <> into <ARGV> */
7732 Copy("ARGV",d,5,char);
7734 /* Check whether readline() is overriden */
7735 if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
7736 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
7738 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
7739 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
7740 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
7741 readline_overriden = TRUE;
7743 /* if <$fh>, create the ops to turn the variable into a
7749 /* try to find it in the pad for this block, otherwise find
7750 add symbol table ops
7752 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
7753 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
7754 SV *sym = sv_2mortal(
7755 newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)),0));
7756 sv_catpvn(sym, "::", 2);
7762 OP *o = newOP(OP_PADSV, 0);
7764 PL_lex_op = readline_overriden
7765 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
7766 append_elem(OP_LIST, o,
7767 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
7768 : (OP*)newUNOP(OP_READLINE, 0, o);
7777 ? (GV_ADDMULTI | GV_ADDINEVAL)
7780 PL_lex_op = readline_overriden
7781 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
7782 append_elem(OP_LIST,
7783 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
7784 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
7785 : (OP*)newUNOP(OP_READLINE, 0,
7786 newUNOP(OP_RV2SV, 0,
7787 newGVOP(OP_GV, 0, gv)));
7789 if (!readline_overriden)
7790 PL_lex_op->op_flags |= OPf_SPECIAL;
7791 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
7792 yylval.ival = OP_NULL;
7795 /* If it's none of the above, it must be a literal filehandle
7796 (<Foo::BAR> or <FOO>) so build a simple readline OP */
7798 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
7799 PL_lex_op = readline_overriden
7800 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
7801 append_elem(OP_LIST,
7802 newGVOP(OP_GV, 0, gv),
7803 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
7804 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
7805 yylval.ival = OP_NULL;
7814 takes: start position in buffer
7815 keep_quoted preserve \ on the embedded delimiter(s)
7816 keep_delims preserve the delimiters around the string
7817 returns: position to continue reading from buffer
7818 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
7819 updates the read buffer.
7821 This subroutine pulls a string out of the input. It is called for:
7822 q single quotes q(literal text)
7823 ' single quotes 'literal text'
7824 qq double quotes qq(interpolate $here please)
7825 " double quotes "interpolate $here please"
7826 qx backticks qx(/bin/ls -l)
7827 ` backticks `/bin/ls -l`
7828 qw quote words @EXPORT_OK = qw( func() $spam )
7829 m// regexp match m/this/
7830 s/// regexp substitute s/this/that/
7831 tr/// string transliterate tr/this/that/
7832 y/// string transliterate y/this/that/
7833 ($*@) sub prototypes sub foo ($)
7834 (stuff) sub attr parameters sub foo : attr(stuff)
7835 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
7837 In most of these cases (all but <>, patterns and transliterate)
7838 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
7839 calls scan_str(). s/// makes yylex() call scan_subst() which calls
7840 scan_str(). tr/// and y/// make yylex() call scan_trans() which
7843 It skips whitespace before the string starts, and treats the first
7844 character as the delimiter. If the delimiter is one of ([{< then
7845 the corresponding "close" character )]}> is used as the closing
7846 delimiter. It allows quoting of delimiters, and if the string has
7847 balanced delimiters ([{<>}]) it allows nesting.
7849 On success, the SV with the resulting string is put into lex_stuff or,
7850 if that is already non-NULL, into lex_repl. The second case occurs only
7851 when parsing the RHS of the special constructs s/// and tr/// (y///).
7852 For convenience, the terminating delimiter character is stuffed into
7857 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
7859 SV *sv; /* scalar value: string */
7860 char *tmps; /* temp string, used for delimiter matching */
7861 register char *s = start; /* current position in the buffer */
7862 register char term; /* terminating character */
7863 register char *to; /* current position in the sv's data */
7864 I32 brackets = 1; /* bracket nesting level */
7865 bool has_utf8 = FALSE; /* is there any utf8 content? */
7866 I32 termcode; /* terminating char. code */
7867 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
7868 STRLEN termlen; /* length of terminating string */
7869 char *last = NULL; /* last position for nesting bracket */
7871 /* skip space before the delimiter */
7875 /* mark where we are, in case we need to report errors */
7878 /* after skipping whitespace, the next character is the terminator */
7881 termcode = termstr[0] = term;
7885 termcode = utf8_to_uvchr((U8*)s, &termlen);
7886 Copy(s, termstr, termlen, U8);
7887 if (!UTF8_IS_INVARIANT(term))
7891 /* mark where we are */
7892 PL_multi_start = CopLINE(PL_curcop);
7893 PL_multi_open = term;
7895 /* find corresponding closing delimiter */
7896 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
7897 termcode = termstr[0] = term = tmps[5];
7899 PL_multi_close = term;
7901 /* create a new SV to hold the contents. 87 is leak category, I'm
7902 assuming. 79 is the SV's initial length. What a random number. */
7904 sv_upgrade(sv, SVt_PVIV);
7905 SvIVX(sv) = termcode;
7906 (void)SvPOK_only(sv); /* validate pointer */
7908 /* move past delimiter and try to read a complete string */
7910 sv_catpvn(sv, s, termlen);
7913 if (PL_encoding && !UTF) {
7917 int offset = s - SvPVX(PL_linestr);
7918 bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
7919 &offset, (char*)termstr, termlen);
7920 char *ns = SvPVX(PL_linestr) + offset;
7921 char *svlast = SvEND(sv) - 1;
7923 for (; s < ns; s++) {
7924 if (*s == '\n' && !PL_rsfp)
7925 CopLINE_inc(PL_curcop);
7928 goto read_more_line;
7930 /* handle quoted delimiters */
7931 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
7933 for (t = svlast-2; t >= SvPVX(sv) && *t == '\\';)
7935 if ((svlast-1 - t) % 2) {
7939 SvCUR_set(sv, SvCUR(sv) - 1);
7944 if (PL_multi_open == PL_multi_close) {
7951 for (w = t = last; t < svlast; w++, t++) {
7952 /* At here, all closes are "was quoted" one,
7953 so we don't check PL_multi_close. */
7955 if (!keep_quoted && *(t+1) == PL_multi_open)
7960 else if (*t == PL_multi_open)
7968 SvCUR_set(sv, w - SvPVX(sv));
7971 if (--brackets <= 0)
7977 SvCUR_set(sv, SvCUR(sv) - 1);
7983 /* extend sv if need be */
7984 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
7985 /* set 'to' to the next character in the sv's string */
7986 to = SvPVX(sv)+SvCUR(sv);
7988 /* if open delimiter is the close delimiter read unbridle */
7989 if (PL_multi_open == PL_multi_close) {
7990 for (; s < PL_bufend; s++,to++) {
7991 /* embedded newlines increment the current line number */
7992 if (*s == '\n' && !PL_rsfp)
7993 CopLINE_inc(PL_curcop);
7994 /* handle quoted delimiters */
7995 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
7996 if (!keep_quoted && s[1] == term)
7998 /* any other quotes are simply copied straight through */
8002 /* terminate when run out of buffer (the for() condition), or
8003 have found the terminator */
8004 else if (*s == term) {
8007 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
8010 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
8016 /* if the terminator isn't the same as the start character (e.g.,
8017 matched brackets), we have to allow more in the quoting, and
8018 be prepared for nested brackets.
8021 /* read until we run out of string, or we find the terminator */
8022 for (; s < PL_bufend; s++,to++) {
8023 /* embedded newlines increment the line count */
8024 if (*s == '\n' && !PL_rsfp)
8025 CopLINE_inc(PL_curcop);
8026 /* backslashes can escape the open or closing characters */
8027 if (*s == '\\' && s+1 < PL_bufend) {
8029 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
8034 /* allow nested opens and closes */
8035 else if (*s == PL_multi_close && --brackets <= 0)
8037 else if (*s == PL_multi_open)
8039 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
8044 /* terminate the copied string and update the sv's end-of-string */
8046 SvCUR_set(sv, to - SvPVX(sv));
8049 * this next chunk reads more into the buffer if we're not done yet
8053 break; /* handle case where we are done yet :-) */
8055 #ifndef PERL_STRICT_CR
8056 if (to - SvPVX(sv) >= 2) {
8057 if ((to[-2] == '\r' && to[-1] == '\n') ||
8058 (to[-2] == '\n' && to[-1] == '\r'))
8062 SvCUR_set(sv, to - SvPVX(sv));
8064 else if (to[-1] == '\r')
8067 else if (to - SvPVX(sv) == 1 && to[-1] == '\r')
8072 /* if we're out of file, or a read fails, bail and reset the current
8073 line marker so we can report where the unterminated string began
8076 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
8078 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
8081 /* we read a line, so increment our line counter */
8082 CopLINE_inc(PL_curcop);
8084 /* update debugger info */
8085 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8086 SV *sv = NEWSV(88,0);
8088 sv_upgrade(sv, SVt_PVMG);
8089 sv_setsv(sv,PL_linestr);
8092 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
8095 /* having changed the buffer, we must update PL_bufend */
8096 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8097 PL_last_lop = PL_last_uni = Nullch;
8100 /* at this point, we have successfully read the delimited string */
8102 if (!PL_encoding || UTF) {
8104 sv_catpvn(sv, s, termlen);
8107 if (has_utf8 || PL_encoding)
8110 PL_multi_end = CopLINE(PL_curcop);
8112 /* if we allocated too much space, give some back */
8113 if (SvCUR(sv) + 5 < SvLEN(sv)) {
8114 SvLEN_set(sv, SvCUR(sv) + 1);
8115 Renew(SvPVX(sv), SvLEN(sv), char);
8118 /* decide whether this is the first or second quoted string we've read
8131 takes: pointer to position in buffer
8132 returns: pointer to new position in buffer
8133 side-effects: builds ops for the constant in yylval.op
8135 Read a number in any of the formats that Perl accepts:
8137 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
8138 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
8141 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
8143 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
8146 If it reads a number without a decimal point or an exponent, it will
8147 try converting the number to an integer and see if it can do so
8148 without loss of precision.
8152 Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
8154 register char *s = start; /* current position in buffer */
8155 register char *d; /* destination in temp buffer */
8156 register char *e; /* end of temp buffer */
8157 NV nv; /* number read, as a double */
8158 SV *sv = Nullsv; /* place to put the converted number */
8159 bool floatit; /* boolean: int or float? */
8160 char *lastub = 0; /* position of last underbar */
8161 static char number_too_long[] = "Number too long";
8163 /* We use the first character to decide what type of number this is */
8167 Perl_croak(aTHX_ "panic: scan_num");
8169 /* if it starts with a 0, it could be an octal number, a decimal in
8170 0.13 disguise, or a hexadecimal number, or a binary number. */
8174 u holds the "number so far"
8175 shift the power of 2 of the base
8176 (hex == 4, octal == 3, binary == 1)
8177 overflowed was the number more than we can hold?
8179 Shift is used when we add a digit. It also serves as an "are
8180 we in octal/hex/binary?" indicator to disallow hex characters
8186 bool overflowed = FALSE;
8187 bool just_zero = TRUE; /* just plain 0 or binary number? */
8188 static NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
8189 static char* bases[5] = { "", "binary", "", "octal",
8191 static char* Bases[5] = { "", "Binary", "", "Octal",
8193 static char *maxima[5] = { "",
8194 "0b11111111111111111111111111111111",
8198 char *base, *Base, *max;
8205 } else if (s[1] == 'b') {
8210 /* check for a decimal in disguise */
8211 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
8213 /* so it must be octal */
8220 if (ckWARN(WARN_SYNTAX))
8221 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8222 "Misplaced _ in number");
8226 base = bases[shift];
8227 Base = Bases[shift];
8228 max = maxima[shift];
8230 /* read the rest of the number */
8232 /* x is used in the overflow test,
8233 b is the digit we're adding on. */
8238 /* if we don't mention it, we're done */
8242 /* _ are ignored -- but warned about if consecutive */
8244 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
8245 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8246 "Misplaced _ in number");
8250 /* 8 and 9 are not octal */
8253 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
8257 case '2': case '3': case '4':
8258 case '5': case '6': case '7':
8260 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
8264 b = *s++ & 15; /* ASCII digit -> value of digit */
8268 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
8269 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
8270 /* make sure they said 0x */
8275 /* Prepare to put the digit we have onto the end
8276 of the number so far. We check for overflows.
8282 x = u << shift; /* make room for the digit */
8284 if ((x >> shift) != u
8285 && !(PL_hints & HINT_NEW_BINARY)) {
8288 if (ckWARN_d(WARN_OVERFLOW))
8289 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
8290 "Integer overflow in %s number",
8293 u = x | b; /* add the digit to the end */
8296 n *= nvshift[shift];
8297 /* If an NV has not enough bits in its
8298 * mantissa to represent an UV this summing of
8299 * small low-order numbers is a waste of time
8300 * (because the NV cannot preserve the
8301 * low-order bits anyway): we could just
8302 * remember when did we overflow and in the
8303 * end just multiply n by the right
8311 /* if we get here, we had success: make a scalar value from
8316 /* final misplaced underbar check */
8318 if (ckWARN(WARN_SYNTAX))
8319 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
8324 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
8325 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
8326 "%s number > %s non-portable",
8332 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
8333 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
8334 "%s number > %s non-portable",
8339 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
8340 sv = new_constant(start, s - start, "integer",
8342 else if (PL_hints & HINT_NEW_BINARY)
8343 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
8348 handle decimal numbers.
8349 we're also sent here when we read a 0 as the first digit
8351 case '1': case '2': case '3': case '4': case '5':
8352 case '6': case '7': case '8': case '9': case '.':
8355 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
8358 /* read next group of digits and _ and copy into d */
8359 while (isDIGIT(*s) || *s == '_') {
8360 /* skip underscores, checking for misplaced ones
8364 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
8365 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8366 "Misplaced _ in number");
8370 /* check for end of fixed-length buffer */
8372 Perl_croak(aTHX_ number_too_long);
8373 /* if we're ok, copy the character */
8378 /* final misplaced underbar check */
8379 if (lastub && s == lastub + 1) {
8380 if (ckWARN(WARN_SYNTAX))
8381 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
8384 /* read a decimal portion if there is one. avoid
8385 3..5 being interpreted as the number 3. followed
8388 if (*s == '.' && s[1] != '.') {
8393 if (ckWARN(WARN_SYNTAX))
8394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8395 "Misplaced _ in number");
8399 /* copy, ignoring underbars, until we run out of digits.
8401 for (; isDIGIT(*s) || *s == '_'; s++) {
8402 /* fixed length buffer check */
8404 Perl_croak(aTHX_ number_too_long);
8406 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
8407 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8408 "Misplaced _ in number");
8414 /* fractional part ending in underbar? */
8416 if (ckWARN(WARN_SYNTAX))
8417 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8418 "Misplaced _ in number");
8420 if (*s == '.' && isDIGIT(s[1])) {
8421 /* oops, it's really a v-string, but without the "v" */
8427 /* read exponent part, if present */
8428 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
8432 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
8433 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
8435 /* stray preinitial _ */
8437 if (ckWARN(WARN_SYNTAX))
8438 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8439 "Misplaced _ in number");
8443 /* allow positive or negative exponent */
8444 if (*s == '+' || *s == '-')
8447 /* stray initial _ */
8449 if (ckWARN(WARN_SYNTAX))
8450 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8451 "Misplaced _ in number");
8455 /* read digits of exponent */
8456 while (isDIGIT(*s) || *s == '_') {
8459 Perl_croak(aTHX_ number_too_long);
8463 if (ckWARN(WARN_SYNTAX) &&
8464 ((lastub && s == lastub + 1) ||
8465 (!isDIGIT(s[1]) && s[1] != '_')))
8466 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8467 "Misplaced _ in number");
8474 /* make an sv from the string */
8478 We try to do an integer conversion first if no characters
8479 indicating "float" have been found.
8484 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
8486 if (flags == IS_NUMBER_IN_UV) {
8488 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
8491 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
8492 if (uv <= (UV) IV_MIN)
8493 sv_setiv(sv, -(IV)uv);
8500 /* terminate the string */
8502 nv = Atof(PL_tokenbuf);
8506 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
8507 (PL_hints & HINT_NEW_INTEGER) )
8508 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
8509 (floatit ? "float" : "integer"),
8513 /* if it starts with a v, it could be a v-string */
8516 sv = NEWSV(92,5); /* preallocate storage space */
8517 s = scan_vstring(s,sv);
8521 /* make the op for the constant and return */
8524 lvalp->opval = newSVOP(OP_CONST, 0, sv);
8526 lvalp->opval = Nullop;
8532 S_scan_formline(pTHX_ register char *s)
8536 SV *stuff = newSVpvn("",0);
8537 bool needargs = FALSE;
8543 #ifdef PERL_STRICT_CR
8544 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
8546 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
8548 if (*t == '\n' || t == PL_bufend) {
8553 if (PL_in_eval && !PL_rsfp) {
8554 eol = memchr(s,'\n',PL_bufend-s);
8559 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8561 for (t = s; t < eol; t++) {
8562 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
8564 goto enough; /* ~~ must be first line in formline */
8566 if (*t == '@' || *t == '^')
8570 sv_catpvn(stuff, s, eol-s);
8571 #ifndef PERL_STRICT_CR
8572 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
8573 char *end = SvPVX(stuff) + SvCUR(stuff);
8585 s = filter_gets(PL_linestr, PL_rsfp, 0);
8586 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
8587 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
8588 PL_last_lop = PL_last_uni = Nullch;
8600 PL_lex_state = LEX_NORMAL;
8601 PL_nextval[PL_nexttoke].ival = 0;
8605 PL_lex_state = LEX_FORMLINE;
8607 if (UTF && is_utf8_string((U8*)SvPVX(stuff), SvCUR(stuff)))
8609 else if (PL_encoding)
8610 sv_recode_to_utf8(stuff, PL_encoding);
8612 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
8614 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
8618 SvREFCNT_dec(stuff);
8620 PL_lex_formbrack = 0;
8631 PL_cshlen = strlen(PL_cshname);
8636 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8638 I32 oldsavestack_ix = PL_savestack_ix;
8639 CV* outsidecv = PL_compcv;
8642 assert(SvTYPE(PL_compcv) == SVt_PVCV);
8644 SAVEI32(PL_subline);
8645 save_item(PL_subname);
8646 SAVESPTR(PL_compcv);
8648 PL_compcv = (CV*)NEWSV(1104,0);
8649 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
8650 CvFLAGS(PL_compcv) |= flags;
8652 PL_subline = CopLINE(PL_curcop);
8653 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
8654 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
8655 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
8657 return oldsavestack_ix;
8661 #pragma segment Perl_yylex
8664 Perl_yywarn(pTHX_ char *s)
8666 PL_in_eval |= EVAL_WARNONLY;
8668 PL_in_eval &= ~EVAL_WARNONLY;
8673 Perl_yyerror(pTHX_ char *s)
8676 char *context = NULL;
8680 if (!yychar || (yychar == ';' && !PL_rsfp))
8682 else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 &&
8683 PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) {
8686 The code below is removed for NetWare because it abends/crashes on NetWare
8687 when the script has error such as not having the closing quotes like:
8689 Checking of white spaces is anyway done in NetWare code.
8692 while (isSPACE(*PL_oldoldbufptr))
8695 context = PL_oldoldbufptr;
8696 contlen = PL_bufptr - PL_oldoldbufptr;
8698 else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 &&
8699 PL_oldbufptr != PL_bufptr) {
8702 The code below is removed for NetWare because it abends/crashes on NetWare
8703 when the script has error such as not having the closing quotes like:
8705 Checking of white spaces is anyway done in NetWare code.
8708 while (isSPACE(*PL_oldbufptr))
8711 context = PL_oldbufptr;
8712 contlen = PL_bufptr - PL_oldbufptr;
8714 else if (yychar > 255)
8715 where = "next token ???";
8716 else if (yychar == -2) { /* YYEMPTY */
8717 if (PL_lex_state == LEX_NORMAL ||
8718 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
8719 where = "at end of line";
8720 else if (PL_lex_inpat)
8721 where = "within pattern";
8723 where = "within string";
8726 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
8728 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
8729 else if (isPRINT_LC(yychar))
8730 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
8732 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
8733 where = SvPVX(where_sv);
8735 msg = sv_2mortal(newSVpv(s, 0));
8736 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
8737 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
8739 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
8741 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
8742 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
8743 Perl_sv_catpvf(aTHX_ msg,
8744 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
8745 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
8748 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
8749 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
8752 if (PL_error_count >= 10) {
8753 if (PL_in_eval && SvCUR(ERRSV))
8754 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
8755 ERRSV, OutCopFILE(PL_curcop));
8757 Perl_croak(aTHX_ "%s has too many errors.\n",
8758 OutCopFILE(PL_curcop));
8761 PL_in_my_stash = Nullhv;
8765 #pragma segment Main
8769 S_swallow_bom(pTHX_ U8 *s)
8772 slen = SvCUR(PL_linestr);
8776 /* UTF-16 little-endian? (or UTF32-LE?) */
8777 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
8778 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
8779 #ifndef PERL_NO_UTF16_FILTER
8780 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
8783 if (PL_bufend > (char*)s) {
8787 filter_add(utf16rev_textfilter, NULL);
8788 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
8789 utf16_to_utf8_reversed(s, news,
8790 PL_bufend - (char*)s - 1,
8792 sv_setpvn(PL_linestr, (const char*)news, newlen);
8794 SvUTF8_on(PL_linestr);
8795 s = (U8*)SvPVX(PL_linestr);
8796 PL_bufend = SvPVX(PL_linestr) + newlen;
8799 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
8804 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
8805 #ifndef PERL_NO_UTF16_FILTER
8806 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
8809 if (PL_bufend > (char *)s) {
8813 filter_add(utf16_textfilter, NULL);
8814 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
8815 utf16_to_utf8(s, news,
8816 PL_bufend - (char*)s,
8818 sv_setpvn(PL_linestr, (const char*)news, newlen);
8820 SvUTF8_on(PL_linestr);
8821 s = (U8*)SvPVX(PL_linestr);
8822 PL_bufend = SvPVX(PL_linestr) + newlen;
8825 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
8830 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
8831 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
8838 if (s[2] == 0xFE && s[3] == 0xFF) {
8839 /* UTF-32 big-endian */
8840 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
8843 else if (s[2] == 0 && s[3] != 0) {
8846 * are a good indicator of UTF-16BE. */
8847 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
8852 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
8855 * are a good indicator of UTF-16LE. */
8856 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
8865 * Restore a source filter.
8869 restore_rsfp(pTHX_ void *f)
8871 PerlIO *fp = (PerlIO*)f;
8873 if (PL_rsfp == PerlIO_stdin())
8874 PerlIO_clearerr(PL_rsfp);
8875 else if (PL_rsfp && (PL_rsfp != fp))
8876 PerlIO_close(PL_rsfp);
8880 #ifndef PERL_NO_UTF16_FILTER
8882 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
8884 STRLEN old = SvCUR(sv);
8885 I32 count = FILTER_READ(idx+1, sv, maxlen);
8886 DEBUG_P(PerlIO_printf(Perl_debug_log,
8887 "utf16_textfilter(%p): %d %d (%d)\n",
8888 utf16_textfilter, idx, maxlen, (int) count));
8892 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
8893 Copy(SvPVX(sv), tmps, old, char);
8894 utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
8895 SvCUR(sv) - old, &newlen);
8896 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
8898 DEBUG_P({sv_dump(sv);});
8903 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
8905 STRLEN old = SvCUR(sv);
8906 I32 count = FILTER_READ(idx+1, sv, maxlen);
8907 DEBUG_P(PerlIO_printf(Perl_debug_log,
8908 "utf16rev_textfilter(%p): %d %d (%d)\n",
8909 utf16rev_textfilter, idx, maxlen, (int) count));
8913 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
8914 Copy(SvPVX(sv), tmps, old, char);
8915 utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
8916 SvCUR(sv) - old, &newlen);
8917 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
8919 DEBUG_P({ sv_dump(sv); });
8925 Returns a pointer to the next character after the parsed
8926 vstring, as well as updating the passed in sv.
8928 Function must be called like
8931 s = scan_vstring(s,sv);
8933 The sv should already be large enough to store the vstring
8934 passed in, for performance reasons.
8939 Perl_scan_vstring(pTHX_ char *s, SV *sv)
8943 if (*pos == 'v') pos++; /* get past 'v' */
8944 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
8947 /* this may not be a v-string if followed by => */
8949 while (next < PL_bufend && isSPACE(*next))
8951 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
8952 /* return string not v-string */
8953 sv_setpvn(sv,(char *)s,pos-s);
8958 if (!isALPHA(*pos)) {
8960 U8 tmpbuf[UTF8_MAXBYTES+1];
8963 if (*s == 'v') s++; /* get past 'v' */
8965 sv_setpvn(sv, "", 0);
8970 /* this is atoi() that tolerates underscores */
8973 while (--end >= s) {
8978 rev += (*end - '0') * mult;
8980 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
8981 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
8982 "Integer overflow in decimal number");
8986 if (rev > 0x7FFFFFFF)
8987 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
8989 /* Append native character for the rev point */
8990 tmpend = uvchr_to_utf8(tmpbuf, rev);
8991 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
8992 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
8994 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
9000 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
9004 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);