3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] = "Identifier too long";
30 static const char commaless_variable_list[] = "comma-less variable list";
32 static void restore_rsfp(pTHX_ void *f);
33 #ifndef PERL_NO_UTF16_FILTER
34 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
38 #define XFAKEBRACK 128
41 #ifdef USE_UTF8_SCRIPTS
42 # define UTF (!IN_BYTES)
44 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
47 /* In variables named $^X, these are the legal values for X.
48 * 1999-02-27 mjd-perl-patch@plover.com */
49 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51 /* On MacOS, respect nonbreaking spaces */
52 #ifdef MACOS_TRADITIONAL
53 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
58 /* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
60 * can get by with a single comparison (if the compiler is smart enough).
63 /* #define LEX_NOTPARSING 11 is done in perl.h. */
65 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
66 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
67 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
68 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
69 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
71 /* at end of code, eg "$x" followed by: */
72 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
73 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
75 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
76 string or after \E, $foo, etc */
77 #define LEX_INTERPCONST 2 /* NOT USED */
78 #define LEX_FORMLINE 1 /* expecting a format line */
79 #define LEX_KNOWNEXT 0 /* next token known; just return it */
83 static const char* const lex_state_names[] = {
102 #include "keywords.h"
104 /* CLINE is a macro that ensures PL_copline has a sane value */
109 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
112 * Convenience functions to return different tokens and prime the
113 * lexer for the next token. They all take an argument.
115 * TOKEN : generic token (used for '(', DOLSHARP, etc)
116 * OPERATOR : generic operator
117 * AOPERATOR : assignment operator
118 * PREBLOCK : beginning the block after an if, while, foreach, ...
119 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
120 * PREREF : *EXPR where EXPR is not a simple identifier
121 * TERM : expression term
122 * LOOPX : loop exiting command (goto, last, dump, etc)
123 * FTST : file test operator
124 * FUN0 : zero-argument function
125 * FUN1 : not used, except for not, which isn't a UNIOP
126 * BOop : bitwise or or xor
128 * SHop : shift operator
129 * PWop : power operator
130 * PMop : pattern-matching operator
131 * Aop : addition-level operator
132 * Mop : multiplication-level operator
133 * Eop : equality-testing operator
134 * Rop : relational operator <= != gt
136 * Also see LOP and lop() below.
139 #ifdef DEBUGGING /* Serve -DT. */
140 # define REPORT(retval) tokereport((I32)retval)
142 # define REPORT(retval) (retval)
145 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
146 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
147 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
148 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
149 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
150 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
151 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
152 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
153 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
154 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
155 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
156 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
157 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
158 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
159 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
160 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
161 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
162 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
163 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
164 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
166 /* This bit of chicanery makes a unary function followed by
167 * a parenthesis into a function with one argument, highest precedence.
168 * The UNIDOR macro is for unary functions that can be followed by the //
169 * operator (such as C<shift // 0>).
171 #define UNI2(f,x) { \
175 PL_last_uni = PL_oldbufptr; \
176 PL_last_lop_op = f; \
178 return REPORT( (int)FUNC1 ); \
180 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
182 #define UNI(f) UNI2(f,XTERM)
183 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
185 #define UNIBRACK(f) { \
188 PL_last_uni = PL_oldbufptr; \
190 return REPORT( (int)FUNC1 ); \
192 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
195 /* grandfather return to old style */
196 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
200 /* how to interpret the yylval associated with the token */
204 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
210 static struct debug_tokens { const int token, type; const char *name; }
211 const debug_tokens[] =
213 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
214 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
215 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
216 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
217 { ARROW, TOKENTYPE_NONE, "ARROW" },
218 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
219 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
220 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
221 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
222 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
223 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
224 { DO, TOKENTYPE_NONE, "DO" },
225 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
226 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
227 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
228 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
229 { ELSE, TOKENTYPE_NONE, "ELSE" },
230 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
231 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
232 { FOR, TOKENTYPE_IVAL, "FOR" },
233 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
234 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
235 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
236 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
237 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
238 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
239 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
240 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
241 { IF, TOKENTYPE_IVAL, "IF" },
242 { LABEL, TOKENTYPE_PVAL, "LABEL" },
243 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
244 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
245 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
246 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
247 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
248 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
249 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
250 { MY, TOKENTYPE_IVAL, "MY" },
251 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
252 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
253 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
254 { OROP, TOKENTYPE_IVAL, "OROP" },
255 { OROR, TOKENTYPE_NONE, "OROR" },
256 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
257 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
258 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
259 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
260 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
261 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
262 { PREINC, TOKENTYPE_NONE, "PREINC" },
263 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
264 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
265 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
266 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
267 { SUB, TOKENTYPE_NONE, "SUB" },
268 { THING, TOKENTYPE_OPVAL, "THING" },
269 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
270 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
271 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
272 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
273 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
274 { USE, TOKENTYPE_IVAL, "USE" },
275 { WHEN, TOKENTYPE_IVAL, "WHEN" },
276 { WHILE, TOKENTYPE_IVAL, "WHILE" },
277 { WORD, TOKENTYPE_OPVAL, "WORD" },
278 { 0, TOKENTYPE_NONE, 0 }
281 /* dump the returned token in rv, plus any optional arg in yylval */
284 S_tokereport(pTHX_ I32 rv)
288 const char *name = Nullch;
289 enum token_type type = TOKENTYPE_NONE;
290 const struct debug_tokens *p;
291 SV* const report = newSVpvs("<== ");
293 for (p = debug_tokens; p->token; p++) {
294 if (p->token == (int)rv) {
301 Perl_sv_catpv(aTHX_ report, name);
302 else if ((char)rv > ' ' && (char)rv < '~')
303 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
305 sv_catpvs(report, "EOF");
307 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
310 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
313 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
315 case TOKENTYPE_OPNUM:
316 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
317 PL_op_name[yylval.ival]);
320 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
322 case TOKENTYPE_OPVAL:
324 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
325 PL_op_name[yylval.opval->op_type]);
326 if (yylval.opval->op_type == OP_CONST) {
327 Perl_sv_catpvf(aTHX_ report, " %s",
328 SvPEEK(cSVOPx_sv(yylval.opval)));
333 sv_catpvs(report, "(opval=null)");
336 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
342 /* print the buffer with suitable escapes */
345 S_printbuf(pTHX_ const char* fmt, const char* s)
347 SV* const tmp = newSVpvs("");
348 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
357 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
358 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
362 S_ao(pTHX_ int toketype)
365 if (*PL_bufptr == '=') {
367 if (toketype == ANDAND)
368 yylval.ival = OP_ANDASSIGN;
369 else if (toketype == OROR)
370 yylval.ival = OP_ORASSIGN;
371 else if (toketype == DORDOR)
372 yylval.ival = OP_DORASSIGN;
380 * When Perl expects an operator and finds something else, no_op
381 * prints the warning. It always prints "<something> found where
382 * operator expected. It prints "Missing semicolon on previous line?"
383 * if the surprise occurs at the start of the line. "do you need to
384 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
385 * where the compiler doesn't know if foo is a method call or a function.
386 * It prints "Missing operator before end of line" if there's nothing
387 * after the missing operator, or "... before <...>" if there is something
388 * after the missing operator.
392 S_no_op(pTHX_ const char *what, char *s)
395 char * const oldbp = PL_bufptr;
396 const bool is_first = (PL_oldbufptr == PL_linestart);
402 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
403 if (ckWARN_d(WARN_SYNTAX)) {
405 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
406 "\t(Missing semicolon on previous line?)\n");
407 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
409 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
410 if (t < PL_bufptr && isSPACE(*t))
411 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
412 "\t(Do you need to predeclare %.*s?)\n",
413 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
417 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
418 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
426 * Complain about missing quote/regexp/heredoc terminator.
427 * If it's called with (char *)NULL then it cauterizes the line buffer.
428 * If we're in a delimited string and the delimiter is a control
429 * character, it's reformatted into a two-char sequence like ^C.
434 S_missingterm(pTHX_ char *s)
440 char * const nl = strrchr(s,'\n');
446 iscntrl(PL_multi_close)
448 PL_multi_close < 32 || PL_multi_close == 127
452 tmpbuf[1] = (char)toCTRL(PL_multi_close);
457 *tmpbuf = (char)PL_multi_close;
461 q = strchr(s,'"') ? '\'' : '"';
462 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
465 #define FEATURE_IS_ENABLED(name) \
466 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
467 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
469 * S_feature_is_enabled
470 * Check whether the named feature is enabled.
473 S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
476 HV * const hinthv = GvHV(PL_hintgv);
477 char he_name[32] = "feature_";
478 (void) strncpy(&he_name[8], name, 24);
480 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
488 Perl_deprecate(pTHX_ const char *s)
490 if (ckWARN(WARN_DEPRECATED))
491 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
495 Perl_deprecate_old(pTHX_ const char *s)
497 /* This function should NOT be called for any new deprecated warnings */
498 /* Use Perl_deprecate instead */
500 /* It is here to maintain backward compatibility with the pre-5.8 */
501 /* warnings category hierarchy. The "deprecated" category used to */
502 /* live under the "syntax" category. It is now a top-level category */
503 /* in its own right. */
505 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
506 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
507 "Use of %s is deprecated", s);
511 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
512 * utf16-to-utf8-reversed.
515 #ifdef PERL_CR_FILTER
519 register const char *s = SvPVX_const(sv);
520 register const char * const e = s + SvCUR(sv);
521 /* outer loop optimized to do nothing if there are no CR-LFs */
523 if (*s++ == '\r' && *s == '\n') {
524 /* hit a CR-LF, need to copy the rest */
525 register char *d = s - 1;
528 if (*s == '\r' && s[1] == '\n')
539 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
541 const I32 count = FILTER_READ(idx+1, sv, maxlen);
542 if (count > 0 && !maxlen)
550 * Initialize variables. Uses the Perl save_stack to save its state (for
551 * recursive calls to the parser).
555 Perl_lex_start(pTHX_ SV *line)
561 SAVEI32(PL_lex_dojoin);
562 SAVEI32(PL_lex_brackets);
563 SAVEI32(PL_lex_casemods);
564 SAVEI32(PL_lex_starts);
565 SAVEI32(PL_lex_state);
566 SAVEVPTR(PL_lex_inpat);
567 SAVEI32(PL_lex_inwhat);
568 if (PL_lex_state == LEX_KNOWNEXT) {
569 I32 toke = PL_nexttoke;
570 while (--toke >= 0) {
571 SAVEI32(PL_nexttype[toke]);
572 SAVEVPTR(PL_nextval[toke]);
574 SAVEI32(PL_nexttoke);
576 SAVECOPLINE(PL_curcop);
579 SAVEPPTR(PL_oldbufptr);
580 SAVEPPTR(PL_oldoldbufptr);
581 SAVEPPTR(PL_last_lop);
582 SAVEPPTR(PL_last_uni);
583 SAVEPPTR(PL_linestart);
584 SAVESPTR(PL_linestr);
585 SAVEGENERICPV(PL_lex_brackstack);
586 SAVEGENERICPV(PL_lex_casestack);
587 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
588 SAVESPTR(PL_lex_stuff);
589 SAVEI32(PL_lex_defer);
590 SAVEI32(PL_sublex_info.sub_inwhat);
591 SAVESPTR(PL_lex_repl);
593 SAVEINT(PL_lex_expect);
595 PL_lex_state = LEX_NORMAL;
599 Newx(PL_lex_brackstack, 120, char);
600 Newx(PL_lex_casestack, 12, char);
602 *PL_lex_casestack = '\0';
605 PL_lex_stuff = Nullsv;
606 PL_lex_repl = Nullsv;
610 PL_sublex_info.sub_inwhat = 0;
612 if (SvREADONLY(PL_linestr))
613 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
614 s = SvPV_const(PL_linestr, len);
615 if (!len || s[len-1] != ';') {
616 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
617 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
618 sv_catpvs(PL_linestr, "\n;");
620 SvTEMP_off(PL_linestr);
621 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
622 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
623 PL_last_lop = PL_last_uni = Nullch;
629 * Finalizer for lexing operations. Must be called when the parser is
630 * done with the lexer.
637 PL_doextract = FALSE;
642 * This subroutine has nothing to do with tilting, whether at windmills
643 * or pinball tables. Its name is short for "increment line". It
644 * increments the current line number in CopLINE(PL_curcop) and checks
645 * to see whether the line starts with a comment of the form
646 * # line 500 "foo.pm"
647 * If so, it sets the current line number and file to the values in the comment.
651 S_incline(pTHX_ char *s)
659 CopLINE_inc(PL_curcop);
662 while (SPACE_OR_TAB(*s)) s++;
663 if (strnEQ(s, "line", 4))
667 if (SPACE_OR_TAB(*s))
671 while (SPACE_OR_TAB(*s)) s++;
677 while (SPACE_OR_TAB(*s))
679 if (*s == '"' && (t = strchr(s+1, '"'))) {
684 for (t = s; !isSPACE(*t); t++) ;
687 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
689 if (*e != '\n' && *e != '\0')
690 return; /* false alarm */
696 const char * const cf = CopFILE(PL_curcop);
697 STRLEN tmplen = cf ? strlen(cf) : 0;
698 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
699 /* must copy *{"::_<(eval N)[oldfilename:L]"}
700 * to *{"::_<newfilename"} */
701 char smallbuf[256], smallbuf2[256];
702 char *tmpbuf, *tmpbuf2;
704 STRLEN tmplen2 = strlen(s);
705 if (tmplen + 3 < sizeof smallbuf)
708 Newx(tmpbuf, tmplen + 3, char);
709 if (tmplen2 + 3 < sizeof smallbuf2)
712 Newx(tmpbuf2, tmplen2 + 3, char);
713 tmpbuf[0] = tmpbuf2[0] = '_';
714 tmpbuf[1] = tmpbuf2[1] = '<';
715 memcpy(tmpbuf + 2, cf, ++tmplen);
716 memcpy(tmpbuf2 + 2, s, ++tmplen2);
718 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
720 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
722 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
723 /* adjust ${"::_<newfilename"} to store the new file name */
724 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
725 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
726 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
728 if (tmpbuf != smallbuf) Safefree(tmpbuf);
729 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
732 CopFILE_free(PL_curcop);
733 CopFILE_set(PL_curcop, s);
736 CopLINE_set(PL_curcop, atoi(n)-1);
741 * Called to gobble the appropriate amount and type of whitespace.
742 * Skips comments as well.
746 S_skipspace(pTHX_ register char *s)
749 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
750 while (s < PL_bufend && SPACE_OR_TAB(*s))
756 SSize_t oldprevlen, oldoldprevlen;
757 SSize_t oldloplen = 0, oldunilen = 0;
758 while (s < PL_bufend && isSPACE(*s)) {
759 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
764 if (s < PL_bufend && *s == '#') {
765 while (s < PL_bufend && *s != '\n')
769 if (PL_in_eval && !PL_rsfp) {
776 /* only continue to recharge the buffer if we're at the end
777 * of the buffer, we're not reading from a source filter, and
778 * we're in normal lexing mode
780 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
781 PL_lex_state == LEX_FORMLINE)
784 /* try to recharge the buffer */
785 if ((s = filter_gets(PL_linestr, PL_rsfp,
786 (prevlen = SvCUR(PL_linestr)))) == Nullch)
788 /* end of file. Add on the -p or -n magic */
791 ";}continue{print or die qq(-p destination: $!\\n);}");
792 PL_minus_n = PL_minus_p = 0;
794 else if (PL_minus_n) {
795 sv_setpvn(PL_linestr, ";}", 2);
799 sv_setpvn(PL_linestr,";", 1);
801 /* reset variables for next time we lex */
802 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
804 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
805 PL_last_lop = PL_last_uni = Nullch;
807 /* Close the filehandle. Could be from -P preprocessor,
808 * STDIN, or a regular file. If we were reading code from
809 * STDIN (because the commandline held no -e or filename)
810 * then we don't close it, we reset it so the code can
811 * read from STDIN too.
814 if (PL_preprocess && !PL_in_eval)
815 (void)PerlProc_pclose(PL_rsfp);
816 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
817 PerlIO_clearerr(PL_rsfp);
819 (void)PerlIO_close(PL_rsfp);
824 /* not at end of file, so we only read another line */
825 /* make corresponding updates to old pointers, for yyerror() */
826 oldprevlen = PL_oldbufptr - PL_bufend;
827 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
829 oldunilen = PL_last_uni - PL_bufend;
831 oldloplen = PL_last_lop - PL_bufend;
832 PL_linestart = PL_bufptr = s + prevlen;
833 PL_bufend = s + SvCUR(PL_linestr);
835 PL_oldbufptr = s + oldprevlen;
836 PL_oldoldbufptr = s + oldoldprevlen;
838 PL_last_uni = s + oldunilen;
840 PL_last_lop = s + oldloplen;
843 /* debugger active and we're not compiling the debugger code,
844 * so store the line into the debugger's array of lines
846 if (PERLDB_LINE && PL_curstash != PL_debstash) {
847 SV * const sv = NEWSV(85,0);
849 sv_upgrade(sv, SVt_PVMG);
850 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
853 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
860 * Check the unary operators to ensure there's no ambiguity in how they're
861 * used. An ambiguous piece of code would be:
863 * This doesn't mean rand() + 5. Because rand() is a unary operator,
864 * the +5 is its argument.
874 if (PL_oldoldbufptr != PL_last_uni)
876 while (isSPACE(*PL_last_uni))
878 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
879 if ((t = strchr(s, '(')) && t < PL_bufptr)
882 /* XXX Things like this are just so nasty. We shouldn't be modifying
883 source code, even if we realquick set it back. */
884 if (ckWARN_d(WARN_AMBIGUOUS)){
887 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
888 "Warning: Use of \"%s\" without parentheses is ambiguous",
895 * LOP : macro to build a list operator. Its behaviour has been replaced
896 * with a subroutine, S_lop() for which LOP is just another name.
899 #define LOP(f,x) return lop(f,x,s)
903 * Build a list operator (or something that might be one). The rules:
904 * - if we have a next token, then it's a list operator [why?]
905 * - if the next thing is an opening paren, then it's a function
906 * - else it's a list operator
910 S_lop(pTHX_ I32 f, int x, char *s)
917 PL_last_lop = PL_oldbufptr;
918 PL_last_lop_op = (OPCODE)f;
920 return REPORT(LSTOP);
927 return REPORT(LSTOP);
932 * When the lexer realizes it knows the next token (for instance,
933 * it is reordering tokens for the parser) then it can call S_force_next
934 * to know what token to return the next time the lexer is called. Caller
935 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
936 * handles the token correctly.
940 S_force_next(pTHX_ I32 type)
943 PL_nexttype[PL_nexttoke] = type;
945 if (PL_lex_state != LEX_KNOWNEXT) {
946 PL_lex_defer = PL_lex_state;
947 PL_lex_expect = PL_expect;
948 PL_lex_state = LEX_KNOWNEXT;
953 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
956 SV * const sv = newSVpvn(start,len);
957 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
964 * When the lexer knows the next thing is a word (for instance, it has
965 * just seen -> and it knows that the next char is a word char, then
966 * it calls S_force_word to stick the next word into the PL_next lookahead.
969 * char *start : buffer position (must be within PL_linestr)
970 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
971 * int check_keyword : if true, Perl checks to make sure the word isn't
972 * a keyword (do this if the word is a label, e.g. goto FOO)
973 * int allow_pack : if true, : characters will also be allowed (require,
975 * int allow_initial_tick : used by the "sub" lexer only.
979 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
985 start = skipspace(start);
987 if (isIDFIRST_lazy_if(s,UTF) ||
988 (allow_pack && *s == ':') ||
989 (allow_initial_tick && *s == '\'') )
991 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
992 if (check_keyword && keyword(PL_tokenbuf, len))
994 if (token == METHOD) {
999 PL_expect = XOPERATOR;
1002 PL_nextval[PL_nexttoke].opval
1003 = (OP*)newSVOP(OP_CONST,0,
1004 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1005 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
1013 * Called when the lexer wants $foo *foo &foo etc, but the program
1014 * text only contains the "foo" portion. The first argument is a pointer
1015 * to the "foo", and the second argument is the type symbol to prefix.
1016 * Forces the next token to be a "WORD".
1017 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1021 S_force_ident(pTHX_ register const char *s, int kind)
1025 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
1026 PL_nextval[PL_nexttoke].opval = o;
1029 o->op_private = OPpCONST_ENTERED;
1030 /* XXX see note in pp_entereval() for why we forgo typo
1031 warnings if the symbol must be introduced in an eval.
1033 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
1034 kind == '$' ? SVt_PV :
1035 kind == '@' ? SVt_PVAV :
1036 kind == '%' ? SVt_PVHV :
1044 Perl_str_to_version(pTHX_ SV *sv)
1049 const char *start = SvPV_const(sv,len);
1050 const char * const end = start + len;
1051 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1052 while (start < end) {
1056 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1061 retval += ((NV)n)/nshift;
1070 * Forces the next token to be a version number.
1071 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1072 * and if "guessing" is TRUE, then no new token is created (and the caller
1073 * must use an alternative parsing method).
1077 S_force_version(pTHX_ char *s, int guessing)
1080 OP *version = Nullop;
1089 while (isDIGIT(*d) || *d == '_' || *d == '.')
1091 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1093 s = scan_num(s, &yylval);
1094 version = yylval.opval;
1095 ver = cSVOPx(version)->op_sv;
1096 if (SvPOK(ver) && !SvNIOK(ver)) {
1097 SvUPGRADE(ver, SVt_PVNV);
1098 SvNV_set(ver, str_to_version(ver));
1099 SvNOK_on(ver); /* hint that it is a version */
1106 /* NOTE: The parser sees the package name and the VERSION swapped */
1107 PL_nextval[PL_nexttoke].opval = version;
1115 * Tokenize a quoted string passed in as an SV. It finds the next
1116 * chunk, up to end of string or a backslash. It may make a new
1117 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1122 S_tokeq(pTHX_ SV *sv)
1126 register char *send;
1134 s = SvPV_force(sv, len);
1135 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1138 while (s < send && *s != '\\')
1143 if ( PL_hints & HINT_NEW_STRING ) {
1144 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1150 if (s + 1 < send && (s[1] == '\\'))
1151 s++; /* all that, just for this */
1156 SvCUR_set(sv, d - SvPVX_const(sv));
1158 if ( PL_hints & HINT_NEW_STRING )
1159 return new_constant(NULL, 0, "q", sv, pv, "q");
1164 * Now come three functions related to double-quote context,
1165 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1166 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1167 * interact with PL_lex_state, and create fake ( ... ) argument lists
1168 * to handle functions and concatenation.
1169 * They assume that whoever calls them will be setting up a fake
1170 * join call, because each subthing puts a ',' after it. This lets
1173 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1175 * (I'm not sure whether the spurious commas at the end of lcfirst's
1176 * arguments and join's arguments are created or not).
1181 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1183 * Pattern matching will set PL_lex_op to the pattern-matching op to
1184 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1186 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1188 * Everything else becomes a FUNC.
1190 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1191 * had an OP_CONST or OP_READLINE). This just sets us up for a
1192 * call to S_sublex_push().
1196 S_sublex_start(pTHX)
1199 register const I32 op_type = yylval.ival;
1201 if (op_type == OP_NULL) {
1202 yylval.opval = PL_lex_op;
1206 if (op_type == OP_CONST || op_type == OP_READLINE) {
1207 SV *sv = tokeq(PL_lex_stuff);
1209 if (SvTYPE(sv) == SVt_PVIV) {
1210 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1212 const char *p = SvPV_const(sv, len);
1213 SV * const nsv = newSVpvn(p, len);
1219 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1220 PL_lex_stuff = Nullsv;
1221 /* Allow <FH> // "foo" */
1222 if (op_type == OP_READLINE)
1223 PL_expect = XTERMORDORDOR;
1227 PL_sublex_info.super_state = PL_lex_state;
1228 PL_sublex_info.sub_inwhat = op_type;
1229 PL_sublex_info.sub_op = PL_lex_op;
1230 PL_lex_state = LEX_INTERPPUSH;
1234 yylval.opval = PL_lex_op;
1244 * Create a new scope to save the lexing state. The scope will be
1245 * ended in S_sublex_done. Returns a '(', starting the function arguments
1246 * to the uc, lc, etc. found before.
1247 * Sets PL_lex_state to LEX_INTERPCONCAT.
1256 PL_lex_state = PL_sublex_info.super_state;
1257 SAVEI32(PL_lex_dojoin);
1258 SAVEI32(PL_lex_brackets);
1259 SAVEI32(PL_lex_casemods);
1260 SAVEI32(PL_lex_starts);
1261 SAVEI32(PL_lex_state);
1262 SAVEVPTR(PL_lex_inpat);
1263 SAVEI32(PL_lex_inwhat);
1264 SAVECOPLINE(PL_curcop);
1265 SAVEPPTR(PL_bufptr);
1266 SAVEPPTR(PL_bufend);
1267 SAVEPPTR(PL_oldbufptr);
1268 SAVEPPTR(PL_oldoldbufptr);
1269 SAVEPPTR(PL_last_lop);
1270 SAVEPPTR(PL_last_uni);
1271 SAVEPPTR(PL_linestart);
1272 SAVESPTR(PL_linestr);
1273 SAVEGENERICPV(PL_lex_brackstack);
1274 SAVEGENERICPV(PL_lex_casestack);
1276 PL_linestr = PL_lex_stuff;
1277 PL_lex_stuff = Nullsv;
1279 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1280 = SvPVX(PL_linestr);
1281 PL_bufend += SvCUR(PL_linestr);
1282 PL_last_lop = PL_last_uni = Nullch;
1283 SAVEFREESV(PL_linestr);
1285 PL_lex_dojoin = FALSE;
1286 PL_lex_brackets = 0;
1287 Newx(PL_lex_brackstack, 120, char);
1288 Newx(PL_lex_casestack, 12, char);
1289 PL_lex_casemods = 0;
1290 *PL_lex_casestack = '\0';
1292 PL_lex_state = LEX_INTERPCONCAT;
1293 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1295 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1296 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1297 PL_lex_inpat = PL_sublex_info.sub_op;
1299 PL_lex_inpat = Nullop;
1306 * Restores lexer state after a S_sublex_push.
1313 if (!PL_lex_starts++) {
1314 SV * const sv = newSVpvs("");
1315 if (SvUTF8(PL_linestr))
1317 PL_expect = XOPERATOR;
1318 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1322 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1323 PL_lex_state = LEX_INTERPCASEMOD;
1327 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1328 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1329 PL_linestr = PL_lex_repl;
1331 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1332 PL_bufend += SvCUR(PL_linestr);
1333 PL_last_lop = PL_last_uni = Nullch;
1334 SAVEFREESV(PL_linestr);
1335 PL_lex_dojoin = FALSE;
1336 PL_lex_brackets = 0;
1337 PL_lex_casemods = 0;
1338 *PL_lex_casestack = '\0';
1340 if (SvEVALED(PL_lex_repl)) {
1341 PL_lex_state = LEX_INTERPNORMAL;
1343 /* we don't clear PL_lex_repl here, so that we can check later
1344 whether this is an evalled subst; that means we rely on the
1345 logic to ensure sublex_done() is called again only via the
1346 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1349 PL_lex_state = LEX_INTERPCONCAT;
1350 PL_lex_repl = Nullsv;
1356 PL_bufend = SvPVX(PL_linestr);
1357 PL_bufend += SvCUR(PL_linestr);
1358 PL_expect = XOPERATOR;
1359 PL_sublex_info.sub_inwhat = 0;
1367 Extracts a pattern, double-quoted string, or transliteration. This
1370 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1371 processing a pattern (PL_lex_inpat is true), a transliteration
1372 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1374 Returns a pointer to the character scanned up to. Iff this is
1375 advanced from the start pointer supplied (ie if anything was
1376 successfully parsed), will leave an OP for the substring scanned
1377 in yylval. Caller must intuit reason for not parsing further
1378 by looking at the next characters herself.
1382 double-quoted style: \r and \n
1383 regexp special ones: \D \s
1385 backrefs: \1 (deprecated in substitution replacements)
1386 case and quoting: \U \Q \E
1387 stops on @ and $, but not for $ as tail anchor
1389 In transliterations:
1390 characters are VERY literal, except for - not at the start or end
1391 of the string, which indicates a range. scan_const expands the
1392 range to the full set of intermediate characters.
1394 In double-quoted strings:
1396 double-quoted style: \r and \n
1398 backrefs: \1 (deprecated)
1399 case and quoting: \U \Q \E
1402 scan_const does *not* construct ops to handle interpolated strings.
1403 It stops processing as soon as it finds an embedded $ or @ variable
1404 and leaves it to the caller to work out what's going on.
1406 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1408 $ in pattern could be $foo or could be tail anchor. Assumption:
1409 it's a tail anchor if $ is the last thing in the string, or if it's
1410 followed by one of ")| \n\t"
1412 \1 (backreferences) are turned into $1
1414 The structure of the code is
1415 while (there's a character to process) {
1416 handle transliteration ranges
1417 skip regexp comments
1418 skip # initiated comments in //x patterns
1419 check for embedded @foo
1420 check for embedded scalars
1422 leave intact backslashes from leave (below)
1423 deprecate \1 in strings and sub replacements
1424 handle string-changing backslashes \l \U \Q \E, etc.
1425 switch (what was escaped) {
1426 handle - in a transliteration (becomes a literal -)
1427 handle \132 octal characters
1428 handle 0x15 hex characters
1429 handle \cV (control V)
1430 handle printf backslashes (\f, \r, \n, etc)
1432 } (end if backslash)
1433 } (end while character to read)
1438 S_scan_const(pTHX_ char *start)
1441 register char *send = PL_bufend; /* end of the constant */
1442 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1443 register char *s = start; /* start of the constant */
1444 register char *d = SvPVX(sv); /* destination for copies */
1445 bool dorange = FALSE; /* are we in a translit range? */
1446 bool didrange = FALSE; /* did we just finish a range? */
1447 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1448 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1451 UV literal_endpoint = 0;
1454 const char *leaveit = /* set of acceptably-backslashed characters */
1456 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1459 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1460 /* If we are doing a trans and we know we want UTF8 set expectation */
1461 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1462 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1466 while (s < send || dorange) {
1467 /* get transliterations out of the way (they're most literal) */
1468 if (PL_lex_inwhat == OP_TRANS) {
1469 /* expand a range A-Z to the full set of characters. AIE! */
1471 I32 i; /* current expanded character */
1472 I32 min; /* first character in range */
1473 I32 max; /* last character in range */
1476 char * const c = (char*)utf8_hop((U8*)d, -1);
1480 *c = (char)UTF_TO_NATIVE(0xff);
1481 /* mark the range as done, and continue */
1487 i = d - SvPVX_const(sv); /* remember current offset */
1488 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1489 d = SvPVX(sv) + i; /* refresh d after realloc */
1490 d -= 2; /* eat the first char and the - */
1492 min = (U8)*d; /* first char in range */
1493 max = (U8)d[1]; /* last char in range */
1497 "Invalid range \"%c-%c\" in transliteration operator",
1498 (char)min, (char)max);
1502 if (literal_endpoint == 2 &&
1503 ((isLOWER(min) && isLOWER(max)) ||
1504 (isUPPER(min) && isUPPER(max)))) {
1506 for (i = min; i <= max; i++)
1508 *d++ = NATIVE_TO_NEED(has_utf8,i);
1510 for (i = min; i <= max; i++)
1512 *d++ = NATIVE_TO_NEED(has_utf8,i);
1517 for (i = min; i <= max; i++)
1520 /* mark the range as done, and continue */
1524 literal_endpoint = 0;
1529 /* range begins (ignore - as first or last char) */
1530 else if (*s == '-' && s+1 < send && s != start) {
1532 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1535 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1545 literal_endpoint = 0;
1550 /* if we get here, we're not doing a transliteration */
1552 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1553 except for the last char, which will be done separately. */
1554 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1556 while (s+1 < send && *s != ')')
1557 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1559 else if (s[2] == '{' /* This should match regcomp.c */
1560 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1563 char *regparse = s + (s[2] == '{' ? 3 : 4);
1566 while (count && (c = *regparse)) {
1567 if (c == '\\' && regparse[1])
1575 if (*regparse != ')')
1576 regparse--; /* Leave one char for continuation. */
1577 while (s < regparse)
1578 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1582 /* likewise skip #-initiated comments in //x patterns */
1583 else if (*s == '#' && PL_lex_inpat &&
1584 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1585 while (s+1 < send && *s != '\n')
1586 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1589 /* check for embedded arrays
1590 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1592 else if (*s == '@' && s[1]
1593 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1596 /* check for embedded scalars. only stop if we're sure it's a
1599 else if (*s == '$') {
1600 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1602 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1603 break; /* in regexp, $ might be tail anchor */
1606 /* End of else if chain - OP_TRANS rejoin rest */
1609 if (*s == '\\' && s+1 < send) {
1612 /* some backslashes we leave behind */
1613 if (*leaveit && *s && strchr(leaveit, *s)) {
1614 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1615 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1619 /* deprecate \1 in strings and substitution replacements */
1620 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1621 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1623 if (ckWARN(WARN_SYNTAX))
1624 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1629 /* string-change backslash escapes */
1630 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1635 /* if we get here, it's either a quoted -, or a digit */
1638 /* quoted - in transliterations */
1640 if (PL_lex_inwhat == OP_TRANS) {
1650 Perl_warner(aTHX_ packWARN(WARN_MISC),
1651 "Unrecognized escape \\%c passed through",
1653 /* default action is to copy the quoted character */
1654 goto default_action;
1657 /* \132 indicates an octal constant */
1658 case '0': case '1': case '2': case '3':
1659 case '4': case '5': case '6': case '7':
1663 uv = grok_oct(s, &len, &flags, NULL);
1666 goto NUM_ESCAPE_INSERT;
1668 /* \x24 indicates a hex constant */
1672 char* const e = strchr(s, '}');
1673 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1674 PERL_SCAN_DISALLOW_PREFIX;
1679 yyerror("Missing right brace on \\x{}");
1683 uv = grok_hex(s, &len, &flags, NULL);
1689 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1690 uv = grok_hex(s, &len, &flags, NULL);
1696 /* Insert oct or hex escaped character.
1697 * There will always enough room in sv since such
1698 * escapes will be longer than any UTF-8 sequence
1699 * they can end up as. */
1701 /* We need to map to chars to ASCII before doing the tests
1704 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1705 if (!has_utf8 && uv > 255) {
1706 /* Might need to recode whatever we have
1707 * accumulated so far if it contains any
1710 * (Can't we keep track of that and avoid
1711 * this rescan? --jhi)
1715 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1716 if (!NATIVE_IS_INVARIANT(*c)) {
1721 const STRLEN offset = d - SvPVX_const(sv);
1723 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1727 while (src >= (const U8 *)SvPVX_const(sv)) {
1728 if (!NATIVE_IS_INVARIANT(*src)) {
1729 const U8 ch = NATIVE_TO_ASCII(*src);
1730 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1731 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1741 if (has_utf8 || uv > 255) {
1742 d = (char*)uvchr_to_utf8((U8*)d, uv);
1744 if (PL_lex_inwhat == OP_TRANS &&
1745 PL_sublex_info.sub_op) {
1746 PL_sublex_info.sub_op->op_private |=
1747 (PL_lex_repl ? OPpTRANS_FROM_UTF
1760 /* \N{LATIN SMALL LETTER A} is a named character */
1764 char* e = strchr(s, '}');
1770 yyerror("Missing right brace on \\N{}");
1774 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1776 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1777 PERL_SCAN_DISALLOW_PREFIX;
1780 uv = grok_hex(s, &len, &flags, NULL);
1782 goto NUM_ESCAPE_INSERT;
1784 res = newSVpvn(s + 1, e - s - 1);
1785 res = new_constant( Nullch, 0, "charnames",
1786 res, Nullsv, "\\N{...}" );
1788 sv_utf8_upgrade(res);
1789 str = SvPV_const(res,len);
1790 #ifdef EBCDIC_NEVER_MIND
1791 /* charnames uses pack U and that has been
1792 * recently changed to do the below uni->native
1793 * mapping, so this would be redundant (and wrong,
1794 * the code point would be doubly converted).
1795 * But leave this in just in case the pack U change
1796 * gets revoked, but the semantics is still
1797 * desireable for charnames. --jhi */
1799 UV uv = utf8_to_uvchr((const U8*)str, 0);
1802 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1804 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1805 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1806 str = SvPV_const(res, len);
1810 if (!has_utf8 && SvUTF8(res)) {
1811 const char * const ostart = SvPVX_const(sv);
1812 SvCUR_set(sv, d - ostart);
1815 sv_utf8_upgrade(sv);
1816 /* this just broke our allocation above... */
1817 SvGROW(sv, (STRLEN)(send - start));
1818 d = SvPVX(sv) + SvCUR(sv);
1821 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1822 const char * const odest = SvPVX_const(sv);
1824 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1825 d = SvPVX(sv) + (d - odest);
1827 Copy(str, d, len, char);
1834 yyerror("Missing braces on \\N{}");
1837 /* \c is a control character */
1846 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1849 yyerror("Missing control char name in \\c");
1853 /* printf-style backslashes, formfeeds, newlines, etc */
1855 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1858 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1861 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1864 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1867 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1870 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1873 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1879 } /* end if (backslash) */
1886 /* If we started with encoded form, or already know we want it
1887 and then encode the next character */
1888 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1890 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1891 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1894 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1895 const STRLEN off = d - SvPVX_const(sv);
1896 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1898 d = (char*)uvchr_to_utf8((U8*)d, uv);
1902 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1904 } /* while loop to process each character */
1906 /* terminate the string and set up the sv */
1908 SvCUR_set(sv, d - SvPVX_const(sv));
1909 if (SvCUR(sv) >= SvLEN(sv))
1910 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1913 if (PL_encoding && !has_utf8) {
1914 sv_recode_to_utf8(sv, PL_encoding);
1920 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1921 PL_sublex_info.sub_op->op_private |=
1922 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1926 /* shrink the sv if we allocated more than we used */
1927 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1928 SvPV_shrink_to_cur(sv);
1931 /* return the substring (via yylval) only if we parsed anything */
1932 if (s > PL_bufptr) {
1933 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1934 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1936 ( PL_lex_inwhat == OP_TRANS
1938 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1941 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1948 * Returns TRUE if there's more to the expression (e.g., a subscript),
1951 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1953 * ->[ and ->{ return TRUE
1954 * { and [ outside a pattern are always subscripts, so return TRUE
1955 * if we're outside a pattern and it's not { or [, then return FALSE
1956 * if we're in a pattern and the first char is a {
1957 * {4,5} (any digits around the comma) returns FALSE
1958 * if we're in a pattern and the first char is a [
1960 * [SOMETHING] has a funky algorithm to decide whether it's a
1961 * character class or not. It has to deal with things like
1962 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1963 * anything else returns TRUE
1966 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1969 S_intuit_more(pTHX_ register char *s)
1972 if (PL_lex_brackets)
1974 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1976 if (*s != '{' && *s != '[')
1981 /* In a pattern, so maybe we have {n,m}. */
1998 /* On the other hand, maybe we have a character class */
2001 if (*s == ']' || *s == '^')
2004 /* this is terrifying, and it works */
2005 int weight = 2; /* let's weigh the evidence */
2007 unsigned char un_char = 255, last_un_char;
2008 const char * const send = strchr(s,']');
2009 char tmpbuf[sizeof PL_tokenbuf * 4];
2011 if (!send) /* has to be an expression */
2014 Zero(seen,256,char);
2017 else if (isDIGIT(*s)) {
2019 if (isDIGIT(s[1]) && s[2] == ']')
2025 for (; s < send; s++) {
2026 last_un_char = un_char;
2027 un_char = (unsigned char)*s;
2032 weight -= seen[un_char] * 10;
2033 if (isALNUM_lazy_if(s+1,UTF)) {
2034 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2035 if ((int)strlen(tmpbuf) > 1
2036 && gv_fetchpv(tmpbuf, 0, SVt_PV))
2041 else if (*s == '$' && s[1] &&
2042 strchr("[#!%*<>()-=",s[1])) {
2043 if (/*{*/ strchr("])} =",s[2]))
2052 if (strchr("wds]",s[1]))
2054 else if (seen['\''] || seen['"'])
2056 else if (strchr("rnftbxcav",s[1]))
2058 else if (isDIGIT(s[1])) {
2060 while (s[1] && isDIGIT(s[1]))
2070 if (strchr("aA01! ",last_un_char))
2072 if (strchr("zZ79~",s[1]))
2074 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2075 weight -= 5; /* cope with negative subscript */
2078 if (!isALNUM(last_un_char)
2079 && !(last_un_char == '$' || last_un_char == '@'
2080 || last_un_char == '&')
2081 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2086 if (keyword(tmpbuf, d - tmpbuf))
2089 if (un_char == last_un_char + 1)
2091 weight -= seen[un_char];
2096 if (weight >= 0) /* probably a character class */
2106 * Does all the checking to disambiguate
2108 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2109 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2111 * First argument is the stuff after the first token, e.g. "bar".
2113 * Not a method if bar is a filehandle.
2114 * Not a method if foo is a subroutine prototyped to take a filehandle.
2115 * Not a method if it's really "Foo $bar"
2116 * Method if it's "foo $bar"
2117 * Not a method if it's really "print foo $bar"
2118 * Method if it's really "foo package::" (interpreted as package->foo)
2119 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2120 * Not a method if bar is a filehandle or package, but is quoted with
2125 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2128 char *s = start + (*start == '$');
2129 char tmpbuf[sizeof PL_tokenbuf];
2134 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2138 const char *proto = SvPVX_const(cv);
2149 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2150 /* start is the beginning of the possible filehandle/object,
2151 * and s is the end of it
2152 * tmpbuf is a copy of it
2155 if (*start == '$') {
2156 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2161 return *s == '(' ? FUNCMETH : METHOD;
2163 if (!keyword(tmpbuf, len)) {
2164 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2169 indirgv = gv_fetchpv(tmpbuf, 0, SVt_PVCV);
2170 if (indirgv && GvCVu(indirgv))
2172 /* filehandle or package name makes it a method */
2173 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2175 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2176 return 0; /* no assumptions -- "=>" quotes bearword */
2178 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2179 newSVpvn(tmpbuf,len));
2180 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2184 return *s == '(' ? FUNCMETH : METHOD;
2192 * Return a string of Perl code to load the debugger. If PERL5DB
2193 * is set, it will return the contents of that, otherwise a
2194 * compile-time require of perl5db.pl.
2202 const char * const pdb = PerlEnv_getenv("PERL5DB");
2206 SETERRNO(0,SS_NORMAL);
2207 return "BEGIN { require 'perl5db.pl' }";
2213 /* Encoded script support. filter_add() effectively inserts a
2214 * 'pre-processing' function into the current source input stream.
2215 * Note that the filter function only applies to the current source file
2216 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2218 * The datasv parameter (which may be NULL) can be used to pass
2219 * private data to this instance of the filter. The filter function
2220 * can recover the SV using the FILTER_DATA macro and use it to
2221 * store private buffers and state information.
2223 * The supplied datasv parameter is upgraded to a PVIO type
2224 * and the IoDIRP/IoANY field is used to store the function pointer,
2225 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2226 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2227 * private use must be set using malloc'd pointers.
2231 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2237 if (!PL_rsfp_filters)
2238 PL_rsfp_filters = newAV();
2240 datasv = NEWSV(255,0);
2241 SvUPGRADE(datasv, SVt_PVIO);
2242 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2243 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2244 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2245 IoANY(datasv), SvPV_nolen(datasv)));
2246 av_unshift(PL_rsfp_filters, 1);
2247 av_store(PL_rsfp_filters, 0, datasv) ;
2252 /* Delete most recently added instance of this filter function. */
2254 Perl_filter_del(pTHX_ filter_t funcp)
2260 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
2262 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2264 /* if filter is on top of stack (usual case) just pop it off */
2265 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2266 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2267 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2268 IoANY(datasv) = (void *)NULL;
2269 sv_free(av_pop(PL_rsfp_filters));
2273 /* we need to search for the correct entry and clear it */
2274 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2278 /* Invoke the idxth filter function for the current rsfp. */
2279 /* maxlen 0 = read one text line */
2281 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2287 if (!PL_rsfp_filters)
2289 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2290 /* Provide a default input filter to make life easy. */
2291 /* Note that we append to the line. This is handy. */
2292 DEBUG_P(PerlIO_printf(Perl_debug_log,
2293 "filter_read %d: from rsfp\n", idx));
2297 const int old_len = SvCUR(buf_sv);
2299 /* ensure buf_sv is large enough */
2300 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2301 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2302 if (PerlIO_error(PL_rsfp))
2303 return -1; /* error */
2305 return 0 ; /* end of file */
2307 SvCUR_set(buf_sv, old_len + len) ;
2310 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2311 if (PerlIO_error(PL_rsfp))
2312 return -1; /* error */
2314 return 0 ; /* end of file */
2317 return SvCUR(buf_sv);
2319 /* Skip this filter slot if filter has been deleted */
2320 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2321 DEBUG_P(PerlIO_printf(Perl_debug_log,
2322 "filter_read %d: skipped (filter deleted)\n",
2324 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2326 /* Get function pointer hidden within datasv */
2327 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2328 DEBUG_P(PerlIO_printf(Perl_debug_log,
2329 "filter_read %d: via function %p (%s)\n",
2330 idx, datasv, SvPV_nolen_const(datasv)));
2331 /* Call function. The function is expected to */
2332 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2333 /* Return: <0:error, =0:eof, >0:not eof */
2334 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2338 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2341 #ifdef PERL_CR_FILTER
2342 if (!PL_rsfp_filters) {
2343 filter_add(S_cr_textfilter,NULL);
2346 if (PL_rsfp_filters) {
2348 SvCUR_set(sv, 0); /* start with empty line */
2349 if (FILTER_READ(0, sv, 0) > 0)
2350 return ( SvPVX(sv) ) ;
2355 return (sv_gets(sv, fp, append));
2359 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2364 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2368 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2369 (gv = gv_fetchpv(pkgname, 0, SVt_PVHV)))
2371 return GvHV(gv); /* Foo:: */
2374 /* use constant CLASS => 'MyClass' */
2375 if ((gv = gv_fetchpv(pkgname, 0, SVt_PVCV))) {
2377 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2378 pkgname = SvPV_nolen_const(sv);
2382 return gv_stashpv(pkgname, FALSE);
2386 S_tokenize_use(pTHX_ int is_use, char *s) {
2388 if (PL_expect != XSTATE)
2389 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2390 is_use ? "use" : "no"));
2392 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2393 s = force_version(s, TRUE);
2394 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2395 PL_nextval[PL_nexttoke].opval = Nullop;
2398 else if (*s == 'v') {
2399 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2400 s = force_version(s, FALSE);
2404 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2405 s = force_version(s, FALSE);
2407 yylval.ival = is_use;
2411 static const char* const exp_name[] =
2412 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2413 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2420 Works out what to call the token just pulled out of the input
2421 stream. The yacc parser takes care of taking the ops we return and
2422 stitching them into a tree.
2428 if read an identifier
2429 if we're in a my declaration
2430 croak if they tried to say my($foo::bar)
2431 build the ops for a my() declaration
2432 if it's an access to a my() variable
2433 are we in a sort block?
2434 croak if my($a); $a <=> $b
2435 build ops for access to a my() variable
2436 if in a dq string, and they've said @foo and we can't find @foo
2438 build ops for a bareword
2439 if we already built the token before, use it.
2444 #pragma segment Perl_yylex
2450 register char *s = PL_bufptr;
2456 SV* tmp = newSVpvs("");
2457 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2458 (IV)CopLINE(PL_curcop),
2459 lex_state_names[PL_lex_state],
2460 exp_name[PL_expect],
2461 pv_display(tmp, s, strlen(s), 0, 60));
2464 /* check if there's an identifier for us to look at */
2465 if (PL_pending_ident)
2466 return REPORT(S_pending_ident(aTHX));
2468 /* no identifier pending identification */
2470 switch (PL_lex_state) {
2472 case LEX_NORMAL: /* Some compilers will produce faster */
2473 case LEX_INTERPNORMAL: /* code if we comment these out. */
2477 /* when we've already built the next token, just pull it out of the queue */
2480 yylval = PL_nextval[PL_nexttoke];
2482 PL_lex_state = PL_lex_defer;
2483 PL_expect = PL_lex_expect;
2484 PL_lex_defer = LEX_NORMAL;
2486 return REPORT(PL_nexttype[PL_nexttoke]);
2488 /* interpolated case modifiers like \L \U, including \Q and \E.
2489 when we get here, PL_bufptr is at the \
2491 case LEX_INTERPCASEMOD:
2493 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2494 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2496 /* handle \E or end of string */
2497 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2499 if (PL_lex_casemods) {
2500 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2501 PL_lex_casestack[PL_lex_casemods] = '\0';
2503 if (PL_bufptr != PL_bufend
2504 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2506 PL_lex_state = LEX_INTERPCONCAT;
2510 if (PL_bufptr != PL_bufend)
2512 PL_lex_state = LEX_INTERPCONCAT;
2516 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2517 "### Saw case modifier\n"); });
2519 if (s[1] == '\\' && s[2] == 'E') {
2521 PL_lex_state = LEX_INTERPCONCAT;
2526 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2527 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2528 if ((*s == 'L' || *s == 'U') &&
2529 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2530 PL_lex_casestack[--PL_lex_casemods] = '\0';
2533 if (PL_lex_casemods > 10)
2534 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2535 PL_lex_casestack[PL_lex_casemods++] = *s;
2536 PL_lex_casestack[PL_lex_casemods] = '\0';
2537 PL_lex_state = LEX_INTERPCONCAT;
2538 PL_nextval[PL_nexttoke].ival = 0;
2541 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2543 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2545 PL_nextval[PL_nexttoke].ival = OP_LC;
2547 PL_nextval[PL_nexttoke].ival = OP_UC;
2549 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2551 Perl_croak(aTHX_ "panic: yylex");
2555 if (PL_lex_starts) {
2558 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2559 if (PL_lex_casemods == 1 && PL_lex_inpat)
2568 case LEX_INTERPPUSH:
2569 return REPORT(sublex_push());
2571 case LEX_INTERPSTART:
2572 if (PL_bufptr == PL_bufend)
2573 return REPORT(sublex_done());
2574 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2575 "### Interpolated variable\n"); });
2577 PL_lex_dojoin = (*PL_bufptr == '@');
2578 PL_lex_state = LEX_INTERPNORMAL;
2579 if (PL_lex_dojoin) {
2580 PL_nextval[PL_nexttoke].ival = 0;
2582 force_ident("\"", '$');
2583 PL_nextval[PL_nexttoke].ival = 0;
2585 PL_nextval[PL_nexttoke].ival = 0;
2587 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2590 if (PL_lex_starts++) {
2592 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2593 if (!PL_lex_casemods && PL_lex_inpat)
2600 case LEX_INTERPENDMAYBE:
2601 if (intuit_more(PL_bufptr)) {
2602 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2608 if (PL_lex_dojoin) {
2609 PL_lex_dojoin = FALSE;
2610 PL_lex_state = LEX_INTERPCONCAT;
2613 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2614 && SvEVALED(PL_lex_repl))
2616 if (PL_bufptr != PL_bufend)
2617 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2618 PL_lex_repl = Nullsv;
2621 case LEX_INTERPCONCAT:
2623 if (PL_lex_brackets)
2624 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2626 if (PL_bufptr == PL_bufend)
2627 return REPORT(sublex_done());
2629 if (SvIVX(PL_linestr) == '\'') {
2630 SV *sv = newSVsv(PL_linestr);
2633 else if ( PL_hints & HINT_NEW_RE )
2634 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2635 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2639 s = scan_const(PL_bufptr);
2641 PL_lex_state = LEX_INTERPCASEMOD;
2643 PL_lex_state = LEX_INTERPSTART;
2646 if (s != PL_bufptr) {
2647 PL_nextval[PL_nexttoke] = yylval;
2650 if (PL_lex_starts++) {
2651 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2652 if (!PL_lex_casemods && PL_lex_inpat)
2665 PL_lex_state = LEX_NORMAL;
2666 s = scan_formline(PL_bufptr);
2667 if (!PL_lex_formbrack)
2673 PL_oldoldbufptr = PL_oldbufptr;
2679 if (isIDFIRST_lazy_if(s,UTF))
2681 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2684 goto fake_eof; /* emulate EOF on ^D or ^Z */
2689 if (PL_lex_brackets) {
2690 yyerror(PL_lex_formbrack
2691 ? "Format not terminated"
2692 : "Missing right curly or square bracket");
2694 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2695 "### Tokener got EOF\n");
2699 if (s++ < PL_bufend)
2700 goto retry; /* ignore stray nulls */
2703 if (!PL_in_eval && !PL_preambled) {
2704 PL_preambled = TRUE;
2705 sv_setpv(PL_linestr,incl_perldb());
2706 if (SvCUR(PL_linestr))
2707 sv_catpvs(PL_linestr,";");
2709 while(AvFILLp(PL_preambleav) >= 0) {
2710 SV *tmpsv = av_shift(PL_preambleav);
2711 sv_catsv(PL_linestr, tmpsv);
2712 sv_catpvs(PL_linestr, ";");
2715 sv_free((SV*)PL_preambleav);
2716 PL_preambleav = NULL;
2718 if (PL_minus_n || PL_minus_p) {
2719 sv_catpvs(PL_linestr, "LINE: while (<>) {");
2721 sv_catpvs(PL_linestr,"chomp;");
2724 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2725 || *PL_splitstr == '"')
2726 && strchr(PL_splitstr + 1, *PL_splitstr))
2727 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2729 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2730 bytes can be used as quoting characters. :-) */
2731 const char *splits = PL_splitstr;
2732 sv_catpvs(PL_linestr, "our @F=split(q\0");
2735 if (*splits == '\\')
2736 sv_catpvn(PL_linestr, splits, 1);
2737 sv_catpvn(PL_linestr, splits, 1);
2738 } while (*splits++);
2739 /* This loop will embed the trailing NUL of
2740 PL_linestr as the last thing it does before
2742 sv_catpvs(PL_linestr, ");");
2746 sv_catpvs(PL_linestr,"our @F=split(' ');");
2750 sv_catpvs(PL_linestr,"use feature ':5.10';");
2751 sv_catpvs(PL_linestr, "\n");
2752 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2753 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2754 PL_last_lop = PL_last_uni = Nullch;
2755 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2756 SV * const sv = NEWSV(85,0);
2758 sv_upgrade(sv, SVt_PVMG);
2759 sv_setsv(sv,PL_linestr);
2762 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2767 bof = PL_rsfp ? TRUE : FALSE;
2768 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2771 if (PL_preprocess && !PL_in_eval)
2772 (void)PerlProc_pclose(PL_rsfp);
2773 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2774 PerlIO_clearerr(PL_rsfp);
2776 (void)PerlIO_close(PL_rsfp);
2778 PL_doextract = FALSE;
2780 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2781 sv_setpv(PL_linestr,PL_minus_p
2782 ? ";}continue{print;}" : ";}");
2783 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2784 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2785 PL_last_lop = PL_last_uni = Nullch;
2786 PL_minus_n = PL_minus_p = 0;
2789 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2790 PL_last_lop = PL_last_uni = Nullch;
2791 sv_setpvn(PL_linestr,"",0);
2792 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2794 /* If it looks like the start of a BOM or raw UTF-16,
2795 * check if it in fact is. */
2801 #ifdef PERLIO_IS_STDIO
2802 # ifdef __GNU_LIBRARY__
2803 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2804 # define FTELL_FOR_PIPE_IS_BROKEN
2808 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2809 # define FTELL_FOR_PIPE_IS_BROKEN
2814 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2815 /* This loses the possibility to detect the bof
2816 * situation on perl -P when the libc5 is being used.
2817 * Workaround? Maybe attach some extra state to PL_rsfp?
2820 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2822 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2825 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2826 s = swallow_bom((U8*)s);
2830 /* Incest with pod. */
2831 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2832 sv_setpvn(PL_linestr, "", 0);
2833 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2834 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2835 PL_last_lop = PL_last_uni = Nullch;
2836 PL_doextract = FALSE;
2840 } while (PL_doextract);
2841 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2842 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2843 SV * const sv = NEWSV(85,0);
2845 sv_upgrade(sv, SVt_PVMG);
2846 sv_setsv(sv,PL_linestr);
2849 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2851 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2852 PL_last_lop = PL_last_uni = Nullch;
2853 if (CopLINE(PL_curcop) == 1) {
2854 while (s < PL_bufend && isSPACE(*s))
2856 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2860 if (*s == '#' && *(s+1) == '!')
2862 #ifdef ALTERNATE_SHEBANG
2864 static char const as[] = ALTERNATE_SHEBANG;
2865 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2866 d = s + (sizeof(as) - 1);
2868 #endif /* ALTERNATE_SHEBANG */
2877 while (*d && !isSPACE(*d))
2881 #ifdef ARG_ZERO_IS_SCRIPT
2882 if (ipathend > ipath) {
2884 * HP-UX (at least) sets argv[0] to the script name,
2885 * which makes $^X incorrect. And Digital UNIX and Linux,
2886 * at least, set argv[0] to the basename of the Perl
2887 * interpreter. So, having found "#!", we'll set it right.
2890 = GvSV(gv_fetchpv("\030", GV_ADD, SVt_PV)); /* $^X */
2891 assert(SvPOK(x) || SvGMAGICAL(x));
2892 if (sv_eq(x, CopFILESV(PL_curcop))) {
2893 sv_setpvn(x, ipath, ipathend - ipath);
2899 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
2900 const char * const lstart = SvPV_const(x,llen);
2902 bstart += blen - llen;
2903 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2904 sv_setpvn(x, ipath, ipathend - ipath);
2909 TAINT_NOT; /* $^X is always tainted, but that's OK */
2911 #endif /* ARG_ZERO_IS_SCRIPT */
2916 d = instr(s,"perl -");
2918 d = instr(s,"perl");
2920 /* avoid getting into infinite loops when shebang
2921 * line contains "Perl" rather than "perl" */
2923 for (d = ipathend-4; d >= ipath; --d) {
2924 if ((*d == 'p' || *d == 'P')
2925 && !ibcmp(d, "perl", 4))
2935 #ifdef ALTERNATE_SHEBANG
2937 * If the ALTERNATE_SHEBANG on this system starts with a
2938 * character that can be part of a Perl expression, then if
2939 * we see it but not "perl", we're probably looking at the
2940 * start of Perl code, not a request to hand off to some
2941 * other interpreter. Similarly, if "perl" is there, but
2942 * not in the first 'word' of the line, we assume the line
2943 * contains the start of the Perl program.
2945 if (d && *s != '#') {
2946 const char *c = ipath;
2947 while (*c && !strchr("; \t\r\n\f\v#", *c))
2950 d = Nullch; /* "perl" not in first word; ignore */
2952 *s = '#'; /* Don't try to parse shebang line */
2954 #endif /* ALTERNATE_SHEBANG */
2955 #ifndef MACOS_TRADITIONAL
2960 !instr(s,"indir") &&
2961 instr(PL_origargv[0],"perl"))
2968 while (s < PL_bufend && isSPACE(*s))
2970 if (s < PL_bufend) {
2971 Newxz(newargv,PL_origargc+3,char*);
2973 while (s < PL_bufend && !isSPACE(*s))
2976 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2979 newargv = PL_origargv;
2982 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2984 Perl_croak(aTHX_ "Can't exec %s", ipath);
2988 const U32 oldpdb = PL_perldb;
2989 const bool oldn = PL_minus_n;
2990 const bool oldp = PL_minus_p;
2992 while (*d && !isSPACE(*d)) d++;
2993 while (SPACE_OR_TAB(*d)) d++;
2996 const bool switches_done = PL_doswitches;
2998 if (*d == 'M' || *d == 'm' || *d == 'C') {
2999 const char * const m = d;
3000 while (*d && !isSPACE(*d)) d++;
3001 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3004 d = moreswitches(d);
3006 if (PL_doswitches && !switches_done) {
3007 int argc = PL_origargc;
3008 char **argv = PL_origargv;
3011 } while (argc && argv[0][0] == '-' && argv[0][1]);
3012 init_argv_symbols(argc,argv);
3014 if ((PERLDB_LINE && !oldpdb) ||
3015 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3016 /* if we have already added "LINE: while (<>) {",
3017 we must not do it again */
3019 sv_setpvn(PL_linestr, "", 0);
3020 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3021 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3022 PL_last_lop = PL_last_uni = Nullch;
3023 PL_preambled = FALSE;
3025 (void)gv_fetchfile(PL_origfilename);
3032 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3034 PL_lex_state = LEX_FORMLINE;
3039 #ifdef PERL_STRICT_CR
3040 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3042 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3044 case ' ': case '\t': case '\f': case 013:
3045 #ifdef MACOS_TRADITIONAL
3052 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3053 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3054 /* handle eval qq[#line 1 "foo"\n ...] */
3055 CopLINE_dec(PL_curcop);
3059 while (s < d && *s != '\n')
3063 else if (s > d) /* Found by Ilya: feed random input to Perl. */
3064 Perl_croak(aTHX_ "panic: input overflow");
3066 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3068 PL_lex_state = LEX_FORMLINE;
3078 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3086 while (s < PL_bufend && SPACE_OR_TAB(*s))
3089 if (strnEQ(s,"=>",2)) {
3090 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3091 DEBUG_T( { S_printbuf(aTHX_
3092 "### Saw unary minus before =>, forcing word %s\n", s);
3094 OPERATOR('-'); /* unary minus */
3096 PL_last_uni = PL_oldbufptr;
3098 case 'r': ftst = OP_FTEREAD; break;
3099 case 'w': ftst = OP_FTEWRITE; break;
3100 case 'x': ftst = OP_FTEEXEC; break;
3101 case 'o': ftst = OP_FTEOWNED; break;
3102 case 'R': ftst = OP_FTRREAD; break;
3103 case 'W': ftst = OP_FTRWRITE; break;
3104 case 'X': ftst = OP_FTREXEC; break;
3105 case 'O': ftst = OP_FTROWNED; break;
3106 case 'e': ftst = OP_FTIS; break;
3107 case 'z': ftst = OP_FTZERO; break;
3108 case 's': ftst = OP_FTSIZE; break;
3109 case 'f': ftst = OP_FTFILE; break;
3110 case 'd': ftst = OP_FTDIR; break;
3111 case 'l': ftst = OP_FTLINK; break;
3112 case 'p': ftst = OP_FTPIPE; break;
3113 case 'S': ftst = OP_FTSOCK; break;
3114 case 'u': ftst = OP_FTSUID; break;
3115 case 'g': ftst = OP_FTSGID; break;
3116 case 'k': ftst = OP_FTSVTX; break;
3117 case 'b': ftst = OP_FTBLK; break;
3118 case 'c': ftst = OP_FTCHR; break;
3119 case 't': ftst = OP_FTTTY; break;
3120 case 'T': ftst = OP_FTTEXT; break;
3121 case 'B': ftst = OP_FTBINARY; break;
3122 case 'M': case 'A': case 'C':
3123 gv_fetchpv("\024",GV_ADD, SVt_PV);
3125 case 'M': ftst = OP_FTMTIME; break;
3126 case 'A': ftst = OP_FTATIME; break;
3127 case 'C': ftst = OP_FTCTIME; break;
3135 PL_last_lop_op = (OPCODE)ftst;
3136 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3137 "### Saw file test %c\n", (int)tmp);
3142 /* Assume it was a minus followed by a one-letter named
3143 * subroutine call (or a -bareword), then. */
3144 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3145 "### '-%c' looked like a file test but was not\n",
3152 const char tmp = *s++;
3155 if (PL_expect == XOPERATOR)
3160 else if (*s == '>') {
3163 if (isIDFIRST_lazy_if(s,UTF)) {
3164 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3172 if (PL_expect == XOPERATOR)
3175 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3177 OPERATOR('-'); /* unary minus */
3183 const char tmp = *s++;
3186 if (PL_expect == XOPERATOR)
3191 if (PL_expect == XOPERATOR)
3194 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3201 if (PL_expect != XOPERATOR) {
3202 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3203 PL_expect = XOPERATOR;
3204 force_ident(PL_tokenbuf, '*');
3217 if (PL_expect == XOPERATOR) {
3221 PL_tokenbuf[0] = '%';
3222 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3223 if (!PL_tokenbuf[1]) {
3226 PL_pending_ident = '%';
3237 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
3238 && FEATURE_IS_ENABLED("~~"))
3245 const char tmp = *s++;
3251 goto just_a_word_zero_gv;
3254 switch (PL_expect) {
3257 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3259 PL_bufptr = s; /* update in case we back off */
3265 PL_expect = XTERMBLOCK;
3269 while (isIDFIRST_lazy_if(s,UTF)) {
3271 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3272 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3273 if (tmp < 0) tmp = -tmp;
3289 d = scan_str(d,TRUE,TRUE);
3291 /* MUST advance bufptr here to avoid bogus
3292 "at end of line" context messages from yyerror().
3294 PL_bufptr = s + len;
3295 yyerror("Unterminated attribute parameter in attribute list");
3298 return REPORT(0); /* EOF indicator */
3302 SV *sv = newSVpvn(s, len);
3303 sv_catsv(sv, PL_lex_stuff);
3304 attrs = append_elem(OP_LIST, attrs,
3305 newSVOP(OP_CONST, 0, sv));
3306 SvREFCNT_dec(PL_lex_stuff);
3307 PL_lex_stuff = Nullsv;
3310 if (len == 6 && strnEQ(s, "unique", len)) {
3311 if (PL_in_my == KEY_our)
3313 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3315 ; /* skip to avoid loading attributes.pm */
3318 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3321 /* NOTE: any CV attrs applied here need to be part of
3322 the CVf_BUILTIN_ATTRS define in cv.h! */
3323 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3324 CvLVALUE_on(PL_compcv);
3325 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3326 CvLOCKED_on(PL_compcv);
3327 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3328 CvMETHOD_on(PL_compcv);
3329 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3330 CvASSERTION_on(PL_compcv);
3331 /* After we've set the flags, it could be argued that
3332 we don't need to do the attributes.pm-based setting
3333 process, and shouldn't bother appending recognized
3334 flags. To experiment with that, uncomment the
3335 following "else". (Note that's already been
3336 uncommented. That keeps the above-applied built-in
3337 attributes from being intercepted (and possibly
3338 rejected) by a package's attribute routines, but is
3339 justified by the performance win for the common case
3340 of applying only built-in attributes.) */
3342 attrs = append_elem(OP_LIST, attrs,
3343 newSVOP(OP_CONST, 0,
3347 if (*s == ':' && s[1] != ':')
3350 break; /* require real whitespace or :'s */
3354 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3355 if (*s != ';' && *s != '}' && *s != tmp
3356 && (tmp != '=' || *s != ')')) {
3357 const char q = ((*s == '\'') ? '"' : '\'');
3358 /* If here for an expression, and parsed no attrs, back
3360 if (tmp == '=' && !attrs) {
3364 /* MUST advance bufptr here to avoid bogus "at end of line"
3365 context messages from yyerror().
3369 ? Perl_form(aTHX_ "Invalid separator character "
3370 "%c%c%c in attribute list", q, *s, q)
3371 : "Unterminated attribute list" );
3379 PL_nextval[PL_nexttoke].opval = attrs;
3387 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3388 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3396 const char tmp = *s++;
3401 const char tmp = *s++;
3409 if (PL_lex_brackets <= 0)
3410 yyerror("Unmatched right square bracket");
3413 if (PL_lex_state == LEX_INTERPNORMAL) {
3414 if (PL_lex_brackets == 0) {
3415 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3416 PL_lex_state = LEX_INTERPEND;
3423 if (PL_lex_brackets > 100) {
3424 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3426 switch (PL_expect) {
3428 if (PL_lex_formbrack) {
3432 if (PL_oldoldbufptr == PL_last_lop)
3433 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3435 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3436 OPERATOR(HASHBRACK);
3438 while (s < PL_bufend && SPACE_OR_TAB(*s))
3441 PL_tokenbuf[0] = '\0';
3442 if (d < PL_bufend && *d == '-') {
3443 PL_tokenbuf[0] = '-';
3445 while (d < PL_bufend && SPACE_OR_TAB(*d))
3448 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3449 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3451 while (d < PL_bufend && SPACE_OR_TAB(*d))
3454 const char minus = (PL_tokenbuf[0] == '-');
3455 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3463 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3468 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3473 if (PL_oldoldbufptr == PL_last_lop)
3474 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3476 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3479 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3481 /* This hack is to get the ${} in the message. */
3483 yyerror("syntax error");
3486 OPERATOR(HASHBRACK);
3488 /* This hack serves to disambiguate a pair of curlies
3489 * as being a block or an anon hash. Normally, expectation
3490 * determines that, but in cases where we're not in a
3491 * position to expect anything in particular (like inside
3492 * eval"") we have to resolve the ambiguity. This code
3493 * covers the case where the first term in the curlies is a
3494 * quoted string. Most other cases need to be explicitly
3495 * disambiguated by prepending a "+" before the opening
3496 * curly in order to force resolution as an anon hash.
3498 * XXX should probably propagate the outer expectation
3499 * into eval"" to rely less on this hack, but that could
3500 * potentially break current behavior of eval"".
3504 if (*s == '\'' || *s == '"' || *s == '`') {
3505 /* common case: get past first string, handling escapes */
3506 for (t++; t < PL_bufend && *t != *s;)
3507 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3511 else if (*s == 'q') {
3514 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3517 /* skip q//-like construct */
3519 char open, close, term;
3522 while (t < PL_bufend && isSPACE(*t))
3524 /* check for q => */
3525 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3526 OPERATOR(HASHBRACK);
3530 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3534 for (t++; t < PL_bufend; t++) {
3535 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3537 else if (*t == open)
3541 for (t++; t < PL_bufend; t++) {
3542 if (*t == '\\' && t+1 < PL_bufend)
3544 else if (*t == close && --brackets <= 0)
3546 else if (*t == open)
3553 /* skip plain q word */
3554 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3557 else if (isALNUM_lazy_if(t,UTF)) {
3559 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3562 while (t < PL_bufend && isSPACE(*t))
3564 /* if comma follows first term, call it an anon hash */
3565 /* XXX it could be a comma expression with loop modifiers */
3566 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3567 || (*t == '=' && t[1] == '>')))
3568 OPERATOR(HASHBRACK);
3569 if (PL_expect == XREF)
3572 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3578 yylval.ival = CopLINE(PL_curcop);
3579 if (isSPACE(*s) || *s == '#')
3580 PL_copline = NOLINE; /* invalidate current command line number */
3585 if (PL_lex_brackets <= 0)
3586 yyerror("Unmatched right curly bracket");
3588 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3589 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3590 PL_lex_formbrack = 0;
3591 if (PL_lex_state == LEX_INTERPNORMAL) {
3592 if (PL_lex_brackets == 0) {
3593 if (PL_expect & XFAKEBRACK) {
3594 PL_expect &= XENUMMASK;
3595 PL_lex_state = LEX_INTERPEND;
3597 return yylex(); /* ignore fake brackets */
3599 if (*s == '-' && s[1] == '>')
3600 PL_lex_state = LEX_INTERPENDMAYBE;
3601 else if (*s != '[' && *s != '{')
3602 PL_lex_state = LEX_INTERPEND;
3605 if (PL_expect & XFAKEBRACK) {
3606 PL_expect &= XENUMMASK;
3608 return yylex(); /* ignore fake brackets */
3617 if (PL_expect == XOPERATOR) {
3618 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
3619 && isIDFIRST_lazy_if(s,UTF))
3621 CopLINE_dec(PL_curcop);
3622 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3623 CopLINE_inc(PL_curcop);
3628 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3630 PL_expect = XOPERATOR;
3631 force_ident(PL_tokenbuf, '&');
3635 yylval.ival = (OPpENTERSUB_AMPER<<8);
3647 const char tmp = *s++;
3654 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
3655 && strchr("+-*/%.^&|<",tmp))
3656 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3657 "Reversed %c= operator",(int)tmp);
3659 if (PL_expect == XSTATE && isALPHA(tmp) &&
3660 (s == PL_linestart+1 || s[-2] == '\n') )
3662 if (PL_in_eval && !PL_rsfp) {
3667 if (strnEQ(s,"=cut",4)) {
3681 PL_doextract = TRUE;
3685 if (PL_lex_brackets < PL_lex_formbrack) {
3687 #ifdef PERL_STRICT_CR
3688 for (t = s; SPACE_OR_TAB(*t); t++) ;
3690 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3692 if (*t == '\n' || *t == '#') {
3703 const char tmp = *s++;
3705 /* was this !=~ where !~ was meant?
3706 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3708 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3709 const char *t = s+1;
3711 while (t < PL_bufend && isSPACE(*t))
3714 if (*t == '/' || *t == '?' ||
3715 ((*t == 'm' || *t == 's' || *t == 'y')
3716 && !isALNUM(t[1])) ||
3717 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3718 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3719 "!=~ should be !~");
3729 if (PL_expect != XOPERATOR) {
3730 if (s[1] != '<' && !strchr(s,'>'))
3733 s = scan_heredoc(s);
3735 s = scan_inputsymbol(s);
3736 TERM(sublex_start());
3742 SHop(OP_LEFT_SHIFT);
3756 const char tmp = *s++;
3758 SHop(OP_RIGHT_SHIFT);
3768 if (PL_expect == XOPERATOR) {
3769 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3771 deprecate_old(commaless_variable_list);
3772 return REPORT(','); /* grandfather non-comma-format format */
3776 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3777 PL_tokenbuf[0] = '@';
3778 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3779 sizeof PL_tokenbuf - 1, FALSE);
3780 if (PL_expect == XOPERATOR)
3781 no_op("Array length", s);
3782 if (!PL_tokenbuf[1])
3784 PL_expect = XOPERATOR;
3785 PL_pending_ident = '#';
3789 PL_tokenbuf[0] = '$';
3790 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3791 sizeof PL_tokenbuf - 1, FALSE);
3792 if (PL_expect == XOPERATOR)
3794 if (!PL_tokenbuf[1]) {
3796 yyerror("Final $ should be \\$ or $name");
3800 /* This kludge not intended to be bulletproof. */
3801 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3802 yylval.opval = newSVOP(OP_CONST, 0,
3803 newSViv(PL_compiling.cop_arybase));
3804 yylval.opval->op_private = OPpCONST_ARYBASE;
3810 const char tmp = *s;
3811 if (PL_lex_state == LEX_NORMAL)
3814 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
3815 && intuit_more(s)) {
3817 PL_tokenbuf[0] = '@';
3818 if (ckWARN(WARN_SYNTAX)) {
3821 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3824 PL_bufptr = skipspace(PL_bufptr);
3825 while (t < PL_bufend && *t != ']')
3827 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3828 "Multidimensional syntax %.*s not supported",
3829 (int)((t - PL_bufptr) + 1), PL_bufptr);
3833 else if (*s == '{') {
3835 PL_tokenbuf[0] = '%';
3836 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
3837 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
3839 char tmpbuf[sizeof PL_tokenbuf];
3840 for (t++; isSPACE(*t); t++) ;
3841 if (isIDFIRST_lazy_if(t,UTF)) {
3843 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
3845 for (; isSPACE(*t); t++) ;
3846 if (*t == ';' && get_cv(tmpbuf, FALSE))
3847 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3848 "You need to quote \"%s\"",
3855 PL_expect = XOPERATOR;
3856 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3857 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3858 if (!islop || PL_last_lop_op == OP_GREPSTART)
3859 PL_expect = XOPERATOR;
3860 else if (strchr("$@\"'`q", *s))
3861 PL_expect = XTERM; /* e.g. print $fh "foo" */
3862 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3863 PL_expect = XTERM; /* e.g. print $fh &sub */
3864 else if (isIDFIRST_lazy_if(s,UTF)) {
3865 char tmpbuf[sizeof PL_tokenbuf];
3867 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3868 if ((t2 = keyword(tmpbuf, len))) {
3869 /* binary operators exclude handle interpretations */
3881 PL_expect = XTERM; /* e.g. print $fh length() */
3886 PL_expect = XTERM; /* e.g. print $fh subr() */
3889 else if (isDIGIT(*s))
3890 PL_expect = XTERM; /* e.g. print $fh 3 */
3891 else if (*s == '.' && isDIGIT(s[1]))
3892 PL_expect = XTERM; /* e.g. print $fh .3 */
3893 else if ((*s == '?' || *s == '-' || *s == '+')
3894 && !isSPACE(s[1]) && s[1] != '=')
3895 PL_expect = XTERM; /* e.g. print $fh -1 */
3896 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
3898 PL_expect = XTERM; /* e.g. print $fh /.../
3899 XXX except DORDOR operator
3901 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
3903 PL_expect = XTERM; /* print $fh <<"EOF" */
3906 PL_pending_ident = '$';
3910 if (PL_expect == XOPERATOR)
3912 PL_tokenbuf[0] = '@';
3913 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3914 if (!PL_tokenbuf[1]) {
3917 if (PL_lex_state == LEX_NORMAL)
3919 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3921 PL_tokenbuf[0] = '%';
3923 /* Warn about @ where they meant $. */
3924 if (*s == '[' || *s == '{') {
3925 if (ckWARN(WARN_SYNTAX)) {
3926 const char *t = s + 1;
3927 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3929 if (*t == '}' || *t == ']') {
3931 PL_bufptr = skipspace(PL_bufptr);
3932 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3933 "Scalar value %.*s better written as $%.*s",
3934 (int)(t-PL_bufptr), PL_bufptr,
3935 (int)(t-PL_bufptr-1), PL_bufptr+1);
3940 PL_pending_ident = '@';
3943 case '/': /* may be division, defined-or, or pattern */
3944 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3948 case '?': /* may either be conditional or pattern */
3949 if(PL_expect == XOPERATOR) {
3957 /* A // operator. */
3967 /* Disable warning on "study /blah/" */
3968 if (PL_oldoldbufptr == PL_last_uni
3969 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3970 || memNE(PL_last_uni, "study", 5)
3971 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3974 s = scan_pat(s,OP_MATCH);
3975 TERM(sublex_start());
3979 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3980 #ifdef PERL_STRICT_CR
3983 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3985 && (s == PL_linestart || s[-1] == '\n') )
3987 PL_lex_formbrack = 0;
3991 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3997 yylval.ival = OPf_SPECIAL;
4003 if (PL_expect != XOPERATOR)
4008 case '0': case '1': case '2': case '3': case '4':
4009 case '5': case '6': case '7': case '8': case '9':
4010 s = scan_num(s, &yylval);
4011 DEBUG_T( { S_printbuf(aTHX_ "### Saw number in %s\n", s); } );
4012 if (PL_expect == XOPERATOR)
4017 s = scan_str(s,FALSE,FALSE);
4018 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4019 if (PL_expect == XOPERATOR) {
4020 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4022 deprecate_old(commaless_variable_list);
4023 return REPORT(','); /* grandfather non-comma-format format */
4029 missingterm((char*)0);
4030 yylval.ival = OP_CONST;
4031 TERM(sublex_start());
4034 s = scan_str(s,FALSE,FALSE);
4035 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4036 if (PL_expect == XOPERATOR) {
4037 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4039 deprecate_old(commaless_variable_list);
4040 return REPORT(','); /* grandfather non-comma-format format */
4046 missingterm((char*)0);
4047 yylval.ival = OP_CONST;
4048 /* FIXME. I think that this can be const if char *d is replaced by
4049 more localised variables. */
4050 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4051 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4052 yylval.ival = OP_STRINGIFY;
4056 TERM(sublex_start());
4059 s = scan_str(s,FALSE,FALSE);
4060 DEBUG_T( { S_printbuf(aTHX_ "### Saw backtick string before %s\n", s); } );
4061 if (PL_expect == XOPERATOR)
4062 no_op("Backticks",s);
4064 missingterm((char*)0);
4065 yylval.ival = OP_BACKTICK;
4067 TERM(sublex_start());
4071 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
4072 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
4074 if (PL_expect == XOPERATOR)
4075 no_op("Backslash",s);
4079 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
4080 char *start = s + 2;
4081 while (isDIGIT(*start) || *start == '_')
4083 if (*start == '.' && isDIGIT(start[1])) {
4084 s = scan_num(s, &yylval);
4087 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
4088 else if (!isALPHA(*start) && (PL_expect == XTERM
4089 || PL_expect == XREF || PL_expect == XSTATE
4090 || PL_expect == XTERMORDORDOR)) {
4091 const char c = *start;
4094 gv = gv_fetchpv(s, 0, SVt_PVCV);
4097 s = scan_num(s, &yylval);
4104 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
4140 I32 orig_keyword = 0;
4145 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4147 /* Some keywords can be followed by any delimiter, including ':' */
4148 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
4149 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4150 (PL_tokenbuf[0] == 'q' &&
4151 strchr("qwxr", PL_tokenbuf[1])))));
4153 /* x::* is just a word, unless x is "CORE" */
4154 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4158 while (d < PL_bufend && isSPACE(*d))
4159 d++; /* no comments skipped here, or s### is misparsed */
4161 /* Is this a label? */
4162 if (!tmp && PL_expect == XSTATE
4163 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4165 yylval.pval = savepv(PL_tokenbuf);
4170 /* Check for keywords */
4171 tmp = keyword(PL_tokenbuf, len);
4173 /* Is this a word before a => operator? */
4174 if (*d == '=' && d[1] == '>') {
4177 = (OP*)newSVOP(OP_CONST, 0,
4178 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4179 yylval.opval->op_private = OPpCONST_BARE;
4183 if (tmp < 0) { /* second-class keyword? */
4184 GV *ogv = NULL; /* override (winner) */
4185 GV *hgv = NULL; /* hidden (loser) */
4186 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4188 if ((gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV)) &&
4191 if (GvIMPORTED_CV(gv))
4193 else if (! CvMETHOD(cv))
4197 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4198 (gv = *gvp) != (GV*)&PL_sv_undef &&
4199 GvCVu(gv) && GvIMPORTED_CV(gv))
4206 tmp = 0; /* overridden by import or by GLOBAL */
4209 && -tmp==KEY_lock /* XXX generalizable kludge */
4211 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
4213 tmp = 0; /* any sub overrides "weak" keyword */
4215 else { /* no override */
4217 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4218 Perl_warner(aTHX_ packWARN(WARN_MISC),
4219 "dump() better written as CORE::dump()");
4223 if (hgv && tmp != KEY_x && tmp != KEY_CORE
4224 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
4225 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4226 "Ambiguous call resolved as CORE::%s(), %s",
4227 GvENAME(hgv), "qualify as such or use &");
4234 default: /* not a keyword */
4235 /* Trade off - by using this evil construction we can pull the
4236 variable gv into the block labelled keylookup. If not, then
4237 we have to give it function scope so that the goto from the
4238 earlier ':' case doesn't bypass the initialisation. */
4240 just_a_word_zero_gv:
4247 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4250 /* Get the rest if it looks like a package qualifier */
4252 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4254 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4257 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4258 *s == '\'' ? "'" : "::");
4263 if (PL_expect == XOPERATOR) {
4264 if (PL_bufptr == PL_linestart) {
4265 CopLINE_dec(PL_curcop);
4266 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4267 CopLINE_inc(PL_curcop);
4270 no_op("Bareword",s);
4273 /* Look for a subroutine with this name in current package,
4274 unless name is "Foo::", in which case Foo is a bearword
4275 (and a package name). */
4278 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4280 if (ckWARN(WARN_BAREWORD)
4281 && ! gv_fetchpv(PL_tokenbuf, 0, SVt_PVHV))
4282 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4283 "Bareword \"%s\" refers to nonexistent package",
4286 PL_tokenbuf[len] = '\0';
4293 /* Mustn't actually add anything to a symbol table.
4294 But also don't want to "initialise" any placeholder
4295 constants that might already be there into full
4296 blown PVGVs with attached PVCV. */
4297 gv = gv_fetchpv(PL_tokenbuf, GV_NOADD_NOINIT,
4302 /* if we saw a global override before, get the right name */
4305 sv = newSVpvs("CORE::GLOBAL::");
4306 sv_catpv(sv,PL_tokenbuf);
4309 /* If len is 0, newSVpv does strlen(), which is correct.
4310 If len is non-zero, then it will be the true length,
4311 and so the scalar will be created correctly. */
4312 sv = newSVpv(PL_tokenbuf,len);
4315 /* Presume this is going to be a bareword of some sort. */
4318 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4319 yylval.opval->op_private = OPpCONST_BARE;
4320 /* UTF-8 package name? */
4321 if (UTF && !IN_BYTES &&
4322 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
4325 /* And if "Foo::", then that's what it certainly is. */
4330 /* Do the explicit type check so that we don't need to force
4331 the initialisation of the symbol table to have a real GV.
4332 Beware - gv may not really be a PVGV, cv may not really be
4333 a PVCV, (because of the space optimisations that gv_init
4334 understands) But they're true if for this symbol there is
4335 respectively a typeglob and a subroutine.
4337 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
4338 /* Real typeglob, so get the real subroutine: */
4340 /* A proxy for a subroutine in this package? */
4341 : SvOK(gv) ? (CV *) gv : NULL)
4344 /* See if it's the indirect object for a list operator. */
4346 if (PL_oldoldbufptr &&
4347 PL_oldoldbufptr < PL_bufptr &&
4348 (PL_oldoldbufptr == PL_last_lop
4349 || PL_oldoldbufptr == PL_last_uni) &&
4350 /* NO SKIPSPACE BEFORE HERE! */
4351 (PL_expect == XREF ||
4352 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4354 bool immediate_paren = *s == '(';
4356 /* (Now we can afford to cross potential line boundary.) */
4359 /* Two barewords in a row may indicate method call. */
4361 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
4362 (tmp = intuit_method(s, gv, cv)))
4365 /* If not a declared subroutine, it's an indirect object. */
4366 /* (But it's an indir obj regardless for sort.) */
4367 /* Also, if "_" follows a filetest operator, it's a bareword */
4370 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4372 (PL_last_lop_op != OP_MAPSTART &&
4373 PL_last_lop_op != OP_GREPSTART))))
4374 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
4375 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
4378 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4383 PL_expect = XOPERATOR;
4386 /* Is this a word before a => operator? */
4387 if (*s == '=' && s[1] == '>' && !pkgname) {
4389 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4390 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4391 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4395 /* If followed by a paren, it's certainly a subroutine. */
4399 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4400 if (*d == ')' && (sv = gv_const_sv(gv))) {
4405 PL_nextval[PL_nexttoke].opval = yylval.opval;
4406 PL_expect = XOPERATOR;
4412 /* If followed by var or block, call it a method (unless sub) */
4414 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
4415 PL_last_lop = PL_oldbufptr;
4416 PL_last_lop_op = OP_METHOD;
4420 /* If followed by a bareword, see if it looks like indir obj. */
4423 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4424 && (tmp = intuit_method(s, gv, cv)))
4427 /* Not a method, so call it a subroutine (if defined) */
4430 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4431 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4432 "Ambiguous use of -%s resolved as -&%s()",
4433 PL_tokenbuf, PL_tokenbuf);
4434 /* Check for a constant sub */
4435 if ((sv = gv_const_sv(gv))) {
4437 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4438 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4439 yylval.opval->op_private = 0;
4443 /* Resolve to GV now. */
4444 if (SvTYPE(gv) != SVt_PVGV) {
4445 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
4446 assert (SvTYPE(gv) == SVt_PVGV);
4447 /* cv must have been some sort of placeholder, so
4448 now needs replacing with a real code reference. */
4452 op_free(yylval.opval);
4453 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4454 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4455 PL_last_lop = PL_oldbufptr;
4456 PL_last_lop_op = OP_ENTERSUB;
4457 /* Is there a prototype? */
4460 const char *proto = SvPV_const((SV*)cv, len);
4463 if (*proto == '$' && proto[1] == '\0')
4465 while (*proto == ';')
4467 if (*proto == '&' && *s == '{') {
4468 sv_setpv(PL_subname, PL_curstash ?
4469 "__ANON__" : "__ANON__::__ANON__");
4473 PL_nextval[PL_nexttoke].opval = yylval.opval;
4479 /* Call it a bare word */
4481 if (PL_hints & HINT_STRICT_SUBS)
4482 yylval.opval->op_private |= OPpCONST_STRICT;
4485 if (lastchar != '-') {
4486 if (ckWARN(WARN_RESERVED)) {
4487 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4488 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4489 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4496 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4497 && ckWARN_d(WARN_AMBIGUOUS)) {
4498 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4499 "Operator or semicolon missing before %c%s",
4500 lastchar, PL_tokenbuf);
4501 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4502 "Ambiguous use of %c resolved as operator %c",
4503 lastchar, lastchar);
4509 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4510 newSVpv(CopFILE(PL_curcop),0));
4514 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4515 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4518 case KEY___PACKAGE__:
4519 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4521 ? newSVhek(HvNAME_HEK(PL_curstash))
4528 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4529 const char *pname = "main";
4530 if (PL_tokenbuf[2] == 'D')
4531 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
4532 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
4536 GvIOp(gv) = newIO();
4537 IoIFP(GvIOp(gv)) = PL_rsfp;
4538 #if defined(HAS_FCNTL) && defined(F_SETFD)
4540 const int fd = PerlIO_fileno(PL_rsfp);
4541 fcntl(fd,F_SETFD,fd >= 3);
4544 /* Mark this internal pseudo-handle as clean */
4545 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4547 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4548 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4549 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4551 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4552 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4553 /* if the script was opened in binmode, we need to revert
4554 * it to text mode for compatibility; but only iff it has CRs
4555 * XXX this is a questionable hack at best. */
4556 if (PL_bufend-PL_bufptr > 2
4557 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4560 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4561 loc = PerlIO_tell(PL_rsfp);
4562 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4565 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4567 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4568 #endif /* NETWARE */
4569 #ifdef PERLIO_IS_STDIO /* really? */
4570 # if defined(__BORLANDC__)
4571 /* XXX see note in do_binmode() */
4572 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4576 PerlIO_seek(PL_rsfp, loc, 0);
4580 #ifdef PERLIO_LAYERS
4583 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4584 else if (PL_encoding) {
4591 XPUSHs(PL_encoding);
4593 call_method("name", G_SCALAR);
4597 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4598 Perl_form(aTHX_ ":encoding(%"SVf")",
4616 if (PL_expect == XSTATE) {
4623 if (*s == ':' && s[1] == ':') {
4626 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4627 if (!(tmp = keyword(PL_tokenbuf, len)))
4628 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4631 else if (tmp == KEY_require || tmp == KEY_do)
4632 /* that's a way to remember we saw "CORE::" */
4645 LOP(OP_ACCEPT,XTERM);
4651 LOP(OP_ATAN2,XTERM);
4657 LOP(OP_BINMODE,XTERM);
4660 LOP(OP_BLESS,XTERM);
4669 /* When 'use switch' is in effect, continue has a dual
4670 life as a control operator. */
4672 if (!FEATURE_IS_ENABLED("switch"))
4675 /* We have to disambiguate the two senses of
4676 "continue". If the next token is a '{' then
4677 treat it as the start of a continue block;
4678 otherwise treat it as a control operator.
4689 (void)gv_fetchpv("ENV", GV_ADD, SVt_PVHV); /* may use HOME */
4706 if (!PL_cryptseen) {
4707 PL_cryptseen = TRUE;
4711 LOP(OP_CRYPT,XTERM);
4714 LOP(OP_CHMOD,XTERM);
4717 LOP(OP_CHOWN,XTERM);
4720 LOP(OP_CONNECT,XTERM);
4739 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4740 if (orig_keyword == KEY_do) {
4749 PL_hints |= HINT_BLOCK_SCOPE;
4759 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4760 LOP(OP_DBMOPEN,XTERM);
4766 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4773 yylval.ival = CopLINE(PL_curcop);
4787 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4788 UNIBRACK(OP_ENTEREVAL);
4806 case KEY_endhostent:
4812 case KEY_endservent:
4815 case KEY_endprotoent:
4826 yylval.ival = CopLINE(PL_curcop);
4828 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4830 if ((PL_bufend - p) >= 3 &&
4831 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4833 else if ((PL_bufend - p) >= 4 &&
4834 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4837 if (isIDFIRST_lazy_if(p,UTF)) {
4838 p = scan_ident(p, PL_bufend,
4839 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4843 Perl_croak(aTHX_ "Missing $ on loop variable");
4848 LOP(OP_FORMLINE,XTERM);
4854 LOP(OP_FCNTL,XTERM);
4860 LOP(OP_FLOCK,XTERM);
4869 LOP(OP_GREPSTART, XREF);
4872 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4887 case KEY_getpriority:
4888 LOP(OP_GETPRIORITY,XTERM);
4890 case KEY_getprotobyname:
4893 case KEY_getprotobynumber:
4894 LOP(OP_GPBYNUMBER,XTERM);
4896 case KEY_getprotoent:
4908 case KEY_getpeername:
4909 UNI(OP_GETPEERNAME);
4911 case KEY_gethostbyname:
4914 case KEY_gethostbyaddr:
4915 LOP(OP_GHBYADDR,XTERM);
4917 case KEY_gethostent:
4920 case KEY_getnetbyname:
4923 case KEY_getnetbyaddr:
4924 LOP(OP_GNBYADDR,XTERM);
4929 case KEY_getservbyname:
4930 LOP(OP_GSBYNAME,XTERM);
4932 case KEY_getservbyport:
4933 LOP(OP_GSBYPORT,XTERM);
4935 case KEY_getservent:
4938 case KEY_getsockname:
4939 UNI(OP_GETSOCKNAME);
4941 case KEY_getsockopt:
4942 LOP(OP_GSOCKOPT,XTERM);
4957 yylval.ival = CopLINE(PL_curcop);
4968 yylval.ival = CopLINE(PL_curcop);
4972 LOP(OP_INDEX,XTERM);
4978 LOP(OP_IOCTL,XTERM);
4990 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5022 LOP(OP_LISTEN,XTERM);
5031 s = scan_pat(s,OP_MATCH);
5032 TERM(sublex_start());
5035 LOP(OP_MAPSTART, XREF);
5038 LOP(OP_MKDIR,XTERM);
5041 LOP(OP_MSGCTL,XTERM);
5044 LOP(OP_MSGGET,XTERM);
5047 LOP(OP_MSGRCV,XTERM);
5050 LOP(OP_MSGSND,XTERM);
5056 if (isIDFIRST_lazy_if(s,UTF)) {
5057 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
5058 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
5060 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
5061 if (!PL_in_my_stash) {
5064 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
5072 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5079 s = tokenize_use(0, s);
5083 if (*s == '(' || (s = skipspace(s), *s == '('))
5090 if (isIDFIRST_lazy_if(s,UTF)) {
5092 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
5093 for (t=d; *t && isSPACE(*t); t++) ;
5094 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
5096 && !(t[0] == '=' && t[1] == '>')
5098 int len = (int)(d-s);
5099 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
5100 "Precedence problem: open %.*s should be open(%.*s)",
5107 yylval.ival = OP_OR;
5117 LOP(OP_OPEN_DIR,XTERM);
5120 checkcomma(s,PL_tokenbuf,"filehandle");
5124 checkcomma(s,PL_tokenbuf,"filehandle");
5143 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5147 LOP(OP_PIPE_OP,XTERM);
5150 s = scan_str(s,FALSE,FALSE);
5152 missingterm((char*)0);
5153 yylval.ival = OP_CONST;
5154 TERM(sublex_start());
5160 s = scan_str(s,FALSE,FALSE);
5162 missingterm((char*)0);
5163 PL_expect = XOPERATOR;
5165 if (SvCUR(PL_lex_stuff)) {
5168 d = SvPV_force(PL_lex_stuff, len);
5171 for (; isSPACE(*d) && len; --len, ++d) ;
5174 if (!warned && ckWARN(WARN_QW)) {
5175 for (; !isSPACE(*d) && len; --len, ++d) {
5177 Perl_warner(aTHX_ packWARN(WARN_QW),
5178 "Possible attempt to separate words with commas");
5181 else if (*d == '#') {
5182 Perl_warner(aTHX_ packWARN(WARN_QW),
5183 "Possible attempt to put comments in qw() list");
5189 for (; !isSPACE(*d) && len; --len, ++d) ;
5191 sv = newSVpvn(b, d-b);
5192 if (DO_UTF8(PL_lex_stuff))
5194 words = append_elem(OP_LIST, words,
5195 newSVOP(OP_CONST, 0, tokeq(sv)));
5199 PL_nextval[PL_nexttoke].opval = words;
5204 SvREFCNT_dec(PL_lex_stuff);
5205 PL_lex_stuff = Nullsv;
5211 s = scan_str(s,FALSE,FALSE);
5213 missingterm((char*)0);
5214 yylval.ival = OP_STRINGIFY;
5215 if (SvIVX(PL_lex_stuff) == '\'')
5216 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
5217 TERM(sublex_start());
5220 s = scan_pat(s,OP_QR);
5221 TERM(sublex_start());
5224 s = scan_str(s,FALSE,FALSE);
5226 missingterm((char*)0);
5227 yylval.ival = OP_BACKTICK;
5229 TERM(sublex_start());
5237 s = force_version(s, FALSE);
5239 else if (*s != 'v' || !isDIGIT(s[1])
5240 || (s = force_version(s, TRUE), *s == 'v'))
5242 *PL_tokenbuf = '\0';
5243 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5244 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5245 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5247 yyerror("<> should be quotes");
5249 if (orig_keyword == KEY_require) {
5257 PL_last_uni = PL_oldbufptr;
5258 PL_last_lop_op = OP_REQUIRE;
5260 return REPORT( (int)REQUIRE );
5266 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5270 LOP(OP_RENAME,XTERM);
5279 LOP(OP_RINDEX,XTERM);
5289 UNIDOR(OP_READLINE);
5302 LOP(OP_REVERSE,XTERM);
5305 UNIDOR(OP_READLINK);
5313 TERM(sublex_start());
5315 TOKEN(1); /* force error */
5318 checkcomma(s,PL_tokenbuf,"filehandle");
5328 LOP(OP_SELECT,XTERM);
5334 LOP(OP_SEMCTL,XTERM);
5337 LOP(OP_SEMGET,XTERM);
5340 LOP(OP_SEMOP,XTERM);
5346 LOP(OP_SETPGRP,XTERM);
5348 case KEY_setpriority:
5349 LOP(OP_SETPRIORITY,XTERM);
5351 case KEY_sethostent:
5357 case KEY_setservent:
5360 case KEY_setprotoent:
5370 LOP(OP_SEEKDIR,XTERM);
5372 case KEY_setsockopt:
5373 LOP(OP_SSOCKOPT,XTERM);
5379 LOP(OP_SHMCTL,XTERM);
5382 LOP(OP_SHMGET,XTERM);
5385 LOP(OP_SHMREAD,XTERM);
5388 LOP(OP_SHMWRITE,XTERM);
5391 LOP(OP_SHUTDOWN,XTERM);
5400 LOP(OP_SOCKET,XTERM);
5402 case KEY_socketpair:
5403 LOP(OP_SOCKPAIR,XTERM);
5406 checkcomma(s,PL_tokenbuf,"subroutine name");
5408 if (*s == ';' || *s == ')') /* probably a close */
5409 Perl_croak(aTHX_ "sort is now a reserved word");
5411 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5415 LOP(OP_SPLIT,XTERM);
5418 LOP(OP_SPRINTF,XTERM);
5421 LOP(OP_SPLICE,XTERM);
5436 LOP(OP_SUBSTR,XTERM);
5442 char tmpbuf[sizeof PL_tokenbuf];
5443 SSize_t tboffset = 0;
5444 expectation attrful;
5445 bool have_name, have_proto, bad_proto;
5446 const int key = tmp;
5450 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5451 (*s == ':' && s[1] == ':'))
5454 attrful = XATTRBLOCK;
5455 /* remember buffer pos'n for later force_word */
5456 tboffset = s - PL_oldbufptr;
5457 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458 if (strchr(tmpbuf, ':'))
5459 sv_setpv(PL_subname, tmpbuf);
5461 sv_setsv(PL_subname,PL_curstname);
5462 sv_catpvs(PL_subname,"::");
5463 sv_catpvn(PL_subname,tmpbuf,len);
5470 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5471 PL_expect = XTERMBLOCK;
5472 attrful = XATTRTERM;
5473 sv_setpvn(PL_subname,"?",1);
5477 if (key == KEY_format) {
5479 PL_lex_formbrack = PL_lex_brackets + 1;
5481 (void) force_word(PL_oldbufptr + tboffset, WORD,
5486 /* Look for a prototype */
5490 s = scan_str(s,FALSE,FALSE);
5492 Perl_croak(aTHX_ "Prototype not terminated");
5493 /* strip spaces and check for bad characters */
5494 d = SvPVX(PL_lex_stuff);
5497 for (p = d; *p; ++p) {
5500 if (!strchr("$@%*;[]&\\", *p))
5505 if (bad_proto && ckWARN(WARN_SYNTAX))
5506 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5507 "Illegal character in prototype for %"SVf" : %s",
5509 SvCUR_set(PL_lex_stuff, tmp);
5517 if (*s == ':' && s[1] != ':')
5518 PL_expect = attrful;
5519 else if (*s != '{' && key == KEY_sub) {
5521 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5523 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5527 PL_nextval[PL_nexttoke].opval =
5528 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5529 PL_lex_stuff = Nullsv;
5533 sv_setpv(PL_subname,
5534 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5537 (void) force_word(PL_oldbufptr + tboffset, WORD,
5546 LOP(OP_SYSTEM,XREF);
5549 LOP(OP_SYMLINK,XTERM);
5552 LOP(OP_SYSCALL,XTERM);
5555 LOP(OP_SYSOPEN,XTERM);
5558 LOP(OP_SYSSEEK,XTERM);
5561 LOP(OP_SYSREAD,XTERM);
5564 LOP(OP_SYSWRITE,XTERM);
5568 TERM(sublex_start());
5589 LOP(OP_TRUNCATE,XTERM);
5601 yylval.ival = CopLINE(PL_curcop);
5605 yylval.ival = CopLINE(PL_curcop);
5609 LOP(OP_UNLINK,XTERM);
5615 LOP(OP_UNPACK,XTERM);
5618 LOP(OP_UTIME,XTERM);
5624 LOP(OP_UNSHIFT,XTERM);
5627 s = tokenize_use(1, s);
5637 yylval.ival = CopLINE(PL_curcop);
5641 yylval.ival = CopLINE(PL_curcop);
5645 PL_hints |= HINT_BLOCK_SCOPE;
5652 LOP(OP_WAITPID,XTERM);
5661 ctl_l[0] = toCTRL('L');
5663 gv_fetchpv(ctl_l, GV_ADD, SVt_PV);
5666 gv_fetchpv("\f", GV_ADD, SVt_PV); /* Make sure $^L is defined */
5671 if (PL_expect == XOPERATOR)
5677 yylval.ival = OP_XOR;
5682 TERM(sublex_start());
5687 #pragma segment Main
5691 S_pending_ident(pTHX)
5695 register I32 tmp = 0;
5696 /* pit holds the identifier we read and pending_ident is reset */
5697 char pit = PL_pending_ident;
5698 PL_pending_ident = 0;
5700 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5701 "### Pending identifier '%s'\n", PL_tokenbuf); });
5703 /* if we're in a my(), we can't allow dynamics here.
5704 $foo'bar has already been turned into $foo::bar, so
5705 just check for colons.
5707 if it's a legal name, the OP is a PADANY.
5710 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5711 if (strchr(PL_tokenbuf,':'))
5712 yyerror(Perl_form(aTHX_ "No package name allowed for "
5713 "variable %s in \"our\"",
5715 tmp = allocmy(PL_tokenbuf);
5718 if (strchr(PL_tokenbuf,':'))
5719 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5721 yylval.opval = newOP(OP_PADANY, 0);
5722 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5728 build the ops for accesses to a my() variable.
5730 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5731 then used in a comparison. This catches most, but not
5732 all cases. For instance, it catches
5733 sort { my($a); $a <=> $b }
5735 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5736 (although why you'd do that is anyone's guess).
5739 if (!strchr(PL_tokenbuf,':')) {
5741 tmp = pad_findmy(PL_tokenbuf);
5742 if (tmp != NOT_IN_PAD) {
5743 /* might be an "our" variable" */
5744 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5745 /* build ops for a bareword */
5746 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
5747 HEK * const stashname = HvNAME_HEK(stash);
5748 SV * const sym = newSVhek(stashname);
5749 sv_catpvs(sym, "::");
5750 sv_catpv(sym, PL_tokenbuf+1);
5751 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5752 yylval.opval->op_private = OPpCONST_ENTERED;
5755 ? (GV_ADDMULTI | GV_ADDINEVAL)
5758 ((PL_tokenbuf[0] == '$') ? SVt_PV
5759 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5764 /* if it's a sort block and they're naming $a or $b */
5765 if (PL_last_lop_op == OP_SORT &&
5766 PL_tokenbuf[0] == '$' &&
5767 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5770 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5771 d < PL_bufend && *d != '\n';
5774 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5775 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5781 yylval.opval = newOP(OP_PADANY, 0);
5782 yylval.opval->op_targ = tmp;
5788 Whine if they've said @foo in a doublequoted string,
5789 and @foo isn't a variable we can find in the symbol
5792 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5793 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
5794 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5795 && ckWARN(WARN_AMBIGUOUS))
5797 /* Downgraded from fatal to warning 20000522 mjd */
5798 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5799 "Possible unintended interpolation of %s in string",
5804 /* build ops for a bareword */
5805 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5806 yylval.opval->op_private = OPpCONST_ENTERED;
5810 ? (GV_ADDMULTI | GV_ADDINEVAL)
5811 /* If the identifier refers to a stash, don't autovivify it.
5812 * Change 24660 had the side effect of causing symbol table
5813 * hashes to always be defined, even if they were freshly
5814 * created and the only reference in the entire program was
5815 * the single statement with the defined %foo::bar:: test.
5816 * It appears that all code in the wild doing this actually
5817 * wants to know whether sub-packages have been loaded, so
5818 * by avoiding auto-vivifying symbol tables, we ensure that
5819 * defined %foo::bar:: continues to be false, and the existing
5820 * tests still give the expected answers, even though what
5821 * they're actually testing has now changed subtly.
5823 : !(*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'),
5824 ((PL_tokenbuf[0] == '$') ? SVt_PV
5825 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5831 * The following code was generated by perl_keyword.pl.
5835 Perl_keyword (pTHX_ const char *name, I32 len)
5840 case 1: /* 5 tokens of length 1 */
5872 case 2: /* 18 tokens of length 2 */
6018 case 3: /* 29 tokens of length 3 */
6022 if (name[1] == 'N' &&
6085 if (name[1] == 'i' &&
6107 return (FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
6125 if (name[1] == 'o' &&
6134 if (name[1] == 'e' &&
6143 if (name[1] == 'n' &&
6152 if (name[1] == 'o' &&
6161 if (name[1] == 'a' &&
6170 if (name[1] == 'o' &&
6232 if (name[1] == 'e' &&
6246 return (FEATURE_IS_ENABLED("say") ? -KEY_say : 0);
6272 if (name[1] == 'i' &&
6281 if (name[1] == 's' &&
6290 if (name[1] == 'e' &&
6299 if (name[1] == 'o' &&
6311 case 4: /* 41 tokens of length 4 */
6315 if (name[1] == 'O' &&
6325 if (name[1] == 'N' &&
6335 if (name[1] == 'i' &&
6345 if (name[1] == 'h' &&
6355 if (name[1] == 'u' &&
6368 if (name[2] == 'c' &&
6377 if (name[2] == 's' &&
6386 if (name[2] == 'a' &&
6422 if (name[1] == 'o' &&
6435 if (name[2] == 't' &&
6444 if (name[2] == 'o' &&
6453 if (name[2] == 't' &&
6462 if (name[2] == 'e' &&
6475 if (name[1] == 'o' &&
6488 if (name[2] == 'y' &&
6497 if (name[2] == 'l' &&
6513 if (name[2] == 's' &&
6522 if (name[2] == 'n' &&
6531 if (name[2] == 'c' &&
6544 if (name[1] == 'e' &&
6554 if (name[1] == 'p' &&
6567 if (name[2] == 'c' &&
6576 if (name[2] == 'p' &&
6585 if (name[2] == 's' &&
6601 if (name[2] == 'n' &&
6671 if (name[2] == 'r' &&
6680 if (name[2] == 'r' &&
6689 if (name[2] == 'a' &&
6705 if (name[2] == 'l' &&
6767 if (name[2] == 'e' &&
6770 return (FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
6783 case 5: /* 38 tokens of length 5 */
6787 if (name[1] == 'E' &&
6798 if (name[1] == 'H' &&
6812 if (name[2] == 'a' &&
6822 if (name[2] == 'a' &&
6839 if (name[2] == 'e' &&
6849 if (name[2] == 'e' &&
6853 return (FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
6869 if (name[3] == 'i' &&
6878 if (name[3] == 'o' &&
6914 if (name[2] == 'o' &&
6924 if (name[2] == 'y' &&
6938 if (name[1] == 'l' &&
6952 if (name[2] == 'n' &&
6962 if (name[2] == 'o' &&
6976 if (name[1] == 'i' &&
6981 return (FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
6990 if (name[2] == 'd' &&
7000 if (name[2] == 'c' &&
7017 if (name[2] == 'c' &&
7027 if (name[2] == 't' &&
7041 if (name[1] == 'k' &&
7052 if (name[1] == 'r' &&
7066 if (name[2] == 's' &&
7076 if (name[2] == 'd' &&
7093 if (name[2] == 'm' &&
7103 if (name[2] == 'i' &&
7113 if (name[2] == 'e' &&
7123 if (name[2] == 'l' &&
7133 if (name[2] == 'a' &&
7143 if (name[2] == 'u' &&
7157 if (name[1] == 'i' &&
7171 if (name[2] == 'a' &&
7184 if (name[3] == 'e' &&
7219 if (name[2] == 'i' &&
7236 if (name[2] == 'i' &&
7246 if (name[2] == 'i' &&
7263 case 6: /* 33 tokens of length 6 */
7267 if (name[1] == 'c' &&
7282 if (name[2] == 'l' &&
7293 if (name[2] == 'r' &&
7308 if (name[1] == 'e' &&
7323 if (name[2] == 's' &&
7328 if(ckWARN_d(WARN_SYNTAX))
7329 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
7335 if (name[2] == 'i' &&
7353 if (name[2] == 'l' &&
7364 if (name[2] == 'r' &&
7379 if (name[1] == 'm' &&
7394 if (name[2] == 'n' &&
7405 if (name[2] == 's' &&
7420 if (name[1] == 's' &&
7426 if (name[4] == 't' &&
7435 if (name[4] == 'e' &&
7444 if (name[4] == 'c' &&
7453 if (name[4] == 'n' &&
7469 if (name[1] == 'r' &&
7487 if (name[3] == 'a' &&
7497 if (name[3] == 'u' &&
7511 if (name[2] == 'n' &&
7529 if (name[2] == 'a' &&
7543 if (name[3] == 'e' &&
7556 if (name[4] == 't' &&
7565 if (name[4] == 'e' &&
7587 if (name[4] == 't' &&
7596 if (name[4] == 'e' &&
7612 if (name[2] == 'c' &&
7623 if (name[2] == 'l' &&
7634 if (name[2] == 'b' &&
7645 if (name[2] == 's' &&
7668 if (name[4] == 's' &&
7677 if (name[4] == 'n' &&
7690 if (name[3] == 'a' &&
7707 if (name[1] == 'a' &&
7722 case 7: /* 29 tokens of length 7 */
7726 if (name[1] == 'E' &&
7739 if (name[1] == '_' &&
7752 if (name[1] == 'i' &&
7759 return -KEY_binmode;
7765 if (name[1] == 'o' &&
7772 return -KEY_connect;
7781 if (name[2] == 'm' &&
7787 return -KEY_dbmopen;
7798 if (name[4] == 'u' &&
7802 return (FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
7808 if (name[4] == 'n' &&
7829 if (name[1] == 'o' &&
7842 if (name[1] == 'e' &&
7849 if (name[5] == 'r' &&
7852 return -KEY_getpgrp;
7858 if (name[5] == 'i' &&
7861 return -KEY_getppid;
7874 if (name[1] == 'c' &&
7881 return -KEY_lcfirst;
7887 if (name[1] == 'p' &&
7894 return -KEY_opendir;
7900 if (name[1] == 'a' &&
7918 if (name[3] == 'd' &&
7923 return -KEY_readdir;
7929 if (name[3] == 'u' &&
7940 if (name[3] == 'e' &&
7945 return -KEY_reverse;
7964 if (name[3] == 'k' &&
7969 return -KEY_seekdir;
7975 if (name[3] == 'p' &&
7980 return -KEY_setpgrp;
7990 if (name[2] == 'm' &&
7996 return -KEY_shmread;
8002 if (name[2] == 'r' &&
8008 return -KEY_sprintf;
8017 if (name[3] == 'l' &&
8022 return -KEY_symlink;
8031 if (name[4] == 'a' &&
8035 return -KEY_syscall;
8041 if (name[4] == 'p' &&
8045 return -KEY_sysopen;
8051 if (name[4] == 'e' &&
8055 return -KEY_sysread;
8061 if (name[4] == 'e' &&
8065 return -KEY_sysseek;
8083 if (name[1] == 'e' &&
8090 return -KEY_telldir;
8099 if (name[2] == 'f' &&
8105 return -KEY_ucfirst;
8111 if (name[2] == 's' &&
8117 return -KEY_unshift;
8127 if (name[1] == 'a' &&
8134 return -KEY_waitpid;
8143 case 8: /* 26 tokens of length 8 */
8147 if (name[1] == 'U' &&
8155 return KEY_AUTOLOAD;
8166 if (name[3] == 'A' &&
8172 return KEY___DATA__;
8178 if (name[3] == 'I' &&
8184 return -KEY___FILE__;
8190 if (name[3] == 'I' &&
8196 return -KEY___LINE__;
8212 if (name[2] == 'o' &&
8219 return -KEY_closedir;
8225 if (name[2] == 'n' &&
8232 return -KEY_continue;
8242 if (name[1] == 'b' &&
8250 return -KEY_dbmclose;
8256 if (name[1] == 'n' &&
8262 if (name[4] == 'r' &&
8267 return -KEY_endgrent;
8273 if (name[4] == 'w' &&
8278 return -KEY_endpwent;
8291 if (name[1] == 'o' &&
8299 return -KEY_formline;
8305 if (name[1] == 'e' &&
8316 if (name[6] == 'n' &&
8319 return -KEY_getgrent;
8325 if (name[6] == 'i' &&
8328 return -KEY_getgrgid;
8334 if (name[6] == 'a' &&
8337 return -KEY_getgrnam;
8350 if (name[4] == 'o' &&
8355 return -KEY_getlogin;
8366 if (name[6] == 'n' &&
8369 return -KEY_getpwent;
8375 if (name[6] == 'a' &&
8378 return -KEY_getpwnam;
8384 if (name[6] == 'i' &&
8387 return -KEY_getpwuid;
8407 if (name[1] == 'e' &&
8414 if (name[5] == 'i' &&
8421 return -KEY_readline;
8426 return -KEY_readlink;
8437 if (name[5] == 'i' &&
8441 return -KEY_readpipe;
8462 if (name[4] == 'r' &&
8467 return -KEY_setgrent;
8473 if (name[4] == 'w' &&
8478 return -KEY_setpwent;
8494 if (name[3] == 'w' &&
8500 return -KEY_shmwrite;
8506 if (name[3] == 't' &&
8512 return -KEY_shutdown;
8522 if (name[2] == 's' &&
8529 return -KEY_syswrite;
8539 if (name[1] == 'r' &&
8547 return -KEY_truncate;
8556 case 9: /* 8 tokens of length 9 */
8560 if (name[1] == 'n' &&
8569 return -KEY_endnetent;
8575 if (name[1] == 'e' &&
8584 return -KEY_getnetent;
8590 if (name[1] == 'o' &&
8599 return -KEY_localtime;
8605 if (name[1] == 'r' &&
8614 return KEY_prototype;
8620 if (name[1] == 'u' &&
8629 return -KEY_quotemeta;
8635 if (name[1] == 'e' &&
8644 return -KEY_rewinddir;
8650 if (name[1] == 'e' &&
8659 return -KEY_setnetent;
8665 if (name[1] == 'a' &&
8674 return -KEY_wantarray;
8683 case 10: /* 9 tokens of length 10 */
8687 if (name[1] == 'n' &&
8693 if (name[4] == 'o' &&
8700 return -KEY_endhostent;
8706 if (name[4] == 'e' &&
8713 return -KEY_endservent;
8726 if (name[1] == 'e' &&
8732 if (name[4] == 'o' &&
8739 return -KEY_gethostent;
8748 if (name[5] == 'r' &&
8754 return -KEY_getservent;
8760 if (name[5] == 'c' &&
8766 return -KEY_getsockopt;
8791 if (name[4] == 'o' &&
8798 return -KEY_sethostent;
8807 if (name[5] == 'r' &&
8813 return -KEY_setservent;
8819 if (name[5] == 'c' &&
8825 return -KEY_setsockopt;
8842 if (name[2] == 'c' &&
8851 return -KEY_socketpair;
8864 case 11: /* 8 tokens of length 11 */
8868 if (name[1] == '_' &&
8879 return -KEY___PACKAGE__;
8885 if (name[1] == 'n' &&
8896 return -KEY_endprotoent;
8902 if (name[1] == 'e' &&
8911 if (name[5] == 'e' &&
8918 return -KEY_getpeername;
8927 if (name[6] == 'o' &&
8933 return -KEY_getpriority;
8939 if (name[6] == 't' &&
8945 return -KEY_getprotoent;
8959 if (name[4] == 'o' &&
8967 return -KEY_getsockname;
8980 if (name[1] == 'e' &&
8988 if (name[6] == 'o' &&
8994 return -KEY_setpriority;
9000 if (name[6] == 't' &&
9006 return -KEY_setprotoent;
9022 case 12: /* 2 tokens of length 12 */
9023 if (name[0] == 'g' &&
9035 if (name[9] == 'd' &&
9038 { /* getnetbyaddr */
9039 return -KEY_getnetbyaddr;
9045 if (name[9] == 'a' &&
9048 { /* getnetbyname */
9049 return -KEY_getnetbyname;
9061 case 13: /* 4 tokens of length 13 */
9062 if (name[0] == 'g' &&
9069 if (name[4] == 'o' &&
9078 if (name[10] == 'd' &&
9081 { /* gethostbyaddr */
9082 return -KEY_gethostbyaddr;
9088 if (name[10] == 'a' &&
9091 { /* gethostbyname */
9092 return -KEY_gethostbyname;
9105 if (name[4] == 'e' &&
9114 if (name[10] == 'a' &&
9117 { /* getservbyname */
9118 return -KEY_getservbyname;
9124 if (name[10] == 'o' &&
9127 { /* getservbyport */
9128 return -KEY_getservbyport;
9147 case 14: /* 1 tokens of length 14 */
9148 if (name[0] == 'g' &&
9162 { /* getprotobyname */
9163 return -KEY_getprotobyname;
9168 case 16: /* 1 tokens of length 16 */
9169 if (name[0] == 'g' &&
9185 { /* getprotobynumber */
9186 return -KEY_getprotobynumber;
9200 S_checkcomma(pTHX_ register char *s, const char *name, const char *what)
9205 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9206 if (ckWARN(WARN_SYNTAX)) {
9208 for (w = s+2; *w && level; w++) {
9215 for (; *w && isSPACE(*w); w++) ;
9216 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
9217 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9218 "%s (...) interpreted as function",name);
9221 while (s < PL_bufend && isSPACE(*s))
9225 while (s < PL_bufend && isSPACE(*s))
9227 if (isIDFIRST_lazy_if(s,UTF)) {
9229 while (isALNUM_lazy_if(s,UTF))
9231 while (s < PL_bufend && isSPACE(*s))
9235 *s = '\0'; /* XXX If we didn't do this, we could const a lot of toke.c */
9236 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
9240 Perl_croak(aTHX_ "No comma allowed after %s", what);
9245 /* Either returns sv, or mortalizes sv and returns a new SV*.
9246 Best used as sv=new_constant(..., sv, ...).
9247 If s, pv are NULL, calls subroutine with one argument,
9248 and type is used with error messages only. */
9251 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9255 HV * const table = GvHV(PL_hintgv); /* ^H */
9259 const char *why1 = "", *why2 = "", *why3 = "";
9261 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9264 why2 = strEQ(key,"charnames")
9265 ? "(possibly a missing \"use charnames ...\")"
9267 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
9268 (type ? type: "undef"), why2);
9270 /* This is convoluted and evil ("goto considered harmful")
9271 * but I do not understand the intricacies of all the different
9272 * failure modes of %^H in here. The goal here is to make
9273 * the most probable error message user-friendly. --jhi */
9278 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
9279 (type ? type: "undef"), why1, why2, why3);
9281 yyerror(SvPVX_const(msg));
9285 cvp = hv_fetch(table, key, strlen(key), FALSE);
9286 if (!cvp || !SvOK(*cvp)) {
9289 why3 = "} is not defined";
9292 sv_2mortal(sv); /* Parent created it permanently */
9295 pv = sv_2mortal(newSVpvn(s, len));
9297 typesv = sv_2mortal(newSVpv(type, 0));
9299 typesv = &PL_sv_undef;
9301 PUSHSTACKi(PERLSI_OVERLOAD);
9313 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9317 /* Check the eval first */
9318 if (!PL_in_eval && SvTRUE(ERRSV)) {
9319 sv_catpvs(ERRSV, "Propagated");
9320 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
9322 res = SvREFCNT_inc(sv);
9326 (void)SvREFCNT_inc(res);
9335 why1 = "Call to &{$^H{";
9337 why3 = "}} did not return a defined value";
9345 /* Returns a NUL terminated string, with the length of the string written to
9349 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9352 register char *d = dest;
9353 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
9356 Perl_croak(aTHX_ ident_too_long);
9357 if (isALNUM(*s)) /* UTF handled below */
9359 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
9364 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
9368 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9369 char *t = s + UTF8SKIP(s);
9370 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9372 if (d + (t - s) > e)
9373 Perl_croak(aTHX_ ident_too_long);
9374 Copy(s, d, t - s, char);
9387 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9390 char *bracket = NULL;
9392 register char *d = dest;
9393 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
9398 while (isDIGIT(*s)) {
9400 Perl_croak(aTHX_ ident_too_long);
9407 Perl_croak(aTHX_ ident_too_long);
9408 if (isALNUM(*s)) /* UTF handled below */
9410 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9415 else if (*s == ':' && s[1] == ':') {
9419 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9420 char *t = s + UTF8SKIP(s);
9421 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9423 if (d + (t - s) > e)
9424 Perl_croak(aTHX_ ident_too_long);
9425 Copy(s, d, t - s, char);
9436 if (PL_lex_state != LEX_NORMAL)
9437 PL_lex_state = LEX_INTERPENDMAYBE;
9440 if (*s == '$' && s[1] &&
9441 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9454 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9459 if (isSPACE(s[-1])) {
9461 const char ch = *s++;
9462 if (!SPACE_OR_TAB(ch)) {
9468 if (isIDFIRST_lazy_if(d,UTF)) {
9472 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9473 end += UTF8SKIP(end);
9474 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9475 end += UTF8SKIP(end);
9477 Copy(s, d, end - s, char);
9482 while ((isALNUM(*s) || *s == ':') && d < e)
9485 Perl_croak(aTHX_ ident_too_long);
9488 while (s < send && SPACE_OR_TAB(*s)) s++;
9489 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9490 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
9491 const char *brack = *s == '[' ? "[...]" : "{...}";
9492 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9493 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9494 funny, dest, brack, funny, dest, brack);
9497 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9501 /* Handle extended ${^Foo} variables
9502 * 1999-02-27 mjd-perl-patch@plover.com */
9503 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9507 while (isALNUM(*s) && d < e) {
9511 Perl_croak(aTHX_ ident_too_long);
9516 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9517 PL_lex_state = LEX_INTERPEND;
9522 if (PL_lex_state == LEX_NORMAL) {
9523 if (ckWARN(WARN_AMBIGUOUS) &&
9524 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
9526 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9527 "Ambiguous use of %c{%s} resolved to %c%s",
9528 funny, dest, funny, dest);
9533 s = bracket; /* let the parser handle it */
9537 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9538 PL_lex_state = LEX_INTERPEND;
9543 Perl_pmflag(pTHX_ U32* pmfl, int ch)
9548 *pmfl |= PMf_GLOBAL;
9550 *pmfl |= PMf_CONTINUE;
9554 *pmfl |= PMf_MULTILINE;
9556 *pmfl |= PMf_SINGLELINE;
9558 *pmfl |= PMf_EXTENDED;
9562 S_scan_pat(pTHX_ char *start, I32 type)
9566 char *s = scan_str(start,FALSE,FALSE);
9567 const char * const valid_flags = (type == OP_QR) ? "iomsx" : "iogcmsx";
9570 const char * const delimiter = skipspace(start);
9571 Perl_croak(aTHX_ *delimiter == '?'
9572 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9573 : "Search pattern not terminated" );
9576 pm = (PMOP*)newPMOP(type, 0);
9577 if (PL_multi_open == '?')
9578 pm->op_pmflags |= PMf_ONCE;
9579 while (*s && strchr(valid_flags, *s))
9580 pmflag(&pm->op_pmflags,*s++);
9581 /* issue a warning if /c is specified,but /g is not */
9582 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
9583 && ckWARN(WARN_REGEXP))
9585 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
9588 pm->op_pmpermflags = pm->op_pmflags;
9590 PL_lex_op = (OP*)pm;
9591 yylval.ival = OP_MATCH;
9596 S_scan_subst(pTHX_ char *start)
9604 yylval.ival = OP_NULL;
9606 s = scan_str(start,FALSE,FALSE);
9609 Perl_croak(aTHX_ "Substitution pattern not terminated");
9611 if (s[-1] == PL_multi_open)
9614 first_start = PL_multi_start;
9615 s = scan_str(s,FALSE,FALSE);
9618 SvREFCNT_dec(PL_lex_stuff);
9619 PL_lex_stuff = Nullsv;
9621 Perl_croak(aTHX_ "Substitution replacement not terminated");
9623 PL_multi_start = first_start; /* so whole substitution is taken together */
9625 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9631 else if (strchr("iogcmsx", *s))
9632 pmflag(&pm->op_pmflags,*s++);
9637 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
9638 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9642 SV * const repl = newSVpvs("");
9644 PL_sublex_info.super_bufptr = s;
9645 PL_sublex_info.super_bufend = PL_bufend;
9647 pm->op_pmflags |= PMf_EVAL;
9649 sv_catpv(repl, es ? "eval " : "do ");
9650 sv_catpvs(repl, "{ ");
9651 sv_catsv(repl, PL_lex_repl);
9652 sv_catpvs(repl, " }");
9654 SvREFCNT_dec(PL_lex_repl);
9658 pm->op_pmpermflags = pm->op_pmflags;
9659 PL_lex_op = (OP*)pm;
9660 yylval.ival = OP_SUBST;
9665 S_scan_trans(pTHX_ char *start)
9675 yylval.ival = OP_NULL;
9677 s = scan_str(start,FALSE,FALSE);
9679 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9680 if (s[-1] == PL_multi_open)
9683 s = scan_str(s,FALSE,FALSE);
9686 SvREFCNT_dec(PL_lex_stuff);
9687 PL_lex_stuff = Nullsv;
9689 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9692 complement = del = squash = 0;
9696 complement = OPpTRANS_COMPLEMENT;
9699 del = OPpTRANS_DELETE;
9702 squash = OPpTRANS_SQUASH;
9711 Newx(tbl, complement&&!del?258:256, short);
9712 o = newPVOP(OP_TRANS, 0, (char*)tbl);
9713 o->op_private &= ~OPpTRANS_ALL;
9714 o->op_private |= del|squash|complement|
9715 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9716 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9719 yylval.ival = OP_TRANS;
9724 S_scan_heredoc(pTHX_ register char *s)
9728 I32 op_type = OP_SCALAR;
9732 const char *found_newline;
9736 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9740 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9743 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
9744 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9747 s = delimcpy(d, e, s, PL_bufend, term, &len);
9757 if (!isALNUM_lazy_if(s,UTF))
9758 deprecate_old("bare << to mean <<\"\"");
9759 for (; isALNUM_lazy_if(s,UTF); s++) {
9764 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9765 Perl_croak(aTHX_ "Delimiter for here document is too long");
9768 len = d - PL_tokenbuf;
9769 #ifndef PERL_STRICT_CR
9770 d = strchr(s, '\r');
9772 char * const olds = s;
9774 while (s < PL_bufend) {
9780 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9789 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9793 if ( outer || !(found_newline = memchr(s, '\n', PL_bufend - s)) ) {
9794 herewas = newSVpvn(s,PL_bufend-s);
9798 herewas = newSVpvn(s,found_newline-s);
9800 s += SvCUR(herewas);
9802 tmpstr = NEWSV(87,79);
9803 sv_upgrade(tmpstr, SVt_PVIV);
9806 SvIV_set(tmpstr, -1);
9808 else if (term == '`') {
9809 op_type = OP_BACKTICK;
9810 SvIV_set(tmpstr, '\\');
9814 PL_multi_start = CopLINE(PL_curcop);
9815 PL_multi_open = PL_multi_close = '<';
9816 term = *PL_tokenbuf;
9817 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9818 char * const bufptr = PL_sublex_info.super_bufptr;
9819 char * const bufend = PL_sublex_info.super_bufend;
9820 char * const olds = s - SvCUR(herewas);
9821 s = strchr(bufptr, '\n');
9825 while (s < bufend &&
9826 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9828 CopLINE_inc(PL_curcop);
9831 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9832 missingterm(PL_tokenbuf);
9834 sv_setpvn(herewas,bufptr,d-bufptr+1);
9835 sv_setpvn(tmpstr,d+1,s-d);
9837 sv_catpvn(herewas,s,bufend-s);
9838 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9845 while (s < PL_bufend &&
9846 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9848 CopLINE_inc(PL_curcop);
9850 if (s >= PL_bufend) {
9851 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9852 missingterm(PL_tokenbuf);
9854 sv_setpvn(tmpstr,d+1,s-d);
9856 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9858 sv_catpvn(herewas,s,PL_bufend-s);
9859 sv_setsv(PL_linestr,herewas);
9860 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9861 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9862 PL_last_lop = PL_last_uni = Nullch;
9865 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
9866 while (s >= PL_bufend) { /* multiple line string? */
9868 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
9869 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9870 missingterm(PL_tokenbuf);
9872 CopLINE_inc(PL_curcop);
9873 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9874 PL_last_lop = PL_last_uni = Nullch;
9875 #ifndef PERL_STRICT_CR
9876 if (PL_bufend - PL_linestart >= 2) {
9877 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9878 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9880 PL_bufend[-2] = '\n';
9882 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9884 else if (PL_bufend[-1] == '\r')
9885 PL_bufend[-1] = '\n';
9887 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9888 PL_bufend[-1] = '\n';
9890 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9891 SV * const sv = NEWSV(88,0);
9893 sv_upgrade(sv, SVt_PVMG);
9894 sv_setsv(sv,PL_linestr);
9897 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop),sv);
9899 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9900 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9901 *(SvPVX(PL_linestr) + off ) = ' ';
9902 sv_catsv(PL_linestr,herewas);
9903 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9904 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9908 sv_catsv(tmpstr,PL_linestr);
9913 PL_multi_end = CopLINE(PL_curcop);
9914 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9915 SvPV_shrink_to_cur(tmpstr);
9917 SvREFCNT_dec(herewas);
9919 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9921 else if (PL_encoding)
9922 sv_recode_to_utf8(tmpstr, PL_encoding);
9924 PL_lex_stuff = tmpstr;
9925 yylval.ival = op_type;
9930 takes: current position in input buffer
9931 returns: new position in input buffer
9932 side-effects: yylval and lex_op are set.
9937 <FH> read from filehandle
9938 <pkg::FH> read from package qualified filehandle
9939 <pkg'FH> read from package qualified filehandle
9940 <$fh> read from filehandle in $fh
9946 S_scan_inputsymbol(pTHX_ char *start)
9949 register char *s = start; /* current position in buffer */
9953 char *d = PL_tokenbuf; /* start of temp holding space */
9954 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9956 end = strchr(s, '\n');
9959 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9961 /* die if we didn't have space for the contents of the <>,
9962 or if it didn't end, or if we see a newline
9965 if (len >= sizeof PL_tokenbuf)
9966 Perl_croak(aTHX_ "Excessively long <> operator");
9968 Perl_croak(aTHX_ "Unterminated <> operator");
9973 Remember, only scalar variables are interpreted as filehandles by
9974 this code. Anything more complex (e.g., <$fh{$num}>) will be
9975 treated as a glob() call.
9976 This code makes use of the fact that except for the $ at the front,
9977 a scalar variable and a filehandle look the same.
9979 if (*d == '$' && d[1]) d++;
9981 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9982 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9985 /* If we've tried to read what we allow filehandles to look like, and
9986 there's still text left, then it must be a glob() and not a getline.
9987 Use scan_str to pull out the stuff between the <> and treat it
9988 as nothing more than a string.
9991 if (d - PL_tokenbuf != len) {
9992 yylval.ival = OP_GLOB;
9994 s = scan_str(start,FALSE,FALSE);
9996 Perl_croak(aTHX_ "Glob not terminated");
10000 bool readline_overriden = FALSE;
10003 /* we're in a filehandle read situation */
10006 /* turn <> into <ARGV> */
10008 Copy("ARGV",d,5,char);
10010 /* Check whether readline() is overriden */
10011 gv_readline = gv_fetchpv("readline", 0, SVt_PVCV);
10013 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10015 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
10016 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
10017 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10018 readline_overriden = TRUE;
10020 /* if <$fh>, create the ops to turn the variable into a
10026 /* try to find it in the pad for this block, otherwise find
10027 add symbol table ops
10029 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
10030 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
10031 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10032 HEK * const stashname = HvNAME_HEK(stash);
10033 SV * const sym = sv_2mortal(newSVhek(stashname));
10034 sv_catpvs(sym, "::");
10035 sv_catpv(sym, d+1);
10040 OP * const o = newOP(OP_PADSV, 0);
10042 PL_lex_op = readline_overriden
10043 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10044 append_elem(OP_LIST, o,
10045 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10046 : (OP*)newUNOP(OP_READLINE, 0, o);
10055 ? (GV_ADDMULTI | GV_ADDINEVAL)
10058 PL_lex_op = readline_overriden
10059 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10060 append_elem(OP_LIST,
10061 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10062 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10063 : (OP*)newUNOP(OP_READLINE, 0,
10064 newUNOP(OP_RV2SV, 0,
10065 newGVOP(OP_GV, 0, gv)));
10067 if (!readline_overriden)
10068 PL_lex_op->op_flags |= OPf_SPECIAL;
10069 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
10070 yylval.ival = OP_NULL;
10073 /* If it's none of the above, it must be a literal filehandle
10074 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10076 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
10077 PL_lex_op = readline_overriden
10078 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10079 append_elem(OP_LIST,
10080 newGVOP(OP_GV, 0, gv),
10081 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10082 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10083 yylval.ival = OP_NULL;
10092 takes: start position in buffer
10093 keep_quoted preserve \ on the embedded delimiter(s)
10094 keep_delims preserve the delimiters around the string
10095 returns: position to continue reading from buffer
10096 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10097 updates the read buffer.
10099 This subroutine pulls a string out of the input. It is called for:
10100 q single quotes q(literal text)
10101 ' single quotes 'literal text'
10102 qq double quotes qq(interpolate $here please)
10103 " double quotes "interpolate $here please"
10104 qx backticks qx(/bin/ls -l)
10105 ` backticks `/bin/ls -l`
10106 qw quote words @EXPORT_OK = qw( func() $spam )
10107 m// regexp match m/this/
10108 s/// regexp substitute s/this/that/
10109 tr/// string transliterate tr/this/that/
10110 y/// string transliterate y/this/that/
10111 ($*@) sub prototypes sub foo ($)
10112 (stuff) sub attr parameters sub foo : attr(stuff)
10113 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10115 In most of these cases (all but <>, patterns and transliterate)
10116 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10117 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10118 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10121 It skips whitespace before the string starts, and treats the first
10122 character as the delimiter. If the delimiter is one of ([{< then
10123 the corresponding "close" character )]}> is used as the closing
10124 delimiter. It allows quoting of delimiters, and if the string has
10125 balanced delimiters ([{<>}]) it allows nesting.
10127 On success, the SV with the resulting string is put into lex_stuff or,
10128 if that is already non-NULL, into lex_repl. The second case occurs only
10129 when parsing the RHS of the special constructs s/// and tr/// (y///).
10130 For convenience, the terminating delimiter character is stuffed into
10135 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
10138 SV *sv; /* scalar value: string */
10139 char *tmps; /* temp string, used for delimiter matching */
10140 register char *s = start; /* current position in the buffer */
10141 register char term; /* terminating character */
10142 register char *to; /* current position in the sv's data */
10143 I32 brackets = 1; /* bracket nesting level */
10144 bool has_utf8 = FALSE; /* is there any utf8 content? */
10145 I32 termcode; /* terminating char. code */
10146 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10147 STRLEN termlen; /* length of terminating string */
10148 char *last = NULL; /* last position for nesting bracket */
10150 /* skip space before the delimiter */
10154 /* mark where we are, in case we need to report errors */
10157 /* after skipping whitespace, the next character is the terminator */
10160 termcode = termstr[0] = term;
10164 termcode = utf8_to_uvchr((U8*)s, &termlen);
10165 Copy(s, termstr, termlen, U8);
10166 if (!UTF8_IS_INVARIANT(term))
10170 /* mark where we are */
10171 PL_multi_start = CopLINE(PL_curcop);
10172 PL_multi_open = term;
10174 /* find corresponding closing delimiter */
10175 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10176 termcode = termstr[0] = term = tmps[5];
10178 PL_multi_close = term;
10180 /* create a new SV to hold the contents. 87 is leak category, I'm
10181 assuming. 79 is the SV's initial length. What a random number. */
10183 sv_upgrade(sv, SVt_PVIV);
10184 SvIV_set(sv, termcode);
10185 (void)SvPOK_only(sv); /* validate pointer */
10187 /* move past delimiter and try to read a complete string */
10189 sv_catpvn(sv, s, termlen);
10192 if (PL_encoding && !UTF) {
10196 int offset = s - SvPVX_const(PL_linestr);
10197 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10198 &offset, (char*)termstr, termlen);
10199 const char * const ns = SvPVX_const(PL_linestr) + offset;
10200 char * const svlast = SvEND(sv) - 1;
10202 for (; s < ns; s++) {
10203 if (*s == '\n' && !PL_rsfp)
10204 CopLINE_inc(PL_curcop);
10207 goto read_more_line;
10209 /* handle quoted delimiters */
10210 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10212 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10214 if ((svlast-1 - t) % 2) {
10215 if (!keep_quoted) {
10216 *(svlast-1) = term;
10218 SvCUR_set(sv, SvCUR(sv) - 1);
10223 if (PL_multi_open == PL_multi_close) {
10231 for (t = w = last; t < svlast; w++, t++) {
10232 /* At here, all closes are "was quoted" one,
10233 so we don't check PL_multi_close. */
10235 if (!keep_quoted && *(t+1) == PL_multi_open)
10240 else if (*t == PL_multi_open)
10248 SvCUR_set(sv, w - SvPVX_const(sv));
10251 if (--brackets <= 0)
10256 if (!keep_delims) {
10257 SvCUR_set(sv, SvCUR(sv) - 1);
10263 /* extend sv if need be */
10264 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10265 /* set 'to' to the next character in the sv's string */
10266 to = SvPVX(sv)+SvCUR(sv);
10268 /* if open delimiter is the close delimiter read unbridle */
10269 if (PL_multi_open == PL_multi_close) {
10270 for (; s < PL_bufend; s++,to++) {
10271 /* embedded newlines increment the current line number */
10272 if (*s == '\n' && !PL_rsfp)
10273 CopLINE_inc(PL_curcop);
10274 /* handle quoted delimiters */
10275 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10276 if (!keep_quoted && s[1] == term)
10278 /* any other quotes are simply copied straight through */
10282 /* terminate when run out of buffer (the for() condition), or
10283 have found the terminator */
10284 else if (*s == term) {
10287 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10290 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10296 /* if the terminator isn't the same as the start character (e.g.,
10297 matched brackets), we have to allow more in the quoting, and
10298 be prepared for nested brackets.
10301 /* read until we run out of string, or we find the terminator */
10302 for (; s < PL_bufend; s++,to++) {
10303 /* embedded newlines increment the line count */
10304 if (*s == '\n' && !PL_rsfp)
10305 CopLINE_inc(PL_curcop);
10306 /* backslashes can escape the open or closing characters */
10307 if (*s == '\\' && s+1 < PL_bufend) {
10308 if (!keep_quoted &&
10309 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10314 /* allow nested opens and closes */
10315 else if (*s == PL_multi_close && --brackets <= 0)
10317 else if (*s == PL_multi_open)
10319 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10324 /* terminate the copied string and update the sv's end-of-string */
10326 SvCUR_set(sv, to - SvPVX_const(sv));
10329 * this next chunk reads more into the buffer if we're not done yet
10333 break; /* handle case where we are done yet :-) */
10335 #ifndef PERL_STRICT_CR
10336 if (to - SvPVX_const(sv) >= 2) {
10337 if ((to[-2] == '\r' && to[-1] == '\n') ||
10338 (to[-2] == '\n' && to[-1] == '\r'))
10342 SvCUR_set(sv, to - SvPVX_const(sv));
10344 else if (to[-1] == '\r')
10347 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10352 /* if we're out of file, or a read fails, bail and reset the current
10353 line marker so we can report where the unterminated string began
10356 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10358 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10361 /* we read a line, so increment our line counter */
10362 CopLINE_inc(PL_curcop);
10364 /* update debugger info */
10365 if (PERLDB_LINE && PL_curstash != PL_debstash) {
10366 SV * const sv = NEWSV(88,0);
10368 sv_upgrade(sv, SVt_PVMG);
10369 sv_setsv(sv,PL_linestr);
10370 (void)SvIOK_on(sv);
10372 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop), sv);
10375 /* having changed the buffer, we must update PL_bufend */
10376 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10377 PL_last_lop = PL_last_uni = Nullch;
10380 /* at this point, we have successfully read the delimited string */
10382 if (!PL_encoding || UTF) {
10384 sv_catpvn(sv, s, termlen);
10387 if (has_utf8 || PL_encoding)
10390 PL_multi_end = CopLINE(PL_curcop);
10392 /* if we allocated too much space, give some back */
10393 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10394 SvLEN_set(sv, SvCUR(sv) + 1);
10395 SvPV_renew(sv, SvLEN(sv));
10398 /* decide whether this is the first or second quoted string we've read
10411 takes: pointer to position in buffer
10412 returns: pointer to new position in buffer
10413 side-effects: builds ops for the constant in yylval.op
10415 Read a number in any of the formats that Perl accepts:
10417 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10418 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10421 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10423 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10426 If it reads a number without a decimal point or an exponent, it will
10427 try converting the number to an integer and see if it can do so
10428 without loss of precision.
10432 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10435 register const char *s = start; /* current position in buffer */
10436 register char *d; /* destination in temp buffer */
10437 register char *e; /* end of temp buffer */
10438 NV nv; /* number read, as a double */
10439 SV *sv = Nullsv; /* place to put the converted number */
10440 bool floatit; /* boolean: int or float? */
10441 const char *lastub = NULL; /* position of last underbar */
10442 static char const number_too_long[] = "Number too long";
10444 /* We use the first character to decide what type of number this is */
10448 Perl_croak(aTHX_ "panic: scan_num");
10450 /* if it starts with a 0, it could be an octal number, a decimal in
10451 0.13 disguise, or a hexadecimal number, or a binary number. */
10455 u holds the "number so far"
10456 shift the power of 2 of the base
10457 (hex == 4, octal == 3, binary == 1)
10458 overflowed was the number more than we can hold?
10460 Shift is used when we add a digit. It also serves as an "are
10461 we in octal/hex/binary?" indicator to disallow hex characters
10462 when in octal mode.
10467 bool overflowed = FALSE;
10468 bool just_zero = TRUE; /* just plain 0 or binary number? */
10469 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10470 static const char* const bases[5] =
10471 { "", "binary", "", "octal", "hexadecimal" };
10472 static const char* const Bases[5] =
10473 { "", "Binary", "", "Octal", "Hexadecimal" };
10474 static const char* const maxima[5] =
10476 "0b11111111111111111111111111111111",
10480 const char *base, *Base, *max;
10482 /* check for hex */
10487 } else if (s[1] == 'b') {
10492 /* check for a decimal in disguise */
10493 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10495 /* so it must be octal */
10502 if (ckWARN(WARN_SYNTAX))
10503 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10504 "Misplaced _ in number");
10508 base = bases[shift];
10509 Base = Bases[shift];
10510 max = maxima[shift];
10512 /* read the rest of the number */
10514 /* x is used in the overflow test,
10515 b is the digit we're adding on. */
10520 /* if we don't mention it, we're done */
10524 /* _ are ignored -- but warned about if consecutive */
10526 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10527 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10528 "Misplaced _ in number");
10532 /* 8 and 9 are not octal */
10533 case '8': case '9':
10535 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10539 case '2': case '3': case '4':
10540 case '5': case '6': case '7':
10542 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10545 case '0': case '1':
10546 b = *s++ & 15; /* ASCII digit -> value of digit */
10550 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10551 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10552 /* make sure they said 0x */
10555 b = (*s++ & 7) + 9;
10557 /* Prepare to put the digit we have onto the end
10558 of the number so far. We check for overflows.
10564 x = u << shift; /* make room for the digit */
10566 if ((x >> shift) != u
10567 && !(PL_hints & HINT_NEW_BINARY)) {
10570 if (ckWARN_d(WARN_OVERFLOW))
10571 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10572 "Integer overflow in %s number",
10575 u = x | b; /* add the digit to the end */
10578 n *= nvshift[shift];
10579 /* If an NV has not enough bits in its
10580 * mantissa to represent an UV this summing of
10581 * small low-order numbers is a waste of time
10582 * (because the NV cannot preserve the
10583 * low-order bits anyway): we could just
10584 * remember when did we overflow and in the
10585 * end just multiply n by the right
10593 /* if we get here, we had success: make a scalar value from
10598 /* final misplaced underbar check */
10599 if (s[-1] == '_') {
10600 if (ckWARN(WARN_SYNTAX))
10601 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10606 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
10607 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10608 "%s number > %s non-portable",
10614 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
10615 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10616 "%s number > %s non-portable",
10621 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10622 sv = new_constant(start, s - start, "integer",
10624 else if (PL_hints & HINT_NEW_BINARY)
10625 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
10630 handle decimal numbers.
10631 we're also sent here when we read a 0 as the first digit
10633 case '1': case '2': case '3': case '4': case '5':
10634 case '6': case '7': case '8': case '9': case '.':
10637 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10640 /* read next group of digits and _ and copy into d */
10641 while (isDIGIT(*s) || *s == '_') {
10642 /* skip underscores, checking for misplaced ones
10646 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10647 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10648 "Misplaced _ in number");
10652 /* check for end of fixed-length buffer */
10654 Perl_croak(aTHX_ number_too_long);
10655 /* if we're ok, copy the character */
10660 /* final misplaced underbar check */
10661 if (lastub && s == lastub + 1) {
10662 if (ckWARN(WARN_SYNTAX))
10663 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10666 /* read a decimal portion if there is one. avoid
10667 3..5 being interpreted as the number 3. followed
10670 if (*s == '.' && s[1] != '.') {
10675 if (ckWARN(WARN_SYNTAX))
10676 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10677 "Misplaced _ in number");
10681 /* copy, ignoring underbars, until we run out of digits.
10683 for (; isDIGIT(*s) || *s == '_'; s++) {
10684 /* fixed length buffer check */
10686 Perl_croak(aTHX_ number_too_long);
10688 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10689 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10690 "Misplaced _ in number");
10696 /* fractional part ending in underbar? */
10697 if (s[-1] == '_') {
10698 if (ckWARN(WARN_SYNTAX))
10699 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10700 "Misplaced _ in number");
10702 if (*s == '.' && isDIGIT(s[1])) {
10703 /* oops, it's really a v-string, but without the "v" */
10709 /* read exponent part, if present */
10710 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10714 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10715 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10717 /* stray preinitial _ */
10719 if (ckWARN(WARN_SYNTAX))
10720 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10721 "Misplaced _ in number");
10725 /* allow positive or negative exponent */
10726 if (*s == '+' || *s == '-')
10729 /* stray initial _ */
10731 if (ckWARN(WARN_SYNTAX))
10732 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10733 "Misplaced _ in number");
10737 /* read digits of exponent */
10738 while (isDIGIT(*s) || *s == '_') {
10741 Perl_croak(aTHX_ number_too_long);
10745 if (((lastub && s == lastub + 1) ||
10746 (!isDIGIT(s[1]) && s[1] != '_'))
10747 && ckWARN(WARN_SYNTAX))
10748 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10749 "Misplaced _ in number");
10756 /* make an sv from the string */
10760 We try to do an integer conversion first if no characters
10761 indicating "float" have been found.
10766 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10768 if (flags == IS_NUMBER_IN_UV) {
10770 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
10773 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10774 if (uv <= (UV) IV_MIN)
10775 sv_setiv(sv, -(IV)uv);
10782 /* terminate the string */
10784 nv = Atof(PL_tokenbuf);
10788 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
10789 (PL_hints & HINT_NEW_INTEGER) )
10790 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
10791 (floatit ? "float" : "integer"),
10795 /* if it starts with a v, it could be a v-string */
10798 sv = NEWSV(92,5); /* preallocate storage space */
10799 s = scan_vstring(s,sv);
10803 /* make the op for the constant and return */
10806 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10808 lvalp->opval = Nullop;
10814 S_scan_formline(pTHX_ register char *s)
10817 register char *eol;
10819 SV * const stuff = newSVpvs("");
10820 bool needargs = FALSE;
10821 bool eofmt = FALSE;
10823 while (!needargs) {
10825 #ifdef PERL_STRICT_CR
10826 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
10828 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
10830 if (*t == '\n' || t == PL_bufend) {
10835 if (PL_in_eval && !PL_rsfp) {
10836 eol = (char *) memchr(s,'\n',PL_bufend-s);
10841 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10843 for (t = s; t < eol; t++) {
10844 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10846 goto enough; /* ~~ must be first line in formline */
10848 if (*t == '@' || *t == '^')
10852 sv_catpvn(stuff, s, eol-s);
10853 #ifndef PERL_STRICT_CR
10854 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10855 char *end = SvPVX(stuff) + SvCUR(stuff);
10858 SvCUR_set(stuff, SvCUR(stuff) - 1);
10867 s = filter_gets(PL_linestr, PL_rsfp, 0);
10868 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
10869 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
10870 PL_last_lop = PL_last_uni = Nullch;
10879 if (SvCUR(stuff)) {
10882 PL_lex_state = LEX_NORMAL;
10883 PL_nextval[PL_nexttoke].ival = 0;
10887 PL_lex_state = LEX_FORMLINE;
10889 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10891 else if (PL_encoding)
10892 sv_recode_to_utf8(stuff, PL_encoding);
10894 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10896 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
10900 SvREFCNT_dec(stuff);
10902 PL_lex_formbrack = 0;
10914 PL_cshlen = strlen(PL_cshname);
10919 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10922 const I32 oldsavestack_ix = PL_savestack_ix;
10923 CV* const outsidecv = PL_compcv;
10926 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10928 SAVEI32(PL_subline);
10929 save_item(PL_subname);
10930 SAVESPTR(PL_compcv);
10932 PL_compcv = (CV*)NEWSV(1104,0);
10933 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
10934 CvFLAGS(PL_compcv) |= flags;
10936 PL_subline = CopLINE(PL_curcop);
10937 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10938 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
10939 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10941 return oldsavestack_ix;
10945 #pragma segment Perl_yylex
10948 Perl_yywarn(pTHX_ const char *s)
10951 PL_in_eval |= EVAL_WARNONLY;
10953 PL_in_eval &= ~EVAL_WARNONLY;
10958 Perl_yyerror(pTHX_ const char *s)
10961 const char *where = NULL;
10962 const char *context = NULL;
10966 if (!yychar || (yychar == ';' && !PL_rsfp))
10968 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10969 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10970 PL_oldbufptr != PL_bufptr) {
10973 The code below is removed for NetWare because it abends/crashes on NetWare
10974 when the script has error such as not having the closing quotes like:
10975 if ($var eq "value)
10976 Checking of white spaces is anyway done in NetWare code.
10979 while (isSPACE(*PL_oldoldbufptr))
10982 context = PL_oldoldbufptr;
10983 contlen = PL_bufptr - PL_oldoldbufptr;
10985 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10986 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10989 The code below is removed for NetWare because it abends/crashes on NetWare
10990 when the script has error such as not having the closing quotes like:
10991 if ($var eq "value)
10992 Checking of white spaces is anyway done in NetWare code.
10995 while (isSPACE(*PL_oldbufptr))
10998 context = PL_oldbufptr;
10999 contlen = PL_bufptr - PL_oldbufptr;
11001 else if (yychar > 255)
11002 where = "next token ???";
11003 else if (yychar == -2) { /* YYEMPTY */
11004 if (PL_lex_state == LEX_NORMAL ||
11005 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11006 where = "at end of line";
11007 else if (PL_lex_inpat)
11008 where = "within pattern";
11010 where = "within string";
11013 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
11015 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11016 else if (isPRINT_LC(yychar))
11017 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
11019 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11020 where = SvPVX_const(where_sv);
11022 msg = sv_2mortal(newSVpv(s, 0));
11023 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11024 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11026 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
11028 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
11029 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11030 Perl_sv_catpvf(aTHX_ msg,
11031 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11032 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11035 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
11036 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
11039 if (PL_error_count >= 10) {
11040 if (PL_in_eval && SvCUR(ERRSV))
11041 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11042 ERRSV, OutCopFILE(PL_curcop));
11044 Perl_croak(aTHX_ "%s has too many errors.\n",
11045 OutCopFILE(PL_curcop));
11048 PL_in_my_stash = NULL;
11052 #pragma segment Main
11056 S_swallow_bom(pTHX_ U8 *s)
11059 const STRLEN slen = SvCUR(PL_linestr);
11062 if (s[1] == 0xFE) {
11063 /* UTF-16 little-endian? (or UTF32-LE?) */
11064 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11065 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
11066 #ifndef PERL_NO_UTF16_FILTER
11067 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
11070 if (PL_bufend > (char*)s) {
11074 filter_add(utf16rev_textfilter, NULL);
11075 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11076 utf16_to_utf8_reversed(s, news,
11077 PL_bufend - (char*)s - 1,
11079 sv_setpvn(PL_linestr, (const char*)news, newlen);
11081 SvUTF8_on(PL_linestr);
11082 s = (U8*)SvPVX(PL_linestr);
11083 PL_bufend = SvPVX(PL_linestr) + newlen;
11086 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
11091 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11092 #ifndef PERL_NO_UTF16_FILTER
11093 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11096 if (PL_bufend > (char *)s) {
11100 filter_add(utf16_textfilter, NULL);
11101 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11102 utf16_to_utf8(s, news,
11103 PL_bufend - (char*)s,
11105 sv_setpvn(PL_linestr, (const char*)news, newlen);
11107 SvUTF8_on(PL_linestr);
11108 s = (U8*)SvPVX(PL_linestr);
11109 PL_bufend = SvPVX(PL_linestr) + newlen;
11112 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
11117 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11118 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11119 s += 3; /* UTF-8 */
11125 if (s[2] == 0xFE && s[3] == 0xFF) {
11126 /* UTF-32 big-endian */
11127 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
11130 else if (s[2] == 0 && s[3] != 0) {
11133 * are a good indicator of UTF-16BE. */
11134 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11139 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11142 * are a good indicator of UTF-16LE. */
11143 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11152 * Restore a source filter.
11156 restore_rsfp(pTHX_ void *f)
11159 PerlIO * const fp = (PerlIO*)f;
11161 if (PL_rsfp == PerlIO_stdin())
11162 PerlIO_clearerr(PL_rsfp);
11163 else if (PL_rsfp && (PL_rsfp != fp))
11164 PerlIO_close(PL_rsfp);
11168 #ifndef PERL_NO_UTF16_FILTER
11170 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11173 const STRLEN old = SvCUR(sv);
11174 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11175 DEBUG_P(PerlIO_printf(Perl_debug_log,
11176 "utf16_textfilter(%p): %d %d (%d)\n",
11177 utf16_textfilter, idx, maxlen, (int) count));
11181 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11182 Copy(SvPVX_const(sv), tmps, old, char);
11183 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11184 SvCUR(sv) - old, &newlen);
11185 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11187 DEBUG_P({sv_dump(sv);});
11192 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11195 const STRLEN old = SvCUR(sv);
11196 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11197 DEBUG_P(PerlIO_printf(Perl_debug_log,
11198 "utf16rev_textfilter(%p): %d %d (%d)\n",
11199 utf16rev_textfilter, idx, maxlen, (int) count));
11203 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11204 Copy(SvPVX_const(sv), tmps, old, char);
11205 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11206 SvCUR(sv) - old, &newlen);
11207 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11209 DEBUG_P({ sv_dump(sv); });
11215 Returns a pointer to the next character after the parsed
11216 vstring, as well as updating the passed in sv.
11218 Function must be called like
11221 s = scan_vstring(s,sv);
11223 The sv should already be large enough to store the vstring
11224 passed in, for performance reasons.
11229 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
11232 const char *pos = s;
11233 const char *start = s;
11234 if (*pos == 'v') pos++; /* get past 'v' */
11235 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11237 if ( *pos != '.') {
11238 /* this may not be a v-string if followed by => */
11239 const char *next = pos;
11240 while (next < PL_bufend && isSPACE(*next))
11242 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
11243 /* return string not v-string */
11244 sv_setpvn(sv,(char *)s,pos-s);
11245 return (char *)pos;
11249 if (!isALPHA(*pos)) {
11250 U8 tmpbuf[UTF8_MAXBYTES+1];
11252 if (*s == 'v') s++; /* get past 'v' */
11254 sv_setpvn(sv, "", 0);
11260 /* this is atoi() that tolerates underscores */
11261 const char *end = pos;
11263 while (--end >= s) {
11268 rev += (*end - '0') * mult;
11270 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
11271 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11272 "Integer overflow in decimal number");
11276 if (rev > 0x7FFFFFFF)
11277 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11279 /* Append native character for the rev point */
11280 tmpend = uvchr_to_utf8(tmpbuf, rev);
11281 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11282 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11284 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
11290 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11294 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11302 * c-indentation-style: bsd
11303 * c-basic-offset: 4
11304 * indent-tabs-mode: t
11307 * ex: set ts=8 sts=4 sw=4 noet: