3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] = "Identifier too long";
30 static const char commaless_variable_list[] = "comma-less variable list";
32 static void restore_rsfp(pTHX_ void *f);
33 #ifndef PERL_NO_UTF16_FILTER
34 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
38 #define XFAKEBRACK 128
41 #ifdef USE_UTF8_SCRIPTS
42 # define UTF (!IN_BYTES)
44 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
47 /* In variables named $^X, these are the legal values for X.
48 * 1999-02-27 mjd-perl-patch@plover.com */
49 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51 /* On MacOS, respect nonbreaking spaces */
52 #ifdef MACOS_TRADITIONAL
53 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
58 /* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
60 * can get by with a single comparison (if the compiler is smart enough).
63 /* #define LEX_NOTPARSING 11 is done in perl.h. */
65 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
66 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
67 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
68 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
69 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
71 /* at end of code, eg "$x" followed by: */
72 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
73 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
75 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
76 string or after \E, $foo, etc */
77 #define LEX_INTERPCONST 2 /* NOT USED */
78 #define LEX_FORMLINE 1 /* expecting a format line */
79 #define LEX_KNOWNEXT 0 /* next token known; just return it */
83 static const char* const lex_state_names[] = {
102 #include "keywords.h"
104 /* CLINE is a macro that ensures PL_copline has a sane value */
109 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
112 * Convenience functions to return different tokens and prime the
113 * lexer for the next token. They all take an argument.
115 * TOKEN : generic token (used for '(', DOLSHARP, etc)
116 * OPERATOR : generic operator
117 * AOPERATOR : assignment operator
118 * PREBLOCK : beginning the block after an if, while, foreach, ...
119 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
120 * PREREF : *EXPR where EXPR is not a simple identifier
121 * TERM : expression term
122 * LOOPX : loop exiting command (goto, last, dump, etc)
123 * FTST : file test operator
124 * FUN0 : zero-argument function
125 * FUN1 : not used, except for not, which isn't a UNIOP
126 * BOop : bitwise or or xor
128 * SHop : shift operator
129 * PWop : power operator
130 * PMop : pattern-matching operator
131 * Aop : addition-level operator
132 * Mop : multiplication-level operator
133 * Eop : equality-testing operator
134 * Rop : relational operator <= != gt
136 * Also see LOP and lop() below.
139 #ifdef DEBUGGING /* Serve -DT. */
140 # define REPORT(retval) tokereport((I32)retval)
142 # define REPORT(retval) (retval)
145 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
146 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
147 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
148 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
149 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
150 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
151 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
152 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
153 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
154 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
155 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
156 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
157 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
158 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
159 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
160 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
161 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
162 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
163 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
164 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
166 /* This bit of chicanery makes a unary function followed by
167 * a parenthesis into a function with one argument, highest precedence.
168 * The UNIDOR macro is for unary functions that can be followed by the //
169 * operator (such as C<shift // 0>).
171 #define UNI2(f,x) { \
175 PL_last_uni = PL_oldbufptr; \
176 PL_last_lop_op = f; \
178 return REPORT( (int)FUNC1 ); \
180 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
182 #define UNI(f) UNI2(f,XTERM)
183 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
185 #define UNIBRACK(f) { \
188 PL_last_uni = PL_oldbufptr; \
190 return REPORT( (int)FUNC1 ); \
192 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
195 /* grandfather return to old style */
196 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
200 /* how to interpret the yylval associated with the token */
204 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
210 static struct debug_tokens { const int token, type; const char *name; }
211 const debug_tokens[] =
213 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
214 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
215 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
216 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
217 { ARROW, TOKENTYPE_NONE, "ARROW" },
218 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
219 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
220 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
221 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
222 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
223 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
224 { DO, TOKENTYPE_NONE, "DO" },
225 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
226 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
227 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
228 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
229 { ELSE, TOKENTYPE_NONE, "ELSE" },
230 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
231 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
232 { FOR, TOKENTYPE_IVAL, "FOR" },
233 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
234 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
235 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
236 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
237 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
238 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
239 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
240 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
241 { IF, TOKENTYPE_IVAL, "IF" },
242 { LABEL, TOKENTYPE_PVAL, "LABEL" },
243 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
244 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
245 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
246 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
247 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
248 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
249 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
250 { MY, TOKENTYPE_IVAL, "MY" },
251 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
252 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
253 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
254 { OROP, TOKENTYPE_IVAL, "OROP" },
255 { OROR, TOKENTYPE_NONE, "OROR" },
256 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
257 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
258 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
259 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
260 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
261 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
262 { PREINC, TOKENTYPE_NONE, "PREINC" },
263 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
264 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
265 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
266 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
267 { SUB, TOKENTYPE_NONE, "SUB" },
268 { THING, TOKENTYPE_OPVAL, "THING" },
269 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
270 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
271 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
272 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
273 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
274 { USE, TOKENTYPE_IVAL, "USE" },
275 { WHEN, TOKENTYPE_IVAL, "WHEN" },
276 { WHILE, TOKENTYPE_IVAL, "WHILE" },
277 { WORD, TOKENTYPE_OPVAL, "WORD" },
278 { 0, TOKENTYPE_NONE, 0 }
281 /* dump the returned token in rv, plus any optional arg in yylval */
284 S_tokereport(pTHX_ I32 rv)
288 const char *name = Nullch;
289 enum token_type type = TOKENTYPE_NONE;
290 const struct debug_tokens *p;
291 SV* const report = newSVpvs("<== ");
293 for (p = debug_tokens; p->token; p++) {
294 if (p->token == (int)rv) {
301 Perl_sv_catpv(aTHX_ report, name);
302 else if ((char)rv > ' ' && (char)rv < '~')
303 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
305 sv_catpvs(report, "EOF");
307 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
310 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
313 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
315 case TOKENTYPE_OPNUM:
316 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
317 PL_op_name[yylval.ival]);
320 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
322 case TOKENTYPE_OPVAL:
324 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
325 PL_op_name[yylval.opval->op_type]);
326 if (yylval.opval->op_type == OP_CONST) {
327 Perl_sv_catpvf(aTHX_ report, " %s",
328 SvPEEK(cSVOPx_sv(yylval.opval)));
333 sv_catpvs(report, "(opval=null)");
336 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
342 /* print the buffer with suitable escapes */
345 S_printbuf(pTHX_ const char* fmt, const char* s)
347 SV* const tmp = newSVpvs("");
348 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
357 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
358 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
362 S_ao(pTHX_ int toketype)
365 if (*PL_bufptr == '=') {
367 if (toketype == ANDAND)
368 yylval.ival = OP_ANDASSIGN;
369 else if (toketype == OROR)
370 yylval.ival = OP_ORASSIGN;
371 else if (toketype == DORDOR)
372 yylval.ival = OP_DORASSIGN;
380 * When Perl expects an operator and finds something else, no_op
381 * prints the warning. It always prints "<something> found where
382 * operator expected. It prints "Missing semicolon on previous line?"
383 * if the surprise occurs at the start of the line. "do you need to
384 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
385 * where the compiler doesn't know if foo is a method call or a function.
386 * It prints "Missing operator before end of line" if there's nothing
387 * after the missing operator, or "... before <...>" if there is something
388 * after the missing operator.
392 S_no_op(pTHX_ const char *what, char *s)
395 char * const oldbp = PL_bufptr;
396 const bool is_first = (PL_oldbufptr == PL_linestart);
402 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
403 if (ckWARN_d(WARN_SYNTAX)) {
405 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
406 "\t(Missing semicolon on previous line?)\n");
407 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
409 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
410 if (t < PL_bufptr && isSPACE(*t))
411 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
412 "\t(Do you need to predeclare %.*s?)\n",
413 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
417 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
418 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
426 * Complain about missing quote/regexp/heredoc terminator.
427 * If it's called with (char *)NULL then it cauterizes the line buffer.
428 * If we're in a delimited string and the delimiter is a control
429 * character, it's reformatted into a two-char sequence like ^C.
434 S_missingterm(pTHX_ char *s)
440 char * const nl = strrchr(s,'\n');
446 iscntrl(PL_multi_close)
448 PL_multi_close < 32 || PL_multi_close == 127
452 tmpbuf[1] = (char)toCTRL(PL_multi_close);
457 *tmpbuf = (char)PL_multi_close;
461 q = strchr(s,'"') ? '\'' : '"';
462 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
465 #define FEATURE_IS_ENABLED(name) \
466 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
467 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
469 * S_feature_is_enabled
470 * Check whether the named feature is enabled.
473 S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
476 HV * const hinthv = GvHV(PL_hintgv);
477 char he_name[32] = "feature_";
478 (void) strncpy(&he_name[8], name, 24);
480 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
488 Perl_deprecate(pTHX_ const char *s)
490 if (ckWARN(WARN_DEPRECATED))
491 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
495 Perl_deprecate_old(pTHX_ const char *s)
497 /* This function should NOT be called for any new deprecated warnings */
498 /* Use Perl_deprecate instead */
500 /* It is here to maintain backward compatibility with the pre-5.8 */
501 /* warnings category hierarchy. The "deprecated" category used to */
502 /* live under the "syntax" category. It is now a top-level category */
503 /* in its own right. */
505 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
506 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
507 "Use of %s is deprecated", s);
511 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
512 * utf16-to-utf8-reversed.
515 #ifdef PERL_CR_FILTER
519 register const char *s = SvPVX_const(sv);
520 register const char * const e = s + SvCUR(sv);
521 /* outer loop optimized to do nothing if there are no CR-LFs */
523 if (*s++ == '\r' && *s == '\n') {
524 /* hit a CR-LF, need to copy the rest */
525 register char *d = s - 1;
528 if (*s == '\r' && s[1] == '\n')
539 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
541 const I32 count = FILTER_READ(idx+1, sv, maxlen);
542 if (count > 0 && !maxlen)
550 * Initialize variables. Uses the Perl save_stack to save its state (for
551 * recursive calls to the parser).
555 Perl_lex_start(pTHX_ SV *line)
561 SAVEI32(PL_lex_dojoin);
562 SAVEI32(PL_lex_brackets);
563 SAVEI32(PL_lex_casemods);
564 SAVEI32(PL_lex_starts);
565 SAVEI32(PL_lex_state);
566 SAVEVPTR(PL_lex_inpat);
567 SAVEI32(PL_lex_inwhat);
568 if (PL_lex_state == LEX_KNOWNEXT) {
569 I32 toke = PL_nexttoke;
570 while (--toke >= 0) {
571 SAVEI32(PL_nexttype[toke]);
572 SAVEVPTR(PL_nextval[toke]);
574 SAVEI32(PL_nexttoke);
576 SAVECOPLINE(PL_curcop);
579 SAVEPPTR(PL_oldbufptr);
580 SAVEPPTR(PL_oldoldbufptr);
581 SAVEPPTR(PL_last_lop);
582 SAVEPPTR(PL_last_uni);
583 SAVEPPTR(PL_linestart);
584 SAVESPTR(PL_linestr);
585 SAVEGENERICPV(PL_lex_brackstack);
586 SAVEGENERICPV(PL_lex_casestack);
587 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
588 SAVESPTR(PL_lex_stuff);
589 SAVEI32(PL_lex_defer);
590 SAVEI32(PL_sublex_info.sub_inwhat);
591 SAVESPTR(PL_lex_repl);
593 SAVEINT(PL_lex_expect);
595 PL_lex_state = LEX_NORMAL;
599 Newx(PL_lex_brackstack, 120, char);
600 Newx(PL_lex_casestack, 12, char);
602 *PL_lex_casestack = '\0';
605 PL_lex_stuff = Nullsv;
606 PL_lex_repl = Nullsv;
610 PL_sublex_info.sub_inwhat = 0;
612 if (SvREADONLY(PL_linestr))
613 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
614 s = SvPV_const(PL_linestr, len);
615 if (!len || s[len-1] != ';') {
616 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
617 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
618 sv_catpvs(PL_linestr, "\n;");
620 SvTEMP_off(PL_linestr);
621 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
622 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
623 PL_last_lop = PL_last_uni = Nullch;
629 * Finalizer for lexing operations. Must be called when the parser is
630 * done with the lexer.
637 PL_doextract = FALSE;
642 * This subroutine has nothing to do with tilting, whether at windmills
643 * or pinball tables. Its name is short for "increment line". It
644 * increments the current line number in CopLINE(PL_curcop) and checks
645 * to see whether the line starts with a comment of the form
646 * # line 500 "foo.pm"
647 * If so, it sets the current line number and file to the values in the comment.
651 S_incline(pTHX_ char *s)
659 CopLINE_inc(PL_curcop);
662 while (SPACE_OR_TAB(*s)) s++;
663 if (strnEQ(s, "line", 4))
667 if (SPACE_OR_TAB(*s))
671 while (SPACE_OR_TAB(*s)) s++;
677 while (SPACE_OR_TAB(*s))
679 if (*s == '"' && (t = strchr(s+1, '"'))) {
684 for (t = s; !isSPACE(*t); t++) ;
687 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
689 if (*e != '\n' && *e != '\0')
690 return; /* false alarm */
696 const char * const cf = CopFILE(PL_curcop);
697 STRLEN tmplen = cf ? strlen(cf) : 0;
698 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
699 /* must copy *{"::_<(eval N)[oldfilename:L]"}
700 * to *{"::_<newfilename"} */
701 char smallbuf[256], smallbuf2[256];
702 char *tmpbuf, *tmpbuf2;
704 STRLEN tmplen2 = strlen(s);
705 if (tmplen + 3 < sizeof smallbuf)
708 Newx(tmpbuf, tmplen + 3, char);
709 if (tmplen2 + 3 < sizeof smallbuf2)
712 Newx(tmpbuf2, tmplen2 + 3, char);
713 tmpbuf[0] = tmpbuf2[0] = '_';
714 tmpbuf[1] = tmpbuf2[1] = '<';
715 memcpy(tmpbuf + 2, cf, ++tmplen);
716 memcpy(tmpbuf2 + 2, s, ++tmplen2);
718 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
720 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
722 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
723 /* adjust ${"::_<newfilename"} to store the new file name */
724 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
725 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
726 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
728 if (tmpbuf != smallbuf) Safefree(tmpbuf);
729 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
732 CopFILE_free(PL_curcop);
733 CopFILE_set(PL_curcop, s);
736 CopLINE_set(PL_curcop, atoi(n)-1);
741 * Called to gobble the appropriate amount and type of whitespace.
742 * Skips comments as well.
746 S_skipspace(pTHX_ register char *s)
749 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
750 while (s < PL_bufend && SPACE_OR_TAB(*s))
756 SSize_t oldprevlen, oldoldprevlen;
757 SSize_t oldloplen = 0, oldunilen = 0;
758 while (s < PL_bufend && isSPACE(*s)) {
759 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
764 if (s < PL_bufend && *s == '#') {
765 while (s < PL_bufend && *s != '\n')
769 if (PL_in_eval && !PL_rsfp) {
776 /* only continue to recharge the buffer if we're at the end
777 * of the buffer, we're not reading from a source filter, and
778 * we're in normal lexing mode
780 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
781 PL_lex_state == LEX_FORMLINE)
784 /* try to recharge the buffer */
785 if ((s = filter_gets(PL_linestr, PL_rsfp,
786 (prevlen = SvCUR(PL_linestr)))) == Nullch)
788 /* end of file. Add on the -p or -n magic */
791 ";}continue{print or die qq(-p destination: $!\\n);}");
792 PL_minus_n = PL_minus_p = 0;
794 else if (PL_minus_n) {
795 sv_setpvn(PL_linestr, ";}", 2);
799 sv_setpvn(PL_linestr,";", 1);
801 /* reset variables for next time we lex */
802 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
804 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
805 PL_last_lop = PL_last_uni = Nullch;
807 /* Close the filehandle. Could be from -P preprocessor,
808 * STDIN, or a regular file. If we were reading code from
809 * STDIN (because the commandline held no -e or filename)
810 * then we don't close it, we reset it so the code can
811 * read from STDIN too.
814 if (PL_preprocess && !PL_in_eval)
815 (void)PerlProc_pclose(PL_rsfp);
816 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
817 PerlIO_clearerr(PL_rsfp);
819 (void)PerlIO_close(PL_rsfp);
824 /* not at end of file, so we only read another line */
825 /* make corresponding updates to old pointers, for yyerror() */
826 oldprevlen = PL_oldbufptr - PL_bufend;
827 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
829 oldunilen = PL_last_uni - PL_bufend;
831 oldloplen = PL_last_lop - PL_bufend;
832 PL_linestart = PL_bufptr = s + prevlen;
833 PL_bufend = s + SvCUR(PL_linestr);
835 PL_oldbufptr = s + oldprevlen;
836 PL_oldoldbufptr = s + oldoldprevlen;
838 PL_last_uni = s + oldunilen;
840 PL_last_lop = s + oldloplen;
843 /* debugger active and we're not compiling the debugger code,
844 * so store the line into the debugger's array of lines
846 if (PERLDB_LINE && PL_curstash != PL_debstash) {
847 SV * const sv = NEWSV(85,0);
849 sv_upgrade(sv, SVt_PVMG);
850 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
853 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
860 * Check the unary operators to ensure there's no ambiguity in how they're
861 * used. An ambiguous piece of code would be:
863 * This doesn't mean rand() + 5. Because rand() is a unary operator,
864 * the +5 is its argument.
874 if (PL_oldoldbufptr != PL_last_uni)
876 while (isSPACE(*PL_last_uni))
878 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
879 if ((t = strchr(s, '(')) && t < PL_bufptr)
882 /* XXX Things like this are just so nasty. We shouldn't be modifying
883 source code, even if we realquick set it back. */
884 if (ckWARN_d(WARN_AMBIGUOUS)){
887 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
888 "Warning: Use of \"%s\" without parentheses is ambiguous",
895 * LOP : macro to build a list operator. Its behaviour has been replaced
896 * with a subroutine, S_lop() for which LOP is just another name.
899 #define LOP(f,x) return lop(f,x,s)
903 * Build a list operator (or something that might be one). The rules:
904 * - if we have a next token, then it's a list operator [why?]
905 * - if the next thing is an opening paren, then it's a function
906 * - else it's a list operator
910 S_lop(pTHX_ I32 f, int x, char *s)
917 PL_last_lop = PL_oldbufptr;
918 PL_last_lop_op = (OPCODE)f;
920 return REPORT(LSTOP);
927 return REPORT(LSTOP);
932 * When the lexer realizes it knows the next token (for instance,
933 * it is reordering tokens for the parser) then it can call S_force_next
934 * to know what token to return the next time the lexer is called. Caller
935 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
936 * handles the token correctly.
940 S_force_next(pTHX_ I32 type)
943 PL_nexttype[PL_nexttoke] = type;
945 if (PL_lex_state != LEX_KNOWNEXT) {
946 PL_lex_defer = PL_lex_state;
947 PL_lex_expect = PL_expect;
948 PL_lex_state = LEX_KNOWNEXT;
953 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
956 SV * const sv = newSVpvn(start,len);
957 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
964 * When the lexer knows the next thing is a word (for instance, it has
965 * just seen -> and it knows that the next char is a word char, then
966 * it calls S_force_word to stick the next word into the PL_next lookahead.
969 * char *start : buffer position (must be within PL_linestr)
970 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
971 * int check_keyword : if true, Perl checks to make sure the word isn't
972 * a keyword (do this if the word is a label, e.g. goto FOO)
973 * int allow_pack : if true, : characters will also be allowed (require,
975 * int allow_initial_tick : used by the "sub" lexer only.
979 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
985 start = skipspace(start);
987 if (isIDFIRST_lazy_if(s,UTF) ||
988 (allow_pack && *s == ':') ||
989 (allow_initial_tick && *s == '\'') )
991 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
992 if (check_keyword && keyword(PL_tokenbuf, len))
994 if (token == METHOD) {
999 PL_expect = XOPERATOR;
1002 PL_nextval[PL_nexttoke].opval
1003 = (OP*)newSVOP(OP_CONST,0,
1004 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1005 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
1013 * Called when the lexer wants $foo *foo &foo etc, but the program
1014 * text only contains the "foo" portion. The first argument is a pointer
1015 * to the "foo", and the second argument is the type symbol to prefix.
1016 * Forces the next token to be a "WORD".
1017 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1021 S_force_ident(pTHX_ register const char *s, int kind)
1025 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
1026 PL_nextval[PL_nexttoke].opval = o;
1029 o->op_private = OPpCONST_ENTERED;
1030 /* XXX see note in pp_entereval() for why we forgo typo
1031 warnings if the symbol must be introduced in an eval.
1033 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
1034 kind == '$' ? SVt_PV :
1035 kind == '@' ? SVt_PVAV :
1036 kind == '%' ? SVt_PVHV :
1044 Perl_str_to_version(pTHX_ SV *sv)
1049 const char *start = SvPV_const(sv,len);
1050 const char * const end = start + len;
1051 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1052 while (start < end) {
1056 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1061 retval += ((NV)n)/nshift;
1070 * Forces the next token to be a version number.
1071 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1072 * and if "guessing" is TRUE, then no new token is created (and the caller
1073 * must use an alternative parsing method).
1077 S_force_version(pTHX_ char *s, int guessing)
1080 OP *version = Nullop;
1089 while (isDIGIT(*d) || *d == '_' || *d == '.')
1091 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1093 s = scan_num(s, &yylval);
1094 version = yylval.opval;
1095 ver = cSVOPx(version)->op_sv;
1096 if (SvPOK(ver) && !SvNIOK(ver)) {
1097 SvUPGRADE(ver, SVt_PVNV);
1098 SvNV_set(ver, str_to_version(ver));
1099 SvNOK_on(ver); /* hint that it is a version */
1106 /* NOTE: The parser sees the package name and the VERSION swapped */
1107 PL_nextval[PL_nexttoke].opval = version;
1115 * Tokenize a quoted string passed in as an SV. It finds the next
1116 * chunk, up to end of string or a backslash. It may make a new
1117 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1122 S_tokeq(pTHX_ SV *sv)
1126 register char *send;
1134 s = SvPV_force(sv, len);
1135 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1138 while (s < send && *s != '\\')
1143 if ( PL_hints & HINT_NEW_STRING ) {
1144 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1150 if (s + 1 < send && (s[1] == '\\'))
1151 s++; /* all that, just for this */
1156 SvCUR_set(sv, d - SvPVX_const(sv));
1158 if ( PL_hints & HINT_NEW_STRING )
1159 return new_constant(NULL, 0, "q", sv, pv, "q");
1164 * Now come three functions related to double-quote context,
1165 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1166 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1167 * interact with PL_lex_state, and create fake ( ... ) argument lists
1168 * to handle functions and concatenation.
1169 * They assume that whoever calls them will be setting up a fake
1170 * join call, because each subthing puts a ',' after it. This lets
1173 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1175 * (I'm not sure whether the spurious commas at the end of lcfirst's
1176 * arguments and join's arguments are created or not).
1181 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1183 * Pattern matching will set PL_lex_op to the pattern-matching op to
1184 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1186 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1188 * Everything else becomes a FUNC.
1190 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1191 * had an OP_CONST or OP_READLINE). This just sets us up for a
1192 * call to S_sublex_push().
1196 S_sublex_start(pTHX)
1199 register const I32 op_type = yylval.ival;
1201 if (op_type == OP_NULL) {
1202 yylval.opval = PL_lex_op;
1206 if (op_type == OP_CONST || op_type == OP_READLINE) {
1207 SV *sv = tokeq(PL_lex_stuff);
1209 if (SvTYPE(sv) == SVt_PVIV) {
1210 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1212 const char *p = SvPV_const(sv, len);
1213 SV * const nsv = newSVpvn(p, len);
1219 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1220 PL_lex_stuff = Nullsv;
1221 /* Allow <FH> // "foo" */
1222 if (op_type == OP_READLINE)
1223 PL_expect = XTERMORDORDOR;
1227 PL_sublex_info.super_state = PL_lex_state;
1228 PL_sublex_info.sub_inwhat = op_type;
1229 PL_sublex_info.sub_op = PL_lex_op;
1230 PL_lex_state = LEX_INTERPPUSH;
1234 yylval.opval = PL_lex_op;
1244 * Create a new scope to save the lexing state. The scope will be
1245 * ended in S_sublex_done. Returns a '(', starting the function arguments
1246 * to the uc, lc, etc. found before.
1247 * Sets PL_lex_state to LEX_INTERPCONCAT.
1256 PL_lex_state = PL_sublex_info.super_state;
1257 SAVEI32(PL_lex_dojoin);
1258 SAVEI32(PL_lex_brackets);
1259 SAVEI32(PL_lex_casemods);
1260 SAVEI32(PL_lex_starts);
1261 SAVEI32(PL_lex_state);
1262 SAVEVPTR(PL_lex_inpat);
1263 SAVEI32(PL_lex_inwhat);
1264 SAVECOPLINE(PL_curcop);
1265 SAVEPPTR(PL_bufptr);
1266 SAVEPPTR(PL_bufend);
1267 SAVEPPTR(PL_oldbufptr);
1268 SAVEPPTR(PL_oldoldbufptr);
1269 SAVEPPTR(PL_last_lop);
1270 SAVEPPTR(PL_last_uni);
1271 SAVEPPTR(PL_linestart);
1272 SAVESPTR(PL_linestr);
1273 SAVEGENERICPV(PL_lex_brackstack);
1274 SAVEGENERICPV(PL_lex_casestack);
1276 PL_linestr = PL_lex_stuff;
1277 PL_lex_stuff = Nullsv;
1279 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1280 = SvPVX(PL_linestr);
1281 PL_bufend += SvCUR(PL_linestr);
1282 PL_last_lop = PL_last_uni = Nullch;
1283 SAVEFREESV(PL_linestr);
1285 PL_lex_dojoin = FALSE;
1286 PL_lex_brackets = 0;
1287 Newx(PL_lex_brackstack, 120, char);
1288 Newx(PL_lex_casestack, 12, char);
1289 PL_lex_casemods = 0;
1290 *PL_lex_casestack = '\0';
1292 PL_lex_state = LEX_INTERPCONCAT;
1293 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1295 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1296 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1297 PL_lex_inpat = PL_sublex_info.sub_op;
1299 PL_lex_inpat = Nullop;
1306 * Restores lexer state after a S_sublex_push.
1313 if (!PL_lex_starts++) {
1314 SV * const sv = newSVpvs("");
1315 if (SvUTF8(PL_linestr))
1317 PL_expect = XOPERATOR;
1318 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1322 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1323 PL_lex_state = LEX_INTERPCASEMOD;
1327 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1328 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1329 PL_linestr = PL_lex_repl;
1331 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1332 PL_bufend += SvCUR(PL_linestr);
1333 PL_last_lop = PL_last_uni = Nullch;
1334 SAVEFREESV(PL_linestr);
1335 PL_lex_dojoin = FALSE;
1336 PL_lex_brackets = 0;
1337 PL_lex_casemods = 0;
1338 *PL_lex_casestack = '\0';
1340 if (SvEVALED(PL_lex_repl)) {
1341 PL_lex_state = LEX_INTERPNORMAL;
1343 /* we don't clear PL_lex_repl here, so that we can check later
1344 whether this is an evalled subst; that means we rely on the
1345 logic to ensure sublex_done() is called again only via the
1346 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1349 PL_lex_state = LEX_INTERPCONCAT;
1350 PL_lex_repl = Nullsv;
1356 PL_bufend = SvPVX(PL_linestr);
1357 PL_bufend += SvCUR(PL_linestr);
1358 PL_expect = XOPERATOR;
1359 PL_sublex_info.sub_inwhat = 0;
1367 Extracts a pattern, double-quoted string, or transliteration. This
1370 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1371 processing a pattern (PL_lex_inpat is true), a transliteration
1372 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1374 Returns a pointer to the character scanned up to. Iff this is
1375 advanced from the start pointer supplied (ie if anything was
1376 successfully parsed), will leave an OP for the substring scanned
1377 in yylval. Caller must intuit reason for not parsing further
1378 by looking at the next characters herself.
1382 double-quoted style: \r and \n
1383 regexp special ones: \D \s
1385 backrefs: \1 (deprecated in substitution replacements)
1386 case and quoting: \U \Q \E
1387 stops on @ and $, but not for $ as tail anchor
1389 In transliterations:
1390 characters are VERY literal, except for - not at the start or end
1391 of the string, which indicates a range. scan_const expands the
1392 range to the full set of intermediate characters.
1394 In double-quoted strings:
1396 double-quoted style: \r and \n
1398 backrefs: \1 (deprecated)
1399 case and quoting: \U \Q \E
1402 scan_const does *not* construct ops to handle interpolated strings.
1403 It stops processing as soon as it finds an embedded $ or @ variable
1404 and leaves it to the caller to work out what's going on.
1406 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1408 $ in pattern could be $foo or could be tail anchor. Assumption:
1409 it's a tail anchor if $ is the last thing in the string, or if it's
1410 followed by one of ")| \n\t"
1412 \1 (backreferences) are turned into $1
1414 The structure of the code is
1415 while (there's a character to process) {
1416 handle transliteration ranges
1417 skip regexp comments
1418 skip # initiated comments in //x patterns
1419 check for embedded @foo
1420 check for embedded scalars
1422 leave intact backslashes from leave (below)
1423 deprecate \1 in strings and sub replacements
1424 handle string-changing backslashes \l \U \Q \E, etc.
1425 switch (what was escaped) {
1426 handle - in a transliteration (becomes a literal -)
1427 handle \132 octal characters
1428 handle 0x15 hex characters
1429 handle \cV (control V)
1430 handle printf backslashes (\f, \r, \n, etc)
1432 } (end if backslash)
1433 } (end while character to read)
1438 S_scan_const(pTHX_ char *start)
1441 register char *send = PL_bufend; /* end of the constant */
1442 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1443 register char *s = start; /* start of the constant */
1444 register char *d = SvPVX(sv); /* destination for copies */
1445 bool dorange = FALSE; /* are we in a translit range? */
1446 bool didrange = FALSE; /* did we just finish a range? */
1447 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1448 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1451 UV literal_endpoint = 0;
1454 const char *leaveit = /* set of acceptably-backslashed characters */
1456 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1459 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1460 /* If we are doing a trans and we know we want UTF8 set expectation */
1461 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1462 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1466 while (s < send || dorange) {
1467 /* get transliterations out of the way (they're most literal) */
1468 if (PL_lex_inwhat == OP_TRANS) {
1469 /* expand a range A-Z to the full set of characters. AIE! */
1471 I32 i; /* current expanded character */
1472 I32 min; /* first character in range */
1473 I32 max; /* last character in range */
1476 char * const c = (char*)utf8_hop((U8*)d, -1);
1480 *c = (char)UTF_TO_NATIVE(0xff);
1481 /* mark the range as done, and continue */
1487 i = d - SvPVX_const(sv); /* remember current offset */
1488 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1489 d = SvPVX(sv) + i; /* refresh d after realloc */
1490 d -= 2; /* eat the first char and the - */
1492 min = (U8)*d; /* first char in range */
1493 max = (U8)d[1]; /* last char in range */
1497 "Invalid range \"%c-%c\" in transliteration operator",
1498 (char)min, (char)max);
1502 if (literal_endpoint == 2 &&
1503 ((isLOWER(min) && isLOWER(max)) ||
1504 (isUPPER(min) && isUPPER(max)))) {
1506 for (i = min; i <= max; i++)
1508 *d++ = NATIVE_TO_NEED(has_utf8,i);
1510 for (i = min; i <= max; i++)
1512 *d++ = NATIVE_TO_NEED(has_utf8,i);
1517 for (i = min; i <= max; i++)
1520 /* mark the range as done, and continue */
1524 literal_endpoint = 0;
1529 /* range begins (ignore - as first or last char) */
1530 else if (*s == '-' && s+1 < send && s != start) {
1532 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1535 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1545 literal_endpoint = 0;
1550 /* if we get here, we're not doing a transliteration */
1552 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1553 except for the last char, which will be done separately. */
1554 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1556 while (s+1 < send && *s != ')')
1557 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1559 else if (s[2] == '{' /* This should match regcomp.c */
1560 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1563 char *regparse = s + (s[2] == '{' ? 3 : 4);
1566 while (count && (c = *regparse)) {
1567 if (c == '\\' && regparse[1])
1575 if (*regparse != ')')
1576 regparse--; /* Leave one char for continuation. */
1577 while (s < regparse)
1578 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1582 /* likewise skip #-initiated comments in //x patterns */
1583 else if (*s == '#' && PL_lex_inpat &&
1584 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1585 while (s+1 < send && *s != '\n')
1586 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1589 /* check for embedded arrays
1590 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1592 else if (*s == '@' && s[1]
1593 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1596 /* check for embedded scalars. only stop if we're sure it's a
1599 else if (*s == '$') {
1600 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1602 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1603 break; /* in regexp, $ might be tail anchor */
1606 /* End of else if chain - OP_TRANS rejoin rest */
1609 if (*s == '\\' && s+1 < send) {
1612 /* some backslashes we leave behind */
1613 if (*leaveit && *s && strchr(leaveit, *s)) {
1614 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1615 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1619 /* deprecate \1 in strings and substitution replacements */
1620 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1621 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1623 if (ckWARN(WARN_SYNTAX))
1624 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1629 /* string-change backslash escapes */
1630 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1635 /* if we get here, it's either a quoted -, or a digit */
1638 /* quoted - in transliterations */
1640 if (PL_lex_inwhat == OP_TRANS) {
1650 Perl_warner(aTHX_ packWARN(WARN_MISC),
1651 "Unrecognized escape \\%c passed through",
1653 /* default action is to copy the quoted character */
1654 goto default_action;
1657 /* \132 indicates an octal constant */
1658 case '0': case '1': case '2': case '3':
1659 case '4': case '5': case '6': case '7':
1663 uv = grok_oct(s, &len, &flags, NULL);
1666 goto NUM_ESCAPE_INSERT;
1668 /* \x24 indicates a hex constant */
1672 char* const e = strchr(s, '}');
1673 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1674 PERL_SCAN_DISALLOW_PREFIX;
1679 yyerror("Missing right brace on \\x{}");
1683 uv = grok_hex(s, &len, &flags, NULL);
1689 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1690 uv = grok_hex(s, &len, &flags, NULL);
1696 /* Insert oct or hex escaped character.
1697 * There will always enough room in sv since such
1698 * escapes will be longer than any UTF-8 sequence
1699 * they can end up as. */
1701 /* We need to map to chars to ASCII before doing the tests
1704 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1705 if (!has_utf8 && uv > 255) {
1706 /* Might need to recode whatever we have
1707 * accumulated so far if it contains any
1710 * (Can't we keep track of that and avoid
1711 * this rescan? --jhi)
1715 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1716 if (!NATIVE_IS_INVARIANT(*c)) {
1721 const STRLEN offset = d - SvPVX_const(sv);
1723 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1727 while (src >= (const U8 *)SvPVX_const(sv)) {
1728 if (!NATIVE_IS_INVARIANT(*src)) {
1729 const U8 ch = NATIVE_TO_ASCII(*src);
1730 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1731 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1741 if (has_utf8 || uv > 255) {
1742 d = (char*)uvchr_to_utf8((U8*)d, uv);
1744 if (PL_lex_inwhat == OP_TRANS &&
1745 PL_sublex_info.sub_op) {
1746 PL_sublex_info.sub_op->op_private |=
1747 (PL_lex_repl ? OPpTRANS_FROM_UTF
1760 /* \N{LATIN SMALL LETTER A} is a named character */
1764 char* e = strchr(s, '}');
1770 yyerror("Missing right brace on \\N{}");
1774 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1776 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1777 PERL_SCAN_DISALLOW_PREFIX;
1780 uv = grok_hex(s, &len, &flags, NULL);
1782 goto NUM_ESCAPE_INSERT;
1784 res = newSVpvn(s + 1, e - s - 1);
1785 res = new_constant( Nullch, 0, "charnames",
1786 res, Nullsv, "\\N{...}" );
1788 sv_utf8_upgrade(res);
1789 str = SvPV_const(res,len);
1790 #ifdef EBCDIC_NEVER_MIND
1791 /* charnames uses pack U and that has been
1792 * recently changed to do the below uni->native
1793 * mapping, so this would be redundant (and wrong,
1794 * the code point would be doubly converted).
1795 * But leave this in just in case the pack U change
1796 * gets revoked, but the semantics is still
1797 * desireable for charnames. --jhi */
1799 UV uv = utf8_to_uvchr((const U8*)str, 0);
1802 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1804 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1805 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1806 str = SvPV_const(res, len);
1810 if (!has_utf8 && SvUTF8(res)) {
1811 const char * const ostart = SvPVX_const(sv);
1812 SvCUR_set(sv, d - ostart);
1815 sv_utf8_upgrade(sv);
1816 /* this just broke our allocation above... */
1817 SvGROW(sv, (STRLEN)(send - start));
1818 d = SvPVX(sv) + SvCUR(sv);
1821 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1822 const char * const odest = SvPVX_const(sv);
1824 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1825 d = SvPVX(sv) + (d - odest);
1827 Copy(str, d, len, char);
1834 yyerror("Missing braces on \\N{}");
1837 /* \c is a control character */
1846 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1849 yyerror("Missing control char name in \\c");
1853 /* printf-style backslashes, formfeeds, newlines, etc */
1855 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1858 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1861 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1864 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1867 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1870 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1873 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1879 } /* end if (backslash) */
1886 /* If we started with encoded form, or already know we want it
1887 and then encode the next character */
1888 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1890 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1891 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1894 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1895 const STRLEN off = d - SvPVX_const(sv);
1896 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1898 d = (char*)uvchr_to_utf8((U8*)d, uv);
1902 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1904 } /* while loop to process each character */
1906 /* terminate the string and set up the sv */
1908 SvCUR_set(sv, d - SvPVX_const(sv));
1909 if (SvCUR(sv) >= SvLEN(sv))
1910 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1913 if (PL_encoding && !has_utf8) {
1914 sv_recode_to_utf8(sv, PL_encoding);
1920 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1921 PL_sublex_info.sub_op->op_private |=
1922 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1926 /* shrink the sv if we allocated more than we used */
1927 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1928 SvPV_shrink_to_cur(sv);
1931 /* return the substring (via yylval) only if we parsed anything */
1932 if (s > PL_bufptr) {
1933 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1934 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1936 ( PL_lex_inwhat == OP_TRANS
1938 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1941 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1948 * Returns TRUE if there's more to the expression (e.g., a subscript),
1951 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1953 * ->[ and ->{ return TRUE
1954 * { and [ outside a pattern are always subscripts, so return TRUE
1955 * if we're outside a pattern and it's not { or [, then return FALSE
1956 * if we're in a pattern and the first char is a {
1957 * {4,5} (any digits around the comma) returns FALSE
1958 * if we're in a pattern and the first char is a [
1960 * [SOMETHING] has a funky algorithm to decide whether it's a
1961 * character class or not. It has to deal with things like
1962 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1963 * anything else returns TRUE
1966 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1969 S_intuit_more(pTHX_ register char *s)
1972 if (PL_lex_brackets)
1974 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1976 if (*s != '{' && *s != '[')
1981 /* In a pattern, so maybe we have {n,m}. */
1998 /* On the other hand, maybe we have a character class */
2001 if (*s == ']' || *s == '^')
2004 /* this is terrifying, and it works */
2005 int weight = 2; /* let's weigh the evidence */
2007 unsigned char un_char = 255, last_un_char;
2008 const char * const send = strchr(s,']');
2009 char tmpbuf[sizeof PL_tokenbuf * 4];
2011 if (!send) /* has to be an expression */
2014 Zero(seen,256,char);
2017 else if (isDIGIT(*s)) {
2019 if (isDIGIT(s[1]) && s[2] == ']')
2025 for (; s < send; s++) {
2026 last_un_char = un_char;
2027 un_char = (unsigned char)*s;
2032 weight -= seen[un_char] * 10;
2033 if (isALNUM_lazy_if(s+1,UTF)) {
2034 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2035 if ((int)strlen(tmpbuf) > 1
2036 && gv_fetchpv(tmpbuf, 0, SVt_PV))
2041 else if (*s == '$' && s[1] &&
2042 strchr("[#!%*<>()-=",s[1])) {
2043 if (/*{*/ strchr("])} =",s[2]))
2052 if (strchr("wds]",s[1]))
2054 else if (seen['\''] || seen['"'])
2056 else if (strchr("rnftbxcav",s[1]))
2058 else if (isDIGIT(s[1])) {
2060 while (s[1] && isDIGIT(s[1]))
2070 if (strchr("aA01! ",last_un_char))
2072 if (strchr("zZ79~",s[1]))
2074 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2075 weight -= 5; /* cope with negative subscript */
2078 if (!isALNUM(last_un_char)
2079 && !(last_un_char == '$' || last_un_char == '@'
2080 || last_un_char == '&')
2081 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2086 if (keyword(tmpbuf, d - tmpbuf))
2089 if (un_char == last_un_char + 1)
2091 weight -= seen[un_char];
2096 if (weight >= 0) /* probably a character class */
2106 * Does all the checking to disambiguate
2108 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2109 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2111 * First argument is the stuff after the first token, e.g. "bar".
2113 * Not a method if bar is a filehandle.
2114 * Not a method if foo is a subroutine prototyped to take a filehandle.
2115 * Not a method if it's really "Foo $bar"
2116 * Method if it's "foo $bar"
2117 * Not a method if it's really "print foo $bar"
2118 * Method if it's really "foo package::" (interpreted as package->foo)
2119 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2120 * Not a method if bar is a filehandle or package, but is quoted with
2125 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2128 char *s = start + (*start == '$');
2129 char tmpbuf[sizeof PL_tokenbuf];
2134 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2138 const char *proto = SvPVX_const(cv);
2149 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2150 /* start is the beginning of the possible filehandle/object,
2151 * and s is the end of it
2152 * tmpbuf is a copy of it
2155 if (*start == '$') {
2156 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2161 return *s == '(' ? FUNCMETH : METHOD;
2163 if (!keyword(tmpbuf, len)) {
2164 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2169 indirgv = gv_fetchpv(tmpbuf, 0, SVt_PVCV);
2170 if (indirgv && GvCVu(indirgv))
2172 /* filehandle or package name makes it a method */
2173 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2175 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2176 return 0; /* no assumptions -- "=>" quotes bearword */
2178 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2179 newSVpvn(tmpbuf,len));
2180 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2184 return *s == '(' ? FUNCMETH : METHOD;
2192 * Return a string of Perl code to load the debugger. If PERL5DB
2193 * is set, it will return the contents of that, otherwise a
2194 * compile-time require of perl5db.pl.
2202 const char * const pdb = PerlEnv_getenv("PERL5DB");
2206 SETERRNO(0,SS_NORMAL);
2207 return "BEGIN { require 'perl5db.pl' }";
2213 /* Encoded script support. filter_add() effectively inserts a
2214 * 'pre-processing' function into the current source input stream.
2215 * Note that the filter function only applies to the current source file
2216 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2218 * The datasv parameter (which may be NULL) can be used to pass
2219 * private data to this instance of the filter. The filter function
2220 * can recover the SV using the FILTER_DATA macro and use it to
2221 * store private buffers and state information.
2223 * The supplied datasv parameter is upgraded to a PVIO type
2224 * and the IoDIRP/IoANY field is used to store the function pointer,
2225 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2226 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2227 * private use must be set using malloc'd pointers.
2231 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2237 if (!PL_rsfp_filters)
2238 PL_rsfp_filters = newAV();
2240 datasv = NEWSV(255,0);
2241 SvUPGRADE(datasv, SVt_PVIO);
2242 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2243 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2244 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2245 IoANY(datasv), SvPV_nolen(datasv)));
2246 av_unshift(PL_rsfp_filters, 1);
2247 av_store(PL_rsfp_filters, 0, datasv) ;
2252 /* Delete most recently added instance of this filter function. */
2254 Perl_filter_del(pTHX_ filter_t funcp)
2260 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
2262 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2264 /* if filter is on top of stack (usual case) just pop it off */
2265 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2266 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2267 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2268 IoANY(datasv) = (void *)NULL;
2269 sv_free(av_pop(PL_rsfp_filters));
2273 /* we need to search for the correct entry and clear it */
2274 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2278 /* Invoke the idxth filter function for the current rsfp. */
2279 /* maxlen 0 = read one text line */
2281 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2287 if (!PL_rsfp_filters)
2289 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2290 /* Provide a default input filter to make life easy. */
2291 /* Note that we append to the line. This is handy. */
2292 DEBUG_P(PerlIO_printf(Perl_debug_log,
2293 "filter_read %d: from rsfp\n", idx));
2297 const int old_len = SvCUR(buf_sv);
2299 /* ensure buf_sv is large enough */
2300 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2301 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2302 if (PerlIO_error(PL_rsfp))
2303 return -1; /* error */
2305 return 0 ; /* end of file */
2307 SvCUR_set(buf_sv, old_len + len) ;
2310 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2311 if (PerlIO_error(PL_rsfp))
2312 return -1; /* error */
2314 return 0 ; /* end of file */
2317 return SvCUR(buf_sv);
2319 /* Skip this filter slot if filter has been deleted */
2320 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2321 DEBUG_P(PerlIO_printf(Perl_debug_log,
2322 "filter_read %d: skipped (filter deleted)\n",
2324 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2326 /* Get function pointer hidden within datasv */
2327 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2328 DEBUG_P(PerlIO_printf(Perl_debug_log,
2329 "filter_read %d: via function %p (%s)\n",
2330 idx, datasv, SvPV_nolen_const(datasv)));
2331 /* Call function. The function is expected to */
2332 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2333 /* Return: <0:error, =0:eof, >0:not eof */
2334 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2338 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2341 #ifdef PERL_CR_FILTER
2342 if (!PL_rsfp_filters) {
2343 filter_add(S_cr_textfilter,NULL);
2346 if (PL_rsfp_filters) {
2348 SvCUR_set(sv, 0); /* start with empty line */
2349 if (FILTER_READ(0, sv, 0) > 0)
2350 return ( SvPVX(sv) ) ;
2355 return (sv_gets(sv, fp, append));
2359 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2364 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2368 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2369 (gv = gv_fetchpv(pkgname, 0, SVt_PVHV)))
2371 return GvHV(gv); /* Foo:: */
2374 /* use constant CLASS => 'MyClass' */
2375 if ((gv = gv_fetchpv(pkgname, 0, SVt_PVCV))) {
2377 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2378 pkgname = SvPV_nolen_const(sv);
2382 return gv_stashpv(pkgname, FALSE);
2386 S_tokenize_use(pTHX_ int is_use, char *s) {
2388 if (PL_expect != XSTATE)
2389 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2390 is_use ? "use" : "no"));
2392 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2393 s = force_version(s, TRUE);
2394 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2395 PL_nextval[PL_nexttoke].opval = Nullop;
2398 else if (*s == 'v') {
2399 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2400 s = force_version(s, FALSE);
2404 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2405 s = force_version(s, FALSE);
2407 yylval.ival = is_use;
2411 static const char* const exp_name[] =
2412 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2413 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2420 Works out what to call the token just pulled out of the input
2421 stream. The yacc parser takes care of taking the ops we return and
2422 stitching them into a tree.
2428 if read an identifier
2429 if we're in a my declaration
2430 croak if they tried to say my($foo::bar)
2431 build the ops for a my() declaration
2432 if it's an access to a my() variable
2433 are we in a sort block?
2434 croak if my($a); $a <=> $b
2435 build ops for access to a my() variable
2436 if in a dq string, and they've said @foo and we can't find @foo
2438 build ops for a bareword
2439 if we already built the token before, use it.
2444 #pragma segment Perl_yylex
2450 register char *s = PL_bufptr;
2456 SV* tmp = newSVpvs("");
2457 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2458 (IV)CopLINE(PL_curcop),
2459 lex_state_names[PL_lex_state],
2460 exp_name[PL_expect],
2461 pv_display(tmp, s, strlen(s), 0, 60));
2464 /* check if there's an identifier for us to look at */
2465 if (PL_pending_ident)
2466 return REPORT(S_pending_ident(aTHX));
2468 /* no identifier pending identification */
2470 switch (PL_lex_state) {
2472 case LEX_NORMAL: /* Some compilers will produce faster */
2473 case LEX_INTERPNORMAL: /* code if we comment these out. */
2477 /* when we've already built the next token, just pull it out of the queue */
2480 yylval = PL_nextval[PL_nexttoke];
2482 PL_lex_state = PL_lex_defer;
2483 PL_expect = PL_lex_expect;
2484 PL_lex_defer = LEX_NORMAL;
2486 return REPORT(PL_nexttype[PL_nexttoke]);
2488 /* interpolated case modifiers like \L \U, including \Q and \E.
2489 when we get here, PL_bufptr is at the \
2491 case LEX_INTERPCASEMOD:
2493 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2494 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2496 /* handle \E or end of string */
2497 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2499 if (PL_lex_casemods) {
2500 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2501 PL_lex_casestack[PL_lex_casemods] = '\0';
2503 if (PL_bufptr != PL_bufend
2504 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2506 PL_lex_state = LEX_INTERPCONCAT;
2510 if (PL_bufptr != PL_bufend)
2512 PL_lex_state = LEX_INTERPCONCAT;
2516 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2517 "### Saw case modifier\n"); });
2519 if (s[1] == '\\' && s[2] == 'E') {
2521 PL_lex_state = LEX_INTERPCONCAT;
2526 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2527 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2528 if ((*s == 'L' || *s == 'U') &&
2529 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2530 PL_lex_casestack[--PL_lex_casemods] = '\0';
2533 if (PL_lex_casemods > 10)
2534 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2535 PL_lex_casestack[PL_lex_casemods++] = *s;
2536 PL_lex_casestack[PL_lex_casemods] = '\0';
2537 PL_lex_state = LEX_INTERPCONCAT;
2538 PL_nextval[PL_nexttoke].ival = 0;
2541 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2543 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2545 PL_nextval[PL_nexttoke].ival = OP_LC;
2547 PL_nextval[PL_nexttoke].ival = OP_UC;
2549 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2551 Perl_croak(aTHX_ "panic: yylex");
2555 if (PL_lex_starts) {
2558 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2559 if (PL_lex_casemods == 1 && PL_lex_inpat)
2568 case LEX_INTERPPUSH:
2569 return REPORT(sublex_push());
2571 case LEX_INTERPSTART:
2572 if (PL_bufptr == PL_bufend)
2573 return REPORT(sublex_done());
2574 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2575 "### Interpolated variable\n"); });
2577 PL_lex_dojoin = (*PL_bufptr == '@');
2578 PL_lex_state = LEX_INTERPNORMAL;
2579 if (PL_lex_dojoin) {
2580 PL_nextval[PL_nexttoke].ival = 0;
2582 force_ident("\"", '$');
2583 PL_nextval[PL_nexttoke].ival = 0;
2585 PL_nextval[PL_nexttoke].ival = 0;
2587 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2590 if (PL_lex_starts++) {
2592 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2593 if (!PL_lex_casemods && PL_lex_inpat)
2600 case LEX_INTERPENDMAYBE:
2601 if (intuit_more(PL_bufptr)) {
2602 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2608 if (PL_lex_dojoin) {
2609 PL_lex_dojoin = FALSE;
2610 PL_lex_state = LEX_INTERPCONCAT;
2613 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2614 && SvEVALED(PL_lex_repl))
2616 if (PL_bufptr != PL_bufend)
2617 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2618 PL_lex_repl = Nullsv;
2621 case LEX_INTERPCONCAT:
2623 if (PL_lex_brackets)
2624 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2626 if (PL_bufptr == PL_bufend)
2627 return REPORT(sublex_done());
2629 if (SvIVX(PL_linestr) == '\'') {
2630 SV *sv = newSVsv(PL_linestr);
2633 else if ( PL_hints & HINT_NEW_RE )
2634 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2635 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2639 s = scan_const(PL_bufptr);
2641 PL_lex_state = LEX_INTERPCASEMOD;
2643 PL_lex_state = LEX_INTERPSTART;
2646 if (s != PL_bufptr) {
2647 PL_nextval[PL_nexttoke] = yylval;
2650 if (PL_lex_starts++) {
2651 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2652 if (!PL_lex_casemods && PL_lex_inpat)
2665 PL_lex_state = LEX_NORMAL;
2666 s = scan_formline(PL_bufptr);
2667 if (!PL_lex_formbrack)
2673 PL_oldoldbufptr = PL_oldbufptr;
2679 if (isIDFIRST_lazy_if(s,UTF))
2681 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2684 goto fake_eof; /* emulate EOF on ^D or ^Z */
2689 if (PL_lex_brackets) {
2690 yyerror(PL_lex_formbrack
2691 ? "Format not terminated"
2692 : "Missing right curly or square bracket");
2694 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2695 "### Tokener got EOF\n");
2699 if (s++ < PL_bufend)
2700 goto retry; /* ignore stray nulls */
2703 if (!PL_in_eval && !PL_preambled) {
2704 PL_preambled = TRUE;
2705 sv_setpv(PL_linestr,incl_perldb());
2706 if (SvCUR(PL_linestr))
2707 sv_catpvs(PL_linestr,";");
2709 while(AvFILLp(PL_preambleav) >= 0) {
2710 SV *tmpsv = av_shift(PL_preambleav);
2711 sv_catsv(PL_linestr, tmpsv);
2712 sv_catpvs(PL_linestr, ";");
2715 sv_free((SV*)PL_preambleav);
2716 PL_preambleav = NULL;
2718 if (PL_minus_n || PL_minus_p) {
2719 sv_catpvs(PL_linestr, "LINE: while (<>) {");
2721 sv_catpvs(PL_linestr,"chomp;");
2724 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2725 || *PL_splitstr == '"')
2726 && strchr(PL_splitstr + 1, *PL_splitstr))
2727 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2729 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2730 bytes can be used as quoting characters. :-) */
2731 const char *splits = PL_splitstr;
2732 sv_catpvs(PL_linestr, "our @F=split(q\0");
2735 if (*splits == '\\')
2736 sv_catpvn(PL_linestr, splits, 1);
2737 sv_catpvn(PL_linestr, splits, 1);
2738 } while (*splits++);
2739 /* This loop will embed the trailing NUL of
2740 PL_linestr as the last thing it does before
2742 sv_catpvs(PL_linestr, ");");
2746 sv_catpvs(PL_linestr,"our @F=split(' ');");
2750 sv_catpvs(PL_linestr,"use feature ':5.10';");
2751 sv_catpvs(PL_linestr, "\n");
2752 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2753 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2754 PL_last_lop = PL_last_uni = Nullch;
2755 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2756 SV * const sv = NEWSV(85,0);
2758 sv_upgrade(sv, SVt_PVMG);
2759 sv_setsv(sv,PL_linestr);
2762 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2767 bof = PL_rsfp ? TRUE : FALSE;
2768 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2771 if (PL_preprocess && !PL_in_eval)
2772 (void)PerlProc_pclose(PL_rsfp);
2773 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2774 PerlIO_clearerr(PL_rsfp);
2776 (void)PerlIO_close(PL_rsfp);
2778 PL_doextract = FALSE;
2780 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2781 sv_setpv(PL_linestr,PL_minus_p
2782 ? ";}continue{print;}" : ";}");
2783 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2784 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2785 PL_last_lop = PL_last_uni = Nullch;
2786 PL_minus_n = PL_minus_p = 0;
2789 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2790 PL_last_lop = PL_last_uni = Nullch;
2791 sv_setpvn(PL_linestr,"",0);
2792 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2794 /* If it looks like the start of a BOM or raw UTF-16,
2795 * check if it in fact is. */
2801 #ifdef PERLIO_IS_STDIO
2802 # ifdef __GNU_LIBRARY__
2803 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2804 # define FTELL_FOR_PIPE_IS_BROKEN
2808 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2809 # define FTELL_FOR_PIPE_IS_BROKEN
2814 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2815 /* This loses the possibility to detect the bof
2816 * situation on perl -P when the libc5 is being used.
2817 * Workaround? Maybe attach some extra state to PL_rsfp?
2820 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2822 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2825 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2826 s = swallow_bom((U8*)s);
2830 /* Incest with pod. */
2831 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2832 sv_setpvn(PL_linestr, "", 0);
2833 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2834 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2835 PL_last_lop = PL_last_uni = Nullch;
2836 PL_doextract = FALSE;
2840 } while (PL_doextract);
2841 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2842 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2843 SV * const sv = NEWSV(85,0);
2845 sv_upgrade(sv, SVt_PVMG);
2846 sv_setsv(sv,PL_linestr);
2849 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2851 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2852 PL_last_lop = PL_last_uni = Nullch;
2853 if (CopLINE(PL_curcop) == 1) {
2854 while (s < PL_bufend && isSPACE(*s))
2856 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2860 if (*s == '#' && *(s+1) == '!')
2862 #ifdef ALTERNATE_SHEBANG
2864 static char const as[] = ALTERNATE_SHEBANG;
2865 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2866 d = s + (sizeof(as) - 1);
2868 #endif /* ALTERNATE_SHEBANG */
2877 while (*d && !isSPACE(*d))
2881 #ifdef ARG_ZERO_IS_SCRIPT
2882 if (ipathend > ipath) {
2884 * HP-UX (at least) sets argv[0] to the script name,
2885 * which makes $^X incorrect. And Digital UNIX and Linux,
2886 * at least, set argv[0] to the basename of the Perl
2887 * interpreter. So, having found "#!", we'll set it right.
2890 = GvSV(gv_fetchpv("\030", GV_ADD, SVt_PV)); /* $^X */
2891 assert(SvPOK(x) || SvGMAGICAL(x));
2892 if (sv_eq(x, CopFILESV(PL_curcop))) {
2893 sv_setpvn(x, ipath, ipathend - ipath);
2899 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
2900 const char * const lstart = SvPV_const(x,llen);
2902 bstart += blen - llen;
2903 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2904 sv_setpvn(x, ipath, ipathend - ipath);
2909 TAINT_NOT; /* $^X is always tainted, but that's OK */
2911 #endif /* ARG_ZERO_IS_SCRIPT */
2916 d = instr(s,"perl -");
2918 d = instr(s,"perl");
2920 /* avoid getting into infinite loops when shebang
2921 * line contains "Perl" rather than "perl" */
2923 for (d = ipathend-4; d >= ipath; --d) {
2924 if ((*d == 'p' || *d == 'P')
2925 && !ibcmp(d, "perl", 4))
2935 #ifdef ALTERNATE_SHEBANG
2937 * If the ALTERNATE_SHEBANG on this system starts with a
2938 * character that can be part of a Perl expression, then if
2939 * we see it but not "perl", we're probably looking at the
2940 * start of Perl code, not a request to hand off to some
2941 * other interpreter. Similarly, if "perl" is there, but
2942 * not in the first 'word' of the line, we assume the line
2943 * contains the start of the Perl program.
2945 if (d && *s != '#') {
2946 const char *c = ipath;
2947 while (*c && !strchr("; \t\r\n\f\v#", *c))
2950 d = Nullch; /* "perl" not in first word; ignore */
2952 *s = '#'; /* Don't try to parse shebang line */
2954 #endif /* ALTERNATE_SHEBANG */
2955 #ifndef MACOS_TRADITIONAL
2960 !instr(s,"indir") &&
2961 instr(PL_origargv[0],"perl"))
2968 while (s < PL_bufend && isSPACE(*s))
2970 if (s < PL_bufend) {
2971 Newxz(newargv,PL_origargc+3,char*);
2973 while (s < PL_bufend && !isSPACE(*s))
2976 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2979 newargv = PL_origargv;
2982 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2984 Perl_croak(aTHX_ "Can't exec %s", ipath);
2988 while (*d && !isSPACE(*d)) d++;
2989 while (SPACE_OR_TAB(*d)) d++;
2992 const bool switches_done = PL_doswitches;
2993 const U32 oldpdb = PL_perldb;
2994 const bool oldn = PL_minus_n;
2995 const bool oldp = PL_minus_p;
2998 if (*d == 'M' || *d == 'm' || *d == 'C') {
2999 const char * const m = d;
3000 while (*d && !isSPACE(*d)) d++;
3001 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3004 d = moreswitches(d);
3006 if (PL_doswitches && !switches_done) {
3007 int argc = PL_origargc;
3008 char **argv = PL_origargv;
3011 } while (argc && argv[0][0] == '-' && argv[0][1]);
3012 init_argv_symbols(argc,argv);
3014 if ((PERLDB_LINE && !oldpdb) ||
3015 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3016 /* if we have already added "LINE: while (<>) {",
3017 we must not do it again */
3019 sv_setpvn(PL_linestr, "", 0);
3020 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3021 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3022 PL_last_lop = PL_last_uni = Nullch;
3023 PL_preambled = FALSE;
3025 (void)gv_fetchfile(PL_origfilename);
3032 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3034 PL_lex_state = LEX_FORMLINE;
3039 #ifdef PERL_STRICT_CR
3040 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3042 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3044 case ' ': case '\t': case '\f': case 013:
3045 #ifdef MACOS_TRADITIONAL
3052 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3053 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3054 /* handle eval qq[#line 1 "foo"\n ...] */
3055 CopLINE_dec(PL_curcop);
3059 while (s < d && *s != '\n')
3063 else if (s > d) /* Found by Ilya: feed random input to Perl. */
3064 Perl_croak(aTHX_ "panic: input overflow");
3066 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3068 PL_lex_state = LEX_FORMLINE;
3078 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3086 while (s < PL_bufend && SPACE_OR_TAB(*s))
3089 if (strnEQ(s,"=>",2)) {
3090 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3091 DEBUG_T( { S_printbuf(aTHX_
3092 "### Saw unary minus before =>, forcing word %s\n", s);
3094 OPERATOR('-'); /* unary minus */
3096 PL_last_uni = PL_oldbufptr;
3098 case 'r': ftst = OP_FTEREAD; break;
3099 case 'w': ftst = OP_FTEWRITE; break;
3100 case 'x': ftst = OP_FTEEXEC; break;
3101 case 'o': ftst = OP_FTEOWNED; break;
3102 case 'R': ftst = OP_FTRREAD; break;
3103 case 'W': ftst = OP_FTRWRITE; break;
3104 case 'X': ftst = OP_FTREXEC; break;
3105 case 'O': ftst = OP_FTROWNED; break;
3106 case 'e': ftst = OP_FTIS; break;
3107 case 'z': ftst = OP_FTZERO; break;
3108 case 's': ftst = OP_FTSIZE; break;
3109 case 'f': ftst = OP_FTFILE; break;
3110 case 'd': ftst = OP_FTDIR; break;
3111 case 'l': ftst = OP_FTLINK; break;
3112 case 'p': ftst = OP_FTPIPE; break;
3113 case 'S': ftst = OP_FTSOCK; break;
3114 case 'u': ftst = OP_FTSUID; break;
3115 case 'g': ftst = OP_FTSGID; break;
3116 case 'k': ftst = OP_FTSVTX; break;
3117 case 'b': ftst = OP_FTBLK; break;
3118 case 'c': ftst = OP_FTCHR; break;
3119 case 't': ftst = OP_FTTTY; break;
3120 case 'T': ftst = OP_FTTEXT; break;
3121 case 'B': ftst = OP_FTBINARY; break;
3122 case 'M': case 'A': case 'C':
3123 gv_fetchpv("\024",GV_ADD, SVt_PV);
3125 case 'M': ftst = OP_FTMTIME; break;
3126 case 'A': ftst = OP_FTATIME; break;
3127 case 'C': ftst = OP_FTCTIME; break;
3135 PL_last_lop_op = (OPCODE)ftst;
3136 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3137 "### Saw file test %c\n", (int)tmp);
3142 /* Assume it was a minus followed by a one-letter named
3143 * subroutine call (or a -bareword), then. */
3144 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3145 "### '-%c' looked like a file test but was not\n",
3152 const char tmp = *s++;
3155 if (PL_expect == XOPERATOR)
3160 else if (*s == '>') {
3163 if (isIDFIRST_lazy_if(s,UTF)) {
3164 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3172 if (PL_expect == XOPERATOR)
3175 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3177 OPERATOR('-'); /* unary minus */
3183 const char tmp = *s++;
3186 if (PL_expect == XOPERATOR)
3191 if (PL_expect == XOPERATOR)
3194 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3201 if (PL_expect != XOPERATOR) {
3202 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3203 PL_expect = XOPERATOR;
3204 force_ident(PL_tokenbuf, '*');
3217 if (PL_expect == XOPERATOR) {
3221 PL_tokenbuf[0] = '%';
3222 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3223 if (!PL_tokenbuf[1]) {
3226 PL_pending_ident = '%';
3237 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
3238 && FEATURE_IS_ENABLED("~~"))
3245 const char tmp = *s++;
3251 goto just_a_word_zero_gv;
3254 switch (PL_expect) {
3257 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3259 PL_bufptr = s; /* update in case we back off */
3265 PL_expect = XTERMBLOCK;
3269 while (isIDFIRST_lazy_if(s,UTF)) {
3271 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3272 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3273 if (tmp < 0) tmp = -tmp;
3289 d = scan_str(d,TRUE,TRUE);
3291 /* MUST advance bufptr here to avoid bogus
3292 "at end of line" context messages from yyerror().
3294 PL_bufptr = s + len;
3295 yyerror("Unterminated attribute parameter in attribute list");
3298 return REPORT(0); /* EOF indicator */
3302 SV *sv = newSVpvn(s, len);
3303 sv_catsv(sv, PL_lex_stuff);
3304 attrs = append_elem(OP_LIST, attrs,
3305 newSVOP(OP_CONST, 0, sv));
3306 SvREFCNT_dec(PL_lex_stuff);
3307 PL_lex_stuff = Nullsv;
3310 if (len == 6 && strnEQ(s, "unique", len)) {
3311 if (PL_in_my == KEY_our)
3313 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3315 ; /* skip to avoid loading attributes.pm */
3318 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3321 /* NOTE: any CV attrs applied here need to be part of
3322 the CVf_BUILTIN_ATTRS define in cv.h! */
3323 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3324 CvLVALUE_on(PL_compcv);
3325 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3326 CvLOCKED_on(PL_compcv);
3327 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3328 CvMETHOD_on(PL_compcv);
3329 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3330 CvASSERTION_on(PL_compcv);
3331 /* After we've set the flags, it could be argued that
3332 we don't need to do the attributes.pm-based setting
3333 process, and shouldn't bother appending recognized
3334 flags. To experiment with that, uncomment the
3335 following "else". (Note that's already been
3336 uncommented. That keeps the above-applied built-in
3337 attributes from being intercepted (and possibly
3338 rejected) by a package's attribute routines, but is
3339 justified by the performance win for the common case
3340 of applying only built-in attributes.) */
3342 attrs = append_elem(OP_LIST, attrs,
3343 newSVOP(OP_CONST, 0,
3347 if (*s == ':' && s[1] != ':')
3350 break; /* require real whitespace or :'s */
3354 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3355 if (*s != ';' && *s != '}' && *s != tmp
3356 && (tmp != '=' || *s != ')')) {
3357 const char q = ((*s == '\'') ? '"' : '\'');
3358 /* If here for an expression, and parsed no attrs, back
3360 if (tmp == '=' && !attrs) {
3364 /* MUST advance bufptr here to avoid bogus "at end of line"
3365 context messages from yyerror().
3369 ? Perl_form(aTHX_ "Invalid separator character "
3370 "%c%c%c in attribute list", q, *s, q)
3371 : "Unterminated attribute list" );
3379 PL_nextval[PL_nexttoke].opval = attrs;
3387 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3388 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3396 const char tmp = *s++;
3401 const char tmp = *s++;
3409 if (PL_lex_brackets <= 0)
3410 yyerror("Unmatched right square bracket");
3413 if (PL_lex_state == LEX_INTERPNORMAL) {
3414 if (PL_lex_brackets == 0) {
3415 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3416 PL_lex_state = LEX_INTERPEND;
3423 if (PL_lex_brackets > 100) {
3424 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3426 switch (PL_expect) {
3428 if (PL_lex_formbrack) {
3432 if (PL_oldoldbufptr == PL_last_lop)
3433 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3435 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3436 OPERATOR(HASHBRACK);
3438 while (s < PL_bufend && SPACE_OR_TAB(*s))
3441 PL_tokenbuf[0] = '\0';
3442 if (d < PL_bufend && *d == '-') {
3443 PL_tokenbuf[0] = '-';
3445 while (d < PL_bufend && SPACE_OR_TAB(*d))
3448 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3449 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3451 while (d < PL_bufend && SPACE_OR_TAB(*d))
3454 const char minus = (PL_tokenbuf[0] == '-');
3455 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3463 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3468 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3473 if (PL_oldoldbufptr == PL_last_lop)
3474 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3476 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3479 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3481 /* This hack is to get the ${} in the message. */
3483 yyerror("syntax error");
3486 OPERATOR(HASHBRACK);
3488 /* This hack serves to disambiguate a pair of curlies
3489 * as being a block or an anon hash. Normally, expectation
3490 * determines that, but in cases where we're not in a
3491 * position to expect anything in particular (like inside
3492 * eval"") we have to resolve the ambiguity. This code
3493 * covers the case where the first term in the curlies is a
3494 * quoted string. Most other cases need to be explicitly
3495 * disambiguated by prepending a "+" before the opening
3496 * curly in order to force resolution as an anon hash.
3498 * XXX should probably propagate the outer expectation
3499 * into eval"" to rely less on this hack, but that could
3500 * potentially break current behavior of eval"".
3504 if (*s == '\'' || *s == '"' || *s == '`') {
3505 /* common case: get past first string, handling escapes */
3506 for (t++; t < PL_bufend && *t != *s;)
3507 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3511 else if (*s == 'q') {
3514 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3517 /* skip q//-like construct */
3519 char open, close, term;
3522 while (t < PL_bufend && isSPACE(*t))
3524 /* check for q => */
3525 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3526 OPERATOR(HASHBRACK);
3530 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3534 for (t++; t < PL_bufend; t++) {
3535 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3537 else if (*t == open)
3541 for (t++; t < PL_bufend; t++) {
3542 if (*t == '\\' && t+1 < PL_bufend)
3544 else if (*t == close && --brackets <= 0)
3546 else if (*t == open)
3553 /* skip plain q word */
3554 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3557 else if (isALNUM_lazy_if(t,UTF)) {
3559 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3562 while (t < PL_bufend && isSPACE(*t))
3564 /* if comma follows first term, call it an anon hash */
3565 /* XXX it could be a comma expression with loop modifiers */
3566 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3567 || (*t == '=' && t[1] == '>')))
3568 OPERATOR(HASHBRACK);
3569 if (PL_expect == XREF)
3572 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3578 yylval.ival = CopLINE(PL_curcop);
3579 if (isSPACE(*s) || *s == '#')
3580 PL_copline = NOLINE; /* invalidate current command line number */
3585 if (PL_lex_brackets <= 0)
3586 yyerror("Unmatched right curly bracket");
3588 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3589 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3590 PL_lex_formbrack = 0;
3591 if (PL_lex_state == LEX_INTERPNORMAL) {
3592 if (PL_lex_brackets == 0) {
3593 if (PL_expect & XFAKEBRACK) {
3594 PL_expect &= XENUMMASK;
3595 PL_lex_state = LEX_INTERPEND;
3597 return yylex(); /* ignore fake brackets */
3599 if (*s == '-' && s[1] == '>')
3600 PL_lex_state = LEX_INTERPENDMAYBE;
3601 else if (*s != '[' && *s != '{')
3602 PL_lex_state = LEX_INTERPEND;
3605 if (PL_expect & XFAKEBRACK) {
3606 PL_expect &= XENUMMASK;
3608 return yylex(); /* ignore fake brackets */
3617 if (PL_expect == XOPERATOR) {
3618 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
3619 && isIDFIRST_lazy_if(s,UTF))
3621 CopLINE_dec(PL_curcop);
3622 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3623 CopLINE_inc(PL_curcop);
3628 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3630 PL_expect = XOPERATOR;
3631 force_ident(PL_tokenbuf, '&');
3635 yylval.ival = (OPpENTERSUB_AMPER<<8);
3647 const char tmp = *s++;
3654 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
3655 && strchr("+-*/%.^&|<",tmp))
3656 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3657 "Reversed %c= operator",(int)tmp);
3659 if (PL_expect == XSTATE && isALPHA(tmp) &&
3660 (s == PL_linestart+1 || s[-2] == '\n') )
3662 if (PL_in_eval && !PL_rsfp) {
3667 if (strnEQ(s,"=cut",4)) {
3681 PL_doextract = TRUE;
3685 if (PL_lex_brackets < PL_lex_formbrack) {
3687 #ifdef PERL_STRICT_CR
3688 for (t = s; SPACE_OR_TAB(*t); t++) ;
3690 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3692 if (*t == '\n' || *t == '#') {
3703 const char tmp = *s++;
3705 /* was this !=~ where !~ was meant?
3706 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3708 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3709 const char *t = s+1;
3711 while (t < PL_bufend && isSPACE(*t))
3714 if (*t == '/' || *t == '?' ||
3715 ((*t == 'm' || *t == 's' || *t == 'y')
3716 && !isALNUM(t[1])) ||
3717 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3718 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3719 "!=~ should be !~");
3729 if (PL_expect != XOPERATOR) {
3730 if (s[1] != '<' && !strchr(s,'>'))
3733 s = scan_heredoc(s);
3735 s = scan_inputsymbol(s);
3736 TERM(sublex_start());
3742 SHop(OP_LEFT_SHIFT);
3756 const char tmp = *s++;
3758 SHop(OP_RIGHT_SHIFT);
3768 if (PL_expect == XOPERATOR) {
3769 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3771 deprecate_old(commaless_variable_list);
3772 return REPORT(','); /* grandfather non-comma-format format */
3776 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3777 PL_tokenbuf[0] = '@';
3778 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3779 sizeof PL_tokenbuf - 1, FALSE);
3780 if (PL_expect == XOPERATOR)
3781 no_op("Array length", s);
3782 if (!PL_tokenbuf[1])
3784 PL_expect = XOPERATOR;
3785 PL_pending_ident = '#';
3789 PL_tokenbuf[0] = '$';
3790 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3791 sizeof PL_tokenbuf - 1, FALSE);
3792 if (PL_expect == XOPERATOR)
3794 if (!PL_tokenbuf[1]) {
3796 yyerror("Final $ should be \\$ or $name");
3800 /* This kludge not intended to be bulletproof. */
3801 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3802 yylval.opval = newSVOP(OP_CONST, 0,
3803 newSViv(PL_compiling.cop_arybase));
3804 yylval.opval->op_private = OPpCONST_ARYBASE;
3810 const char tmp = *s;
3811 if (PL_lex_state == LEX_NORMAL)
3814 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
3815 && intuit_more(s)) {
3817 PL_tokenbuf[0] = '@';
3818 if (ckWARN(WARN_SYNTAX)) {
3821 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3824 PL_bufptr = skipspace(PL_bufptr);
3825 while (t < PL_bufend && *t != ']')
3827 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3828 "Multidimensional syntax %.*s not supported",
3829 (int)((t - PL_bufptr) + 1), PL_bufptr);
3833 else if (*s == '{') {
3835 PL_tokenbuf[0] = '%';
3836 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
3837 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
3839 char tmpbuf[sizeof PL_tokenbuf];
3840 for (t++; isSPACE(*t); t++) ;
3841 if (isIDFIRST_lazy_if(t,UTF)) {
3843 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
3845 for (; isSPACE(*t); t++) ;
3846 if (*t == ';' && get_cv(tmpbuf, FALSE))
3847 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3848 "You need to quote \"%s\"",
3855 PL_expect = XOPERATOR;
3856 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3857 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3858 if (!islop || PL_last_lop_op == OP_GREPSTART)
3859 PL_expect = XOPERATOR;
3860 else if (strchr("$@\"'`q", *s))
3861 PL_expect = XTERM; /* e.g. print $fh "foo" */
3862 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3863 PL_expect = XTERM; /* e.g. print $fh &sub */
3864 else if (isIDFIRST_lazy_if(s,UTF)) {
3865 char tmpbuf[sizeof PL_tokenbuf];
3867 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3868 if ((t2 = keyword(tmpbuf, len))) {
3869 /* binary operators exclude handle interpretations */
3881 PL_expect = XTERM; /* e.g. print $fh length() */
3886 PL_expect = XTERM; /* e.g. print $fh subr() */
3889 else if (isDIGIT(*s))
3890 PL_expect = XTERM; /* e.g. print $fh 3 */
3891 else if (*s == '.' && isDIGIT(s[1]))
3892 PL_expect = XTERM; /* e.g. print $fh .3 */
3893 else if ((*s == '?' || *s == '-' || *s == '+')
3894 && !isSPACE(s[1]) && s[1] != '=')
3895 PL_expect = XTERM; /* e.g. print $fh -1 */
3896 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
3898 PL_expect = XTERM; /* e.g. print $fh /.../
3899 XXX except DORDOR operator
3901 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
3903 PL_expect = XTERM; /* print $fh <<"EOF" */
3906 PL_pending_ident = '$';
3910 if (PL_expect == XOPERATOR)
3912 PL_tokenbuf[0] = '@';
3913 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3914 if (!PL_tokenbuf[1]) {
3917 if (PL_lex_state == LEX_NORMAL)
3919 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3921 PL_tokenbuf[0] = '%';
3923 /* Warn about @ where they meant $. */
3924 if (*s == '[' || *s == '{') {
3925 if (ckWARN(WARN_SYNTAX)) {
3926 const char *t = s + 1;
3927 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3929 if (*t == '}' || *t == ']') {
3931 PL_bufptr = skipspace(PL_bufptr);
3932 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3933 "Scalar value %.*s better written as $%.*s",
3934 (int)(t-PL_bufptr), PL_bufptr,
3935 (int)(t-PL_bufptr-1), PL_bufptr+1);
3940 PL_pending_ident = '@';
3943 case '/': /* may be division, defined-or, or pattern */
3944 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3948 case '?': /* may either be conditional or pattern */
3949 if(PL_expect == XOPERATOR) {
3957 /* A // operator. */
3967 /* Disable warning on "study /blah/" */
3968 if (PL_oldoldbufptr == PL_last_uni
3969 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3970 || memNE(PL_last_uni, "study", 5)
3971 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3974 s = scan_pat(s,OP_MATCH);
3975 TERM(sublex_start());
3979 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3980 #ifdef PERL_STRICT_CR
3983 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3985 && (s == PL_linestart || s[-1] == '\n') )
3987 PL_lex_formbrack = 0;
3991 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3997 yylval.ival = OPf_SPECIAL;
4003 if (PL_expect != XOPERATOR)
4008 case '0': case '1': case '2': case '3': case '4':
4009 case '5': case '6': case '7': case '8': case '9':
4010 s = scan_num(s, &yylval);
4011 DEBUG_T( { S_printbuf(aTHX_ "### Saw number in %s\n", s); } );
4012 if (PL_expect == XOPERATOR)
4017 s = scan_str(s,FALSE,FALSE);
4018 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4019 if (PL_expect == XOPERATOR) {
4020 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4022 deprecate_old(commaless_variable_list);
4023 return REPORT(','); /* grandfather non-comma-format format */
4029 missingterm((char*)0);
4030 yylval.ival = OP_CONST;
4031 TERM(sublex_start());
4034 s = scan_str(s,FALSE,FALSE);
4035 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4036 if (PL_expect == XOPERATOR) {
4037 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4039 deprecate_old(commaless_variable_list);
4040 return REPORT(','); /* grandfather non-comma-format format */
4046 missingterm((char*)0);
4047 yylval.ival = OP_CONST;
4048 /* FIXME. I think that this can be const if char *d is replaced by
4049 more localised variables. */
4050 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4051 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4052 yylval.ival = OP_STRINGIFY;
4056 TERM(sublex_start());
4059 s = scan_str(s,FALSE,FALSE);
4060 DEBUG_T( { S_printbuf(aTHX_ "### Saw backtick string before %s\n", s); } );
4061 if (PL_expect == XOPERATOR)
4062 no_op("Backticks",s);
4064 missingterm((char*)0);
4065 yylval.ival = OP_BACKTICK;
4067 TERM(sublex_start());
4071 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
4072 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
4074 if (PL_expect == XOPERATOR)
4075 no_op("Backslash",s);
4079 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
4080 char *start = s + 2;
4081 while (isDIGIT(*start) || *start == '_')
4083 if (*start == '.' && isDIGIT(start[1])) {
4084 s = scan_num(s, &yylval);
4087 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
4088 else if (!isALPHA(*start) && (PL_expect == XTERM
4089 || PL_expect == XREF || PL_expect == XSTATE
4090 || PL_expect == XTERMORDORDOR)) {
4091 const char c = *start;
4094 gv = gv_fetchpv(s, 0, SVt_PVCV);
4097 s = scan_num(s, &yylval);
4104 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
4140 I32 orig_keyword = 0;
4145 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4147 /* Some keywords can be followed by any delimiter, including ':' */
4148 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
4149 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4150 (PL_tokenbuf[0] == 'q' &&
4151 strchr("qwxr", PL_tokenbuf[1])))));
4153 /* x::* is just a word, unless x is "CORE" */
4154 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4158 while (d < PL_bufend && isSPACE(*d))
4159 d++; /* no comments skipped here, or s### is misparsed */
4161 /* Is this a label? */
4162 if (!tmp && PL_expect == XSTATE
4163 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4165 yylval.pval = savepv(PL_tokenbuf);
4170 /* Check for keywords */
4171 tmp = keyword(PL_tokenbuf, len);
4173 /* Is this a word before a => operator? */
4174 if (*d == '=' && d[1] == '>') {
4177 = (OP*)newSVOP(OP_CONST, 0,
4178 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4179 yylval.opval->op_private = OPpCONST_BARE;
4183 if (tmp < 0) { /* second-class keyword? */
4184 GV *ogv = NULL; /* override (winner) */
4185 GV *hgv = NULL; /* hidden (loser) */
4186 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4188 if ((gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV)) &&
4191 if (GvIMPORTED_CV(gv))
4193 else if (! CvMETHOD(cv))
4197 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4198 (gv = *gvp) != (GV*)&PL_sv_undef &&
4199 GvCVu(gv) && GvIMPORTED_CV(gv))
4206 tmp = 0; /* overridden by import or by GLOBAL */
4209 && -tmp==KEY_lock /* XXX generalizable kludge */
4211 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
4213 tmp = 0; /* any sub overrides "weak" keyword */
4215 else { /* no override */
4217 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4218 Perl_warner(aTHX_ packWARN(WARN_MISC),
4219 "dump() better written as CORE::dump()");
4223 if (hgv && tmp != KEY_x && tmp != KEY_CORE
4224 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
4225 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4226 "Ambiguous call resolved as CORE::%s(), %s",
4227 GvENAME(hgv), "qualify as such or use &");
4234 default: /* not a keyword */
4235 /* Trade off - by using this evil construction we can pull the
4236 variable gv into the block labelled keylookup. If not, then
4237 we have to give it function scope so that the goto from the
4238 earlier ':' case doesn't bypass the initialisation. */
4240 just_a_word_zero_gv:
4248 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4251 /* Get the rest if it looks like a package qualifier */
4253 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4255 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4258 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4259 *s == '\'' ? "'" : "::");
4264 if (PL_expect == XOPERATOR) {
4265 if (PL_bufptr == PL_linestart) {
4266 CopLINE_dec(PL_curcop);
4267 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4268 CopLINE_inc(PL_curcop);
4271 no_op("Bareword",s);
4274 /* Look for a subroutine with this name in current package,
4275 unless name is "Foo::", in which case Foo is a bearword
4276 (and a package name). */
4279 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4281 if (ckWARN(WARN_BAREWORD)
4282 && ! gv_fetchpv(PL_tokenbuf, 0, SVt_PVHV))
4283 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4284 "Bareword \"%s\" refers to nonexistent package",
4287 PL_tokenbuf[len] = '\0';
4294 /* Mustn't actually add anything to a symbol table.
4295 But also don't want to "initialise" any placeholder
4296 constants that might already be there into full
4297 blown PVGVs with attached PVCV. */
4298 gv = gv_fetchpv(PL_tokenbuf, GV_NOADD_NOINIT,
4303 /* if we saw a global override before, get the right name */
4306 sv = newSVpvs("CORE::GLOBAL::");
4307 sv_catpv(sv,PL_tokenbuf);
4310 /* If len is 0, newSVpv does strlen(), which is correct.
4311 If len is non-zero, then it will be the true length,
4312 and so the scalar will be created correctly. */
4313 sv = newSVpv(PL_tokenbuf,len);
4316 /* Presume this is going to be a bareword of some sort. */
4319 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4320 yylval.opval->op_private = OPpCONST_BARE;
4321 /* UTF-8 package name? */
4322 if (UTF && !IN_BYTES &&
4323 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
4326 /* And if "Foo::", then that's what it certainly is. */
4331 /* Do the explicit type check so that we don't need to force
4332 the initialisation of the symbol table to have a real GV.
4333 Beware - gv may not really be a PVGV, cv may not really be
4334 a PVCV, (because of the space optimisations that gv_init
4335 understands) But they're true if for this symbol there is
4336 respectively a typeglob and a subroutine.
4338 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
4339 /* Real typeglob, so get the real subroutine: */
4341 /* A proxy for a subroutine in this package? */
4342 : SvOK(gv) ? (CV *) gv : NULL)
4345 /* See if it's the indirect object for a list operator. */
4347 if (PL_oldoldbufptr &&
4348 PL_oldoldbufptr < PL_bufptr &&
4349 (PL_oldoldbufptr == PL_last_lop
4350 || PL_oldoldbufptr == PL_last_uni) &&
4351 /* NO SKIPSPACE BEFORE HERE! */
4352 (PL_expect == XREF ||
4353 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4355 bool immediate_paren = *s == '(';
4357 /* (Now we can afford to cross potential line boundary.) */
4360 /* Two barewords in a row may indicate method call. */
4362 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
4363 (tmp = intuit_method(s, gv, cv)))
4366 /* If not a declared subroutine, it's an indirect object. */
4367 /* (But it's an indir obj regardless for sort.) */
4368 /* Also, if "_" follows a filetest operator, it's a bareword */
4371 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4373 (PL_last_lop_op != OP_MAPSTART &&
4374 PL_last_lop_op != OP_GREPSTART))))
4375 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
4376 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
4379 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4384 PL_expect = XOPERATOR;
4387 /* Is this a word before a => operator? */
4388 if (*s == '=' && s[1] == '>' && !pkgname) {
4390 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4391 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4392 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4396 /* If followed by a paren, it's certainly a subroutine. */
4400 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4401 if (*d == ')' && (sv = gv_const_sv(gv))) {
4406 PL_nextval[PL_nexttoke].opval = yylval.opval;
4407 PL_expect = XOPERATOR;
4413 /* If followed by var or block, call it a method (unless sub) */
4415 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
4416 PL_last_lop = PL_oldbufptr;
4417 PL_last_lop_op = OP_METHOD;
4421 /* If followed by a bareword, see if it looks like indir obj. */
4424 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4425 && (tmp = intuit_method(s, gv, cv)))
4428 /* Not a method, so call it a subroutine (if defined) */
4431 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4432 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4433 "Ambiguous use of -%s resolved as -&%s()",
4434 PL_tokenbuf, PL_tokenbuf);
4435 /* Check for a constant sub */
4436 if ((sv = gv_const_sv(gv))) {
4438 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4439 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4440 yylval.opval->op_private = 0;
4444 /* Resolve to GV now. */
4445 if (SvTYPE(gv) != SVt_PVGV) {
4446 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
4447 assert (SvTYPE(gv) == SVt_PVGV);
4448 /* cv must have been some sort of placeholder, so
4449 now needs replacing with a real code reference. */
4453 op_free(yylval.opval);
4454 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4455 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4456 PL_last_lop = PL_oldbufptr;
4457 PL_last_lop_op = OP_ENTERSUB;
4458 /* Is there a prototype? */
4461 const char *proto = SvPV_const((SV*)cv, len);
4464 if (*proto == '$' && proto[1] == '\0')
4466 while (*proto == ';')
4468 if (*proto == '&' && *s == '{') {
4469 sv_setpv(PL_subname, PL_curstash ?
4470 "__ANON__" : "__ANON__::__ANON__");
4474 PL_nextval[PL_nexttoke].opval = yylval.opval;
4480 /* Call it a bare word */
4482 if (PL_hints & HINT_STRICT_SUBS)
4483 yylval.opval->op_private |= OPpCONST_STRICT;
4486 if (lastchar != '-') {
4487 if (ckWARN(WARN_RESERVED)) {
4488 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4489 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4490 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4497 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4498 && ckWARN_d(WARN_AMBIGUOUS)) {
4499 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4500 "Operator or semicolon missing before %c%s",
4501 lastchar, PL_tokenbuf);
4502 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4503 "Ambiguous use of %c resolved as operator %c",
4504 lastchar, lastchar);
4510 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4511 newSVpv(CopFILE(PL_curcop),0));
4515 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4516 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4519 case KEY___PACKAGE__:
4520 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4522 ? newSVhek(HvNAME_HEK(PL_curstash))
4529 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4530 const char *pname = "main";
4531 if (PL_tokenbuf[2] == 'D')
4532 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
4533 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
4537 GvIOp(gv) = newIO();
4538 IoIFP(GvIOp(gv)) = PL_rsfp;
4539 #if defined(HAS_FCNTL) && defined(F_SETFD)
4541 const int fd = PerlIO_fileno(PL_rsfp);
4542 fcntl(fd,F_SETFD,fd >= 3);
4545 /* Mark this internal pseudo-handle as clean */
4546 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4548 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4549 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4550 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4552 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4553 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4554 /* if the script was opened in binmode, we need to revert
4555 * it to text mode for compatibility; but only iff it has CRs
4556 * XXX this is a questionable hack at best. */
4557 if (PL_bufend-PL_bufptr > 2
4558 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4561 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4562 loc = PerlIO_tell(PL_rsfp);
4563 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4566 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4568 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4569 #endif /* NETWARE */
4570 #ifdef PERLIO_IS_STDIO /* really? */
4571 # if defined(__BORLANDC__)
4572 /* XXX see note in do_binmode() */
4573 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4577 PerlIO_seek(PL_rsfp, loc, 0);
4581 #ifdef PERLIO_LAYERS
4584 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4585 else if (PL_encoding) {
4592 XPUSHs(PL_encoding);
4594 call_method("name", G_SCALAR);
4598 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4599 Perl_form(aTHX_ ":encoding(%"SVf")",
4617 if (PL_expect == XSTATE) {
4624 if (*s == ':' && s[1] == ':') {
4627 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4628 if (!(tmp = keyword(PL_tokenbuf, len)))
4629 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4632 else if (tmp == KEY_require || tmp == KEY_do)
4633 /* that's a way to remember we saw "CORE::" */
4646 LOP(OP_ACCEPT,XTERM);
4652 LOP(OP_ATAN2,XTERM);
4658 LOP(OP_BINMODE,XTERM);
4661 LOP(OP_BLESS,XTERM);
4670 /* When 'use switch' is in effect, continue has a dual
4671 life as a control operator. */
4673 if (!FEATURE_IS_ENABLED("switch"))
4676 /* We have to disambiguate the two senses of
4677 "continue". If the next token is a '{' then
4678 treat it as the start of a continue block;
4679 otherwise treat it as a control operator.
4690 (void)gv_fetchpv("ENV", GV_ADD, SVt_PVHV); /* may use HOME */
4707 if (!PL_cryptseen) {
4708 PL_cryptseen = TRUE;
4712 LOP(OP_CRYPT,XTERM);
4715 LOP(OP_CHMOD,XTERM);
4718 LOP(OP_CHOWN,XTERM);
4721 LOP(OP_CONNECT,XTERM);
4740 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4741 if (orig_keyword == KEY_do) {
4750 PL_hints |= HINT_BLOCK_SCOPE;
4760 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4761 LOP(OP_DBMOPEN,XTERM);
4767 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4774 yylval.ival = CopLINE(PL_curcop);
4788 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4789 UNIBRACK(OP_ENTEREVAL);
4807 case KEY_endhostent:
4813 case KEY_endservent:
4816 case KEY_endprotoent:
4827 yylval.ival = CopLINE(PL_curcop);
4829 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4831 if ((PL_bufend - p) >= 3 &&
4832 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4834 else if ((PL_bufend - p) >= 4 &&
4835 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4838 if (isIDFIRST_lazy_if(p,UTF)) {
4839 p = scan_ident(p, PL_bufend,
4840 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4844 Perl_croak(aTHX_ "Missing $ on loop variable");
4849 LOP(OP_FORMLINE,XTERM);
4855 LOP(OP_FCNTL,XTERM);
4861 LOP(OP_FLOCK,XTERM);
4870 LOP(OP_GREPSTART, XREF);
4873 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4888 case KEY_getpriority:
4889 LOP(OP_GETPRIORITY,XTERM);
4891 case KEY_getprotobyname:
4894 case KEY_getprotobynumber:
4895 LOP(OP_GPBYNUMBER,XTERM);
4897 case KEY_getprotoent:
4909 case KEY_getpeername:
4910 UNI(OP_GETPEERNAME);
4912 case KEY_gethostbyname:
4915 case KEY_gethostbyaddr:
4916 LOP(OP_GHBYADDR,XTERM);
4918 case KEY_gethostent:
4921 case KEY_getnetbyname:
4924 case KEY_getnetbyaddr:
4925 LOP(OP_GNBYADDR,XTERM);
4930 case KEY_getservbyname:
4931 LOP(OP_GSBYNAME,XTERM);
4933 case KEY_getservbyport:
4934 LOP(OP_GSBYPORT,XTERM);
4936 case KEY_getservent:
4939 case KEY_getsockname:
4940 UNI(OP_GETSOCKNAME);
4942 case KEY_getsockopt:
4943 LOP(OP_GSOCKOPT,XTERM);
4958 yylval.ival = CopLINE(PL_curcop);
4969 yylval.ival = CopLINE(PL_curcop);
4973 LOP(OP_INDEX,XTERM);
4979 LOP(OP_IOCTL,XTERM);
4991 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5023 LOP(OP_LISTEN,XTERM);
5032 s = scan_pat(s,OP_MATCH);
5033 TERM(sublex_start());
5036 LOP(OP_MAPSTART, XREF);
5039 LOP(OP_MKDIR,XTERM);
5042 LOP(OP_MSGCTL,XTERM);
5045 LOP(OP_MSGGET,XTERM);
5048 LOP(OP_MSGRCV,XTERM);
5051 LOP(OP_MSGSND,XTERM);
5057 if (isIDFIRST_lazy_if(s,UTF)) {
5058 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
5059 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
5061 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
5062 if (!PL_in_my_stash) {
5065 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
5073 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5080 s = tokenize_use(0, s);
5084 if (*s == '(' || (s = skipspace(s), *s == '('))
5091 if (isIDFIRST_lazy_if(s,UTF)) {
5093 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
5094 for (t=d; *t && isSPACE(*t); t++) ;
5095 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
5097 && !(t[0] == '=' && t[1] == '>')
5099 int len = (int)(d-s);
5100 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
5101 "Precedence problem: open %.*s should be open(%.*s)",
5108 yylval.ival = OP_OR;
5118 LOP(OP_OPEN_DIR,XTERM);
5121 checkcomma(s,PL_tokenbuf,"filehandle");
5125 checkcomma(s,PL_tokenbuf,"filehandle");
5144 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5148 LOP(OP_PIPE_OP,XTERM);
5151 s = scan_str(s,FALSE,FALSE);
5153 missingterm((char*)0);
5154 yylval.ival = OP_CONST;
5155 TERM(sublex_start());
5161 s = scan_str(s,FALSE,FALSE);
5163 missingterm((char*)0);
5164 PL_expect = XOPERATOR;
5166 if (SvCUR(PL_lex_stuff)) {
5169 d = SvPV_force(PL_lex_stuff, len);
5172 for (; isSPACE(*d) && len; --len, ++d) ;
5175 if (!warned && ckWARN(WARN_QW)) {
5176 for (; !isSPACE(*d) && len; --len, ++d) {
5178 Perl_warner(aTHX_ packWARN(WARN_QW),
5179 "Possible attempt to separate words with commas");
5182 else if (*d == '#') {
5183 Perl_warner(aTHX_ packWARN(WARN_QW),
5184 "Possible attempt to put comments in qw() list");
5190 for (; !isSPACE(*d) && len; --len, ++d) ;
5192 sv = newSVpvn(b, d-b);
5193 if (DO_UTF8(PL_lex_stuff))
5195 words = append_elem(OP_LIST, words,
5196 newSVOP(OP_CONST, 0, tokeq(sv)));
5200 PL_nextval[PL_nexttoke].opval = words;
5205 SvREFCNT_dec(PL_lex_stuff);
5206 PL_lex_stuff = Nullsv;
5212 s = scan_str(s,FALSE,FALSE);
5214 missingterm((char*)0);
5215 yylval.ival = OP_STRINGIFY;
5216 if (SvIVX(PL_lex_stuff) == '\'')
5217 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
5218 TERM(sublex_start());
5221 s = scan_pat(s,OP_QR);
5222 TERM(sublex_start());
5225 s = scan_str(s,FALSE,FALSE);
5227 missingterm((char*)0);
5228 yylval.ival = OP_BACKTICK;
5230 TERM(sublex_start());
5238 s = force_version(s, FALSE);
5240 else if (*s != 'v' || !isDIGIT(s[1])
5241 || (s = force_version(s, TRUE), *s == 'v'))
5243 *PL_tokenbuf = '\0';
5244 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5245 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5246 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5248 yyerror("<> should be quotes");
5250 if (orig_keyword == KEY_require) {
5258 PL_last_uni = PL_oldbufptr;
5259 PL_last_lop_op = OP_REQUIRE;
5261 return REPORT( (int)REQUIRE );
5267 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5271 LOP(OP_RENAME,XTERM);
5280 LOP(OP_RINDEX,XTERM);
5290 UNIDOR(OP_READLINE);
5303 LOP(OP_REVERSE,XTERM);
5306 UNIDOR(OP_READLINK);
5314 TERM(sublex_start());
5316 TOKEN(1); /* force error */
5319 checkcomma(s,PL_tokenbuf,"filehandle");
5329 LOP(OP_SELECT,XTERM);
5335 LOP(OP_SEMCTL,XTERM);
5338 LOP(OP_SEMGET,XTERM);
5341 LOP(OP_SEMOP,XTERM);
5347 LOP(OP_SETPGRP,XTERM);
5349 case KEY_setpriority:
5350 LOP(OP_SETPRIORITY,XTERM);
5352 case KEY_sethostent:
5358 case KEY_setservent:
5361 case KEY_setprotoent:
5371 LOP(OP_SEEKDIR,XTERM);
5373 case KEY_setsockopt:
5374 LOP(OP_SSOCKOPT,XTERM);
5380 LOP(OP_SHMCTL,XTERM);
5383 LOP(OP_SHMGET,XTERM);
5386 LOP(OP_SHMREAD,XTERM);
5389 LOP(OP_SHMWRITE,XTERM);
5392 LOP(OP_SHUTDOWN,XTERM);
5401 LOP(OP_SOCKET,XTERM);
5403 case KEY_socketpair:
5404 LOP(OP_SOCKPAIR,XTERM);
5407 checkcomma(s,PL_tokenbuf,"subroutine name");
5409 if (*s == ';' || *s == ')') /* probably a close */
5410 Perl_croak(aTHX_ "sort is now a reserved word");
5412 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5416 LOP(OP_SPLIT,XTERM);
5419 LOP(OP_SPRINTF,XTERM);
5422 LOP(OP_SPLICE,XTERM);
5437 LOP(OP_SUBSTR,XTERM);
5443 char tmpbuf[sizeof PL_tokenbuf];
5444 SSize_t tboffset = 0;
5445 expectation attrful;
5446 bool have_name, have_proto, bad_proto;
5447 const int key = tmp;
5451 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5452 (*s == ':' && s[1] == ':'))
5455 attrful = XATTRBLOCK;
5456 /* remember buffer pos'n for later force_word */
5457 tboffset = s - PL_oldbufptr;
5458 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5459 if (strchr(tmpbuf, ':'))
5460 sv_setpv(PL_subname, tmpbuf);
5462 sv_setsv(PL_subname,PL_curstname);
5463 sv_catpvs(PL_subname,"::");
5464 sv_catpvn(PL_subname,tmpbuf,len);
5471 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5472 PL_expect = XTERMBLOCK;
5473 attrful = XATTRTERM;
5474 sv_setpvn(PL_subname,"?",1);
5478 if (key == KEY_format) {
5480 PL_lex_formbrack = PL_lex_brackets + 1;
5482 (void) force_word(PL_oldbufptr + tboffset, WORD,
5487 /* Look for a prototype */
5491 s = scan_str(s,FALSE,FALSE);
5493 Perl_croak(aTHX_ "Prototype not terminated");
5494 /* strip spaces and check for bad characters */
5495 d = SvPVX(PL_lex_stuff);
5498 for (p = d; *p; ++p) {
5501 if (!strchr("$@%*;[]&\\", *p))
5506 if (bad_proto && ckWARN(WARN_SYNTAX))
5507 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5508 "Illegal character in prototype for %"SVf" : %s",
5510 SvCUR_set(PL_lex_stuff, tmp);
5518 if (*s == ':' && s[1] != ':')
5519 PL_expect = attrful;
5520 else if (*s != '{' && key == KEY_sub) {
5522 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5524 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5528 PL_nextval[PL_nexttoke].opval =
5529 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5530 PL_lex_stuff = Nullsv;
5534 sv_setpv(PL_subname,
5535 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5538 (void) force_word(PL_oldbufptr + tboffset, WORD,
5547 LOP(OP_SYSTEM,XREF);
5550 LOP(OP_SYMLINK,XTERM);
5553 LOP(OP_SYSCALL,XTERM);
5556 LOP(OP_SYSOPEN,XTERM);
5559 LOP(OP_SYSSEEK,XTERM);
5562 LOP(OP_SYSREAD,XTERM);
5565 LOP(OP_SYSWRITE,XTERM);
5569 TERM(sublex_start());
5590 LOP(OP_TRUNCATE,XTERM);
5602 yylval.ival = CopLINE(PL_curcop);
5606 yylval.ival = CopLINE(PL_curcop);
5610 LOP(OP_UNLINK,XTERM);
5616 LOP(OP_UNPACK,XTERM);
5619 LOP(OP_UTIME,XTERM);
5625 LOP(OP_UNSHIFT,XTERM);
5628 s = tokenize_use(1, s);
5638 yylval.ival = CopLINE(PL_curcop);
5642 yylval.ival = CopLINE(PL_curcop);
5646 PL_hints |= HINT_BLOCK_SCOPE;
5653 LOP(OP_WAITPID,XTERM);
5662 ctl_l[0] = toCTRL('L');
5664 gv_fetchpv(ctl_l, GV_ADD, SVt_PV);
5667 gv_fetchpv("\f", GV_ADD, SVt_PV); /* Make sure $^L is defined */
5672 if (PL_expect == XOPERATOR)
5678 yylval.ival = OP_XOR;
5683 TERM(sublex_start());
5688 #pragma segment Main
5692 S_pending_ident(pTHX)
5696 register I32 tmp = 0;
5697 /* pit holds the identifier we read and pending_ident is reset */
5698 char pit = PL_pending_ident;
5699 PL_pending_ident = 0;
5701 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5702 "### Pending identifier '%s'\n", PL_tokenbuf); });
5704 /* if we're in a my(), we can't allow dynamics here.
5705 $foo'bar has already been turned into $foo::bar, so
5706 just check for colons.
5708 if it's a legal name, the OP is a PADANY.
5711 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5712 if (strchr(PL_tokenbuf,':'))
5713 yyerror(Perl_form(aTHX_ "No package name allowed for "
5714 "variable %s in \"our\"",
5716 tmp = allocmy(PL_tokenbuf);
5719 if (strchr(PL_tokenbuf,':'))
5720 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5722 yylval.opval = newOP(OP_PADANY, 0);
5723 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5729 build the ops for accesses to a my() variable.
5731 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5732 then used in a comparison. This catches most, but not
5733 all cases. For instance, it catches
5734 sort { my($a); $a <=> $b }
5736 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5737 (although why you'd do that is anyone's guess).
5740 if (!strchr(PL_tokenbuf,':')) {
5742 tmp = pad_findmy(PL_tokenbuf);
5743 if (tmp != NOT_IN_PAD) {
5744 /* might be an "our" variable" */
5745 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5746 /* build ops for a bareword */
5747 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
5748 HEK * const stashname = HvNAME_HEK(stash);
5749 SV * const sym = newSVhek(stashname);
5750 sv_catpvs(sym, "::");
5751 sv_catpv(sym, PL_tokenbuf+1);
5752 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5753 yylval.opval->op_private = OPpCONST_ENTERED;
5756 ? (GV_ADDMULTI | GV_ADDINEVAL)
5759 ((PL_tokenbuf[0] == '$') ? SVt_PV
5760 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5765 /* if it's a sort block and they're naming $a or $b */
5766 if (PL_last_lop_op == OP_SORT &&
5767 PL_tokenbuf[0] == '$' &&
5768 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5771 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5772 d < PL_bufend && *d != '\n';
5775 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5776 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5782 yylval.opval = newOP(OP_PADANY, 0);
5783 yylval.opval->op_targ = tmp;
5789 Whine if they've said @foo in a doublequoted string,
5790 and @foo isn't a variable we can find in the symbol
5793 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5794 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
5795 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5796 && ckWARN(WARN_AMBIGUOUS))
5798 /* Downgraded from fatal to warning 20000522 mjd */
5799 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5800 "Possible unintended interpolation of %s in string",
5805 /* build ops for a bareword */
5806 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5807 yylval.opval->op_private = OPpCONST_ENTERED;
5811 ? (GV_ADDMULTI | GV_ADDINEVAL)
5812 /* If the identifier refers to a stash, don't autovivify it.
5813 * Change 24660 had the side effect of causing symbol table
5814 * hashes to always be defined, even if they were freshly
5815 * created and the only reference in the entire program was
5816 * the single statement with the defined %foo::bar:: test.
5817 * It appears that all code in the wild doing this actually
5818 * wants to know whether sub-packages have been loaded, so
5819 * by avoiding auto-vivifying symbol tables, we ensure that
5820 * defined %foo::bar:: continues to be false, and the existing
5821 * tests still give the expected answers, even though what
5822 * they're actually testing has now changed subtly.
5824 : !(*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'),
5825 ((PL_tokenbuf[0] == '$') ? SVt_PV
5826 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5832 * The following code was generated by perl_keyword.pl.
5836 Perl_keyword (pTHX_ const char *name, I32 len)
5841 case 1: /* 5 tokens of length 1 */
5873 case 2: /* 18 tokens of length 2 */
6019 case 3: /* 29 tokens of length 3 */
6023 if (name[1] == 'N' &&
6086 if (name[1] == 'i' &&
6108 return (FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
6126 if (name[1] == 'o' &&
6135 if (name[1] == 'e' &&
6144 if (name[1] == 'n' &&
6153 if (name[1] == 'o' &&
6162 if (name[1] == 'a' &&
6171 if (name[1] == 'o' &&
6233 if (name[1] == 'e' &&
6247 return (FEATURE_IS_ENABLED("say") ? -KEY_say : 0);
6273 if (name[1] == 'i' &&
6282 if (name[1] == 's' &&
6291 if (name[1] == 'e' &&
6300 if (name[1] == 'o' &&
6312 case 4: /* 41 tokens of length 4 */
6316 if (name[1] == 'O' &&
6326 if (name[1] == 'N' &&
6336 if (name[1] == 'i' &&
6346 if (name[1] == 'h' &&
6356 if (name[1] == 'u' &&
6369 if (name[2] == 'c' &&
6378 if (name[2] == 's' &&
6387 if (name[2] == 'a' &&
6423 if (name[1] == 'o' &&
6436 if (name[2] == 't' &&
6445 if (name[2] == 'o' &&
6454 if (name[2] == 't' &&
6463 if (name[2] == 'e' &&
6476 if (name[1] == 'o' &&
6489 if (name[2] == 'y' &&
6498 if (name[2] == 'l' &&
6514 if (name[2] == 's' &&
6523 if (name[2] == 'n' &&
6532 if (name[2] == 'c' &&
6545 if (name[1] == 'e' &&
6555 if (name[1] == 'p' &&
6568 if (name[2] == 'c' &&
6577 if (name[2] == 'p' &&
6586 if (name[2] == 's' &&
6602 if (name[2] == 'n' &&
6672 if (name[2] == 'r' &&
6681 if (name[2] == 'r' &&
6690 if (name[2] == 'a' &&
6706 if (name[2] == 'l' &&
6768 if (name[2] == 'e' &&
6771 return (FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
6784 case 5: /* 38 tokens of length 5 */
6788 if (name[1] == 'E' &&
6799 if (name[1] == 'H' &&
6813 if (name[2] == 'a' &&
6823 if (name[2] == 'a' &&
6840 if (name[2] == 'e' &&
6850 if (name[2] == 'e' &&
6854 return (FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
6870 if (name[3] == 'i' &&
6879 if (name[3] == 'o' &&
6915 if (name[2] == 'o' &&
6925 if (name[2] == 'y' &&
6939 if (name[1] == 'l' &&
6953 if (name[2] == 'n' &&
6963 if (name[2] == 'o' &&
6977 if (name[1] == 'i' &&
6982 return (FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
6991 if (name[2] == 'd' &&
7001 if (name[2] == 'c' &&
7018 if (name[2] == 'c' &&
7028 if (name[2] == 't' &&
7042 if (name[1] == 'k' &&
7053 if (name[1] == 'r' &&
7067 if (name[2] == 's' &&
7077 if (name[2] == 'd' &&
7094 if (name[2] == 'm' &&
7104 if (name[2] == 'i' &&
7114 if (name[2] == 'e' &&
7124 if (name[2] == 'l' &&
7134 if (name[2] == 'a' &&
7144 if (name[2] == 'u' &&
7158 if (name[1] == 'i' &&
7172 if (name[2] == 'a' &&
7185 if (name[3] == 'e' &&
7220 if (name[2] == 'i' &&
7237 if (name[2] == 'i' &&
7247 if (name[2] == 'i' &&
7264 case 6: /* 33 tokens of length 6 */
7268 if (name[1] == 'c' &&
7283 if (name[2] == 'l' &&
7294 if (name[2] == 'r' &&
7309 if (name[1] == 'e' &&
7324 if (name[2] == 's' &&
7329 if(ckWARN_d(WARN_SYNTAX))
7330 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
7336 if (name[2] == 'i' &&
7354 if (name[2] == 'l' &&
7365 if (name[2] == 'r' &&
7380 if (name[1] == 'm' &&
7395 if (name[2] == 'n' &&
7406 if (name[2] == 's' &&
7421 if (name[1] == 's' &&
7427 if (name[4] == 't' &&
7436 if (name[4] == 'e' &&
7445 if (name[4] == 'c' &&
7454 if (name[4] == 'n' &&
7470 if (name[1] == 'r' &&
7488 if (name[3] == 'a' &&
7498 if (name[3] == 'u' &&
7512 if (name[2] == 'n' &&
7530 if (name[2] == 'a' &&
7544 if (name[3] == 'e' &&
7557 if (name[4] == 't' &&
7566 if (name[4] == 'e' &&
7588 if (name[4] == 't' &&
7597 if (name[4] == 'e' &&
7613 if (name[2] == 'c' &&
7624 if (name[2] == 'l' &&
7635 if (name[2] == 'b' &&
7646 if (name[2] == 's' &&
7669 if (name[4] == 's' &&
7678 if (name[4] == 'n' &&
7691 if (name[3] == 'a' &&
7708 if (name[1] == 'a' &&
7723 case 7: /* 29 tokens of length 7 */
7727 if (name[1] == 'E' &&
7740 if (name[1] == '_' &&
7753 if (name[1] == 'i' &&
7760 return -KEY_binmode;
7766 if (name[1] == 'o' &&
7773 return -KEY_connect;
7782 if (name[2] == 'm' &&
7788 return -KEY_dbmopen;
7799 if (name[4] == 'u' &&
7803 return (FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
7809 if (name[4] == 'n' &&
7830 if (name[1] == 'o' &&
7843 if (name[1] == 'e' &&
7850 if (name[5] == 'r' &&
7853 return -KEY_getpgrp;
7859 if (name[5] == 'i' &&
7862 return -KEY_getppid;
7875 if (name[1] == 'c' &&
7882 return -KEY_lcfirst;
7888 if (name[1] == 'p' &&
7895 return -KEY_opendir;
7901 if (name[1] == 'a' &&
7919 if (name[3] == 'd' &&
7924 return -KEY_readdir;
7930 if (name[3] == 'u' &&
7941 if (name[3] == 'e' &&
7946 return -KEY_reverse;
7965 if (name[3] == 'k' &&
7970 return -KEY_seekdir;
7976 if (name[3] == 'p' &&
7981 return -KEY_setpgrp;
7991 if (name[2] == 'm' &&
7997 return -KEY_shmread;
8003 if (name[2] == 'r' &&
8009 return -KEY_sprintf;
8018 if (name[3] == 'l' &&
8023 return -KEY_symlink;
8032 if (name[4] == 'a' &&
8036 return -KEY_syscall;
8042 if (name[4] == 'p' &&
8046 return -KEY_sysopen;
8052 if (name[4] == 'e' &&
8056 return -KEY_sysread;
8062 if (name[4] == 'e' &&
8066 return -KEY_sysseek;
8084 if (name[1] == 'e' &&
8091 return -KEY_telldir;
8100 if (name[2] == 'f' &&
8106 return -KEY_ucfirst;
8112 if (name[2] == 's' &&
8118 return -KEY_unshift;
8128 if (name[1] == 'a' &&
8135 return -KEY_waitpid;
8144 case 8: /* 26 tokens of length 8 */
8148 if (name[1] == 'U' &&
8156 return KEY_AUTOLOAD;
8167 if (name[3] == 'A' &&
8173 return KEY___DATA__;
8179 if (name[3] == 'I' &&
8185 return -KEY___FILE__;
8191 if (name[3] == 'I' &&
8197 return -KEY___LINE__;
8213 if (name[2] == 'o' &&
8220 return -KEY_closedir;
8226 if (name[2] == 'n' &&
8233 return -KEY_continue;
8243 if (name[1] == 'b' &&
8251 return -KEY_dbmclose;
8257 if (name[1] == 'n' &&
8263 if (name[4] == 'r' &&
8268 return -KEY_endgrent;
8274 if (name[4] == 'w' &&
8279 return -KEY_endpwent;
8292 if (name[1] == 'o' &&
8300 return -KEY_formline;
8306 if (name[1] == 'e' &&
8317 if (name[6] == 'n' &&
8320 return -KEY_getgrent;
8326 if (name[6] == 'i' &&
8329 return -KEY_getgrgid;
8335 if (name[6] == 'a' &&
8338 return -KEY_getgrnam;
8351 if (name[4] == 'o' &&
8356 return -KEY_getlogin;
8367 if (name[6] == 'n' &&
8370 return -KEY_getpwent;
8376 if (name[6] == 'a' &&
8379 return -KEY_getpwnam;
8385 if (name[6] == 'i' &&
8388 return -KEY_getpwuid;
8408 if (name[1] == 'e' &&
8415 if (name[5] == 'i' &&
8422 return -KEY_readline;
8427 return -KEY_readlink;
8438 if (name[5] == 'i' &&
8442 return -KEY_readpipe;
8463 if (name[4] == 'r' &&
8468 return -KEY_setgrent;
8474 if (name[4] == 'w' &&
8479 return -KEY_setpwent;
8495 if (name[3] == 'w' &&
8501 return -KEY_shmwrite;
8507 if (name[3] == 't' &&
8513 return -KEY_shutdown;
8523 if (name[2] == 's' &&
8530 return -KEY_syswrite;
8540 if (name[1] == 'r' &&
8548 return -KEY_truncate;
8557 case 9: /* 8 tokens of length 9 */
8561 if (name[1] == 'n' &&
8570 return -KEY_endnetent;
8576 if (name[1] == 'e' &&
8585 return -KEY_getnetent;
8591 if (name[1] == 'o' &&
8600 return -KEY_localtime;
8606 if (name[1] == 'r' &&
8615 return KEY_prototype;
8621 if (name[1] == 'u' &&
8630 return -KEY_quotemeta;
8636 if (name[1] == 'e' &&
8645 return -KEY_rewinddir;
8651 if (name[1] == 'e' &&
8660 return -KEY_setnetent;
8666 if (name[1] == 'a' &&
8675 return -KEY_wantarray;
8684 case 10: /* 9 tokens of length 10 */
8688 if (name[1] == 'n' &&
8694 if (name[4] == 'o' &&
8701 return -KEY_endhostent;
8707 if (name[4] == 'e' &&
8714 return -KEY_endservent;
8727 if (name[1] == 'e' &&
8733 if (name[4] == 'o' &&
8740 return -KEY_gethostent;
8749 if (name[5] == 'r' &&
8755 return -KEY_getservent;
8761 if (name[5] == 'c' &&
8767 return -KEY_getsockopt;
8792 if (name[4] == 'o' &&
8799 return -KEY_sethostent;
8808 if (name[5] == 'r' &&
8814 return -KEY_setservent;
8820 if (name[5] == 'c' &&
8826 return -KEY_setsockopt;
8843 if (name[2] == 'c' &&
8852 return -KEY_socketpair;
8865 case 11: /* 8 tokens of length 11 */
8869 if (name[1] == '_' &&
8880 return -KEY___PACKAGE__;
8886 if (name[1] == 'n' &&
8897 return -KEY_endprotoent;
8903 if (name[1] == 'e' &&
8912 if (name[5] == 'e' &&
8919 return -KEY_getpeername;
8928 if (name[6] == 'o' &&
8934 return -KEY_getpriority;
8940 if (name[6] == 't' &&
8946 return -KEY_getprotoent;
8960 if (name[4] == 'o' &&
8968 return -KEY_getsockname;
8981 if (name[1] == 'e' &&
8989 if (name[6] == 'o' &&
8995 return -KEY_setpriority;
9001 if (name[6] == 't' &&
9007 return -KEY_setprotoent;
9023 case 12: /* 2 tokens of length 12 */
9024 if (name[0] == 'g' &&
9036 if (name[9] == 'd' &&
9039 { /* getnetbyaddr */
9040 return -KEY_getnetbyaddr;
9046 if (name[9] == 'a' &&
9049 { /* getnetbyname */
9050 return -KEY_getnetbyname;
9062 case 13: /* 4 tokens of length 13 */
9063 if (name[0] == 'g' &&
9070 if (name[4] == 'o' &&
9079 if (name[10] == 'd' &&
9082 { /* gethostbyaddr */
9083 return -KEY_gethostbyaddr;
9089 if (name[10] == 'a' &&
9092 { /* gethostbyname */
9093 return -KEY_gethostbyname;
9106 if (name[4] == 'e' &&
9115 if (name[10] == 'a' &&
9118 { /* getservbyname */
9119 return -KEY_getservbyname;
9125 if (name[10] == 'o' &&
9128 { /* getservbyport */
9129 return -KEY_getservbyport;
9148 case 14: /* 1 tokens of length 14 */
9149 if (name[0] == 'g' &&
9163 { /* getprotobyname */
9164 return -KEY_getprotobyname;
9169 case 16: /* 1 tokens of length 16 */
9170 if (name[0] == 'g' &&
9186 { /* getprotobynumber */
9187 return -KEY_getprotobynumber;
9201 S_checkcomma(pTHX_ register char *s, const char *name, const char *what)
9206 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9207 if (ckWARN(WARN_SYNTAX)) {
9209 for (w = s+2; *w && level; w++) {
9216 for (; *w && isSPACE(*w); w++) ;
9217 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
9218 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9219 "%s (...) interpreted as function",name);
9222 while (s < PL_bufend && isSPACE(*s))
9226 while (s < PL_bufend && isSPACE(*s))
9228 if (isIDFIRST_lazy_if(s,UTF)) {
9230 while (isALNUM_lazy_if(s,UTF))
9232 while (s < PL_bufend && isSPACE(*s))
9236 *s = '\0'; /* XXX If we didn't do this, we could const a lot of toke.c */
9237 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
9241 Perl_croak(aTHX_ "No comma allowed after %s", what);
9246 /* Either returns sv, or mortalizes sv and returns a new SV*.
9247 Best used as sv=new_constant(..., sv, ...).
9248 If s, pv are NULL, calls subroutine with one argument,
9249 and type is used with error messages only. */
9252 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9256 HV * const table = GvHV(PL_hintgv); /* ^H */
9260 const char *why1 = "", *why2 = "", *why3 = "";
9262 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9265 why2 = strEQ(key,"charnames")
9266 ? "(possibly a missing \"use charnames ...\")"
9268 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
9269 (type ? type: "undef"), why2);
9271 /* This is convoluted and evil ("goto considered harmful")
9272 * but I do not understand the intricacies of all the different
9273 * failure modes of %^H in here. The goal here is to make
9274 * the most probable error message user-friendly. --jhi */
9279 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
9280 (type ? type: "undef"), why1, why2, why3);
9282 yyerror(SvPVX_const(msg));
9286 cvp = hv_fetch(table, key, strlen(key), FALSE);
9287 if (!cvp || !SvOK(*cvp)) {
9290 why3 = "} is not defined";
9293 sv_2mortal(sv); /* Parent created it permanently */
9296 pv = sv_2mortal(newSVpvn(s, len));
9298 typesv = sv_2mortal(newSVpv(type, 0));
9300 typesv = &PL_sv_undef;
9302 PUSHSTACKi(PERLSI_OVERLOAD);
9314 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9318 /* Check the eval first */
9319 if (!PL_in_eval && SvTRUE(ERRSV)) {
9320 sv_catpvs(ERRSV, "Propagated");
9321 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
9323 res = SvREFCNT_inc(sv);
9327 (void)SvREFCNT_inc(res);
9336 why1 = "Call to &{$^H{";
9338 why3 = "}} did not return a defined value";
9346 /* Returns a NUL terminated string, with the length of the string written to
9350 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9353 register char *d = dest;
9354 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
9357 Perl_croak(aTHX_ ident_too_long);
9358 if (isALNUM(*s)) /* UTF handled below */
9360 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
9365 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
9369 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9370 char *t = s + UTF8SKIP(s);
9371 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9373 if (d + (t - s) > e)
9374 Perl_croak(aTHX_ ident_too_long);
9375 Copy(s, d, t - s, char);
9388 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9391 char *bracket = NULL;
9393 register char *d = dest;
9394 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
9399 while (isDIGIT(*s)) {
9401 Perl_croak(aTHX_ ident_too_long);
9408 Perl_croak(aTHX_ ident_too_long);
9409 if (isALNUM(*s)) /* UTF handled below */
9411 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9416 else if (*s == ':' && s[1] == ':') {
9420 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9421 char *t = s + UTF8SKIP(s);
9422 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9424 if (d + (t - s) > e)
9425 Perl_croak(aTHX_ ident_too_long);
9426 Copy(s, d, t - s, char);
9437 if (PL_lex_state != LEX_NORMAL)
9438 PL_lex_state = LEX_INTERPENDMAYBE;
9441 if (*s == '$' && s[1] &&
9442 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9455 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9460 if (isSPACE(s[-1])) {
9462 const char ch = *s++;
9463 if (!SPACE_OR_TAB(ch)) {
9469 if (isIDFIRST_lazy_if(d,UTF)) {
9473 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9474 end += UTF8SKIP(end);
9475 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9476 end += UTF8SKIP(end);
9478 Copy(s, d, end - s, char);
9483 while ((isALNUM(*s) || *s == ':') && d < e)
9486 Perl_croak(aTHX_ ident_too_long);
9489 while (s < send && SPACE_OR_TAB(*s)) s++;
9490 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9491 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
9492 const char *brack = *s == '[' ? "[...]" : "{...}";
9493 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9494 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9495 funny, dest, brack, funny, dest, brack);
9498 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9502 /* Handle extended ${^Foo} variables
9503 * 1999-02-27 mjd-perl-patch@plover.com */
9504 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9508 while (isALNUM(*s) && d < e) {
9512 Perl_croak(aTHX_ ident_too_long);
9517 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9518 PL_lex_state = LEX_INTERPEND;
9523 if (PL_lex_state == LEX_NORMAL) {
9524 if (ckWARN(WARN_AMBIGUOUS) &&
9525 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
9527 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9528 "Ambiguous use of %c{%s} resolved to %c%s",
9529 funny, dest, funny, dest);
9534 s = bracket; /* let the parser handle it */
9538 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9539 PL_lex_state = LEX_INTERPEND;
9544 Perl_pmflag(pTHX_ U32* pmfl, int ch)
9549 *pmfl |= PMf_GLOBAL;
9551 *pmfl |= PMf_CONTINUE;
9555 *pmfl |= PMf_MULTILINE;
9557 *pmfl |= PMf_SINGLELINE;
9559 *pmfl |= PMf_EXTENDED;
9563 S_scan_pat(pTHX_ char *start, I32 type)
9567 char *s = scan_str(start,FALSE,FALSE);
9568 const char * const valid_flags = (type == OP_QR) ? "iomsx" : "iogcmsx";
9571 const char * const delimiter = skipspace(start);
9572 Perl_croak(aTHX_ *delimiter == '?'
9573 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9574 : "Search pattern not terminated" );
9577 pm = (PMOP*)newPMOP(type, 0);
9578 if (PL_multi_open == '?')
9579 pm->op_pmflags |= PMf_ONCE;
9580 while (*s && strchr(valid_flags, *s))
9581 pmflag(&pm->op_pmflags,*s++);
9582 /* issue a warning if /c is specified,but /g is not */
9583 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
9584 && ckWARN(WARN_REGEXP))
9586 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
9589 pm->op_pmpermflags = pm->op_pmflags;
9591 PL_lex_op = (OP*)pm;
9592 yylval.ival = OP_MATCH;
9597 S_scan_subst(pTHX_ char *start)
9605 yylval.ival = OP_NULL;
9607 s = scan_str(start,FALSE,FALSE);
9610 Perl_croak(aTHX_ "Substitution pattern not terminated");
9612 if (s[-1] == PL_multi_open)
9615 first_start = PL_multi_start;
9616 s = scan_str(s,FALSE,FALSE);
9619 SvREFCNT_dec(PL_lex_stuff);
9620 PL_lex_stuff = Nullsv;
9622 Perl_croak(aTHX_ "Substitution replacement not terminated");
9624 PL_multi_start = first_start; /* so whole substitution is taken together */
9626 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9632 else if (strchr("iogcmsx", *s))
9633 pmflag(&pm->op_pmflags,*s++);
9638 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
9639 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9643 SV * const repl = newSVpvs("");
9645 PL_sublex_info.super_bufptr = s;
9646 PL_sublex_info.super_bufend = PL_bufend;
9648 pm->op_pmflags |= PMf_EVAL;
9650 sv_catpv(repl, es ? "eval " : "do ");
9651 sv_catpvs(repl, "{ ");
9652 sv_catsv(repl, PL_lex_repl);
9653 sv_catpvs(repl, " }");
9655 SvREFCNT_dec(PL_lex_repl);
9659 pm->op_pmpermflags = pm->op_pmflags;
9660 PL_lex_op = (OP*)pm;
9661 yylval.ival = OP_SUBST;
9666 S_scan_trans(pTHX_ char *start)
9676 yylval.ival = OP_NULL;
9678 s = scan_str(start,FALSE,FALSE);
9680 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9681 if (s[-1] == PL_multi_open)
9684 s = scan_str(s,FALSE,FALSE);
9687 SvREFCNT_dec(PL_lex_stuff);
9688 PL_lex_stuff = Nullsv;
9690 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9693 complement = del = squash = 0;
9697 complement = OPpTRANS_COMPLEMENT;
9700 del = OPpTRANS_DELETE;
9703 squash = OPpTRANS_SQUASH;
9712 Newx(tbl, complement&&!del?258:256, short);
9713 o = newPVOP(OP_TRANS, 0, (char*)tbl);
9714 o->op_private &= ~OPpTRANS_ALL;
9715 o->op_private |= del|squash|complement|
9716 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9717 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9720 yylval.ival = OP_TRANS;
9725 S_scan_heredoc(pTHX_ register char *s)
9729 I32 op_type = OP_SCALAR;
9733 const char *found_newline;
9737 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9741 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9744 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
9745 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9748 s = delimcpy(d, e, s, PL_bufend, term, &len);
9758 if (!isALNUM_lazy_if(s,UTF))
9759 deprecate_old("bare << to mean <<\"\"");
9760 for (; isALNUM_lazy_if(s,UTF); s++) {
9765 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9766 Perl_croak(aTHX_ "Delimiter for here document is too long");
9769 len = d - PL_tokenbuf;
9770 #ifndef PERL_STRICT_CR
9771 d = strchr(s, '\r');
9773 char * const olds = s;
9775 while (s < PL_bufend) {
9781 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9790 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9794 if ( outer || !(found_newline = memchr(s, '\n', PL_bufend - s)) ) {
9795 herewas = newSVpvn(s,PL_bufend-s);
9799 herewas = newSVpvn(s,found_newline-s);
9801 s += SvCUR(herewas);
9803 tmpstr = NEWSV(87,79);
9804 sv_upgrade(tmpstr, SVt_PVIV);
9807 SvIV_set(tmpstr, -1);
9809 else if (term == '`') {
9810 op_type = OP_BACKTICK;
9811 SvIV_set(tmpstr, '\\');
9815 PL_multi_start = CopLINE(PL_curcop);
9816 PL_multi_open = PL_multi_close = '<';
9817 term = *PL_tokenbuf;
9818 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9819 char * const bufptr = PL_sublex_info.super_bufptr;
9820 char * const bufend = PL_sublex_info.super_bufend;
9821 char * const olds = s - SvCUR(herewas);
9822 s = strchr(bufptr, '\n');
9826 while (s < bufend &&
9827 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9829 CopLINE_inc(PL_curcop);
9832 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9833 missingterm(PL_tokenbuf);
9835 sv_setpvn(herewas,bufptr,d-bufptr+1);
9836 sv_setpvn(tmpstr,d+1,s-d);
9838 sv_catpvn(herewas,s,bufend-s);
9839 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9846 while (s < PL_bufend &&
9847 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9849 CopLINE_inc(PL_curcop);
9851 if (s >= PL_bufend) {
9852 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9853 missingterm(PL_tokenbuf);
9855 sv_setpvn(tmpstr,d+1,s-d);
9857 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9859 sv_catpvn(herewas,s,PL_bufend-s);
9860 sv_setsv(PL_linestr,herewas);
9861 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9862 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9863 PL_last_lop = PL_last_uni = Nullch;
9866 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
9867 while (s >= PL_bufend) { /* multiple line string? */
9869 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
9870 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9871 missingterm(PL_tokenbuf);
9873 CopLINE_inc(PL_curcop);
9874 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9875 PL_last_lop = PL_last_uni = Nullch;
9876 #ifndef PERL_STRICT_CR
9877 if (PL_bufend - PL_linestart >= 2) {
9878 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9879 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9881 PL_bufend[-2] = '\n';
9883 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9885 else if (PL_bufend[-1] == '\r')
9886 PL_bufend[-1] = '\n';
9888 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9889 PL_bufend[-1] = '\n';
9891 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9892 SV * const sv = NEWSV(88,0);
9894 sv_upgrade(sv, SVt_PVMG);
9895 sv_setsv(sv,PL_linestr);
9898 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop),sv);
9900 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9901 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9902 *(SvPVX(PL_linestr) + off ) = ' ';
9903 sv_catsv(PL_linestr,herewas);
9904 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9905 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9909 sv_catsv(tmpstr,PL_linestr);
9914 PL_multi_end = CopLINE(PL_curcop);
9915 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9916 SvPV_shrink_to_cur(tmpstr);
9918 SvREFCNT_dec(herewas);
9920 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9922 else if (PL_encoding)
9923 sv_recode_to_utf8(tmpstr, PL_encoding);
9925 PL_lex_stuff = tmpstr;
9926 yylval.ival = op_type;
9931 takes: current position in input buffer
9932 returns: new position in input buffer
9933 side-effects: yylval and lex_op are set.
9938 <FH> read from filehandle
9939 <pkg::FH> read from package qualified filehandle
9940 <pkg'FH> read from package qualified filehandle
9941 <$fh> read from filehandle in $fh
9947 S_scan_inputsymbol(pTHX_ char *start)
9950 register char *s = start; /* current position in buffer */
9954 char *d = PL_tokenbuf; /* start of temp holding space */
9955 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9957 end = strchr(s, '\n');
9960 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9962 /* die if we didn't have space for the contents of the <>,
9963 or if it didn't end, or if we see a newline
9966 if (len >= sizeof PL_tokenbuf)
9967 Perl_croak(aTHX_ "Excessively long <> operator");
9969 Perl_croak(aTHX_ "Unterminated <> operator");
9974 Remember, only scalar variables are interpreted as filehandles by
9975 this code. Anything more complex (e.g., <$fh{$num}>) will be
9976 treated as a glob() call.
9977 This code makes use of the fact that except for the $ at the front,
9978 a scalar variable and a filehandle look the same.
9980 if (*d == '$' && d[1]) d++;
9982 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9983 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9986 /* If we've tried to read what we allow filehandles to look like, and
9987 there's still text left, then it must be a glob() and not a getline.
9988 Use scan_str to pull out the stuff between the <> and treat it
9989 as nothing more than a string.
9992 if (d - PL_tokenbuf != len) {
9993 yylval.ival = OP_GLOB;
9995 s = scan_str(start,FALSE,FALSE);
9997 Perl_croak(aTHX_ "Glob not terminated");
10001 bool readline_overriden = FALSE;
10004 /* we're in a filehandle read situation */
10007 /* turn <> into <ARGV> */
10009 Copy("ARGV",d,5,char);
10011 /* Check whether readline() is overriden */
10012 gv_readline = gv_fetchpv("readline", 0, SVt_PVCV);
10014 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10016 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
10017 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
10018 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10019 readline_overriden = TRUE;
10021 /* if <$fh>, create the ops to turn the variable into a
10027 /* try to find it in the pad for this block, otherwise find
10028 add symbol table ops
10030 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
10031 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
10032 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10033 HEK * const stashname = HvNAME_HEK(stash);
10034 SV * const sym = sv_2mortal(newSVhek(stashname));
10035 sv_catpvs(sym, "::");
10036 sv_catpv(sym, d+1);
10041 OP * const o = newOP(OP_PADSV, 0);
10043 PL_lex_op = readline_overriden
10044 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10045 append_elem(OP_LIST, o,
10046 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10047 : (OP*)newUNOP(OP_READLINE, 0, o);
10056 ? (GV_ADDMULTI | GV_ADDINEVAL)
10059 PL_lex_op = readline_overriden
10060 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10061 append_elem(OP_LIST,
10062 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10063 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10064 : (OP*)newUNOP(OP_READLINE, 0,
10065 newUNOP(OP_RV2SV, 0,
10066 newGVOP(OP_GV, 0, gv)));
10068 if (!readline_overriden)
10069 PL_lex_op->op_flags |= OPf_SPECIAL;
10070 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
10071 yylval.ival = OP_NULL;
10074 /* If it's none of the above, it must be a literal filehandle
10075 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10077 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
10078 PL_lex_op = readline_overriden
10079 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10080 append_elem(OP_LIST,
10081 newGVOP(OP_GV, 0, gv),
10082 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10083 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10084 yylval.ival = OP_NULL;
10093 takes: start position in buffer
10094 keep_quoted preserve \ on the embedded delimiter(s)
10095 keep_delims preserve the delimiters around the string
10096 returns: position to continue reading from buffer
10097 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10098 updates the read buffer.
10100 This subroutine pulls a string out of the input. It is called for:
10101 q single quotes q(literal text)
10102 ' single quotes 'literal text'
10103 qq double quotes qq(interpolate $here please)
10104 " double quotes "interpolate $here please"
10105 qx backticks qx(/bin/ls -l)
10106 ` backticks `/bin/ls -l`
10107 qw quote words @EXPORT_OK = qw( func() $spam )
10108 m// regexp match m/this/
10109 s/// regexp substitute s/this/that/
10110 tr/// string transliterate tr/this/that/
10111 y/// string transliterate y/this/that/
10112 ($*@) sub prototypes sub foo ($)
10113 (stuff) sub attr parameters sub foo : attr(stuff)
10114 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10116 In most of these cases (all but <>, patterns and transliterate)
10117 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10118 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10119 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10122 It skips whitespace before the string starts, and treats the first
10123 character as the delimiter. If the delimiter is one of ([{< then
10124 the corresponding "close" character )]}> is used as the closing
10125 delimiter. It allows quoting of delimiters, and if the string has
10126 balanced delimiters ([{<>}]) it allows nesting.
10128 On success, the SV with the resulting string is put into lex_stuff or,
10129 if that is already non-NULL, into lex_repl. The second case occurs only
10130 when parsing the RHS of the special constructs s/// and tr/// (y///).
10131 For convenience, the terminating delimiter character is stuffed into
10136 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
10139 SV *sv; /* scalar value: string */
10140 char *tmps; /* temp string, used for delimiter matching */
10141 register char *s = start; /* current position in the buffer */
10142 register char term; /* terminating character */
10143 register char *to; /* current position in the sv's data */
10144 I32 brackets = 1; /* bracket nesting level */
10145 bool has_utf8 = FALSE; /* is there any utf8 content? */
10146 I32 termcode; /* terminating char. code */
10147 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10148 STRLEN termlen; /* length of terminating string */
10149 char *last = NULL; /* last position for nesting bracket */
10151 /* skip space before the delimiter */
10155 /* mark where we are, in case we need to report errors */
10158 /* after skipping whitespace, the next character is the terminator */
10161 termcode = termstr[0] = term;
10165 termcode = utf8_to_uvchr((U8*)s, &termlen);
10166 Copy(s, termstr, termlen, U8);
10167 if (!UTF8_IS_INVARIANT(term))
10171 /* mark where we are */
10172 PL_multi_start = CopLINE(PL_curcop);
10173 PL_multi_open = term;
10175 /* find corresponding closing delimiter */
10176 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10177 termcode = termstr[0] = term = tmps[5];
10179 PL_multi_close = term;
10181 /* create a new SV to hold the contents. 87 is leak category, I'm
10182 assuming. 79 is the SV's initial length. What a random number. */
10184 sv_upgrade(sv, SVt_PVIV);
10185 SvIV_set(sv, termcode);
10186 (void)SvPOK_only(sv); /* validate pointer */
10188 /* move past delimiter and try to read a complete string */
10190 sv_catpvn(sv, s, termlen);
10193 if (PL_encoding && !UTF) {
10197 int offset = s - SvPVX_const(PL_linestr);
10198 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10199 &offset, (char*)termstr, termlen);
10200 const char * const ns = SvPVX_const(PL_linestr) + offset;
10201 char * const svlast = SvEND(sv) - 1;
10203 for (; s < ns; s++) {
10204 if (*s == '\n' && !PL_rsfp)
10205 CopLINE_inc(PL_curcop);
10208 goto read_more_line;
10210 /* handle quoted delimiters */
10211 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10213 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10215 if ((svlast-1 - t) % 2) {
10216 if (!keep_quoted) {
10217 *(svlast-1) = term;
10219 SvCUR_set(sv, SvCUR(sv) - 1);
10224 if (PL_multi_open == PL_multi_close) {
10232 for (t = w = last; t < svlast; w++, t++) {
10233 /* At here, all closes are "was quoted" one,
10234 so we don't check PL_multi_close. */
10236 if (!keep_quoted && *(t+1) == PL_multi_open)
10241 else if (*t == PL_multi_open)
10249 SvCUR_set(sv, w - SvPVX_const(sv));
10252 if (--brackets <= 0)
10257 if (!keep_delims) {
10258 SvCUR_set(sv, SvCUR(sv) - 1);
10264 /* extend sv if need be */
10265 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10266 /* set 'to' to the next character in the sv's string */
10267 to = SvPVX(sv)+SvCUR(sv);
10269 /* if open delimiter is the close delimiter read unbridle */
10270 if (PL_multi_open == PL_multi_close) {
10271 for (; s < PL_bufend; s++,to++) {
10272 /* embedded newlines increment the current line number */
10273 if (*s == '\n' && !PL_rsfp)
10274 CopLINE_inc(PL_curcop);
10275 /* handle quoted delimiters */
10276 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10277 if (!keep_quoted && s[1] == term)
10279 /* any other quotes are simply copied straight through */
10283 /* terminate when run out of buffer (the for() condition), or
10284 have found the terminator */
10285 else if (*s == term) {
10288 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10291 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10297 /* if the terminator isn't the same as the start character (e.g.,
10298 matched brackets), we have to allow more in the quoting, and
10299 be prepared for nested brackets.
10302 /* read until we run out of string, or we find the terminator */
10303 for (; s < PL_bufend; s++,to++) {
10304 /* embedded newlines increment the line count */
10305 if (*s == '\n' && !PL_rsfp)
10306 CopLINE_inc(PL_curcop);
10307 /* backslashes can escape the open or closing characters */
10308 if (*s == '\\' && s+1 < PL_bufend) {
10309 if (!keep_quoted &&
10310 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10315 /* allow nested opens and closes */
10316 else if (*s == PL_multi_close && --brackets <= 0)
10318 else if (*s == PL_multi_open)
10320 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10325 /* terminate the copied string and update the sv's end-of-string */
10327 SvCUR_set(sv, to - SvPVX_const(sv));
10330 * this next chunk reads more into the buffer if we're not done yet
10334 break; /* handle case where we are done yet :-) */
10336 #ifndef PERL_STRICT_CR
10337 if (to - SvPVX_const(sv) >= 2) {
10338 if ((to[-2] == '\r' && to[-1] == '\n') ||
10339 (to[-2] == '\n' && to[-1] == '\r'))
10343 SvCUR_set(sv, to - SvPVX_const(sv));
10345 else if (to[-1] == '\r')
10348 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10353 /* if we're out of file, or a read fails, bail and reset the current
10354 line marker so we can report where the unterminated string began
10357 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10359 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10362 /* we read a line, so increment our line counter */
10363 CopLINE_inc(PL_curcop);
10365 /* update debugger info */
10366 if (PERLDB_LINE && PL_curstash != PL_debstash) {
10367 SV * const sv = NEWSV(88,0);
10369 sv_upgrade(sv, SVt_PVMG);
10370 sv_setsv(sv,PL_linestr);
10371 (void)SvIOK_on(sv);
10373 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop), sv);
10376 /* having changed the buffer, we must update PL_bufend */
10377 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10378 PL_last_lop = PL_last_uni = Nullch;
10381 /* at this point, we have successfully read the delimited string */
10383 if (!PL_encoding || UTF) {
10385 sv_catpvn(sv, s, termlen);
10388 if (has_utf8 || PL_encoding)
10391 PL_multi_end = CopLINE(PL_curcop);
10393 /* if we allocated too much space, give some back */
10394 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10395 SvLEN_set(sv, SvCUR(sv) + 1);
10396 SvPV_renew(sv, SvLEN(sv));
10399 /* decide whether this is the first or second quoted string we've read
10412 takes: pointer to position in buffer
10413 returns: pointer to new position in buffer
10414 side-effects: builds ops for the constant in yylval.op
10416 Read a number in any of the formats that Perl accepts:
10418 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10419 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10422 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10424 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10427 If it reads a number without a decimal point or an exponent, it will
10428 try converting the number to an integer and see if it can do so
10429 without loss of precision.
10433 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10436 register const char *s = start; /* current position in buffer */
10437 register char *d; /* destination in temp buffer */
10438 register char *e; /* end of temp buffer */
10439 NV nv; /* number read, as a double */
10440 SV *sv = Nullsv; /* place to put the converted number */
10441 bool floatit; /* boolean: int or float? */
10442 const char *lastub = NULL; /* position of last underbar */
10443 static char const number_too_long[] = "Number too long";
10445 /* We use the first character to decide what type of number this is */
10449 Perl_croak(aTHX_ "panic: scan_num");
10451 /* if it starts with a 0, it could be an octal number, a decimal in
10452 0.13 disguise, or a hexadecimal number, or a binary number. */
10456 u holds the "number so far"
10457 shift the power of 2 of the base
10458 (hex == 4, octal == 3, binary == 1)
10459 overflowed was the number more than we can hold?
10461 Shift is used when we add a digit. It also serves as an "are
10462 we in octal/hex/binary?" indicator to disallow hex characters
10463 when in octal mode.
10468 bool overflowed = FALSE;
10469 bool just_zero = TRUE; /* just plain 0 or binary number? */
10470 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10471 static const char* const bases[5] =
10472 { "", "binary", "", "octal", "hexadecimal" };
10473 static const char* const Bases[5] =
10474 { "", "Binary", "", "Octal", "Hexadecimal" };
10475 static const char* const maxima[5] =
10477 "0b11111111111111111111111111111111",
10481 const char *base, *Base, *max;
10483 /* check for hex */
10488 } else if (s[1] == 'b') {
10493 /* check for a decimal in disguise */
10494 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10496 /* so it must be octal */
10503 if (ckWARN(WARN_SYNTAX))
10504 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10505 "Misplaced _ in number");
10509 base = bases[shift];
10510 Base = Bases[shift];
10511 max = maxima[shift];
10513 /* read the rest of the number */
10515 /* x is used in the overflow test,
10516 b is the digit we're adding on. */
10521 /* if we don't mention it, we're done */
10525 /* _ are ignored -- but warned about if consecutive */
10527 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10528 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10529 "Misplaced _ in number");
10533 /* 8 and 9 are not octal */
10534 case '8': case '9':
10536 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10540 case '2': case '3': case '4':
10541 case '5': case '6': case '7':
10543 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10546 case '0': case '1':
10547 b = *s++ & 15; /* ASCII digit -> value of digit */
10551 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10552 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10553 /* make sure they said 0x */
10556 b = (*s++ & 7) + 9;
10558 /* Prepare to put the digit we have onto the end
10559 of the number so far. We check for overflows.
10565 x = u << shift; /* make room for the digit */
10567 if ((x >> shift) != u
10568 && !(PL_hints & HINT_NEW_BINARY)) {
10571 if (ckWARN_d(WARN_OVERFLOW))
10572 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10573 "Integer overflow in %s number",
10576 u = x | b; /* add the digit to the end */
10579 n *= nvshift[shift];
10580 /* If an NV has not enough bits in its
10581 * mantissa to represent an UV this summing of
10582 * small low-order numbers is a waste of time
10583 * (because the NV cannot preserve the
10584 * low-order bits anyway): we could just
10585 * remember when did we overflow and in the
10586 * end just multiply n by the right
10594 /* if we get here, we had success: make a scalar value from
10599 /* final misplaced underbar check */
10600 if (s[-1] == '_') {
10601 if (ckWARN(WARN_SYNTAX))
10602 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10607 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
10608 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10609 "%s number > %s non-portable",
10615 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
10616 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10617 "%s number > %s non-portable",
10622 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10623 sv = new_constant(start, s - start, "integer",
10625 else if (PL_hints & HINT_NEW_BINARY)
10626 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
10631 handle decimal numbers.
10632 we're also sent here when we read a 0 as the first digit
10634 case '1': case '2': case '3': case '4': case '5':
10635 case '6': case '7': case '8': case '9': case '.':
10638 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10641 /* read next group of digits and _ and copy into d */
10642 while (isDIGIT(*s) || *s == '_') {
10643 /* skip underscores, checking for misplaced ones
10647 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10648 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10649 "Misplaced _ in number");
10653 /* check for end of fixed-length buffer */
10655 Perl_croak(aTHX_ number_too_long);
10656 /* if we're ok, copy the character */
10661 /* final misplaced underbar check */
10662 if (lastub && s == lastub + 1) {
10663 if (ckWARN(WARN_SYNTAX))
10664 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10667 /* read a decimal portion if there is one. avoid
10668 3..5 being interpreted as the number 3. followed
10671 if (*s == '.' && s[1] != '.') {
10676 if (ckWARN(WARN_SYNTAX))
10677 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10678 "Misplaced _ in number");
10682 /* copy, ignoring underbars, until we run out of digits.
10684 for (; isDIGIT(*s) || *s == '_'; s++) {
10685 /* fixed length buffer check */
10687 Perl_croak(aTHX_ number_too_long);
10689 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10690 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10691 "Misplaced _ in number");
10697 /* fractional part ending in underbar? */
10698 if (s[-1] == '_') {
10699 if (ckWARN(WARN_SYNTAX))
10700 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10701 "Misplaced _ in number");
10703 if (*s == '.' && isDIGIT(s[1])) {
10704 /* oops, it's really a v-string, but without the "v" */
10710 /* read exponent part, if present */
10711 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10715 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10716 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10718 /* stray preinitial _ */
10720 if (ckWARN(WARN_SYNTAX))
10721 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10722 "Misplaced _ in number");
10726 /* allow positive or negative exponent */
10727 if (*s == '+' || *s == '-')
10730 /* stray initial _ */
10732 if (ckWARN(WARN_SYNTAX))
10733 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10734 "Misplaced _ in number");
10738 /* read digits of exponent */
10739 while (isDIGIT(*s) || *s == '_') {
10742 Perl_croak(aTHX_ number_too_long);
10746 if (((lastub && s == lastub + 1) ||
10747 (!isDIGIT(s[1]) && s[1] != '_'))
10748 && ckWARN(WARN_SYNTAX))
10749 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10750 "Misplaced _ in number");
10757 /* make an sv from the string */
10761 We try to do an integer conversion first if no characters
10762 indicating "float" have been found.
10767 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10769 if (flags == IS_NUMBER_IN_UV) {
10771 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
10774 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10775 if (uv <= (UV) IV_MIN)
10776 sv_setiv(sv, -(IV)uv);
10783 /* terminate the string */
10785 nv = Atof(PL_tokenbuf);
10789 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
10790 (PL_hints & HINT_NEW_INTEGER) )
10791 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
10792 (floatit ? "float" : "integer"),
10796 /* if it starts with a v, it could be a v-string */
10799 sv = NEWSV(92,5); /* preallocate storage space */
10800 s = scan_vstring(s,sv);
10804 /* make the op for the constant and return */
10807 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10809 lvalp->opval = Nullop;
10815 S_scan_formline(pTHX_ register char *s)
10818 register char *eol;
10820 SV * const stuff = newSVpvs("");
10821 bool needargs = FALSE;
10822 bool eofmt = FALSE;
10824 while (!needargs) {
10826 #ifdef PERL_STRICT_CR
10827 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
10829 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
10831 if (*t == '\n' || t == PL_bufend) {
10836 if (PL_in_eval && !PL_rsfp) {
10837 eol = (char *) memchr(s,'\n',PL_bufend-s);
10842 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10844 for (t = s; t < eol; t++) {
10845 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10847 goto enough; /* ~~ must be first line in formline */
10849 if (*t == '@' || *t == '^')
10853 sv_catpvn(stuff, s, eol-s);
10854 #ifndef PERL_STRICT_CR
10855 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10856 char *end = SvPVX(stuff) + SvCUR(stuff);
10859 SvCUR_set(stuff, SvCUR(stuff) - 1);
10868 s = filter_gets(PL_linestr, PL_rsfp, 0);
10869 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
10870 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
10871 PL_last_lop = PL_last_uni = Nullch;
10880 if (SvCUR(stuff)) {
10883 PL_lex_state = LEX_NORMAL;
10884 PL_nextval[PL_nexttoke].ival = 0;
10888 PL_lex_state = LEX_FORMLINE;
10890 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10892 else if (PL_encoding)
10893 sv_recode_to_utf8(stuff, PL_encoding);
10895 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10897 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
10901 SvREFCNT_dec(stuff);
10903 PL_lex_formbrack = 0;
10915 PL_cshlen = strlen(PL_cshname);
10920 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10923 const I32 oldsavestack_ix = PL_savestack_ix;
10924 CV* const outsidecv = PL_compcv;
10927 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10929 SAVEI32(PL_subline);
10930 save_item(PL_subname);
10931 SAVESPTR(PL_compcv);
10933 PL_compcv = (CV*)NEWSV(1104,0);
10934 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
10935 CvFLAGS(PL_compcv) |= flags;
10937 PL_subline = CopLINE(PL_curcop);
10938 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10939 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
10940 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10942 return oldsavestack_ix;
10946 #pragma segment Perl_yylex
10949 Perl_yywarn(pTHX_ const char *s)
10952 PL_in_eval |= EVAL_WARNONLY;
10954 PL_in_eval &= ~EVAL_WARNONLY;
10959 Perl_yyerror(pTHX_ const char *s)
10962 const char *where = NULL;
10963 const char *context = NULL;
10967 if (!yychar || (yychar == ';' && !PL_rsfp))
10969 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10970 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10971 PL_oldbufptr != PL_bufptr) {
10974 The code below is removed for NetWare because it abends/crashes on NetWare
10975 when the script has error such as not having the closing quotes like:
10976 if ($var eq "value)
10977 Checking of white spaces is anyway done in NetWare code.
10980 while (isSPACE(*PL_oldoldbufptr))
10983 context = PL_oldoldbufptr;
10984 contlen = PL_bufptr - PL_oldoldbufptr;
10986 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10987 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10990 The code below is removed for NetWare because it abends/crashes on NetWare
10991 when the script has error such as not having the closing quotes like:
10992 if ($var eq "value)
10993 Checking of white spaces is anyway done in NetWare code.
10996 while (isSPACE(*PL_oldbufptr))
10999 context = PL_oldbufptr;
11000 contlen = PL_bufptr - PL_oldbufptr;
11002 else if (yychar > 255)
11003 where = "next token ???";
11004 else if (yychar == -2) { /* YYEMPTY */
11005 if (PL_lex_state == LEX_NORMAL ||
11006 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11007 where = "at end of line";
11008 else if (PL_lex_inpat)
11009 where = "within pattern";
11011 where = "within string";
11014 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
11016 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11017 else if (isPRINT_LC(yychar))
11018 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
11020 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11021 where = SvPVX_const(where_sv);
11023 msg = sv_2mortal(newSVpv(s, 0));
11024 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11025 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11027 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
11029 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
11030 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11031 Perl_sv_catpvf(aTHX_ msg,
11032 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11033 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11036 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
11037 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
11040 if (PL_error_count >= 10) {
11041 if (PL_in_eval && SvCUR(ERRSV))
11042 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11043 ERRSV, OutCopFILE(PL_curcop));
11045 Perl_croak(aTHX_ "%s has too many errors.\n",
11046 OutCopFILE(PL_curcop));
11049 PL_in_my_stash = NULL;
11053 #pragma segment Main
11057 S_swallow_bom(pTHX_ U8 *s)
11060 const STRLEN slen = SvCUR(PL_linestr);
11063 if (s[1] == 0xFE) {
11064 /* UTF-16 little-endian? (or UTF32-LE?) */
11065 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11066 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
11067 #ifndef PERL_NO_UTF16_FILTER
11068 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
11071 if (PL_bufend > (char*)s) {
11075 filter_add(utf16rev_textfilter, NULL);
11076 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11077 utf16_to_utf8_reversed(s, news,
11078 PL_bufend - (char*)s - 1,
11080 sv_setpvn(PL_linestr, (const char*)news, newlen);
11082 SvUTF8_on(PL_linestr);
11083 s = (U8*)SvPVX(PL_linestr);
11084 PL_bufend = SvPVX(PL_linestr) + newlen;
11087 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
11092 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11093 #ifndef PERL_NO_UTF16_FILTER
11094 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11097 if (PL_bufend > (char *)s) {
11101 filter_add(utf16_textfilter, NULL);
11102 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11103 utf16_to_utf8(s, news,
11104 PL_bufend - (char*)s,
11106 sv_setpvn(PL_linestr, (const char*)news, newlen);
11108 SvUTF8_on(PL_linestr);
11109 s = (U8*)SvPVX(PL_linestr);
11110 PL_bufend = SvPVX(PL_linestr) + newlen;
11113 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
11118 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11119 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11120 s += 3; /* UTF-8 */
11126 if (s[2] == 0xFE && s[3] == 0xFF) {
11127 /* UTF-32 big-endian */
11128 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
11131 else if (s[2] == 0 && s[3] != 0) {
11134 * are a good indicator of UTF-16BE. */
11135 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11140 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11143 * are a good indicator of UTF-16LE. */
11144 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11153 * Restore a source filter.
11157 restore_rsfp(pTHX_ void *f)
11160 PerlIO * const fp = (PerlIO*)f;
11162 if (PL_rsfp == PerlIO_stdin())
11163 PerlIO_clearerr(PL_rsfp);
11164 else if (PL_rsfp && (PL_rsfp != fp))
11165 PerlIO_close(PL_rsfp);
11169 #ifndef PERL_NO_UTF16_FILTER
11171 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11174 const STRLEN old = SvCUR(sv);
11175 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11176 DEBUG_P(PerlIO_printf(Perl_debug_log,
11177 "utf16_textfilter(%p): %d %d (%d)\n",
11178 utf16_textfilter, idx, maxlen, (int) count));
11182 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11183 Copy(SvPVX_const(sv), tmps, old, char);
11184 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11185 SvCUR(sv) - old, &newlen);
11186 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11188 DEBUG_P({sv_dump(sv);});
11193 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11196 const STRLEN old = SvCUR(sv);
11197 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11198 DEBUG_P(PerlIO_printf(Perl_debug_log,
11199 "utf16rev_textfilter(%p): %d %d (%d)\n",
11200 utf16rev_textfilter, idx, maxlen, (int) count));
11204 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11205 Copy(SvPVX_const(sv), tmps, old, char);
11206 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11207 SvCUR(sv) - old, &newlen);
11208 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11210 DEBUG_P({ sv_dump(sv); });
11216 Returns a pointer to the next character after the parsed
11217 vstring, as well as updating the passed in sv.
11219 Function must be called like
11222 s = scan_vstring(s,sv);
11224 The sv should already be large enough to store the vstring
11225 passed in, for performance reasons.
11230 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
11233 const char *pos = s;
11234 const char *start = s;
11235 if (*pos == 'v') pos++; /* get past 'v' */
11236 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11238 if ( *pos != '.') {
11239 /* this may not be a v-string if followed by => */
11240 const char *next = pos;
11241 while (next < PL_bufend && isSPACE(*next))
11243 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
11244 /* return string not v-string */
11245 sv_setpvn(sv,(char *)s,pos-s);
11246 return (char *)pos;
11250 if (!isALPHA(*pos)) {
11251 U8 tmpbuf[UTF8_MAXBYTES+1];
11253 if (*s == 'v') s++; /* get past 'v' */
11255 sv_setpvn(sv, "", 0);
11261 /* this is atoi() that tolerates underscores */
11262 const char *end = pos;
11264 while (--end >= s) {
11269 rev += (*end - '0') * mult;
11271 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
11272 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11273 "Integer overflow in decimal number");
11277 if (rev > 0x7FFFFFFF)
11278 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11280 /* Append native character for the rev point */
11281 tmpend = uvchr_to_utf8(tmpbuf, rev);
11282 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11283 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11285 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
11291 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11295 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11303 * c-indentation-style: bsd
11304 * c-basic-offset: 4
11305 * indent-tabs-mode: t
11308 * ex: set ts=8 sts=4 sw=4 noet: