3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] = "Identifier too long";
30 static const char commaless_variable_list[] = "comma-less variable list";
32 static void restore_rsfp(pTHX_ void *f);
33 #ifndef PERL_NO_UTF16_FILTER
34 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
38 #define XFAKEBRACK 128
41 #ifdef USE_UTF8_SCRIPTS
42 # define UTF (!IN_BYTES)
44 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
47 /* In variables named $^X, these are the legal values for X.
48 * 1999-02-27 mjd-perl-patch@plover.com */
49 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51 /* On MacOS, respect nonbreaking spaces */
52 #ifdef MACOS_TRADITIONAL
53 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
58 /* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
60 * can get by with a single comparison (if the compiler is smart enough).
63 /* #define LEX_NOTPARSING 11 is done in perl.h. */
65 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
66 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
67 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
68 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
69 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
71 /* at end of code, eg "$x" followed by: */
72 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
73 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
75 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
76 string or after \E, $foo, etc */
77 #define LEX_INTERPCONST 2 /* NOT USED */
78 #define LEX_FORMLINE 1 /* expecting a format line */
79 #define LEX_KNOWNEXT 0 /* next token known; just return it */
83 static const char* const lex_state_names[] = {
102 #include "keywords.h"
104 /* CLINE is a macro that ensures PL_copline has a sane value */
109 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
112 * Convenience functions to return different tokens and prime the
113 * lexer for the next token. They all take an argument.
115 * TOKEN : generic token (used for '(', DOLSHARP, etc)
116 * OPERATOR : generic operator
117 * AOPERATOR : assignment operator
118 * PREBLOCK : beginning the block after an if, while, foreach, ...
119 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
120 * PREREF : *EXPR where EXPR is not a simple identifier
121 * TERM : expression term
122 * LOOPX : loop exiting command (goto, last, dump, etc)
123 * FTST : file test operator
124 * FUN0 : zero-argument function
125 * FUN1 : not used, except for not, which isn't a UNIOP
126 * BOop : bitwise or or xor
128 * SHop : shift operator
129 * PWop : power operator
130 * PMop : pattern-matching operator
131 * Aop : addition-level operator
132 * Mop : multiplication-level operator
133 * Eop : equality-testing operator
134 * Rop : relational operator <= != gt
136 * Also see LOP and lop() below.
139 #ifdef DEBUGGING /* Serve -DT. */
140 # define REPORT(retval) tokereport((I32)retval)
142 # define REPORT(retval) (retval)
145 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
146 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
147 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
148 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
149 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
150 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
151 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
152 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
153 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
154 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
155 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
156 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
157 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
158 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
159 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
160 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
161 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
162 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
163 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
164 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
166 /* This bit of chicanery makes a unary function followed by
167 * a parenthesis into a function with one argument, highest precedence.
168 * The UNIDOR macro is for unary functions that can be followed by the //
169 * operator (such as C<shift // 0>).
171 #define UNI2(f,x) { \
175 PL_last_uni = PL_oldbufptr; \
176 PL_last_lop_op = f; \
178 return REPORT( (int)FUNC1 ); \
180 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
182 #define UNI(f) UNI2(f,XTERM)
183 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
185 #define UNIBRACK(f) { \
188 PL_last_uni = PL_oldbufptr; \
190 return REPORT( (int)FUNC1 ); \
192 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
195 /* grandfather return to old style */
196 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
200 /* how to interpret the yylval associated with the token */
204 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
210 static struct debug_tokens { const int token, type; const char *name; }
211 const debug_tokens[] =
213 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
214 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
215 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
216 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
217 { ARROW, TOKENTYPE_NONE, "ARROW" },
218 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
219 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
220 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
221 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
222 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
223 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
224 { DO, TOKENTYPE_NONE, "DO" },
225 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
226 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
227 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
228 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
229 { ELSE, TOKENTYPE_NONE, "ELSE" },
230 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
231 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
232 { FOR, TOKENTYPE_IVAL, "FOR" },
233 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
234 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
235 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
236 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
237 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
238 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
239 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
240 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
241 { IF, TOKENTYPE_IVAL, "IF" },
242 { LABEL, TOKENTYPE_PVAL, "LABEL" },
243 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
244 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
245 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
246 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
247 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
248 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
249 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
250 { MY, TOKENTYPE_IVAL, "MY" },
251 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
252 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
253 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
254 { OROP, TOKENTYPE_IVAL, "OROP" },
255 { OROR, TOKENTYPE_NONE, "OROR" },
256 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
257 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
258 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
259 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
260 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
261 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
262 { PREINC, TOKENTYPE_NONE, "PREINC" },
263 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
264 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
265 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
266 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
267 { SUB, TOKENTYPE_NONE, "SUB" },
268 { THING, TOKENTYPE_OPVAL, "THING" },
269 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
270 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
271 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
272 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
273 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
274 { USE, TOKENTYPE_IVAL, "USE" },
275 { WHEN, TOKENTYPE_IVAL, "WHEN" },
276 { WHILE, TOKENTYPE_IVAL, "WHILE" },
277 { WORD, TOKENTYPE_OPVAL, "WORD" },
278 { 0, TOKENTYPE_NONE, 0 }
281 /* dump the returned token in rv, plus any optional arg in yylval */
284 S_tokereport(pTHX_ I32 rv)
288 const char *name = Nullch;
289 enum token_type type = TOKENTYPE_NONE;
290 const struct debug_tokens *p;
291 SV* const report = newSVpvs("<== ");
293 for (p = debug_tokens; p->token; p++) {
294 if (p->token == (int)rv) {
301 Perl_sv_catpv(aTHX_ report, name);
302 else if ((char)rv > ' ' && (char)rv < '~')
303 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
305 sv_catpvs(report, "EOF");
307 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
310 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
313 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
315 case TOKENTYPE_OPNUM:
316 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
317 PL_op_name[yylval.ival]);
320 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
322 case TOKENTYPE_OPVAL:
324 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
325 PL_op_name[yylval.opval->op_type]);
326 if (yylval.opval->op_type == OP_CONST) {
327 Perl_sv_catpvf(aTHX_ report, " %s",
328 SvPEEK(cSVOPx_sv(yylval.opval)));
333 sv_catpvs(report, "(opval=null)");
336 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
342 /* print the buffer with suitable escapes */
345 S_printbuf(pTHX_ const char* fmt, const char* s)
347 SV* const tmp = newSVpvs("");
348 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
357 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
358 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
362 S_ao(pTHX_ int toketype)
365 if (*PL_bufptr == '=') {
367 if (toketype == ANDAND)
368 yylval.ival = OP_ANDASSIGN;
369 else if (toketype == OROR)
370 yylval.ival = OP_ORASSIGN;
371 else if (toketype == DORDOR)
372 yylval.ival = OP_DORASSIGN;
380 * When Perl expects an operator and finds something else, no_op
381 * prints the warning. It always prints "<something> found where
382 * operator expected. It prints "Missing semicolon on previous line?"
383 * if the surprise occurs at the start of the line. "do you need to
384 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
385 * where the compiler doesn't know if foo is a method call or a function.
386 * It prints "Missing operator before end of line" if there's nothing
387 * after the missing operator, or "... before <...>" if there is something
388 * after the missing operator.
392 S_no_op(pTHX_ const char *what, char *s)
395 char * const oldbp = PL_bufptr;
396 const bool is_first = (PL_oldbufptr == PL_linestart);
402 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
403 if (ckWARN_d(WARN_SYNTAX)) {
405 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
406 "\t(Missing semicolon on previous line?)\n");
407 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
409 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
410 if (t < PL_bufptr && isSPACE(*t))
411 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
412 "\t(Do you need to predeclare %.*s?)\n",
413 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
417 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
418 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
426 * Complain about missing quote/regexp/heredoc terminator.
427 * If it's called with (char *)NULL then it cauterizes the line buffer.
428 * If we're in a delimited string and the delimiter is a control
429 * character, it's reformatted into a two-char sequence like ^C.
434 S_missingterm(pTHX_ char *s)
440 char * const nl = strrchr(s,'\n');
446 iscntrl(PL_multi_close)
448 PL_multi_close < 32 || PL_multi_close == 127
452 tmpbuf[1] = (char)toCTRL(PL_multi_close);
457 *tmpbuf = (char)PL_multi_close;
461 q = strchr(s,'"') ? '\'' : '"';
462 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
465 #define FEATURE_IS_ENABLED(name) \
466 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
467 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
469 * S_feature_is_enabled
470 * Check whether the named feature is enabled.
473 S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
476 HV * const hinthv = GvHV(PL_hintgv);
477 char he_name[32] = "feature_";
478 (void) strncpy(&he_name[8], name, 24);
480 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
488 Perl_deprecate(pTHX_ const char *s)
490 if (ckWARN(WARN_DEPRECATED))
491 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
495 Perl_deprecate_old(pTHX_ const char *s)
497 /* This function should NOT be called for any new deprecated warnings */
498 /* Use Perl_deprecate instead */
500 /* It is here to maintain backward compatibility with the pre-5.8 */
501 /* warnings category hierarchy. The "deprecated" category used to */
502 /* live under the "syntax" category. It is now a top-level category */
503 /* in its own right. */
505 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
506 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
507 "Use of %s is deprecated", s);
511 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
512 * utf16-to-utf8-reversed.
515 #ifdef PERL_CR_FILTER
519 register const char *s = SvPVX_const(sv);
520 register const char * const e = s + SvCUR(sv);
521 /* outer loop optimized to do nothing if there are no CR-LFs */
523 if (*s++ == '\r' && *s == '\n') {
524 /* hit a CR-LF, need to copy the rest */
525 register char *d = s - 1;
528 if (*s == '\r' && s[1] == '\n')
539 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
541 const I32 count = FILTER_READ(idx+1, sv, maxlen);
542 if (count > 0 && !maxlen)
550 * Initialize variables. Uses the Perl save_stack to save its state (for
551 * recursive calls to the parser).
555 Perl_lex_start(pTHX_ SV *line)
561 SAVEI32(PL_lex_dojoin);
562 SAVEI32(PL_lex_brackets);
563 SAVEI32(PL_lex_casemods);
564 SAVEI32(PL_lex_starts);
565 SAVEI32(PL_lex_state);
566 SAVEVPTR(PL_lex_inpat);
567 SAVEI32(PL_lex_inwhat);
568 if (PL_lex_state == LEX_KNOWNEXT) {
569 I32 toke = PL_nexttoke;
570 while (--toke >= 0) {
571 SAVEI32(PL_nexttype[toke]);
572 SAVEVPTR(PL_nextval[toke]);
574 SAVEI32(PL_nexttoke);
576 SAVECOPLINE(PL_curcop);
579 SAVEPPTR(PL_oldbufptr);
580 SAVEPPTR(PL_oldoldbufptr);
581 SAVEPPTR(PL_last_lop);
582 SAVEPPTR(PL_last_uni);
583 SAVEPPTR(PL_linestart);
584 SAVESPTR(PL_linestr);
585 SAVEGENERICPV(PL_lex_brackstack);
586 SAVEGENERICPV(PL_lex_casestack);
587 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
588 SAVESPTR(PL_lex_stuff);
589 SAVEI32(PL_lex_defer);
590 SAVEI32(PL_sublex_info.sub_inwhat);
591 SAVESPTR(PL_lex_repl);
593 SAVEINT(PL_lex_expect);
595 PL_lex_state = LEX_NORMAL;
599 Newx(PL_lex_brackstack, 120, char);
600 Newx(PL_lex_casestack, 12, char);
602 *PL_lex_casestack = '\0';
605 PL_lex_stuff = Nullsv;
606 PL_lex_repl = Nullsv;
610 PL_sublex_info.sub_inwhat = 0;
612 if (SvREADONLY(PL_linestr))
613 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
614 s = SvPV_const(PL_linestr, len);
615 if (!len || s[len-1] != ';') {
616 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
617 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
618 sv_catpvs(PL_linestr, "\n;");
620 SvTEMP_off(PL_linestr);
621 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
622 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
623 PL_last_lop = PL_last_uni = Nullch;
629 * Finalizer for lexing operations. Must be called when the parser is
630 * done with the lexer.
637 PL_doextract = FALSE;
642 * This subroutine has nothing to do with tilting, whether at windmills
643 * or pinball tables. Its name is short for "increment line". It
644 * increments the current line number in CopLINE(PL_curcop) and checks
645 * to see whether the line starts with a comment of the form
646 * # line 500 "foo.pm"
647 * If so, it sets the current line number and file to the values in the comment.
651 S_incline(pTHX_ char *s)
659 CopLINE_inc(PL_curcop);
662 while (SPACE_OR_TAB(*s)) s++;
663 if (strnEQ(s, "line", 4))
667 if (SPACE_OR_TAB(*s))
671 while (SPACE_OR_TAB(*s)) s++;
677 while (SPACE_OR_TAB(*s))
679 if (*s == '"' && (t = strchr(s+1, '"'))) {
684 for (t = s; !isSPACE(*t); t++) ;
687 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
689 if (*e != '\n' && *e != '\0')
690 return; /* false alarm */
696 const char * const cf = CopFILE(PL_curcop);
697 STRLEN tmplen = cf ? strlen(cf) : 0;
698 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
699 /* must copy *{"::_<(eval N)[oldfilename:L]"}
700 * to *{"::_<newfilename"} */
701 char smallbuf[256], smallbuf2[256];
702 char *tmpbuf, *tmpbuf2;
704 STRLEN tmplen2 = strlen(s);
705 if (tmplen + 3 < sizeof smallbuf)
708 Newx(tmpbuf, tmplen + 3, char);
709 if (tmplen2 + 3 < sizeof smallbuf2)
712 Newx(tmpbuf2, tmplen2 + 3, char);
713 tmpbuf[0] = tmpbuf2[0] = '_';
714 tmpbuf[1] = tmpbuf2[1] = '<';
715 memcpy(tmpbuf + 2, cf, ++tmplen);
716 memcpy(tmpbuf2 + 2, s, ++tmplen2);
718 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
720 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
722 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
723 /* adjust ${"::_<newfilename"} to store the new file name */
724 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
725 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
726 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
728 if (tmpbuf != smallbuf) Safefree(tmpbuf);
729 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
732 CopFILE_free(PL_curcop);
733 CopFILE_set(PL_curcop, s);
736 CopLINE_set(PL_curcop, atoi(n)-1);
741 * Called to gobble the appropriate amount and type of whitespace.
742 * Skips comments as well.
746 S_skipspace(pTHX_ register char *s)
749 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
750 while (s < PL_bufend && SPACE_OR_TAB(*s))
756 SSize_t oldprevlen, oldoldprevlen;
757 SSize_t oldloplen = 0, oldunilen = 0;
758 while (s < PL_bufend && isSPACE(*s)) {
759 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
764 if (s < PL_bufend && *s == '#') {
765 while (s < PL_bufend && *s != '\n')
769 if (PL_in_eval && !PL_rsfp) {
776 /* only continue to recharge the buffer if we're at the end
777 * of the buffer, we're not reading from a source filter, and
778 * we're in normal lexing mode
780 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
781 PL_lex_state == LEX_FORMLINE)
784 /* try to recharge the buffer */
785 if ((s = filter_gets(PL_linestr, PL_rsfp,
786 (prevlen = SvCUR(PL_linestr)))) == Nullch)
788 /* end of file. Add on the -p or -n magic */
791 ";}continue{print or die qq(-p destination: $!\\n);}");
792 PL_minus_n = PL_minus_p = 0;
794 else if (PL_minus_n) {
795 sv_setpvn(PL_linestr, ";}", 2);
799 sv_setpvn(PL_linestr,";", 1);
801 /* reset variables for next time we lex */
802 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
804 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
805 PL_last_lop = PL_last_uni = Nullch;
807 /* Close the filehandle. Could be from -P preprocessor,
808 * STDIN, or a regular file. If we were reading code from
809 * STDIN (because the commandline held no -e or filename)
810 * then we don't close it, we reset it so the code can
811 * read from STDIN too.
814 if (PL_preprocess && !PL_in_eval)
815 (void)PerlProc_pclose(PL_rsfp);
816 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
817 PerlIO_clearerr(PL_rsfp);
819 (void)PerlIO_close(PL_rsfp);
824 /* not at end of file, so we only read another line */
825 /* make corresponding updates to old pointers, for yyerror() */
826 oldprevlen = PL_oldbufptr - PL_bufend;
827 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
829 oldunilen = PL_last_uni - PL_bufend;
831 oldloplen = PL_last_lop - PL_bufend;
832 PL_linestart = PL_bufptr = s + prevlen;
833 PL_bufend = s + SvCUR(PL_linestr);
835 PL_oldbufptr = s + oldprevlen;
836 PL_oldoldbufptr = s + oldoldprevlen;
838 PL_last_uni = s + oldunilen;
840 PL_last_lop = s + oldloplen;
843 /* debugger active and we're not compiling the debugger code,
844 * so store the line into the debugger's array of lines
846 if (PERLDB_LINE && PL_curstash != PL_debstash) {
847 SV * const sv = newSV(0);
849 sv_upgrade(sv, SVt_PVMG);
850 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
853 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
860 * Check the unary operators to ensure there's no ambiguity in how they're
861 * used. An ambiguous piece of code would be:
863 * This doesn't mean rand() + 5. Because rand() is a unary operator,
864 * the +5 is its argument.
874 if (PL_oldoldbufptr != PL_last_uni)
876 while (isSPACE(*PL_last_uni))
878 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
879 if ((t = strchr(s, '(')) && t < PL_bufptr)
882 /* XXX Things like this are just so nasty. We shouldn't be modifying
883 source code, even if we realquick set it back. */
884 if (ckWARN_d(WARN_AMBIGUOUS)){
887 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
888 "Warning: Use of \"%s\" without parentheses is ambiguous",
895 * LOP : macro to build a list operator. Its behaviour has been replaced
896 * with a subroutine, S_lop() for which LOP is just another name.
899 #define LOP(f,x) return lop(f,x,s)
903 * Build a list operator (or something that might be one). The rules:
904 * - if we have a next token, then it's a list operator [why?]
905 * - if the next thing is an opening paren, then it's a function
906 * - else it's a list operator
910 S_lop(pTHX_ I32 f, int x, char *s)
917 PL_last_lop = PL_oldbufptr;
918 PL_last_lop_op = (OPCODE)f;
920 return REPORT(LSTOP);
927 return REPORT(LSTOP);
932 * When the lexer realizes it knows the next token (for instance,
933 * it is reordering tokens for the parser) then it can call S_force_next
934 * to know what token to return the next time the lexer is called. Caller
935 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
936 * handles the token correctly.
940 S_force_next(pTHX_ I32 type)
943 PL_nexttype[PL_nexttoke] = type;
945 if (PL_lex_state != LEX_KNOWNEXT) {
946 PL_lex_defer = PL_lex_state;
947 PL_lex_expect = PL_expect;
948 PL_lex_state = LEX_KNOWNEXT;
953 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
956 SV * const sv = newSVpvn(start,len);
957 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
964 * When the lexer knows the next thing is a word (for instance, it has
965 * just seen -> and it knows that the next char is a word char, then
966 * it calls S_force_word to stick the next word into the PL_next lookahead.
969 * char *start : buffer position (must be within PL_linestr)
970 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
971 * int check_keyword : if true, Perl checks to make sure the word isn't
972 * a keyword (do this if the word is a label, e.g. goto FOO)
973 * int allow_pack : if true, : characters will also be allowed (require,
975 * int allow_initial_tick : used by the "sub" lexer only.
979 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
985 start = skipspace(start);
987 if (isIDFIRST_lazy_if(s,UTF) ||
988 (allow_pack && *s == ':') ||
989 (allow_initial_tick && *s == '\'') )
991 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
992 if (check_keyword && keyword(PL_tokenbuf, len))
994 if (token == METHOD) {
999 PL_expect = XOPERATOR;
1002 PL_nextval[PL_nexttoke].opval
1003 = (OP*)newSVOP(OP_CONST,0,
1004 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1005 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
1013 * Called when the lexer wants $foo *foo &foo etc, but the program
1014 * text only contains the "foo" portion. The first argument is a pointer
1015 * to the "foo", and the second argument is the type symbol to prefix.
1016 * Forces the next token to be a "WORD".
1017 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1021 S_force_ident(pTHX_ register const char *s, int kind)
1025 const STRLEN len = strlen(s);
1026 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1027 PL_nextval[PL_nexttoke].opval = o;
1030 o->op_private = OPpCONST_ENTERED;
1031 /* XXX see note in pp_entereval() for why we forgo typo
1032 warnings if the symbol must be introduced in an eval.
1034 gv_fetchpvn_flags(s, len,
1035 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1037 kind == '$' ? SVt_PV :
1038 kind == '@' ? SVt_PVAV :
1039 kind == '%' ? SVt_PVHV :
1047 Perl_str_to_version(pTHX_ SV *sv)
1052 const char *start = SvPV_const(sv,len);
1053 const char * const end = start + len;
1054 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1055 while (start < end) {
1059 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1064 retval += ((NV)n)/nshift;
1073 * Forces the next token to be a version number.
1074 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1075 * and if "guessing" is TRUE, then no new token is created (and the caller
1076 * must use an alternative parsing method).
1080 S_force_version(pTHX_ char *s, int guessing)
1083 OP *version = Nullop;
1092 while (isDIGIT(*d) || *d == '_' || *d == '.')
1094 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1096 s = scan_num(s, &yylval);
1097 version = yylval.opval;
1098 ver = cSVOPx(version)->op_sv;
1099 if (SvPOK(ver) && !SvNIOK(ver)) {
1100 SvUPGRADE(ver, SVt_PVNV);
1101 SvNV_set(ver, str_to_version(ver));
1102 SvNOK_on(ver); /* hint that it is a version */
1109 /* NOTE: The parser sees the package name and the VERSION swapped */
1110 PL_nextval[PL_nexttoke].opval = version;
1118 * Tokenize a quoted string passed in as an SV. It finds the next
1119 * chunk, up to end of string or a backslash. It may make a new
1120 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1125 S_tokeq(pTHX_ SV *sv)
1129 register char *send;
1137 s = SvPV_force(sv, len);
1138 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1141 while (s < send && *s != '\\')
1146 if ( PL_hints & HINT_NEW_STRING ) {
1147 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1153 if (s + 1 < send && (s[1] == '\\'))
1154 s++; /* all that, just for this */
1159 SvCUR_set(sv, d - SvPVX_const(sv));
1161 if ( PL_hints & HINT_NEW_STRING )
1162 return new_constant(NULL, 0, "q", sv, pv, "q");
1167 * Now come three functions related to double-quote context,
1168 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1169 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1170 * interact with PL_lex_state, and create fake ( ... ) argument lists
1171 * to handle functions and concatenation.
1172 * They assume that whoever calls them will be setting up a fake
1173 * join call, because each subthing puts a ',' after it. This lets
1176 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1178 * (I'm not sure whether the spurious commas at the end of lcfirst's
1179 * arguments and join's arguments are created or not).
1184 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1186 * Pattern matching will set PL_lex_op to the pattern-matching op to
1187 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1189 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1191 * Everything else becomes a FUNC.
1193 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1194 * had an OP_CONST or OP_READLINE). This just sets us up for a
1195 * call to S_sublex_push().
1199 S_sublex_start(pTHX)
1202 register const I32 op_type = yylval.ival;
1204 if (op_type == OP_NULL) {
1205 yylval.opval = PL_lex_op;
1209 if (op_type == OP_CONST || op_type == OP_READLINE) {
1210 SV *sv = tokeq(PL_lex_stuff);
1212 if (SvTYPE(sv) == SVt_PVIV) {
1213 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1215 const char *p = SvPV_const(sv, len);
1216 SV * const nsv = newSVpvn(p, len);
1222 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1223 PL_lex_stuff = Nullsv;
1224 /* Allow <FH> // "foo" */
1225 if (op_type == OP_READLINE)
1226 PL_expect = XTERMORDORDOR;
1230 PL_sublex_info.super_state = PL_lex_state;
1231 PL_sublex_info.sub_inwhat = op_type;
1232 PL_sublex_info.sub_op = PL_lex_op;
1233 PL_lex_state = LEX_INTERPPUSH;
1237 yylval.opval = PL_lex_op;
1247 * Create a new scope to save the lexing state. The scope will be
1248 * ended in S_sublex_done. Returns a '(', starting the function arguments
1249 * to the uc, lc, etc. found before.
1250 * Sets PL_lex_state to LEX_INTERPCONCAT.
1259 PL_lex_state = PL_sublex_info.super_state;
1260 SAVEI32(PL_lex_dojoin);
1261 SAVEI32(PL_lex_brackets);
1262 SAVEI32(PL_lex_casemods);
1263 SAVEI32(PL_lex_starts);
1264 SAVEI32(PL_lex_state);
1265 SAVEVPTR(PL_lex_inpat);
1266 SAVEI32(PL_lex_inwhat);
1267 SAVECOPLINE(PL_curcop);
1268 SAVEPPTR(PL_bufptr);
1269 SAVEPPTR(PL_bufend);
1270 SAVEPPTR(PL_oldbufptr);
1271 SAVEPPTR(PL_oldoldbufptr);
1272 SAVEPPTR(PL_last_lop);
1273 SAVEPPTR(PL_last_uni);
1274 SAVEPPTR(PL_linestart);
1275 SAVESPTR(PL_linestr);
1276 SAVEGENERICPV(PL_lex_brackstack);
1277 SAVEGENERICPV(PL_lex_casestack);
1279 PL_linestr = PL_lex_stuff;
1280 PL_lex_stuff = Nullsv;
1282 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1283 = SvPVX(PL_linestr);
1284 PL_bufend += SvCUR(PL_linestr);
1285 PL_last_lop = PL_last_uni = Nullch;
1286 SAVEFREESV(PL_linestr);
1288 PL_lex_dojoin = FALSE;
1289 PL_lex_brackets = 0;
1290 Newx(PL_lex_brackstack, 120, char);
1291 Newx(PL_lex_casestack, 12, char);
1292 PL_lex_casemods = 0;
1293 *PL_lex_casestack = '\0';
1295 PL_lex_state = LEX_INTERPCONCAT;
1296 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1298 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1299 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1300 PL_lex_inpat = PL_sublex_info.sub_op;
1302 PL_lex_inpat = Nullop;
1309 * Restores lexer state after a S_sublex_push.
1316 if (!PL_lex_starts++) {
1317 SV * const sv = newSVpvs("");
1318 if (SvUTF8(PL_linestr))
1320 PL_expect = XOPERATOR;
1321 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1325 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1326 PL_lex_state = LEX_INTERPCASEMOD;
1330 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1331 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1332 PL_linestr = PL_lex_repl;
1334 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1335 PL_bufend += SvCUR(PL_linestr);
1336 PL_last_lop = PL_last_uni = Nullch;
1337 SAVEFREESV(PL_linestr);
1338 PL_lex_dojoin = FALSE;
1339 PL_lex_brackets = 0;
1340 PL_lex_casemods = 0;
1341 *PL_lex_casestack = '\0';
1343 if (SvEVALED(PL_lex_repl)) {
1344 PL_lex_state = LEX_INTERPNORMAL;
1346 /* we don't clear PL_lex_repl here, so that we can check later
1347 whether this is an evalled subst; that means we rely on the
1348 logic to ensure sublex_done() is called again only via the
1349 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1352 PL_lex_state = LEX_INTERPCONCAT;
1353 PL_lex_repl = Nullsv;
1359 PL_bufend = SvPVX(PL_linestr);
1360 PL_bufend += SvCUR(PL_linestr);
1361 PL_expect = XOPERATOR;
1362 PL_sublex_info.sub_inwhat = 0;
1370 Extracts a pattern, double-quoted string, or transliteration. This
1373 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1374 processing a pattern (PL_lex_inpat is true), a transliteration
1375 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1377 Returns a pointer to the character scanned up to. Iff this is
1378 advanced from the start pointer supplied (ie if anything was
1379 successfully parsed), will leave an OP for the substring scanned
1380 in yylval. Caller must intuit reason for not parsing further
1381 by looking at the next characters herself.
1385 double-quoted style: \r and \n
1386 regexp special ones: \D \s
1388 backrefs: \1 (deprecated in substitution replacements)
1389 case and quoting: \U \Q \E
1390 stops on @ and $, but not for $ as tail anchor
1392 In transliterations:
1393 characters are VERY literal, except for - not at the start or end
1394 of the string, which indicates a range. scan_const expands the
1395 range to the full set of intermediate characters.
1397 In double-quoted strings:
1399 double-quoted style: \r and \n
1401 backrefs: \1 (deprecated)
1402 case and quoting: \U \Q \E
1405 scan_const does *not* construct ops to handle interpolated strings.
1406 It stops processing as soon as it finds an embedded $ or @ variable
1407 and leaves it to the caller to work out what's going on.
1409 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1411 $ in pattern could be $foo or could be tail anchor. Assumption:
1412 it's a tail anchor if $ is the last thing in the string, or if it's
1413 followed by one of ")| \n\t"
1415 \1 (backreferences) are turned into $1
1417 The structure of the code is
1418 while (there's a character to process) {
1419 handle transliteration ranges
1420 skip regexp comments
1421 skip # initiated comments in //x patterns
1422 check for embedded @foo
1423 check for embedded scalars
1425 leave intact backslashes from leave (below)
1426 deprecate \1 in strings and sub replacements
1427 handle string-changing backslashes \l \U \Q \E, etc.
1428 switch (what was escaped) {
1429 handle - in a transliteration (becomes a literal -)
1430 handle \132 octal characters
1431 handle 0x15 hex characters
1432 handle \cV (control V)
1433 handle printf backslashes (\f, \r, \n, etc)
1435 } (end if backslash)
1436 } (end while character to read)
1441 S_scan_const(pTHX_ char *start)
1444 register char *send = PL_bufend; /* end of the constant */
1445 SV *sv = newSV(send - start); /* sv for the constant */
1446 register char *s = start; /* start of the constant */
1447 register char *d = SvPVX(sv); /* destination for copies */
1448 bool dorange = FALSE; /* are we in a translit range? */
1449 bool didrange = FALSE; /* did we just finish a range? */
1450 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1451 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1454 UV literal_endpoint = 0;
1457 const char *leaveit = /* set of acceptably-backslashed characters */
1459 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1462 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1463 /* If we are doing a trans and we know we want UTF8 set expectation */
1464 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1465 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1469 while (s < send || dorange) {
1470 /* get transliterations out of the way (they're most literal) */
1471 if (PL_lex_inwhat == OP_TRANS) {
1472 /* expand a range A-Z to the full set of characters. AIE! */
1474 I32 i; /* current expanded character */
1475 I32 min; /* first character in range */
1476 I32 max; /* last character in range */
1479 char * const c = (char*)utf8_hop((U8*)d, -1);
1483 *c = (char)UTF_TO_NATIVE(0xff);
1484 /* mark the range as done, and continue */
1490 i = d - SvPVX_const(sv); /* remember current offset */
1491 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1492 d = SvPVX(sv) + i; /* refresh d after realloc */
1493 d -= 2; /* eat the first char and the - */
1495 min = (U8)*d; /* first char in range */
1496 max = (U8)d[1]; /* last char in range */
1500 "Invalid range \"%c-%c\" in transliteration operator",
1501 (char)min, (char)max);
1505 if (literal_endpoint == 2 &&
1506 ((isLOWER(min) && isLOWER(max)) ||
1507 (isUPPER(min) && isUPPER(max)))) {
1509 for (i = min; i <= max; i++)
1511 *d++ = NATIVE_TO_NEED(has_utf8,i);
1513 for (i = min; i <= max; i++)
1515 *d++ = NATIVE_TO_NEED(has_utf8,i);
1520 for (i = min; i <= max; i++)
1523 /* mark the range as done, and continue */
1527 literal_endpoint = 0;
1532 /* range begins (ignore - as first or last char) */
1533 else if (*s == '-' && s+1 < send && s != start) {
1535 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1538 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1548 literal_endpoint = 0;
1553 /* if we get here, we're not doing a transliteration */
1555 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1556 except for the last char, which will be done separately. */
1557 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1559 while (s+1 < send && *s != ')')
1560 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1562 else if (s[2] == '{' /* This should match regcomp.c */
1563 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1566 char *regparse = s + (s[2] == '{' ? 3 : 4);
1569 while (count && (c = *regparse)) {
1570 if (c == '\\' && regparse[1])
1578 if (*regparse != ')')
1579 regparse--; /* Leave one char for continuation. */
1580 while (s < regparse)
1581 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1585 /* likewise skip #-initiated comments in //x patterns */
1586 else if (*s == '#' && PL_lex_inpat &&
1587 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1588 while (s+1 < send && *s != '\n')
1589 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1592 /* check for embedded arrays
1593 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1595 else if (*s == '@' && s[1]
1596 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1599 /* check for embedded scalars. only stop if we're sure it's a
1602 else if (*s == '$') {
1603 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1605 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1606 break; /* in regexp, $ might be tail anchor */
1609 /* End of else if chain - OP_TRANS rejoin rest */
1612 if (*s == '\\' && s+1 < send) {
1615 /* some backslashes we leave behind */
1616 if (*leaveit && *s && strchr(leaveit, *s)) {
1617 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1618 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1622 /* deprecate \1 in strings and substitution replacements */
1623 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1624 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1626 if (ckWARN(WARN_SYNTAX))
1627 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1632 /* string-change backslash escapes */
1633 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1638 /* if we get here, it's either a quoted -, or a digit */
1641 /* quoted - in transliterations */
1643 if (PL_lex_inwhat == OP_TRANS) {
1653 Perl_warner(aTHX_ packWARN(WARN_MISC),
1654 "Unrecognized escape \\%c passed through",
1656 /* default action is to copy the quoted character */
1657 goto default_action;
1660 /* \132 indicates an octal constant */
1661 case '0': case '1': case '2': case '3':
1662 case '4': case '5': case '6': case '7':
1666 uv = grok_oct(s, &len, &flags, NULL);
1669 goto NUM_ESCAPE_INSERT;
1671 /* \x24 indicates a hex constant */
1675 char* const e = strchr(s, '}');
1676 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1677 PERL_SCAN_DISALLOW_PREFIX;
1682 yyerror("Missing right brace on \\x{}");
1686 uv = grok_hex(s, &len, &flags, NULL);
1692 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1693 uv = grok_hex(s, &len, &flags, NULL);
1699 /* Insert oct or hex escaped character.
1700 * There will always enough room in sv since such
1701 * escapes will be longer than any UTF-8 sequence
1702 * they can end up as. */
1704 /* We need to map to chars to ASCII before doing the tests
1707 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1708 if (!has_utf8 && uv > 255) {
1709 /* Might need to recode whatever we have
1710 * accumulated so far if it contains any
1713 * (Can't we keep track of that and avoid
1714 * this rescan? --jhi)
1718 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1719 if (!NATIVE_IS_INVARIANT(*c)) {
1724 const STRLEN offset = d - SvPVX_const(sv);
1726 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1730 while (src >= (const U8 *)SvPVX_const(sv)) {
1731 if (!NATIVE_IS_INVARIANT(*src)) {
1732 const U8 ch = NATIVE_TO_ASCII(*src);
1733 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1734 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1744 if (has_utf8 || uv > 255) {
1745 d = (char*)uvchr_to_utf8((U8*)d, uv);
1747 if (PL_lex_inwhat == OP_TRANS &&
1748 PL_sublex_info.sub_op) {
1749 PL_sublex_info.sub_op->op_private |=
1750 (PL_lex_repl ? OPpTRANS_FROM_UTF
1763 /* \N{LATIN SMALL LETTER A} is a named character */
1767 char* e = strchr(s, '}');
1773 yyerror("Missing right brace on \\N{}");
1777 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1779 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1780 PERL_SCAN_DISALLOW_PREFIX;
1783 uv = grok_hex(s, &len, &flags, NULL);
1785 goto NUM_ESCAPE_INSERT;
1787 res = newSVpvn(s + 1, e - s - 1);
1788 res = new_constant( Nullch, 0, "charnames",
1789 res, Nullsv, "\\N{...}" );
1791 sv_utf8_upgrade(res);
1792 str = SvPV_const(res,len);
1793 #ifdef EBCDIC_NEVER_MIND
1794 /* charnames uses pack U and that has been
1795 * recently changed to do the below uni->native
1796 * mapping, so this would be redundant (and wrong,
1797 * the code point would be doubly converted).
1798 * But leave this in just in case the pack U change
1799 * gets revoked, but the semantics is still
1800 * desireable for charnames. --jhi */
1802 UV uv = utf8_to_uvchr((const U8*)str, 0);
1805 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1807 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1808 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1809 str = SvPV_const(res, len);
1813 if (!has_utf8 && SvUTF8(res)) {
1814 const char * const ostart = SvPVX_const(sv);
1815 SvCUR_set(sv, d - ostart);
1818 sv_utf8_upgrade(sv);
1819 /* this just broke our allocation above... */
1820 SvGROW(sv, (STRLEN)(send - start));
1821 d = SvPVX(sv) + SvCUR(sv);
1824 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1825 const char * const odest = SvPVX_const(sv);
1827 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1828 d = SvPVX(sv) + (d - odest);
1830 Copy(str, d, len, char);
1837 yyerror("Missing braces on \\N{}");
1840 /* \c is a control character */
1849 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1852 yyerror("Missing control char name in \\c");
1856 /* printf-style backslashes, formfeeds, newlines, etc */
1858 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1861 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1864 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1867 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1870 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1873 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1876 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1882 } /* end if (backslash) */
1889 /* If we started with encoded form, or already know we want it
1890 and then encode the next character */
1891 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1893 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1894 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1897 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1898 const STRLEN off = d - SvPVX_const(sv);
1899 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1901 d = (char*)uvchr_to_utf8((U8*)d, uv);
1905 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1907 } /* while loop to process each character */
1909 /* terminate the string and set up the sv */
1911 SvCUR_set(sv, d - SvPVX_const(sv));
1912 if (SvCUR(sv) >= SvLEN(sv))
1913 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1916 if (PL_encoding && !has_utf8) {
1917 sv_recode_to_utf8(sv, PL_encoding);
1923 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1924 PL_sublex_info.sub_op->op_private |=
1925 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1929 /* shrink the sv if we allocated more than we used */
1930 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1931 SvPV_shrink_to_cur(sv);
1934 /* return the substring (via yylval) only if we parsed anything */
1935 if (s > PL_bufptr) {
1936 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1937 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1939 ( PL_lex_inwhat == OP_TRANS
1941 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1944 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1951 * Returns TRUE if there's more to the expression (e.g., a subscript),
1954 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1956 * ->[ and ->{ return TRUE
1957 * { and [ outside a pattern are always subscripts, so return TRUE
1958 * if we're outside a pattern and it's not { or [, then return FALSE
1959 * if we're in a pattern and the first char is a {
1960 * {4,5} (any digits around the comma) returns FALSE
1961 * if we're in a pattern and the first char is a [
1963 * [SOMETHING] has a funky algorithm to decide whether it's a
1964 * character class or not. It has to deal with things like
1965 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1966 * anything else returns TRUE
1969 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1972 S_intuit_more(pTHX_ register char *s)
1975 if (PL_lex_brackets)
1977 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1979 if (*s != '{' && *s != '[')
1984 /* In a pattern, so maybe we have {n,m}. */
2001 /* On the other hand, maybe we have a character class */
2004 if (*s == ']' || *s == '^')
2007 /* this is terrifying, and it works */
2008 int weight = 2; /* let's weigh the evidence */
2010 unsigned char un_char = 255, last_un_char;
2011 const char * const send = strchr(s,']');
2012 char tmpbuf[sizeof PL_tokenbuf * 4];
2014 if (!send) /* has to be an expression */
2017 Zero(seen,256,char);
2020 else if (isDIGIT(*s)) {
2022 if (isDIGIT(s[1]) && s[2] == ']')
2028 for (; s < send; s++) {
2029 last_un_char = un_char;
2030 un_char = (unsigned char)*s;
2035 weight -= seen[un_char] * 10;
2036 if (isALNUM_lazy_if(s+1,UTF)) {
2038 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2039 len = (int)strlen(tmpbuf);
2040 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2045 else if (*s == '$' && s[1] &&
2046 strchr("[#!%*<>()-=",s[1])) {
2047 if (/*{*/ strchr("])} =",s[2]))
2056 if (strchr("wds]",s[1]))
2058 else if (seen['\''] || seen['"'])
2060 else if (strchr("rnftbxcav",s[1]))
2062 else if (isDIGIT(s[1])) {
2064 while (s[1] && isDIGIT(s[1]))
2074 if (strchr("aA01! ",last_un_char))
2076 if (strchr("zZ79~",s[1]))
2078 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2079 weight -= 5; /* cope with negative subscript */
2082 if (!isALNUM(last_un_char)
2083 && !(last_un_char == '$' || last_un_char == '@'
2084 || last_un_char == '&')
2085 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2090 if (keyword(tmpbuf, d - tmpbuf))
2093 if (un_char == last_un_char + 1)
2095 weight -= seen[un_char];
2100 if (weight >= 0) /* probably a character class */
2110 * Does all the checking to disambiguate
2112 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2113 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2115 * First argument is the stuff after the first token, e.g. "bar".
2117 * Not a method if bar is a filehandle.
2118 * Not a method if foo is a subroutine prototyped to take a filehandle.
2119 * Not a method if it's really "Foo $bar"
2120 * Method if it's "foo $bar"
2121 * Not a method if it's really "print foo $bar"
2122 * Method if it's really "foo package::" (interpreted as package->foo)
2123 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2124 * Not a method if bar is a filehandle or package, but is quoted with
2129 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2132 char *s = start + (*start == '$');
2133 char tmpbuf[sizeof PL_tokenbuf];
2138 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2142 const char *proto = SvPVX_const(cv);
2153 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2154 /* start is the beginning of the possible filehandle/object,
2155 * and s is the end of it
2156 * tmpbuf is a copy of it
2159 if (*start == '$') {
2160 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2165 return *s == '(' ? FUNCMETH : METHOD;
2167 if (!keyword(tmpbuf, len)) {
2168 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2173 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2174 if (indirgv && GvCVu(indirgv))
2176 /* filehandle or package name makes it a method */
2177 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2179 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2180 return 0; /* no assumptions -- "=>" quotes bearword */
2182 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2183 newSVpvn(tmpbuf,len));
2184 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2188 return *s == '(' ? FUNCMETH : METHOD;
2196 * Return a string of Perl code to load the debugger. If PERL5DB
2197 * is set, it will return the contents of that, otherwise a
2198 * compile-time require of perl5db.pl.
2206 const char * const pdb = PerlEnv_getenv("PERL5DB");
2210 SETERRNO(0,SS_NORMAL);
2211 return "BEGIN { require 'perl5db.pl' }";
2217 /* Encoded script support. filter_add() effectively inserts a
2218 * 'pre-processing' function into the current source input stream.
2219 * Note that the filter function only applies to the current source file
2220 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2222 * The datasv parameter (which may be NULL) can be used to pass
2223 * private data to this instance of the filter. The filter function
2224 * can recover the SV using the FILTER_DATA macro and use it to
2225 * store private buffers and state information.
2227 * The supplied datasv parameter is upgraded to a PVIO type
2228 * and the IoDIRP/IoANY field is used to store the function pointer,
2229 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2230 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2231 * private use must be set using malloc'd pointers.
2235 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2241 if (!PL_rsfp_filters)
2242 PL_rsfp_filters = newAV();
2245 SvUPGRADE(datasv, SVt_PVIO);
2246 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2247 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2248 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2249 IoANY(datasv), SvPV_nolen(datasv)));
2250 av_unshift(PL_rsfp_filters, 1);
2251 av_store(PL_rsfp_filters, 0, datasv) ;
2256 /* Delete most recently added instance of this filter function. */
2258 Perl_filter_del(pTHX_ filter_t funcp)
2264 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
2266 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2268 /* if filter is on top of stack (usual case) just pop it off */
2269 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2270 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2271 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2272 IoANY(datasv) = (void *)NULL;
2273 sv_free(av_pop(PL_rsfp_filters));
2277 /* we need to search for the correct entry and clear it */
2278 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2282 /* Invoke the idxth filter function for the current rsfp. */
2283 /* maxlen 0 = read one text line */
2285 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2291 if (!PL_rsfp_filters)
2293 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2294 /* Provide a default input filter to make life easy. */
2295 /* Note that we append to the line. This is handy. */
2296 DEBUG_P(PerlIO_printf(Perl_debug_log,
2297 "filter_read %d: from rsfp\n", idx));
2301 const int old_len = SvCUR(buf_sv);
2303 /* ensure buf_sv is large enough */
2304 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2305 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2306 if (PerlIO_error(PL_rsfp))
2307 return -1; /* error */
2309 return 0 ; /* end of file */
2311 SvCUR_set(buf_sv, old_len + len) ;
2314 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2315 if (PerlIO_error(PL_rsfp))
2316 return -1; /* error */
2318 return 0 ; /* end of file */
2321 return SvCUR(buf_sv);
2323 /* Skip this filter slot if filter has been deleted */
2324 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2325 DEBUG_P(PerlIO_printf(Perl_debug_log,
2326 "filter_read %d: skipped (filter deleted)\n",
2328 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2330 /* Get function pointer hidden within datasv */
2331 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2332 DEBUG_P(PerlIO_printf(Perl_debug_log,
2333 "filter_read %d: via function %p (%s)\n",
2334 idx, datasv, SvPV_nolen_const(datasv)));
2335 /* Call function. The function is expected to */
2336 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2337 /* Return: <0:error, =0:eof, >0:not eof */
2338 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2342 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2345 #ifdef PERL_CR_FILTER
2346 if (!PL_rsfp_filters) {
2347 filter_add(S_cr_textfilter,NULL);
2350 if (PL_rsfp_filters) {
2352 SvCUR_set(sv, 0); /* start with empty line */
2353 if (FILTER_READ(0, sv, 0) > 0)
2354 return ( SvPVX(sv) ) ;
2359 return (sv_gets(sv, fp, append));
2363 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2368 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2372 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2373 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2375 return GvHV(gv); /* Foo:: */
2378 /* use constant CLASS => 'MyClass' */
2379 if ((gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV))) {
2381 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2382 pkgname = SvPV_nolen_const(sv);
2386 return gv_stashpv(pkgname, FALSE);
2390 S_tokenize_use(pTHX_ int is_use, char *s) {
2392 if (PL_expect != XSTATE)
2393 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2394 is_use ? "use" : "no"));
2396 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2397 s = force_version(s, TRUE);
2398 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2399 PL_nextval[PL_nexttoke].opval = Nullop;
2402 else if (*s == 'v') {
2403 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2404 s = force_version(s, FALSE);
2408 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2409 s = force_version(s, FALSE);
2411 yylval.ival = is_use;
2415 static const char* const exp_name[] =
2416 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2417 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2424 Works out what to call the token just pulled out of the input
2425 stream. The yacc parser takes care of taking the ops we return and
2426 stitching them into a tree.
2432 if read an identifier
2433 if we're in a my declaration
2434 croak if they tried to say my($foo::bar)
2435 build the ops for a my() declaration
2436 if it's an access to a my() variable
2437 are we in a sort block?
2438 croak if my($a); $a <=> $b
2439 build ops for access to a my() variable
2440 if in a dq string, and they've said @foo and we can't find @foo
2442 build ops for a bareword
2443 if we already built the token before, use it.
2448 #pragma segment Perl_yylex
2454 register char *s = PL_bufptr;
2460 SV* tmp = newSVpvs("");
2461 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2462 (IV)CopLINE(PL_curcop),
2463 lex_state_names[PL_lex_state],
2464 exp_name[PL_expect],
2465 pv_display(tmp, s, strlen(s), 0, 60));
2468 /* check if there's an identifier for us to look at */
2469 if (PL_pending_ident)
2470 return REPORT(S_pending_ident(aTHX));
2472 /* no identifier pending identification */
2474 switch (PL_lex_state) {
2476 case LEX_NORMAL: /* Some compilers will produce faster */
2477 case LEX_INTERPNORMAL: /* code if we comment these out. */
2481 /* when we've already built the next token, just pull it out of the queue */
2484 yylval = PL_nextval[PL_nexttoke];
2486 PL_lex_state = PL_lex_defer;
2487 PL_expect = PL_lex_expect;
2488 PL_lex_defer = LEX_NORMAL;
2490 return REPORT(PL_nexttype[PL_nexttoke]);
2492 /* interpolated case modifiers like \L \U, including \Q and \E.
2493 when we get here, PL_bufptr is at the \
2495 case LEX_INTERPCASEMOD:
2497 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2498 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2500 /* handle \E or end of string */
2501 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2503 if (PL_lex_casemods) {
2504 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2505 PL_lex_casestack[PL_lex_casemods] = '\0';
2507 if (PL_bufptr != PL_bufend
2508 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2510 PL_lex_state = LEX_INTERPCONCAT;
2514 if (PL_bufptr != PL_bufend)
2516 PL_lex_state = LEX_INTERPCONCAT;
2520 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2521 "### Saw case modifier\n"); });
2523 if (s[1] == '\\' && s[2] == 'E') {
2525 PL_lex_state = LEX_INTERPCONCAT;
2530 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2531 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2532 if ((*s == 'L' || *s == 'U') &&
2533 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2534 PL_lex_casestack[--PL_lex_casemods] = '\0';
2537 if (PL_lex_casemods > 10)
2538 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2539 PL_lex_casestack[PL_lex_casemods++] = *s;
2540 PL_lex_casestack[PL_lex_casemods] = '\0';
2541 PL_lex_state = LEX_INTERPCONCAT;
2542 PL_nextval[PL_nexttoke].ival = 0;
2545 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2547 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2549 PL_nextval[PL_nexttoke].ival = OP_LC;
2551 PL_nextval[PL_nexttoke].ival = OP_UC;
2553 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2555 Perl_croak(aTHX_ "panic: yylex");
2559 if (PL_lex_starts) {
2562 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2563 if (PL_lex_casemods == 1 && PL_lex_inpat)
2572 case LEX_INTERPPUSH:
2573 return REPORT(sublex_push());
2575 case LEX_INTERPSTART:
2576 if (PL_bufptr == PL_bufend)
2577 return REPORT(sublex_done());
2578 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2579 "### Interpolated variable\n"); });
2581 PL_lex_dojoin = (*PL_bufptr == '@');
2582 PL_lex_state = LEX_INTERPNORMAL;
2583 if (PL_lex_dojoin) {
2584 PL_nextval[PL_nexttoke].ival = 0;
2586 force_ident("\"", '$');
2587 PL_nextval[PL_nexttoke].ival = 0;
2589 PL_nextval[PL_nexttoke].ival = 0;
2591 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2594 if (PL_lex_starts++) {
2596 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2597 if (!PL_lex_casemods && PL_lex_inpat)
2604 case LEX_INTERPENDMAYBE:
2605 if (intuit_more(PL_bufptr)) {
2606 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2612 if (PL_lex_dojoin) {
2613 PL_lex_dojoin = FALSE;
2614 PL_lex_state = LEX_INTERPCONCAT;
2617 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2618 && SvEVALED(PL_lex_repl))
2620 if (PL_bufptr != PL_bufend)
2621 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2622 PL_lex_repl = Nullsv;
2625 case LEX_INTERPCONCAT:
2627 if (PL_lex_brackets)
2628 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2630 if (PL_bufptr == PL_bufend)
2631 return REPORT(sublex_done());
2633 if (SvIVX(PL_linestr) == '\'') {
2634 SV *sv = newSVsv(PL_linestr);
2637 else if ( PL_hints & HINT_NEW_RE )
2638 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2639 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2643 s = scan_const(PL_bufptr);
2645 PL_lex_state = LEX_INTERPCASEMOD;
2647 PL_lex_state = LEX_INTERPSTART;
2650 if (s != PL_bufptr) {
2651 PL_nextval[PL_nexttoke] = yylval;
2654 if (PL_lex_starts++) {
2655 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2656 if (!PL_lex_casemods && PL_lex_inpat)
2669 PL_lex_state = LEX_NORMAL;
2670 s = scan_formline(PL_bufptr);
2671 if (!PL_lex_formbrack)
2677 PL_oldoldbufptr = PL_oldbufptr;
2683 if (isIDFIRST_lazy_if(s,UTF))
2685 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2688 goto fake_eof; /* emulate EOF on ^D or ^Z */
2693 if (PL_lex_brackets) {
2694 yyerror(PL_lex_formbrack
2695 ? "Format not terminated"
2696 : "Missing right curly or square bracket");
2698 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2699 "### Tokener got EOF\n");
2703 if (s++ < PL_bufend)
2704 goto retry; /* ignore stray nulls */
2707 if (!PL_in_eval && !PL_preambled) {
2708 PL_preambled = TRUE;
2709 sv_setpv(PL_linestr,incl_perldb());
2710 if (SvCUR(PL_linestr))
2711 sv_catpvs(PL_linestr,";");
2713 while(AvFILLp(PL_preambleav) >= 0) {
2714 SV *tmpsv = av_shift(PL_preambleav);
2715 sv_catsv(PL_linestr, tmpsv);
2716 sv_catpvs(PL_linestr, ";");
2719 sv_free((SV*)PL_preambleav);
2720 PL_preambleav = NULL;
2722 if (PL_minus_n || PL_minus_p) {
2723 sv_catpvs(PL_linestr, "LINE: while (<>) {");
2725 sv_catpvs(PL_linestr,"chomp;");
2728 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2729 || *PL_splitstr == '"')
2730 && strchr(PL_splitstr + 1, *PL_splitstr))
2731 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2733 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2734 bytes can be used as quoting characters. :-) */
2735 const char *splits = PL_splitstr;
2736 sv_catpvs(PL_linestr, "our @F=split(q\0");
2739 if (*splits == '\\')
2740 sv_catpvn(PL_linestr, splits, 1);
2741 sv_catpvn(PL_linestr, splits, 1);
2742 } while (*splits++);
2743 /* This loop will embed the trailing NUL of
2744 PL_linestr as the last thing it does before
2746 sv_catpvs(PL_linestr, ");");
2750 sv_catpvs(PL_linestr,"our @F=split(' ');");
2754 sv_catpvs(PL_linestr,"use feature ':5.10';");
2755 sv_catpvs(PL_linestr, "\n");
2756 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2757 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2758 PL_last_lop = PL_last_uni = Nullch;
2759 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2760 SV * const sv = newSV(0);
2762 sv_upgrade(sv, SVt_PVMG);
2763 sv_setsv(sv,PL_linestr);
2766 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2771 bof = PL_rsfp ? TRUE : FALSE;
2772 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2775 if (PL_preprocess && !PL_in_eval)
2776 (void)PerlProc_pclose(PL_rsfp);
2777 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2778 PerlIO_clearerr(PL_rsfp);
2780 (void)PerlIO_close(PL_rsfp);
2782 PL_doextract = FALSE;
2784 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2785 sv_setpv(PL_linestr,PL_minus_p
2786 ? ";}continue{print;}" : ";}");
2787 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2788 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2789 PL_last_lop = PL_last_uni = Nullch;
2790 PL_minus_n = PL_minus_p = 0;
2793 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2794 PL_last_lop = PL_last_uni = Nullch;
2795 sv_setpvn(PL_linestr,"",0);
2796 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2798 /* If it looks like the start of a BOM or raw UTF-16,
2799 * check if it in fact is. */
2805 #ifdef PERLIO_IS_STDIO
2806 # ifdef __GNU_LIBRARY__
2807 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2808 # define FTELL_FOR_PIPE_IS_BROKEN
2812 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2813 # define FTELL_FOR_PIPE_IS_BROKEN
2818 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2819 /* This loses the possibility to detect the bof
2820 * situation on perl -P when the libc5 is being used.
2821 * Workaround? Maybe attach some extra state to PL_rsfp?
2824 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2826 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2829 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2830 s = swallow_bom((U8*)s);
2834 /* Incest with pod. */
2835 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2836 sv_setpvn(PL_linestr, "", 0);
2837 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2838 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2839 PL_last_lop = PL_last_uni = Nullch;
2840 PL_doextract = FALSE;
2844 } while (PL_doextract);
2845 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2846 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2847 SV * const sv = newSV(0);
2849 sv_upgrade(sv, SVt_PVMG);
2850 sv_setsv(sv,PL_linestr);
2853 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2855 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2856 PL_last_lop = PL_last_uni = Nullch;
2857 if (CopLINE(PL_curcop) == 1) {
2858 while (s < PL_bufend && isSPACE(*s))
2860 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2864 if (*s == '#' && *(s+1) == '!')
2866 #ifdef ALTERNATE_SHEBANG
2868 static char const as[] = ALTERNATE_SHEBANG;
2869 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2870 d = s + (sizeof(as) - 1);
2872 #endif /* ALTERNATE_SHEBANG */
2881 while (*d && !isSPACE(*d))
2885 #ifdef ARG_ZERO_IS_SCRIPT
2886 if (ipathend > ipath) {
2888 * HP-UX (at least) sets argv[0] to the script name,
2889 * which makes $^X incorrect. And Digital UNIX and Linux,
2890 * at least, set argv[0] to the basename of the Perl
2891 * interpreter. So, having found "#!", we'll set it right.
2894 = GvSV(gv_fetchpvs("\030", GV_ADD, SVt_PV)); /* $^X */
2895 assert(SvPOK(x) || SvGMAGICAL(x));
2896 if (sv_eq(x, CopFILESV(PL_curcop))) {
2897 sv_setpvn(x, ipath, ipathend - ipath);
2903 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
2904 const char * const lstart = SvPV_const(x,llen);
2906 bstart += blen - llen;
2907 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2908 sv_setpvn(x, ipath, ipathend - ipath);
2913 TAINT_NOT; /* $^X is always tainted, but that's OK */
2915 #endif /* ARG_ZERO_IS_SCRIPT */
2920 d = instr(s,"perl -");
2922 d = instr(s,"perl");
2924 /* avoid getting into infinite loops when shebang
2925 * line contains "Perl" rather than "perl" */
2927 for (d = ipathend-4; d >= ipath; --d) {
2928 if ((*d == 'p' || *d == 'P')
2929 && !ibcmp(d, "perl", 4))
2939 #ifdef ALTERNATE_SHEBANG
2941 * If the ALTERNATE_SHEBANG on this system starts with a
2942 * character that can be part of a Perl expression, then if
2943 * we see it but not "perl", we're probably looking at the
2944 * start of Perl code, not a request to hand off to some
2945 * other interpreter. Similarly, if "perl" is there, but
2946 * not in the first 'word' of the line, we assume the line
2947 * contains the start of the Perl program.
2949 if (d && *s != '#') {
2950 const char *c = ipath;
2951 while (*c && !strchr("; \t\r\n\f\v#", *c))
2954 d = Nullch; /* "perl" not in first word; ignore */
2956 *s = '#'; /* Don't try to parse shebang line */
2958 #endif /* ALTERNATE_SHEBANG */
2959 #ifndef MACOS_TRADITIONAL
2964 !instr(s,"indir") &&
2965 instr(PL_origargv[0],"perl"))
2972 while (s < PL_bufend && isSPACE(*s))
2974 if (s < PL_bufend) {
2975 Newxz(newargv,PL_origargc+3,char*);
2977 while (s < PL_bufend && !isSPACE(*s))
2980 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2983 newargv = PL_origargv;
2986 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2988 Perl_croak(aTHX_ "Can't exec %s", ipath);
2992 while (*d && !isSPACE(*d)) d++;
2993 while (SPACE_OR_TAB(*d)) d++;
2996 const bool switches_done = PL_doswitches;
2997 const U32 oldpdb = PL_perldb;
2998 const bool oldn = PL_minus_n;
2999 const bool oldp = PL_minus_p;
3002 if (*d == 'M' || *d == 'm' || *d == 'C') {
3003 const char * const m = d;
3004 while (*d && !isSPACE(*d)) d++;
3005 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3008 d = moreswitches(d);
3010 if (PL_doswitches && !switches_done) {
3011 int argc = PL_origargc;
3012 char **argv = PL_origargv;
3015 } while (argc && argv[0][0] == '-' && argv[0][1]);
3016 init_argv_symbols(argc,argv);
3018 if ((PERLDB_LINE && !oldpdb) ||
3019 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3020 /* if we have already added "LINE: while (<>) {",
3021 we must not do it again */
3023 sv_setpvn(PL_linestr, "", 0);
3024 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3025 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3026 PL_last_lop = PL_last_uni = Nullch;
3027 PL_preambled = FALSE;
3029 (void)gv_fetchfile(PL_origfilename);
3036 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3038 PL_lex_state = LEX_FORMLINE;
3043 #ifdef PERL_STRICT_CR
3044 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3046 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3048 case ' ': case '\t': case '\f': case 013:
3049 #ifdef MACOS_TRADITIONAL
3056 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3057 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3058 /* handle eval qq[#line 1 "foo"\n ...] */
3059 CopLINE_dec(PL_curcop);
3063 while (s < d && *s != '\n')
3067 else if (s > d) /* Found by Ilya: feed random input to Perl. */
3068 Perl_croak(aTHX_ "panic: input overflow");
3070 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3072 PL_lex_state = LEX_FORMLINE;
3082 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3090 while (s < PL_bufend && SPACE_OR_TAB(*s))
3093 if (strnEQ(s,"=>",2)) {
3094 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3095 DEBUG_T( { S_printbuf(aTHX_
3096 "### Saw unary minus before =>, forcing word %s\n", s);
3098 OPERATOR('-'); /* unary minus */
3100 PL_last_uni = PL_oldbufptr;
3102 case 'r': ftst = OP_FTEREAD; break;
3103 case 'w': ftst = OP_FTEWRITE; break;
3104 case 'x': ftst = OP_FTEEXEC; break;
3105 case 'o': ftst = OP_FTEOWNED; break;
3106 case 'R': ftst = OP_FTRREAD; break;
3107 case 'W': ftst = OP_FTRWRITE; break;
3108 case 'X': ftst = OP_FTREXEC; break;
3109 case 'O': ftst = OP_FTROWNED; break;
3110 case 'e': ftst = OP_FTIS; break;
3111 case 'z': ftst = OP_FTZERO; break;
3112 case 's': ftst = OP_FTSIZE; break;
3113 case 'f': ftst = OP_FTFILE; break;
3114 case 'd': ftst = OP_FTDIR; break;
3115 case 'l': ftst = OP_FTLINK; break;
3116 case 'p': ftst = OP_FTPIPE; break;
3117 case 'S': ftst = OP_FTSOCK; break;
3118 case 'u': ftst = OP_FTSUID; break;
3119 case 'g': ftst = OP_FTSGID; break;
3120 case 'k': ftst = OP_FTSVTX; break;
3121 case 'b': ftst = OP_FTBLK; break;
3122 case 'c': ftst = OP_FTCHR; break;
3123 case 't': ftst = OP_FTTTY; break;
3124 case 'T': ftst = OP_FTTEXT; break;
3125 case 'B': ftst = OP_FTBINARY; break;
3126 case 'M': case 'A': case 'C':
3127 gv_fetchpvs("\024",GV_ADD, SVt_PV);
3129 case 'M': ftst = OP_FTMTIME; break;
3130 case 'A': ftst = OP_FTATIME; break;
3131 case 'C': ftst = OP_FTCTIME; break;
3139 PL_last_lop_op = (OPCODE)ftst;
3140 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3141 "### Saw file test %c\n", (int)tmp);
3146 /* Assume it was a minus followed by a one-letter named
3147 * subroutine call (or a -bareword), then. */
3148 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3149 "### '-%c' looked like a file test but was not\n",
3156 const char tmp = *s++;
3159 if (PL_expect == XOPERATOR)
3164 else if (*s == '>') {
3167 if (isIDFIRST_lazy_if(s,UTF)) {
3168 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3176 if (PL_expect == XOPERATOR)
3179 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3181 OPERATOR('-'); /* unary minus */
3187 const char tmp = *s++;
3190 if (PL_expect == XOPERATOR)
3195 if (PL_expect == XOPERATOR)
3198 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3205 if (PL_expect != XOPERATOR) {
3206 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3207 PL_expect = XOPERATOR;
3208 force_ident(PL_tokenbuf, '*');
3221 if (PL_expect == XOPERATOR) {
3225 PL_tokenbuf[0] = '%';
3226 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3227 if (!PL_tokenbuf[1]) {
3230 PL_pending_ident = '%';
3241 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
3242 && FEATURE_IS_ENABLED("~~"))
3249 const char tmp = *s++;
3255 goto just_a_word_zero_gv;
3258 switch (PL_expect) {
3261 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3263 PL_bufptr = s; /* update in case we back off */
3269 PL_expect = XTERMBLOCK;
3273 while (isIDFIRST_lazy_if(s,UTF)) {
3275 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3276 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3277 if (tmp < 0) tmp = -tmp;
3293 d = scan_str(d,TRUE,TRUE);
3295 /* MUST advance bufptr here to avoid bogus
3296 "at end of line" context messages from yyerror().
3298 PL_bufptr = s + len;
3299 yyerror("Unterminated attribute parameter in attribute list");
3302 return REPORT(0); /* EOF indicator */
3306 SV *sv = newSVpvn(s, len);
3307 sv_catsv(sv, PL_lex_stuff);
3308 attrs = append_elem(OP_LIST, attrs,
3309 newSVOP(OP_CONST, 0, sv));
3310 SvREFCNT_dec(PL_lex_stuff);
3311 PL_lex_stuff = Nullsv;
3314 if (len == 6 && strnEQ(s, "unique", len)) {
3315 if (PL_in_my == KEY_our)
3317 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3319 ; /* skip to avoid loading attributes.pm */
3322 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3325 /* NOTE: any CV attrs applied here need to be part of
3326 the CVf_BUILTIN_ATTRS define in cv.h! */
3327 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3328 CvLVALUE_on(PL_compcv);
3329 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3330 CvLOCKED_on(PL_compcv);
3331 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3332 CvMETHOD_on(PL_compcv);
3333 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3334 CvASSERTION_on(PL_compcv);
3335 /* After we've set the flags, it could be argued that
3336 we don't need to do the attributes.pm-based setting
3337 process, and shouldn't bother appending recognized
3338 flags. To experiment with that, uncomment the
3339 following "else". (Note that's already been
3340 uncommented. That keeps the above-applied built-in
3341 attributes from being intercepted (and possibly
3342 rejected) by a package's attribute routines, but is
3343 justified by the performance win for the common case
3344 of applying only built-in attributes.) */
3346 attrs = append_elem(OP_LIST, attrs,
3347 newSVOP(OP_CONST, 0,
3351 if (*s == ':' && s[1] != ':')
3354 break; /* require real whitespace or :'s */
3358 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3359 if (*s != ';' && *s != '}' && *s != tmp
3360 && (tmp != '=' || *s != ')')) {
3361 const char q = ((*s == '\'') ? '"' : '\'');
3362 /* If here for an expression, and parsed no attrs, back
3364 if (tmp == '=' && !attrs) {
3368 /* MUST advance bufptr here to avoid bogus "at end of line"
3369 context messages from yyerror().
3373 ? Perl_form(aTHX_ "Invalid separator character "
3374 "%c%c%c in attribute list", q, *s, q)
3375 : "Unterminated attribute list" );
3383 PL_nextval[PL_nexttoke].opval = attrs;
3391 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3392 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3400 const char tmp = *s++;
3405 const char tmp = *s++;
3413 if (PL_lex_brackets <= 0)
3414 yyerror("Unmatched right square bracket");
3417 if (PL_lex_state == LEX_INTERPNORMAL) {
3418 if (PL_lex_brackets == 0) {
3419 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3420 PL_lex_state = LEX_INTERPEND;
3427 if (PL_lex_brackets > 100) {
3428 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3430 switch (PL_expect) {
3432 if (PL_lex_formbrack) {
3436 if (PL_oldoldbufptr == PL_last_lop)
3437 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3439 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3440 OPERATOR(HASHBRACK);
3442 while (s < PL_bufend && SPACE_OR_TAB(*s))
3445 PL_tokenbuf[0] = '\0';
3446 if (d < PL_bufend && *d == '-') {
3447 PL_tokenbuf[0] = '-';
3449 while (d < PL_bufend && SPACE_OR_TAB(*d))
3452 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3453 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3455 while (d < PL_bufend && SPACE_OR_TAB(*d))
3458 const char minus = (PL_tokenbuf[0] == '-');
3459 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3467 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3472 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3477 if (PL_oldoldbufptr == PL_last_lop)
3478 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3480 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3483 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3485 /* This hack is to get the ${} in the message. */
3487 yyerror("syntax error");
3490 OPERATOR(HASHBRACK);
3492 /* This hack serves to disambiguate a pair of curlies
3493 * as being a block or an anon hash. Normally, expectation
3494 * determines that, but in cases where we're not in a
3495 * position to expect anything in particular (like inside
3496 * eval"") we have to resolve the ambiguity. This code
3497 * covers the case where the first term in the curlies is a
3498 * quoted string. Most other cases need to be explicitly
3499 * disambiguated by prepending a "+" before the opening
3500 * curly in order to force resolution as an anon hash.
3502 * XXX should probably propagate the outer expectation
3503 * into eval"" to rely less on this hack, but that could
3504 * potentially break current behavior of eval"".
3508 if (*s == '\'' || *s == '"' || *s == '`') {
3509 /* common case: get past first string, handling escapes */
3510 for (t++; t < PL_bufend && *t != *s;)
3511 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3515 else if (*s == 'q') {
3518 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3521 /* skip q//-like construct */
3523 char open, close, term;
3526 while (t < PL_bufend && isSPACE(*t))
3528 /* check for q => */
3529 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3530 OPERATOR(HASHBRACK);
3534 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3538 for (t++; t < PL_bufend; t++) {
3539 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3541 else if (*t == open)
3545 for (t++; t < PL_bufend; t++) {
3546 if (*t == '\\' && t+1 < PL_bufend)
3548 else if (*t == close && --brackets <= 0)
3550 else if (*t == open)
3557 /* skip plain q word */
3558 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3561 else if (isALNUM_lazy_if(t,UTF)) {
3563 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3566 while (t < PL_bufend && isSPACE(*t))
3568 /* if comma follows first term, call it an anon hash */
3569 /* XXX it could be a comma expression with loop modifiers */
3570 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3571 || (*t == '=' && t[1] == '>')))
3572 OPERATOR(HASHBRACK);
3573 if (PL_expect == XREF)
3576 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3582 yylval.ival = CopLINE(PL_curcop);
3583 if (isSPACE(*s) || *s == '#')
3584 PL_copline = NOLINE; /* invalidate current command line number */
3589 if (PL_lex_brackets <= 0)
3590 yyerror("Unmatched right curly bracket");
3592 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3593 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3594 PL_lex_formbrack = 0;
3595 if (PL_lex_state == LEX_INTERPNORMAL) {
3596 if (PL_lex_brackets == 0) {
3597 if (PL_expect & XFAKEBRACK) {
3598 PL_expect &= XENUMMASK;
3599 PL_lex_state = LEX_INTERPEND;
3601 return yylex(); /* ignore fake brackets */
3603 if (*s == '-' && s[1] == '>')
3604 PL_lex_state = LEX_INTERPENDMAYBE;
3605 else if (*s != '[' && *s != '{')
3606 PL_lex_state = LEX_INTERPEND;
3609 if (PL_expect & XFAKEBRACK) {
3610 PL_expect &= XENUMMASK;
3612 return yylex(); /* ignore fake brackets */
3621 if (PL_expect == XOPERATOR) {
3622 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
3623 && isIDFIRST_lazy_if(s,UTF))
3625 CopLINE_dec(PL_curcop);
3626 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3627 CopLINE_inc(PL_curcop);
3632 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3634 PL_expect = XOPERATOR;
3635 force_ident(PL_tokenbuf, '&');
3639 yylval.ival = (OPpENTERSUB_AMPER<<8);
3651 const char tmp = *s++;
3658 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
3659 && strchr("+-*/%.^&|<",tmp))
3660 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3661 "Reversed %c= operator",(int)tmp);
3663 if (PL_expect == XSTATE && isALPHA(tmp) &&
3664 (s == PL_linestart+1 || s[-2] == '\n') )
3666 if (PL_in_eval && !PL_rsfp) {
3671 if (strnEQ(s,"=cut",4)) {
3685 PL_doextract = TRUE;
3689 if (PL_lex_brackets < PL_lex_formbrack) {
3691 #ifdef PERL_STRICT_CR
3692 for (t = s; SPACE_OR_TAB(*t); t++) ;
3694 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3696 if (*t == '\n' || *t == '#') {
3707 const char tmp = *s++;
3709 /* was this !=~ where !~ was meant?
3710 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3712 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3713 const char *t = s+1;
3715 while (t < PL_bufend && isSPACE(*t))
3718 if (*t == '/' || *t == '?' ||
3719 ((*t == 'm' || *t == 's' || *t == 'y')
3720 && !isALNUM(t[1])) ||
3721 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3722 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3723 "!=~ should be !~");
3733 if (PL_expect != XOPERATOR) {
3734 if (s[1] != '<' && !strchr(s,'>'))
3737 s = scan_heredoc(s);
3739 s = scan_inputsymbol(s);
3740 TERM(sublex_start());
3746 SHop(OP_LEFT_SHIFT);
3760 const char tmp = *s++;
3762 SHop(OP_RIGHT_SHIFT);
3772 if (PL_expect == XOPERATOR) {
3773 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3775 deprecate_old(commaless_variable_list);
3776 return REPORT(','); /* grandfather non-comma-format format */
3780 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3781 PL_tokenbuf[0] = '@';
3782 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3783 sizeof PL_tokenbuf - 1, FALSE);
3784 if (PL_expect == XOPERATOR)
3785 no_op("Array length", s);
3786 if (!PL_tokenbuf[1])
3788 PL_expect = XOPERATOR;
3789 PL_pending_ident = '#';
3793 PL_tokenbuf[0] = '$';
3794 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3795 sizeof PL_tokenbuf - 1, FALSE);
3796 if (PL_expect == XOPERATOR)
3798 if (!PL_tokenbuf[1]) {
3800 yyerror("Final $ should be \\$ or $name");
3804 /* This kludge not intended to be bulletproof. */
3805 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3806 yylval.opval = newSVOP(OP_CONST, 0,
3807 newSViv(PL_compiling.cop_arybase));
3808 yylval.opval->op_private = OPpCONST_ARYBASE;
3814 const char tmp = *s;
3815 if (PL_lex_state == LEX_NORMAL)
3818 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
3819 && intuit_more(s)) {
3821 PL_tokenbuf[0] = '@';
3822 if (ckWARN(WARN_SYNTAX)) {
3825 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3828 PL_bufptr = skipspace(PL_bufptr);
3829 while (t < PL_bufend && *t != ']')
3831 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3832 "Multidimensional syntax %.*s not supported",
3833 (int)((t - PL_bufptr) + 1), PL_bufptr);
3837 else if (*s == '{') {
3839 PL_tokenbuf[0] = '%';
3840 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
3841 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
3843 char tmpbuf[sizeof PL_tokenbuf];
3844 for (t++; isSPACE(*t); t++) ;
3845 if (isIDFIRST_lazy_if(t,UTF)) {
3847 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
3849 for (; isSPACE(*t); t++) ;
3850 if (*t == ';' && get_cv(tmpbuf, FALSE))
3851 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3852 "You need to quote \"%s\"",
3859 PL_expect = XOPERATOR;
3860 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3861 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3862 if (!islop || PL_last_lop_op == OP_GREPSTART)
3863 PL_expect = XOPERATOR;
3864 else if (strchr("$@\"'`q", *s))
3865 PL_expect = XTERM; /* e.g. print $fh "foo" */
3866 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3867 PL_expect = XTERM; /* e.g. print $fh &sub */
3868 else if (isIDFIRST_lazy_if(s,UTF)) {
3869 char tmpbuf[sizeof PL_tokenbuf];
3871 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3872 if ((t2 = keyword(tmpbuf, len))) {
3873 /* binary operators exclude handle interpretations */
3885 PL_expect = XTERM; /* e.g. print $fh length() */
3890 PL_expect = XTERM; /* e.g. print $fh subr() */
3893 else if (isDIGIT(*s))
3894 PL_expect = XTERM; /* e.g. print $fh 3 */
3895 else if (*s == '.' && isDIGIT(s[1]))
3896 PL_expect = XTERM; /* e.g. print $fh .3 */
3897 else if ((*s == '?' || *s == '-' || *s == '+')
3898 && !isSPACE(s[1]) && s[1] != '=')
3899 PL_expect = XTERM; /* e.g. print $fh -1 */
3900 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
3902 PL_expect = XTERM; /* e.g. print $fh /.../
3903 XXX except DORDOR operator
3905 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
3907 PL_expect = XTERM; /* print $fh <<"EOF" */
3910 PL_pending_ident = '$';
3914 if (PL_expect == XOPERATOR)
3916 PL_tokenbuf[0] = '@';
3917 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3918 if (!PL_tokenbuf[1]) {
3921 if (PL_lex_state == LEX_NORMAL)
3923 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3925 PL_tokenbuf[0] = '%';
3927 /* Warn about @ where they meant $. */
3928 if (*s == '[' || *s == '{') {
3929 if (ckWARN(WARN_SYNTAX)) {
3930 const char *t = s + 1;
3931 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3933 if (*t == '}' || *t == ']') {
3935 PL_bufptr = skipspace(PL_bufptr);
3936 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3937 "Scalar value %.*s better written as $%.*s",
3938 (int)(t-PL_bufptr), PL_bufptr,
3939 (int)(t-PL_bufptr-1), PL_bufptr+1);
3944 PL_pending_ident = '@';
3947 case '/': /* may be division, defined-or, or pattern */
3948 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3952 case '?': /* may either be conditional or pattern */
3953 if(PL_expect == XOPERATOR) {
3961 /* A // operator. */
3971 /* Disable warning on "study /blah/" */
3972 if (PL_oldoldbufptr == PL_last_uni
3973 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3974 || memNE(PL_last_uni, "study", 5)
3975 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3978 s = scan_pat(s,OP_MATCH);
3979 TERM(sublex_start());
3983 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3984 #ifdef PERL_STRICT_CR
3987 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3989 && (s == PL_linestart || s[-1] == '\n') )
3991 PL_lex_formbrack = 0;
3995 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
4001 yylval.ival = OPf_SPECIAL;
4007 if (PL_expect != XOPERATOR)
4012 case '0': case '1': case '2': case '3': case '4':
4013 case '5': case '6': case '7': case '8': case '9':
4014 s = scan_num(s, &yylval);
4015 DEBUG_T( { S_printbuf(aTHX_ "### Saw number in %s\n", s); } );
4016 if (PL_expect == XOPERATOR)
4021 s = scan_str(s,FALSE,FALSE);
4022 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4023 if (PL_expect == XOPERATOR) {
4024 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4026 deprecate_old(commaless_variable_list);
4027 return REPORT(','); /* grandfather non-comma-format format */
4033 missingterm((char*)0);
4034 yylval.ival = OP_CONST;
4035 TERM(sublex_start());
4038 s = scan_str(s,FALSE,FALSE);
4039 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4040 if (PL_expect == XOPERATOR) {
4041 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4043 deprecate_old(commaless_variable_list);
4044 return REPORT(','); /* grandfather non-comma-format format */
4050 missingterm((char*)0);
4051 yylval.ival = OP_CONST;
4052 /* FIXME. I think that this can be const if char *d is replaced by
4053 more localised variables. */
4054 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4055 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4056 yylval.ival = OP_STRINGIFY;
4060 TERM(sublex_start());
4063 s = scan_str(s,FALSE,FALSE);
4064 DEBUG_T( { S_printbuf(aTHX_ "### Saw backtick string before %s\n", s); } );
4065 if (PL_expect == XOPERATOR)
4066 no_op("Backticks",s);
4068 missingterm((char*)0);
4069 yylval.ival = OP_BACKTICK;
4071 TERM(sublex_start());
4075 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
4076 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
4078 if (PL_expect == XOPERATOR)
4079 no_op("Backslash",s);
4083 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
4084 char *start = s + 2;
4085 while (isDIGIT(*start) || *start == '_')
4087 if (*start == '.' && isDIGIT(start[1])) {
4088 s = scan_num(s, &yylval);
4091 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
4092 else if (!isALPHA(*start) && (PL_expect == XTERM
4093 || PL_expect == XREF || PL_expect == XSTATE
4094 || PL_expect == XTERMORDORDOR)) {
4095 const char c = *start;
4098 gv = gv_fetchpv(s, 0, SVt_PVCV);
4101 s = scan_num(s, &yylval);
4108 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
4144 I32 orig_keyword = 0;
4149 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4151 /* Some keywords can be followed by any delimiter, including ':' */
4152 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
4153 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4154 (PL_tokenbuf[0] == 'q' &&
4155 strchr("qwxr", PL_tokenbuf[1])))));
4157 /* x::* is just a word, unless x is "CORE" */
4158 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4162 while (d < PL_bufend && isSPACE(*d))
4163 d++; /* no comments skipped here, or s### is misparsed */
4165 /* Is this a label? */
4166 if (!tmp && PL_expect == XSTATE
4167 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4169 yylval.pval = savepv(PL_tokenbuf);
4174 /* Check for keywords */
4175 tmp = keyword(PL_tokenbuf, len);
4177 /* Is this a word before a => operator? */
4178 if (*d == '=' && d[1] == '>') {
4181 = (OP*)newSVOP(OP_CONST, 0,
4182 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4183 yylval.opval->op_private = OPpCONST_BARE;
4187 if (tmp < 0) { /* second-class keyword? */
4188 GV *ogv = NULL; /* override (winner) */
4189 GV *hgv = NULL; /* hidden (loser) */
4190 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4192 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
4195 if (GvIMPORTED_CV(gv))
4197 else if (! CvMETHOD(cv))
4201 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4202 (gv = *gvp) != (GV*)&PL_sv_undef &&
4203 GvCVu(gv) && GvIMPORTED_CV(gv))
4210 tmp = 0; /* overridden by import or by GLOBAL */
4213 && -tmp==KEY_lock /* XXX generalizable kludge */
4215 && !hv_fetchs(GvHVn(PL_incgv), "Thread.pm", FALSE))
4217 tmp = 0; /* any sub overrides "weak" keyword */
4219 else { /* no override */
4221 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4222 Perl_warner(aTHX_ packWARN(WARN_MISC),
4223 "dump() better written as CORE::dump()");
4227 if (hgv && tmp != KEY_x && tmp != KEY_CORE
4228 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
4229 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4230 "Ambiguous call resolved as CORE::%s(), %s",
4231 GvENAME(hgv), "qualify as such or use &");
4238 default: /* not a keyword */
4239 /* Trade off - by using this evil construction we can pull the
4240 variable gv into the block labelled keylookup. If not, then
4241 we have to give it function scope so that the goto from the
4242 earlier ':' case doesn't bypass the initialisation. */
4244 just_a_word_zero_gv:
4252 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4255 /* Get the rest if it looks like a package qualifier */
4257 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4259 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4262 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4263 *s == '\'' ? "'" : "::");
4268 if (PL_expect == XOPERATOR) {
4269 if (PL_bufptr == PL_linestart) {
4270 CopLINE_dec(PL_curcop);
4271 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4272 CopLINE_inc(PL_curcop);
4275 no_op("Bareword",s);
4278 /* Look for a subroutine with this name in current package,
4279 unless name is "Foo::", in which case Foo is a bearword
4280 (and a package name). */
4283 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4285 if (ckWARN(WARN_BAREWORD)
4286 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
4287 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4288 "Bareword \"%s\" refers to nonexistent package",
4291 PL_tokenbuf[len] = '\0';
4298 /* Mustn't actually add anything to a symbol table.
4299 But also don't want to "initialise" any placeholder
4300 constants that might already be there into full
4301 blown PVGVs with attached PVCV. */
4302 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
4303 GV_NOADD_NOINIT, SVt_PVCV);
4307 /* if we saw a global override before, get the right name */
4310 sv = newSVpvs("CORE::GLOBAL::");
4311 sv_catpv(sv,PL_tokenbuf);
4314 /* If len is 0, newSVpv does strlen(), which is correct.
4315 If len is non-zero, then it will be the true length,
4316 and so the scalar will be created correctly. */
4317 sv = newSVpv(PL_tokenbuf,len);
4320 /* Presume this is going to be a bareword of some sort. */
4323 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4324 yylval.opval->op_private = OPpCONST_BARE;
4325 /* UTF-8 package name? */
4326 if (UTF && !IN_BYTES &&
4327 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
4330 /* And if "Foo::", then that's what it certainly is. */
4335 /* Do the explicit type check so that we don't need to force
4336 the initialisation of the symbol table to have a real GV.
4337 Beware - gv may not really be a PVGV, cv may not really be
4338 a PVCV, (because of the space optimisations that gv_init
4339 understands) But they're true if for this symbol there is
4340 respectively a typeglob and a subroutine.
4342 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
4343 /* Real typeglob, so get the real subroutine: */
4345 /* A proxy for a subroutine in this package? */
4346 : SvOK(gv) ? (CV *) gv : NULL)
4349 /* See if it's the indirect object for a list operator. */
4351 if (PL_oldoldbufptr &&
4352 PL_oldoldbufptr < PL_bufptr &&
4353 (PL_oldoldbufptr == PL_last_lop
4354 || PL_oldoldbufptr == PL_last_uni) &&
4355 /* NO SKIPSPACE BEFORE HERE! */
4356 (PL_expect == XREF ||
4357 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4359 bool immediate_paren = *s == '(';
4361 /* (Now we can afford to cross potential line boundary.) */
4364 /* Two barewords in a row may indicate method call. */
4366 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
4367 (tmp = intuit_method(s, gv, cv)))
4370 /* If not a declared subroutine, it's an indirect object. */
4371 /* (But it's an indir obj regardless for sort.) */
4372 /* Also, if "_" follows a filetest operator, it's a bareword */
4375 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4377 (PL_last_lop_op != OP_MAPSTART &&
4378 PL_last_lop_op != OP_GREPSTART))))
4379 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
4380 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
4383 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4388 PL_expect = XOPERATOR;
4391 /* Is this a word before a => operator? */
4392 if (*s == '=' && s[1] == '>' && !pkgname) {
4394 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4395 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4396 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4400 /* If followed by a paren, it's certainly a subroutine. */
4404 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4405 if (*d == ')' && (sv = gv_const_sv(gv))) {
4410 PL_nextval[PL_nexttoke].opval = yylval.opval;
4411 PL_expect = XOPERATOR;
4417 /* If followed by var or block, call it a method (unless sub) */
4419 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
4420 PL_last_lop = PL_oldbufptr;
4421 PL_last_lop_op = OP_METHOD;
4425 /* If followed by a bareword, see if it looks like indir obj. */
4428 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4429 && (tmp = intuit_method(s, gv, cv)))
4432 /* Not a method, so call it a subroutine (if defined) */
4435 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4436 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4437 "Ambiguous use of -%s resolved as -&%s()",
4438 PL_tokenbuf, PL_tokenbuf);
4439 /* Check for a constant sub */
4440 if ((sv = gv_const_sv(gv))) {
4442 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4443 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4444 yylval.opval->op_private = 0;
4448 /* Resolve to GV now. */
4449 if (SvTYPE(gv) != SVt_PVGV) {
4450 gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV);
4451 assert (SvTYPE(gv) == SVt_PVGV);
4452 /* cv must have been some sort of placeholder, so
4453 now needs replacing with a real code reference. */
4457 op_free(yylval.opval);
4458 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4459 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4460 PL_last_lop = PL_oldbufptr;
4461 PL_last_lop_op = OP_ENTERSUB;
4462 /* Is there a prototype? */
4465 const char *proto = SvPV_const((SV*)cv, len);
4468 if (*proto == '$' && proto[1] == '\0')
4470 while (*proto == ';')
4472 if (*proto == '&' && *s == '{') {
4473 sv_setpv(PL_subname, PL_curstash ?
4474 "__ANON__" : "__ANON__::__ANON__");
4478 PL_nextval[PL_nexttoke].opval = yylval.opval;
4484 /* Call it a bare word */
4486 if (PL_hints & HINT_STRICT_SUBS)
4487 yylval.opval->op_private |= OPpCONST_STRICT;
4490 if (lastchar != '-') {
4491 if (ckWARN(WARN_RESERVED)) {
4492 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4493 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4494 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4501 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4502 && ckWARN_d(WARN_AMBIGUOUS)) {
4503 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4504 "Operator or semicolon missing before %c%s",
4505 lastchar, PL_tokenbuf);
4506 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4507 "Ambiguous use of %c resolved as operator %c",
4508 lastchar, lastchar);
4514 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4515 newSVpv(CopFILE(PL_curcop),0));
4519 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4520 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4523 case KEY___PACKAGE__:
4524 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4526 ? newSVhek(HvNAME_HEK(PL_curstash))
4533 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4534 const char *pname = "main";
4535 if (PL_tokenbuf[2] == 'D')
4536 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
4537 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
4541 GvIOp(gv) = newIO();
4542 IoIFP(GvIOp(gv)) = PL_rsfp;
4543 #if defined(HAS_FCNTL) && defined(F_SETFD)
4545 const int fd = PerlIO_fileno(PL_rsfp);
4546 fcntl(fd,F_SETFD,fd >= 3);
4549 /* Mark this internal pseudo-handle as clean */
4550 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4552 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4553 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4554 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4556 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4557 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4558 /* if the script was opened in binmode, we need to revert
4559 * it to text mode for compatibility; but only iff it has CRs
4560 * XXX this is a questionable hack at best. */
4561 if (PL_bufend-PL_bufptr > 2
4562 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4565 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4566 loc = PerlIO_tell(PL_rsfp);
4567 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4570 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4572 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4573 #endif /* NETWARE */
4574 #ifdef PERLIO_IS_STDIO /* really? */
4575 # if defined(__BORLANDC__)
4576 /* XXX see note in do_binmode() */
4577 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4581 PerlIO_seek(PL_rsfp, loc, 0);
4585 #ifdef PERLIO_LAYERS
4588 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4589 else if (PL_encoding) {
4596 XPUSHs(PL_encoding);
4598 call_method("name", G_SCALAR);
4602 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4603 Perl_form(aTHX_ ":encoding(%"SVf")",
4621 if (PL_expect == XSTATE) {
4628 if (*s == ':' && s[1] == ':') {
4631 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4632 if (!(tmp = keyword(PL_tokenbuf, len)))
4633 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4636 else if (tmp == KEY_require || tmp == KEY_do)
4637 /* that's a way to remember we saw "CORE::" */
4650 LOP(OP_ACCEPT,XTERM);
4656 LOP(OP_ATAN2,XTERM);
4662 LOP(OP_BINMODE,XTERM);
4665 LOP(OP_BLESS,XTERM);
4674 /* When 'use switch' is in effect, continue has a dual
4675 life as a control operator. */
4677 if (!FEATURE_IS_ENABLED("switch"))
4680 /* We have to disambiguate the two senses of
4681 "continue". If the next token is a '{' then
4682 treat it as the start of a continue block;
4683 otherwise treat it as a control operator.
4694 (void)gv_fetchpvs("ENV", GV_ADD, SVt_PVHV); /* may use HOME */
4711 if (!PL_cryptseen) {
4712 PL_cryptseen = TRUE;
4716 LOP(OP_CRYPT,XTERM);
4719 LOP(OP_CHMOD,XTERM);
4722 LOP(OP_CHOWN,XTERM);
4725 LOP(OP_CONNECT,XTERM);
4744 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4745 if (orig_keyword == KEY_do) {
4754 PL_hints |= HINT_BLOCK_SCOPE;
4764 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4765 LOP(OP_DBMOPEN,XTERM);
4771 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4778 yylval.ival = CopLINE(PL_curcop);
4792 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4793 UNIBRACK(OP_ENTEREVAL);
4811 case KEY_endhostent:
4817 case KEY_endservent:
4820 case KEY_endprotoent:
4831 yylval.ival = CopLINE(PL_curcop);
4833 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4835 if ((PL_bufend - p) >= 3 &&
4836 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4838 else if ((PL_bufend - p) >= 4 &&
4839 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4842 if (isIDFIRST_lazy_if(p,UTF)) {
4843 p = scan_ident(p, PL_bufend,
4844 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4848 Perl_croak(aTHX_ "Missing $ on loop variable");
4853 LOP(OP_FORMLINE,XTERM);
4859 LOP(OP_FCNTL,XTERM);
4865 LOP(OP_FLOCK,XTERM);
4874 LOP(OP_GREPSTART, XREF);
4877 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4892 case KEY_getpriority:
4893 LOP(OP_GETPRIORITY,XTERM);
4895 case KEY_getprotobyname:
4898 case KEY_getprotobynumber:
4899 LOP(OP_GPBYNUMBER,XTERM);
4901 case KEY_getprotoent:
4913 case KEY_getpeername:
4914 UNI(OP_GETPEERNAME);
4916 case KEY_gethostbyname:
4919 case KEY_gethostbyaddr:
4920 LOP(OP_GHBYADDR,XTERM);
4922 case KEY_gethostent:
4925 case KEY_getnetbyname:
4928 case KEY_getnetbyaddr:
4929 LOP(OP_GNBYADDR,XTERM);
4934 case KEY_getservbyname:
4935 LOP(OP_GSBYNAME,XTERM);
4937 case KEY_getservbyport:
4938 LOP(OP_GSBYPORT,XTERM);
4940 case KEY_getservent:
4943 case KEY_getsockname:
4944 UNI(OP_GETSOCKNAME);
4946 case KEY_getsockopt:
4947 LOP(OP_GSOCKOPT,XTERM);
4962 yylval.ival = CopLINE(PL_curcop);
4973 yylval.ival = CopLINE(PL_curcop);
4977 LOP(OP_INDEX,XTERM);
4983 LOP(OP_IOCTL,XTERM);
4995 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5027 LOP(OP_LISTEN,XTERM);
5036 s = scan_pat(s,OP_MATCH);
5037 TERM(sublex_start());
5040 LOP(OP_MAPSTART, XREF);
5043 LOP(OP_MKDIR,XTERM);
5046 LOP(OP_MSGCTL,XTERM);
5049 LOP(OP_MSGGET,XTERM);
5052 LOP(OP_MSGRCV,XTERM);
5055 LOP(OP_MSGSND,XTERM);
5061 if (isIDFIRST_lazy_if(s,UTF)) {
5062 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
5063 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
5065 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
5066 if (!PL_in_my_stash) {
5069 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
5077 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5084 s = tokenize_use(0, s);
5088 if (*s == '(' || (s = skipspace(s), *s == '('))
5095 if (isIDFIRST_lazy_if(s,UTF)) {
5097 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
5098 for (t=d; *t && isSPACE(*t); t++) ;
5099 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
5101 && !(t[0] == '=' && t[1] == '>')
5103 int len = (int)(d-s);
5104 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
5105 "Precedence problem: open %.*s should be open(%.*s)",
5112 yylval.ival = OP_OR;
5122 LOP(OP_OPEN_DIR,XTERM);
5125 checkcomma(s,PL_tokenbuf,"filehandle");
5129 checkcomma(s,PL_tokenbuf,"filehandle");
5148 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5152 LOP(OP_PIPE_OP,XTERM);
5155 s = scan_str(s,FALSE,FALSE);
5157 missingterm((char*)0);
5158 yylval.ival = OP_CONST;
5159 TERM(sublex_start());
5165 s = scan_str(s,FALSE,FALSE);
5167 missingterm((char*)0);
5168 PL_expect = XOPERATOR;
5170 if (SvCUR(PL_lex_stuff)) {
5173 d = SvPV_force(PL_lex_stuff, len);
5176 for (; isSPACE(*d) && len; --len, ++d) ;
5179 if (!warned && ckWARN(WARN_QW)) {
5180 for (; !isSPACE(*d) && len; --len, ++d) {
5182 Perl_warner(aTHX_ packWARN(WARN_QW),
5183 "Possible attempt to separate words with commas");
5186 else if (*d == '#') {
5187 Perl_warner(aTHX_ packWARN(WARN_QW),
5188 "Possible attempt to put comments in qw() list");
5194 for (; !isSPACE(*d) && len; --len, ++d) ;
5196 sv = newSVpvn(b, d-b);
5197 if (DO_UTF8(PL_lex_stuff))
5199 words = append_elem(OP_LIST, words,
5200 newSVOP(OP_CONST, 0, tokeq(sv)));
5204 PL_nextval[PL_nexttoke].opval = words;
5209 SvREFCNT_dec(PL_lex_stuff);
5210 PL_lex_stuff = Nullsv;
5216 s = scan_str(s,FALSE,FALSE);
5218 missingterm((char*)0);
5219 yylval.ival = OP_STRINGIFY;
5220 if (SvIVX(PL_lex_stuff) == '\'')
5221 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
5222 TERM(sublex_start());
5225 s = scan_pat(s,OP_QR);
5226 TERM(sublex_start());
5229 s = scan_str(s,FALSE,FALSE);
5231 missingterm((char*)0);
5232 yylval.ival = OP_BACKTICK;
5234 TERM(sublex_start());
5242 s = force_version(s, FALSE);
5244 else if (*s != 'v' || !isDIGIT(s[1])
5245 || (s = force_version(s, TRUE), *s == 'v'))
5247 *PL_tokenbuf = '\0';
5248 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5249 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5250 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5252 yyerror("<> should be quotes");
5254 if (orig_keyword == KEY_require) {
5262 PL_last_uni = PL_oldbufptr;
5263 PL_last_lop_op = OP_REQUIRE;
5265 return REPORT( (int)REQUIRE );
5271 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5275 LOP(OP_RENAME,XTERM);
5284 LOP(OP_RINDEX,XTERM);
5294 UNIDOR(OP_READLINE);
5307 LOP(OP_REVERSE,XTERM);
5310 UNIDOR(OP_READLINK);
5318 TERM(sublex_start());
5320 TOKEN(1); /* force error */
5323 checkcomma(s,PL_tokenbuf,"filehandle");
5333 LOP(OP_SELECT,XTERM);
5339 LOP(OP_SEMCTL,XTERM);
5342 LOP(OP_SEMGET,XTERM);
5345 LOP(OP_SEMOP,XTERM);
5351 LOP(OP_SETPGRP,XTERM);
5353 case KEY_setpriority:
5354 LOP(OP_SETPRIORITY,XTERM);
5356 case KEY_sethostent:
5362 case KEY_setservent:
5365 case KEY_setprotoent:
5375 LOP(OP_SEEKDIR,XTERM);
5377 case KEY_setsockopt:
5378 LOP(OP_SSOCKOPT,XTERM);
5384 LOP(OP_SHMCTL,XTERM);
5387 LOP(OP_SHMGET,XTERM);
5390 LOP(OP_SHMREAD,XTERM);
5393 LOP(OP_SHMWRITE,XTERM);
5396 LOP(OP_SHUTDOWN,XTERM);
5405 LOP(OP_SOCKET,XTERM);
5407 case KEY_socketpair:
5408 LOP(OP_SOCKPAIR,XTERM);
5411 checkcomma(s,PL_tokenbuf,"subroutine name");
5413 if (*s == ';' || *s == ')') /* probably a close */
5414 Perl_croak(aTHX_ "sort is now a reserved word");
5416 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5420 LOP(OP_SPLIT,XTERM);
5423 LOP(OP_SPRINTF,XTERM);
5426 LOP(OP_SPLICE,XTERM);
5441 LOP(OP_SUBSTR,XTERM);
5447 char tmpbuf[sizeof PL_tokenbuf];
5448 SSize_t tboffset = 0;
5449 expectation attrful;
5450 bool have_name, have_proto, bad_proto;
5451 const int key = tmp;
5455 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5456 (*s == ':' && s[1] == ':'))
5459 attrful = XATTRBLOCK;
5460 /* remember buffer pos'n for later force_word */
5461 tboffset = s - PL_oldbufptr;
5462 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5463 if (strchr(tmpbuf, ':'))
5464 sv_setpv(PL_subname, tmpbuf);
5466 sv_setsv(PL_subname,PL_curstname);
5467 sv_catpvs(PL_subname,"::");
5468 sv_catpvn(PL_subname,tmpbuf,len);
5475 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5476 PL_expect = XTERMBLOCK;
5477 attrful = XATTRTERM;
5478 sv_setpvn(PL_subname,"?",1);
5482 if (key == KEY_format) {
5484 PL_lex_formbrack = PL_lex_brackets + 1;
5486 (void) force_word(PL_oldbufptr + tboffset, WORD,
5491 /* Look for a prototype */
5495 s = scan_str(s,FALSE,FALSE);
5497 Perl_croak(aTHX_ "Prototype not terminated");
5498 /* strip spaces and check for bad characters */
5499 d = SvPVX(PL_lex_stuff);
5502 for (p = d; *p; ++p) {
5505 if (!strchr("$@%*;[]&\\", *p))
5510 if (bad_proto && ckWARN(WARN_SYNTAX))
5511 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5512 "Illegal character in prototype for %"SVf" : %s",
5514 SvCUR_set(PL_lex_stuff, tmp);
5522 if (*s == ':' && s[1] != ':')
5523 PL_expect = attrful;
5524 else if (*s != '{' && key == KEY_sub) {
5526 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5528 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5532 PL_nextval[PL_nexttoke].opval =
5533 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5534 PL_lex_stuff = Nullsv;
5538 sv_setpv(PL_subname,
5539 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5542 (void) force_word(PL_oldbufptr + tboffset, WORD,
5551 LOP(OP_SYSTEM,XREF);
5554 LOP(OP_SYMLINK,XTERM);
5557 LOP(OP_SYSCALL,XTERM);
5560 LOP(OP_SYSOPEN,XTERM);
5563 LOP(OP_SYSSEEK,XTERM);
5566 LOP(OP_SYSREAD,XTERM);
5569 LOP(OP_SYSWRITE,XTERM);
5573 TERM(sublex_start());
5594 LOP(OP_TRUNCATE,XTERM);
5606 yylval.ival = CopLINE(PL_curcop);
5610 yylval.ival = CopLINE(PL_curcop);
5614 LOP(OP_UNLINK,XTERM);
5620 LOP(OP_UNPACK,XTERM);
5623 LOP(OP_UTIME,XTERM);
5629 LOP(OP_UNSHIFT,XTERM);
5632 s = tokenize_use(1, s);
5642 yylval.ival = CopLINE(PL_curcop);
5646 yylval.ival = CopLINE(PL_curcop);
5650 PL_hints |= HINT_BLOCK_SCOPE;
5657 LOP(OP_WAITPID,XTERM);
5666 ctl_l[0] = toCTRL('L');
5668 gv_fetchpvn_flags(ctl_l, 1, GV_ADD, SVt_PV);
5671 gv_fetchpvs("\f", GV_ADD, SVt_PV); /* Make sure $^L is defined */
5676 if (PL_expect == XOPERATOR)
5682 yylval.ival = OP_XOR;
5687 TERM(sublex_start());
5692 #pragma segment Main
5696 S_pending_ident(pTHX)
5700 register I32 tmp = 0;
5701 /* pit holds the identifier we read and pending_ident is reset */
5702 char pit = PL_pending_ident;
5703 PL_pending_ident = 0;
5705 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5706 "### Pending identifier '%s'\n", PL_tokenbuf); });
5708 /* if we're in a my(), we can't allow dynamics here.
5709 $foo'bar has already been turned into $foo::bar, so
5710 just check for colons.
5712 if it's a legal name, the OP is a PADANY.
5715 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5716 if (strchr(PL_tokenbuf,':'))
5717 yyerror(Perl_form(aTHX_ "No package name allowed for "
5718 "variable %s in \"our\"",
5720 tmp = allocmy(PL_tokenbuf);
5723 if (strchr(PL_tokenbuf,':'))
5724 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5726 yylval.opval = newOP(OP_PADANY, 0);
5727 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5733 build the ops for accesses to a my() variable.
5735 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5736 then used in a comparison. This catches most, but not
5737 all cases. For instance, it catches
5738 sort { my($a); $a <=> $b }
5740 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5741 (although why you'd do that is anyone's guess).
5744 if (!strchr(PL_tokenbuf,':')) {
5746 tmp = pad_findmy(PL_tokenbuf);
5747 if (tmp != NOT_IN_PAD) {
5748 /* might be an "our" variable" */
5749 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5750 /* build ops for a bareword */
5751 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
5752 HEK * const stashname = HvNAME_HEK(stash);
5753 SV * const sym = newSVhek(stashname);
5754 sv_catpvs(sym, "::");
5755 sv_catpv(sym, PL_tokenbuf+1);
5756 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5757 yylval.opval->op_private = OPpCONST_ENTERED;
5760 ? (GV_ADDMULTI | GV_ADDINEVAL)
5763 ((PL_tokenbuf[0] == '$') ? SVt_PV
5764 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5769 /* if it's a sort block and they're naming $a or $b */
5770 if (PL_last_lop_op == OP_SORT &&
5771 PL_tokenbuf[0] == '$' &&
5772 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5775 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5776 d < PL_bufend && *d != '\n';
5779 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5780 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5786 yylval.opval = newOP(OP_PADANY, 0);
5787 yylval.opval->op_targ = tmp;
5793 Whine if they've said @foo in a doublequoted string,
5794 and @foo isn't a variable we can find in the symbol
5797 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5798 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
5799 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5800 && ckWARN(WARN_AMBIGUOUS))
5802 /* Downgraded from fatal to warning 20000522 mjd */
5803 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5804 "Possible unintended interpolation of %s in string",
5809 /* build ops for a bareword */
5810 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5811 yylval.opval->op_private = OPpCONST_ENTERED;
5814 /* If the identifier refers to a stash, don't autovivify it.
5815 * Change 24660 had the side effect of causing symbol table
5816 * hashes to always be defined, even if they were freshly
5817 * created and the only reference in the entire program was
5818 * the single statement with the defined %foo::bar:: test.
5819 * It appears that all code in the wild doing this actually
5820 * wants to know whether sub-packages have been loaded, so
5821 * by avoiding auto-vivifying symbol tables, we ensure that
5822 * defined %foo::bar:: continues to be false, and the existing
5823 * tests still give the expected answers, even though what
5824 * they're actually testing has now changed subtly.
5826 (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
5828 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
5829 ((PL_tokenbuf[0] == '$') ? SVt_PV
5830 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5836 * The following code was generated by perl_keyword.pl.
5840 Perl_keyword (pTHX_ const char *name, I32 len)
5845 case 1: /* 5 tokens of length 1 */
5877 case 2: /* 18 tokens of length 2 */
6023 case 3: /* 29 tokens of length 3 */
6027 if (name[1] == 'N' &&
6090 if (name[1] == 'i' &&
6112 return (FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
6130 if (name[1] == 'o' &&
6139 if (name[1] == 'e' &&
6148 if (name[1] == 'n' &&
6157 if (name[1] == 'o' &&
6166 if (name[1] == 'a' &&
6175 if (name[1] == 'o' &&
6237 if (name[1] == 'e' &&
6251 return (FEATURE_IS_ENABLED("say") ? -KEY_say : 0);
6277 if (name[1] == 'i' &&
6286 if (name[1] == 's' &&
6295 if (name[1] == 'e' &&
6304 if (name[1] == 'o' &&
6316 case 4: /* 41 tokens of length 4 */
6320 if (name[1] == 'O' &&
6330 if (name[1] == 'N' &&
6340 if (name[1] == 'i' &&
6350 if (name[1] == 'h' &&
6360 if (name[1] == 'u' &&
6373 if (name[2] == 'c' &&
6382 if (name[2] == 's' &&
6391 if (name[2] == 'a' &&
6427 if (name[1] == 'o' &&
6440 if (name[2] == 't' &&
6449 if (name[2] == 'o' &&
6458 if (name[2] == 't' &&
6467 if (name[2] == 'e' &&
6480 if (name[1] == 'o' &&
6493 if (name[2] == 'y' &&
6502 if (name[2] == 'l' &&
6518 if (name[2] == 's' &&
6527 if (name[2] == 'n' &&
6536 if (name[2] == 'c' &&
6549 if (name[1] == 'e' &&
6559 if (name[1] == 'p' &&
6572 if (name[2] == 'c' &&
6581 if (name[2] == 'p' &&
6590 if (name[2] == 's' &&
6606 if (name[2] == 'n' &&
6676 if (name[2] == 'r' &&
6685 if (name[2] == 'r' &&
6694 if (name[2] == 'a' &&
6710 if (name[2] == 'l' &&
6772 if (name[2] == 'e' &&
6775 return (FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
6788 case 5: /* 38 tokens of length 5 */
6792 if (name[1] == 'E' &&
6803 if (name[1] == 'H' &&
6817 if (name[2] == 'a' &&
6827 if (name[2] == 'a' &&
6844 if (name[2] == 'e' &&
6854 if (name[2] == 'e' &&
6858 return (FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
6874 if (name[3] == 'i' &&
6883 if (name[3] == 'o' &&
6919 if (name[2] == 'o' &&
6929 if (name[2] == 'y' &&
6943 if (name[1] == 'l' &&
6957 if (name[2] == 'n' &&
6967 if (name[2] == 'o' &&
6981 if (name[1] == 'i' &&
6986 return (FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
6995 if (name[2] == 'd' &&
7005 if (name[2] == 'c' &&
7022 if (name[2] == 'c' &&
7032 if (name[2] == 't' &&
7046 if (name[1] == 'k' &&
7057 if (name[1] == 'r' &&
7071 if (name[2] == 's' &&
7081 if (name[2] == 'd' &&
7098 if (name[2] == 'm' &&
7108 if (name[2] == 'i' &&
7118 if (name[2] == 'e' &&
7128 if (name[2] == 'l' &&
7138 if (name[2] == 'a' &&
7148 if (name[2] == 'u' &&
7162 if (name[1] == 'i' &&
7176 if (name[2] == 'a' &&
7189 if (name[3] == 'e' &&
7224 if (name[2] == 'i' &&
7241 if (name[2] == 'i' &&
7251 if (name[2] == 'i' &&
7268 case 6: /* 33 tokens of length 6 */
7272 if (name[1] == 'c' &&
7287 if (name[2] == 'l' &&
7298 if (name[2] == 'r' &&
7313 if (name[1] == 'e' &&
7328 if (name[2] == 's' &&
7333 if(ckWARN_d(WARN_SYNTAX))
7334 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
7340 if (name[2] == 'i' &&
7358 if (name[2] == 'l' &&
7369 if (name[2] == 'r' &&
7384 if (name[1] == 'm' &&
7399 if (name[2] == 'n' &&
7410 if (name[2] == 's' &&
7425 if (name[1] == 's' &&
7431 if (name[4] == 't' &&
7440 if (name[4] == 'e' &&
7449 if (name[4] == 'c' &&
7458 if (name[4] == 'n' &&
7474 if (name[1] == 'r' &&
7492 if (name[3] == 'a' &&
7502 if (name[3] == 'u' &&
7516 if (name[2] == 'n' &&
7534 if (name[2] == 'a' &&
7548 if (name[3] == 'e' &&
7561 if (name[4] == 't' &&
7570 if (name[4] == 'e' &&
7592 if (name[4] == 't' &&
7601 if (name[4] == 'e' &&
7617 if (name[2] == 'c' &&
7628 if (name[2] == 'l' &&
7639 if (name[2] == 'b' &&
7650 if (name[2] == 's' &&
7673 if (name[4] == 's' &&
7682 if (name[4] == 'n' &&
7695 if (name[3] == 'a' &&
7712 if (name[1] == 'a' &&
7727 case 7: /* 29 tokens of length 7 */
7731 if (name[1] == 'E' &&
7744 if (name[1] == '_' &&
7757 if (name[1] == 'i' &&
7764 return -KEY_binmode;
7770 if (name[1] == 'o' &&
7777 return -KEY_connect;
7786 if (name[2] == 'm' &&
7792 return -KEY_dbmopen;
7803 if (name[4] == 'u' &&
7807 return (FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
7813 if (name[4] == 'n' &&
7834 if (name[1] == 'o' &&
7847 if (name[1] == 'e' &&
7854 if (name[5] == 'r' &&
7857 return -KEY_getpgrp;
7863 if (name[5] == 'i' &&
7866 return -KEY_getppid;
7879 if (name[1] == 'c' &&
7886 return -KEY_lcfirst;
7892 if (name[1] == 'p' &&
7899 return -KEY_opendir;
7905 if (name[1] == 'a' &&
7923 if (name[3] == 'd' &&
7928 return -KEY_readdir;
7934 if (name[3] == 'u' &&
7945 if (name[3] == 'e' &&
7950 return -KEY_reverse;
7969 if (name[3] == 'k' &&
7974 return -KEY_seekdir;
7980 if (name[3] == 'p' &&
7985 return -KEY_setpgrp;
7995 if (name[2] == 'm' &&
8001 return -KEY_shmread;
8007 if (name[2] == 'r' &&
8013 return -KEY_sprintf;
8022 if (name[3] == 'l' &&
8027 return -KEY_symlink;
8036 if (name[4] == 'a' &&
8040 return -KEY_syscall;
8046 if (name[4] == 'p' &&
8050 return -KEY_sysopen;
8056 if (name[4] == 'e' &&
8060 return -KEY_sysread;
8066 if (name[4] == 'e' &&
8070 return -KEY_sysseek;
8088 if (name[1] == 'e' &&
8095 return -KEY_telldir;
8104 if (name[2] == 'f' &&
8110 return -KEY_ucfirst;
8116 if (name[2] == 's' &&
8122 return -KEY_unshift;
8132 if (name[1] == 'a' &&
8139 return -KEY_waitpid;
8148 case 8: /* 26 tokens of length 8 */
8152 if (name[1] == 'U' &&
8160 return KEY_AUTOLOAD;
8171 if (name[3] == 'A' &&
8177 return KEY___DATA__;
8183 if (name[3] == 'I' &&
8189 return -KEY___FILE__;
8195 if (name[3] == 'I' &&
8201 return -KEY___LINE__;
8217 if (name[2] == 'o' &&
8224 return -KEY_closedir;
8230 if (name[2] == 'n' &&
8237 return -KEY_continue;
8247 if (name[1] == 'b' &&
8255 return -KEY_dbmclose;
8261 if (name[1] == 'n' &&
8267 if (name[4] == 'r' &&
8272 return -KEY_endgrent;
8278 if (name[4] == 'w' &&
8283 return -KEY_endpwent;
8296 if (name[1] == 'o' &&
8304 return -KEY_formline;
8310 if (name[1] == 'e' &&
8321 if (name[6] == 'n' &&
8324 return -KEY_getgrent;
8330 if (name[6] == 'i' &&
8333 return -KEY_getgrgid;
8339 if (name[6] == 'a' &&
8342 return -KEY_getgrnam;
8355 if (name[4] == 'o' &&
8360 return -KEY_getlogin;
8371 if (name[6] == 'n' &&
8374 return -KEY_getpwent;
8380 if (name[6] == 'a' &&
8383 return -KEY_getpwnam;
8389 if (name[6] == 'i' &&
8392 return -KEY_getpwuid;
8412 if (name[1] == 'e' &&
8419 if (name[5] == 'i' &&
8426 return -KEY_readline;
8431 return -KEY_readlink;
8442 if (name[5] == 'i' &&
8446 return -KEY_readpipe;
8467 if (name[4] == 'r' &&
8472 return -KEY_setgrent;
8478 if (name[4] == 'w' &&
8483 return -KEY_setpwent;
8499 if (name[3] == 'w' &&
8505 return -KEY_shmwrite;
8511 if (name[3] == 't' &&
8517 return -KEY_shutdown;
8527 if (name[2] == 's' &&
8534 return -KEY_syswrite;
8544 if (name[1] == 'r' &&
8552 return -KEY_truncate;
8561 case 9: /* 8 tokens of length 9 */
8565 if (name[1] == 'n' &&
8574 return -KEY_endnetent;
8580 if (name[1] == 'e' &&
8589 return -KEY_getnetent;
8595 if (name[1] == 'o' &&
8604 return -KEY_localtime;
8610 if (name[1] == 'r' &&
8619 return KEY_prototype;
8625 if (name[1] == 'u' &&
8634 return -KEY_quotemeta;
8640 if (name[1] == 'e' &&
8649 return -KEY_rewinddir;
8655 if (name[1] == 'e' &&
8664 return -KEY_setnetent;
8670 if (name[1] == 'a' &&
8679 return -KEY_wantarray;
8688 case 10: /* 9 tokens of length 10 */
8692 if (name[1] == 'n' &&
8698 if (name[4] == 'o' &&
8705 return -KEY_endhostent;
8711 if (name[4] == 'e' &&
8718 return -KEY_endservent;
8731 if (name[1] == 'e' &&
8737 if (name[4] == 'o' &&
8744 return -KEY_gethostent;
8753 if (name[5] == 'r' &&
8759 return -KEY_getservent;
8765 if (name[5] == 'c' &&
8771 return -KEY_getsockopt;
8796 if (name[4] == 'o' &&
8803 return -KEY_sethostent;
8812 if (name[5] == 'r' &&
8818 return -KEY_setservent;
8824 if (name[5] == 'c' &&
8830 return -KEY_setsockopt;
8847 if (name[2] == 'c' &&
8856 return -KEY_socketpair;
8869 case 11: /* 8 tokens of length 11 */
8873 if (name[1] == '_' &&
8884 return -KEY___PACKAGE__;
8890 if (name[1] == 'n' &&
8901 return -KEY_endprotoent;
8907 if (name[1] == 'e' &&
8916 if (name[5] == 'e' &&
8923 return -KEY_getpeername;
8932 if (name[6] == 'o' &&
8938 return -KEY_getpriority;
8944 if (name[6] == 't' &&
8950 return -KEY_getprotoent;
8964 if (name[4] == 'o' &&
8972 return -KEY_getsockname;
8985 if (name[1] == 'e' &&
8993 if (name[6] == 'o' &&
8999 return -KEY_setpriority;
9005 if (name[6] == 't' &&
9011 return -KEY_setprotoent;
9027 case 12: /* 2 tokens of length 12 */
9028 if (name[0] == 'g' &&
9040 if (name[9] == 'd' &&
9043 { /* getnetbyaddr */
9044 return -KEY_getnetbyaddr;
9050 if (name[9] == 'a' &&
9053 { /* getnetbyname */
9054 return -KEY_getnetbyname;
9066 case 13: /* 4 tokens of length 13 */
9067 if (name[0] == 'g' &&
9074 if (name[4] == 'o' &&
9083 if (name[10] == 'd' &&
9086 { /* gethostbyaddr */
9087 return -KEY_gethostbyaddr;
9093 if (name[10] == 'a' &&
9096 { /* gethostbyname */
9097 return -KEY_gethostbyname;
9110 if (name[4] == 'e' &&
9119 if (name[10] == 'a' &&
9122 { /* getservbyname */
9123 return -KEY_getservbyname;
9129 if (name[10] == 'o' &&
9132 { /* getservbyport */
9133 return -KEY_getservbyport;
9152 case 14: /* 1 tokens of length 14 */
9153 if (name[0] == 'g' &&
9167 { /* getprotobyname */
9168 return -KEY_getprotobyname;
9173 case 16: /* 1 tokens of length 16 */
9174 if (name[0] == 'g' &&
9190 { /* getprotobynumber */
9191 return -KEY_getprotobynumber;
9205 S_checkcomma(pTHX_ register char *s, const char *name, const char *what)
9210 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9211 if (ckWARN(WARN_SYNTAX)) {
9213 for (w = s+2; *w && level; w++) {
9220 for (; *w && isSPACE(*w); w++) ;
9221 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
9222 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9223 "%s (...) interpreted as function",name);
9226 while (s < PL_bufend && isSPACE(*s))
9230 while (s < PL_bufend && isSPACE(*s))
9232 if (isIDFIRST_lazy_if(s,UTF)) {
9234 while (isALNUM_lazy_if(s,UTF))
9236 while (s < PL_bufend && isSPACE(*s))
9240 *s = '\0'; /* XXX If we didn't do this, we could const a lot of toke.c */
9241 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
9245 Perl_croak(aTHX_ "No comma allowed after %s", what);
9250 /* Either returns sv, or mortalizes sv and returns a new SV*.
9251 Best used as sv=new_constant(..., sv, ...).
9252 If s, pv are NULL, calls subroutine with one argument,
9253 and type is used with error messages only. */
9256 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9260 HV * const table = GvHV(PL_hintgv); /* ^H */
9264 const char *why1 = "", *why2 = "", *why3 = "";
9266 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9269 why2 = strEQ(key,"charnames")
9270 ? "(possibly a missing \"use charnames ...\")"
9272 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
9273 (type ? type: "undef"), why2);
9275 /* This is convoluted and evil ("goto considered harmful")
9276 * but I do not understand the intricacies of all the different
9277 * failure modes of %^H in here. The goal here is to make
9278 * the most probable error message user-friendly. --jhi */
9283 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
9284 (type ? type: "undef"), why1, why2, why3);
9286 yyerror(SvPVX_const(msg));
9290 cvp = hv_fetch(table, key, strlen(key), FALSE);
9291 if (!cvp || !SvOK(*cvp)) {
9294 why3 = "} is not defined";
9297 sv_2mortal(sv); /* Parent created it permanently */
9300 pv = sv_2mortal(newSVpvn(s, len));
9302 typesv = sv_2mortal(newSVpv(type, 0));
9304 typesv = &PL_sv_undef;
9306 PUSHSTACKi(PERLSI_OVERLOAD);
9318 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9322 /* Check the eval first */
9323 if (!PL_in_eval && SvTRUE(ERRSV)) {
9324 sv_catpvs(ERRSV, "Propagated");
9325 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
9327 res = SvREFCNT_inc(sv);
9331 (void)SvREFCNT_inc(res);
9340 why1 = "Call to &{$^H{";
9342 why3 = "}} did not return a defined value";
9350 /* Returns a NUL terminated string, with the length of the string written to
9354 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9357 register char *d = dest;
9358 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
9361 Perl_croak(aTHX_ ident_too_long);
9362 if (isALNUM(*s)) /* UTF handled below */
9364 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
9369 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
9373 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9374 char *t = s + UTF8SKIP(s);
9375 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9377 if (d + (t - s) > e)
9378 Perl_croak(aTHX_ ident_too_long);
9379 Copy(s, d, t - s, char);
9392 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9395 char *bracket = NULL;
9397 register char *d = dest;
9398 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
9403 while (isDIGIT(*s)) {
9405 Perl_croak(aTHX_ ident_too_long);
9412 Perl_croak(aTHX_ ident_too_long);
9413 if (isALNUM(*s)) /* UTF handled below */
9415 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9420 else if (*s == ':' && s[1] == ':') {
9424 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9425 char *t = s + UTF8SKIP(s);
9426 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9428 if (d + (t - s) > e)
9429 Perl_croak(aTHX_ ident_too_long);
9430 Copy(s, d, t - s, char);
9441 if (PL_lex_state != LEX_NORMAL)
9442 PL_lex_state = LEX_INTERPENDMAYBE;
9445 if (*s == '$' && s[1] &&
9446 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9459 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9464 if (isSPACE(s[-1])) {
9466 const char ch = *s++;
9467 if (!SPACE_OR_TAB(ch)) {
9473 if (isIDFIRST_lazy_if(d,UTF)) {
9477 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9478 end += UTF8SKIP(end);
9479 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9480 end += UTF8SKIP(end);
9482 Copy(s, d, end - s, char);
9487 while ((isALNUM(*s) || *s == ':') && d < e)
9490 Perl_croak(aTHX_ ident_too_long);
9493 while (s < send && SPACE_OR_TAB(*s)) s++;
9494 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9495 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
9496 const char *brack = *s == '[' ? "[...]" : "{...}";
9497 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9498 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9499 funny, dest, brack, funny, dest, brack);
9502 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9506 /* Handle extended ${^Foo} variables
9507 * 1999-02-27 mjd-perl-patch@plover.com */
9508 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9512 while (isALNUM(*s) && d < e) {
9516 Perl_croak(aTHX_ ident_too_long);
9521 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9522 PL_lex_state = LEX_INTERPEND;
9527 if (PL_lex_state == LEX_NORMAL) {
9528 if (ckWARN(WARN_AMBIGUOUS) &&
9529 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
9531 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9532 "Ambiguous use of %c{%s} resolved to %c%s",
9533 funny, dest, funny, dest);
9538 s = bracket; /* let the parser handle it */
9542 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9543 PL_lex_state = LEX_INTERPEND;
9548 Perl_pmflag(pTHX_ U32* pmfl, int ch)
9553 *pmfl |= PMf_GLOBAL;
9555 *pmfl |= PMf_CONTINUE;
9559 *pmfl |= PMf_MULTILINE;
9561 *pmfl |= PMf_SINGLELINE;
9563 *pmfl |= PMf_EXTENDED;
9567 S_scan_pat(pTHX_ char *start, I32 type)
9571 char *s = scan_str(start,FALSE,FALSE);
9572 const char * const valid_flags = (type == OP_QR) ? "iomsx" : "iogcmsx";
9575 const char * const delimiter = skipspace(start);
9576 Perl_croak(aTHX_ *delimiter == '?'
9577 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9578 : "Search pattern not terminated" );
9581 pm = (PMOP*)newPMOP(type, 0);
9582 if (PL_multi_open == '?')
9583 pm->op_pmflags |= PMf_ONCE;
9584 while (*s && strchr(valid_flags, *s))
9585 pmflag(&pm->op_pmflags,*s++);
9586 /* issue a warning if /c is specified,but /g is not */
9587 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
9588 && ckWARN(WARN_REGEXP))
9590 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
9593 pm->op_pmpermflags = pm->op_pmflags;
9595 PL_lex_op = (OP*)pm;
9596 yylval.ival = OP_MATCH;
9601 S_scan_subst(pTHX_ char *start)
9609 yylval.ival = OP_NULL;
9611 s = scan_str(start,FALSE,FALSE);
9614 Perl_croak(aTHX_ "Substitution pattern not terminated");
9616 if (s[-1] == PL_multi_open)
9619 first_start = PL_multi_start;
9620 s = scan_str(s,FALSE,FALSE);
9623 SvREFCNT_dec(PL_lex_stuff);
9624 PL_lex_stuff = Nullsv;
9626 Perl_croak(aTHX_ "Substitution replacement not terminated");
9628 PL_multi_start = first_start; /* so whole substitution is taken together */
9630 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9636 else if (strchr("iogcmsx", *s))
9637 pmflag(&pm->op_pmflags,*s++);
9642 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
9643 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9647 SV * const repl = newSVpvs("");
9649 PL_sublex_info.super_bufptr = s;
9650 PL_sublex_info.super_bufend = PL_bufend;
9652 pm->op_pmflags |= PMf_EVAL;
9654 sv_catpv(repl, es ? "eval " : "do ");
9655 sv_catpvs(repl, "{ ");
9656 sv_catsv(repl, PL_lex_repl);
9657 sv_catpvs(repl, " }");
9659 SvREFCNT_dec(PL_lex_repl);
9663 pm->op_pmpermflags = pm->op_pmflags;
9664 PL_lex_op = (OP*)pm;
9665 yylval.ival = OP_SUBST;
9670 S_scan_trans(pTHX_ char *start)
9680 yylval.ival = OP_NULL;
9682 s = scan_str(start,FALSE,FALSE);
9684 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9685 if (s[-1] == PL_multi_open)
9688 s = scan_str(s,FALSE,FALSE);
9691 SvREFCNT_dec(PL_lex_stuff);
9692 PL_lex_stuff = Nullsv;
9694 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9697 complement = del = squash = 0;
9701 complement = OPpTRANS_COMPLEMENT;
9704 del = OPpTRANS_DELETE;
9707 squash = OPpTRANS_SQUASH;
9716 Newx(tbl, complement&&!del?258:256, short);
9717 o = newPVOP(OP_TRANS, 0, (char*)tbl);
9718 o->op_private &= ~OPpTRANS_ALL;
9719 o->op_private |= del|squash|complement|
9720 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9721 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9724 yylval.ival = OP_TRANS;
9729 S_scan_heredoc(pTHX_ register char *s)
9733 I32 op_type = OP_SCALAR;
9737 const char *found_newline;
9741 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9745 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9748 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
9749 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9752 s = delimcpy(d, e, s, PL_bufend, term, &len);
9762 if (!isALNUM_lazy_if(s,UTF))
9763 deprecate_old("bare << to mean <<\"\"");
9764 for (; isALNUM_lazy_if(s,UTF); s++) {
9769 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9770 Perl_croak(aTHX_ "Delimiter for here document is too long");
9773 len = d - PL_tokenbuf;
9774 #ifndef PERL_STRICT_CR
9775 d = strchr(s, '\r');
9777 char * const olds = s;
9779 while (s < PL_bufend) {
9785 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9794 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9798 if ( outer || !(found_newline = memchr(s, '\n', PL_bufend - s)) ) {
9799 herewas = newSVpvn(s,PL_bufend-s);
9803 herewas = newSVpvn(s,found_newline-s);
9805 s += SvCUR(herewas);
9808 sv_upgrade(tmpstr, SVt_PVIV);
9811 SvIV_set(tmpstr, -1);
9813 else if (term == '`') {
9814 op_type = OP_BACKTICK;
9815 SvIV_set(tmpstr, '\\');
9819 PL_multi_start = CopLINE(PL_curcop);
9820 PL_multi_open = PL_multi_close = '<';
9821 term = *PL_tokenbuf;
9822 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9823 char * const bufptr = PL_sublex_info.super_bufptr;
9824 char * const bufend = PL_sublex_info.super_bufend;
9825 char * const olds = s - SvCUR(herewas);
9826 s = strchr(bufptr, '\n');
9830 while (s < bufend &&
9831 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9833 CopLINE_inc(PL_curcop);
9836 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9837 missingterm(PL_tokenbuf);
9839 sv_setpvn(herewas,bufptr,d-bufptr+1);
9840 sv_setpvn(tmpstr,d+1,s-d);
9842 sv_catpvn(herewas,s,bufend-s);
9843 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9850 while (s < PL_bufend &&
9851 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9853 CopLINE_inc(PL_curcop);
9855 if (s >= PL_bufend) {
9856 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9857 missingterm(PL_tokenbuf);
9859 sv_setpvn(tmpstr,d+1,s-d);
9861 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9863 sv_catpvn(herewas,s,PL_bufend-s);
9864 sv_setsv(PL_linestr,herewas);
9865 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9866 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9867 PL_last_lop = PL_last_uni = Nullch;
9870 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
9871 while (s >= PL_bufend) { /* multiple line string? */
9873 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
9874 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9875 missingterm(PL_tokenbuf);
9877 CopLINE_inc(PL_curcop);
9878 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9879 PL_last_lop = PL_last_uni = Nullch;
9880 #ifndef PERL_STRICT_CR
9881 if (PL_bufend - PL_linestart >= 2) {
9882 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9883 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9885 PL_bufend[-2] = '\n';
9887 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9889 else if (PL_bufend[-1] == '\r')
9890 PL_bufend[-1] = '\n';
9892 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9893 PL_bufend[-1] = '\n';
9895 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9896 SV * const sv = newSV(0);
9898 sv_upgrade(sv, SVt_PVMG);
9899 sv_setsv(sv,PL_linestr);
9902 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop),sv);
9904 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9905 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9906 *(SvPVX(PL_linestr) + off ) = ' ';
9907 sv_catsv(PL_linestr,herewas);
9908 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9909 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9913 sv_catsv(tmpstr,PL_linestr);
9918 PL_multi_end = CopLINE(PL_curcop);
9919 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9920 SvPV_shrink_to_cur(tmpstr);
9922 SvREFCNT_dec(herewas);
9924 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9926 else if (PL_encoding)
9927 sv_recode_to_utf8(tmpstr, PL_encoding);
9929 PL_lex_stuff = tmpstr;
9930 yylval.ival = op_type;
9935 takes: current position in input buffer
9936 returns: new position in input buffer
9937 side-effects: yylval and lex_op are set.
9942 <FH> read from filehandle
9943 <pkg::FH> read from package qualified filehandle
9944 <pkg'FH> read from package qualified filehandle
9945 <$fh> read from filehandle in $fh
9951 S_scan_inputsymbol(pTHX_ char *start)
9954 register char *s = start; /* current position in buffer */
9958 char *d = PL_tokenbuf; /* start of temp holding space */
9959 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9961 end = strchr(s, '\n');
9964 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9966 /* die if we didn't have space for the contents of the <>,
9967 or if it didn't end, or if we see a newline
9970 if (len >= sizeof PL_tokenbuf)
9971 Perl_croak(aTHX_ "Excessively long <> operator");
9973 Perl_croak(aTHX_ "Unterminated <> operator");
9978 Remember, only scalar variables are interpreted as filehandles by
9979 this code. Anything more complex (e.g., <$fh{$num}>) will be
9980 treated as a glob() call.
9981 This code makes use of the fact that except for the $ at the front,
9982 a scalar variable and a filehandle look the same.
9984 if (*d == '$' && d[1]) d++;
9986 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9987 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9990 /* If we've tried to read what we allow filehandles to look like, and
9991 there's still text left, then it must be a glob() and not a getline.
9992 Use scan_str to pull out the stuff between the <> and treat it
9993 as nothing more than a string.
9996 if (d - PL_tokenbuf != len) {
9997 yylval.ival = OP_GLOB;
9999 s = scan_str(start,FALSE,FALSE);
10001 Perl_croak(aTHX_ "Glob not terminated");
10005 bool readline_overriden = FALSE;
10008 /* we're in a filehandle read situation */
10011 /* turn <> into <ARGV> */
10013 Copy("ARGV",d,5,char);
10015 /* Check whether readline() is overriden */
10016 gv_readline = gv_fetchpvs("readline", 0, SVt_PVCV);
10018 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10020 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10021 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
10022 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10023 readline_overriden = TRUE;
10025 /* if <$fh>, create the ops to turn the variable into a
10031 /* try to find it in the pad for this block, otherwise find
10032 add symbol table ops
10034 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
10035 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
10036 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10037 HEK * const stashname = HvNAME_HEK(stash);
10038 SV * const sym = sv_2mortal(newSVhek(stashname));
10039 sv_catpvs(sym, "::");
10040 sv_catpv(sym, d+1);
10045 OP * const o = newOP(OP_PADSV, 0);
10047 PL_lex_op = readline_overriden
10048 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10049 append_elem(OP_LIST, o,
10050 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10051 : (OP*)newUNOP(OP_READLINE, 0, o);
10060 ? (GV_ADDMULTI | GV_ADDINEVAL)
10063 PL_lex_op = readline_overriden
10064 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10065 append_elem(OP_LIST,
10066 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10067 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10068 : (OP*)newUNOP(OP_READLINE, 0,
10069 newUNOP(OP_RV2SV, 0,
10070 newGVOP(OP_GV, 0, gv)));
10072 if (!readline_overriden)
10073 PL_lex_op->op_flags |= OPf_SPECIAL;
10074 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
10075 yylval.ival = OP_NULL;
10078 /* If it's none of the above, it must be a literal filehandle
10079 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10081 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
10082 PL_lex_op = readline_overriden
10083 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10084 append_elem(OP_LIST,
10085 newGVOP(OP_GV, 0, gv),
10086 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10087 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10088 yylval.ival = OP_NULL;
10097 takes: start position in buffer
10098 keep_quoted preserve \ on the embedded delimiter(s)
10099 keep_delims preserve the delimiters around the string
10100 returns: position to continue reading from buffer
10101 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10102 updates the read buffer.
10104 This subroutine pulls a string out of the input. It is called for:
10105 q single quotes q(literal text)
10106 ' single quotes 'literal text'
10107 qq double quotes qq(interpolate $here please)
10108 " double quotes "interpolate $here please"
10109 qx backticks qx(/bin/ls -l)
10110 ` backticks `/bin/ls -l`
10111 qw quote words @EXPORT_OK = qw( func() $spam )
10112 m// regexp match m/this/
10113 s/// regexp substitute s/this/that/
10114 tr/// string transliterate tr/this/that/
10115 y/// string transliterate y/this/that/
10116 ($*@) sub prototypes sub foo ($)
10117 (stuff) sub attr parameters sub foo : attr(stuff)
10118 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10120 In most of these cases (all but <>, patterns and transliterate)
10121 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10122 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10123 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10126 It skips whitespace before the string starts, and treats the first
10127 character as the delimiter. If the delimiter is one of ([{< then
10128 the corresponding "close" character )]}> is used as the closing
10129 delimiter. It allows quoting of delimiters, and if the string has
10130 balanced delimiters ([{<>}]) it allows nesting.
10132 On success, the SV with the resulting string is put into lex_stuff or,
10133 if that is already non-NULL, into lex_repl. The second case occurs only
10134 when parsing the RHS of the special constructs s/// and tr/// (y///).
10135 For convenience, the terminating delimiter character is stuffed into
10140 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
10143 SV *sv; /* scalar value: string */
10144 char *tmps; /* temp string, used for delimiter matching */
10145 register char *s = start; /* current position in the buffer */
10146 register char term; /* terminating character */
10147 register char *to; /* current position in the sv's data */
10148 I32 brackets = 1; /* bracket nesting level */
10149 bool has_utf8 = FALSE; /* is there any utf8 content? */
10150 I32 termcode; /* terminating char. code */
10151 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10152 STRLEN termlen; /* length of terminating string */
10153 char *last = NULL; /* last position for nesting bracket */
10155 /* skip space before the delimiter */
10159 /* mark where we are, in case we need to report errors */
10162 /* after skipping whitespace, the next character is the terminator */
10165 termcode = termstr[0] = term;
10169 termcode = utf8_to_uvchr((U8*)s, &termlen);
10170 Copy(s, termstr, termlen, U8);
10171 if (!UTF8_IS_INVARIANT(term))
10175 /* mark where we are */
10176 PL_multi_start = CopLINE(PL_curcop);
10177 PL_multi_open = term;
10179 /* find corresponding closing delimiter */
10180 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10181 termcode = termstr[0] = term = tmps[5];
10183 PL_multi_close = term;
10185 /* create a new SV to hold the contents. 79 is the SV's initial length.
10186 What a random number. */
10188 sv_upgrade(sv, SVt_PVIV);
10189 SvIV_set(sv, termcode);
10190 (void)SvPOK_only(sv); /* validate pointer */
10192 /* move past delimiter and try to read a complete string */
10194 sv_catpvn(sv, s, termlen);
10197 if (PL_encoding && !UTF) {
10201 int offset = s - SvPVX_const(PL_linestr);
10202 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10203 &offset, (char*)termstr, termlen);
10204 const char * const ns = SvPVX_const(PL_linestr) + offset;
10205 char * const svlast = SvEND(sv) - 1;
10207 for (; s < ns; s++) {
10208 if (*s == '\n' && !PL_rsfp)
10209 CopLINE_inc(PL_curcop);
10212 goto read_more_line;
10214 /* handle quoted delimiters */
10215 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10217 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10219 if ((svlast-1 - t) % 2) {
10220 if (!keep_quoted) {
10221 *(svlast-1) = term;
10223 SvCUR_set(sv, SvCUR(sv) - 1);
10228 if (PL_multi_open == PL_multi_close) {
10236 for (t = w = last; t < svlast; w++, t++) {
10237 /* At here, all closes are "was quoted" one,
10238 so we don't check PL_multi_close. */
10240 if (!keep_quoted && *(t+1) == PL_multi_open)
10245 else if (*t == PL_multi_open)
10253 SvCUR_set(sv, w - SvPVX_const(sv));
10256 if (--brackets <= 0)
10261 if (!keep_delims) {
10262 SvCUR_set(sv, SvCUR(sv) - 1);
10268 /* extend sv if need be */
10269 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10270 /* set 'to' to the next character in the sv's string */
10271 to = SvPVX(sv)+SvCUR(sv);
10273 /* if open delimiter is the close delimiter read unbridle */
10274 if (PL_multi_open == PL_multi_close) {
10275 for (; s < PL_bufend; s++,to++) {
10276 /* embedded newlines increment the current line number */
10277 if (*s == '\n' && !PL_rsfp)
10278 CopLINE_inc(PL_curcop);
10279 /* handle quoted delimiters */
10280 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10281 if (!keep_quoted && s[1] == term)
10283 /* any other quotes are simply copied straight through */
10287 /* terminate when run out of buffer (the for() condition), or
10288 have found the terminator */
10289 else if (*s == term) {
10292 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10295 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10301 /* if the terminator isn't the same as the start character (e.g.,
10302 matched brackets), we have to allow more in the quoting, and
10303 be prepared for nested brackets.
10306 /* read until we run out of string, or we find the terminator */
10307 for (; s < PL_bufend; s++,to++) {
10308 /* embedded newlines increment the line count */
10309 if (*s == '\n' && !PL_rsfp)
10310 CopLINE_inc(PL_curcop);
10311 /* backslashes can escape the open or closing characters */
10312 if (*s == '\\' && s+1 < PL_bufend) {
10313 if (!keep_quoted &&
10314 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10319 /* allow nested opens and closes */
10320 else if (*s == PL_multi_close && --brackets <= 0)
10322 else if (*s == PL_multi_open)
10324 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10329 /* terminate the copied string and update the sv's end-of-string */
10331 SvCUR_set(sv, to - SvPVX_const(sv));
10334 * this next chunk reads more into the buffer if we're not done yet
10338 break; /* handle case where we are done yet :-) */
10340 #ifndef PERL_STRICT_CR
10341 if (to - SvPVX_const(sv) >= 2) {
10342 if ((to[-2] == '\r' && to[-1] == '\n') ||
10343 (to[-2] == '\n' && to[-1] == '\r'))
10347 SvCUR_set(sv, to - SvPVX_const(sv));
10349 else if (to[-1] == '\r')
10352 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10357 /* if we're out of file, or a read fails, bail and reset the current
10358 line marker so we can report where the unterminated string began
10361 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10363 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10366 /* we read a line, so increment our line counter */
10367 CopLINE_inc(PL_curcop);
10369 /* update debugger info */
10370 if (PERLDB_LINE && PL_curstash != PL_debstash) {
10371 SV * const sv = newSV(0);
10373 sv_upgrade(sv, SVt_PVMG);
10374 sv_setsv(sv,PL_linestr);
10375 (void)SvIOK_on(sv);
10377 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop), sv);
10380 /* having changed the buffer, we must update PL_bufend */
10381 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10382 PL_last_lop = PL_last_uni = Nullch;
10385 /* at this point, we have successfully read the delimited string */
10387 if (!PL_encoding || UTF) {
10389 sv_catpvn(sv, s, termlen);
10392 if (has_utf8 || PL_encoding)
10395 PL_multi_end = CopLINE(PL_curcop);
10397 /* if we allocated too much space, give some back */
10398 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10399 SvLEN_set(sv, SvCUR(sv) + 1);
10400 SvPV_renew(sv, SvLEN(sv));
10403 /* decide whether this is the first or second quoted string we've read
10416 takes: pointer to position in buffer
10417 returns: pointer to new position in buffer
10418 side-effects: builds ops for the constant in yylval.op
10420 Read a number in any of the formats that Perl accepts:
10422 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10423 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10426 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10428 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10431 If it reads a number without a decimal point or an exponent, it will
10432 try converting the number to an integer and see if it can do so
10433 without loss of precision.
10437 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10440 register const char *s = start; /* current position in buffer */
10441 register char *d; /* destination in temp buffer */
10442 register char *e; /* end of temp buffer */
10443 NV nv; /* number read, as a double */
10444 SV *sv = Nullsv; /* place to put the converted number */
10445 bool floatit; /* boolean: int or float? */
10446 const char *lastub = NULL; /* position of last underbar */
10447 static char const number_too_long[] = "Number too long";
10449 /* We use the first character to decide what type of number this is */
10453 Perl_croak(aTHX_ "panic: scan_num");
10455 /* if it starts with a 0, it could be an octal number, a decimal in
10456 0.13 disguise, or a hexadecimal number, or a binary number. */
10460 u holds the "number so far"
10461 shift the power of 2 of the base
10462 (hex == 4, octal == 3, binary == 1)
10463 overflowed was the number more than we can hold?
10465 Shift is used when we add a digit. It also serves as an "are
10466 we in octal/hex/binary?" indicator to disallow hex characters
10467 when in octal mode.
10472 bool overflowed = FALSE;
10473 bool just_zero = TRUE; /* just plain 0 or binary number? */
10474 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10475 static const char* const bases[5] =
10476 { "", "binary", "", "octal", "hexadecimal" };
10477 static const char* const Bases[5] =
10478 { "", "Binary", "", "Octal", "Hexadecimal" };
10479 static const char* const maxima[5] =
10481 "0b11111111111111111111111111111111",
10485 const char *base, *Base, *max;
10487 /* check for hex */
10492 } else if (s[1] == 'b') {
10497 /* check for a decimal in disguise */
10498 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10500 /* so it must be octal */
10507 if (ckWARN(WARN_SYNTAX))
10508 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10509 "Misplaced _ in number");
10513 base = bases[shift];
10514 Base = Bases[shift];
10515 max = maxima[shift];
10517 /* read the rest of the number */
10519 /* x is used in the overflow test,
10520 b is the digit we're adding on. */
10525 /* if we don't mention it, we're done */
10529 /* _ are ignored -- but warned about if consecutive */
10531 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10532 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10533 "Misplaced _ in number");
10537 /* 8 and 9 are not octal */
10538 case '8': case '9':
10540 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10544 case '2': case '3': case '4':
10545 case '5': case '6': case '7':
10547 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10550 case '0': case '1':
10551 b = *s++ & 15; /* ASCII digit -> value of digit */
10555 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10556 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10557 /* make sure they said 0x */
10560 b = (*s++ & 7) + 9;
10562 /* Prepare to put the digit we have onto the end
10563 of the number so far. We check for overflows.
10569 x = u << shift; /* make room for the digit */
10571 if ((x >> shift) != u
10572 && !(PL_hints & HINT_NEW_BINARY)) {
10575 if (ckWARN_d(WARN_OVERFLOW))
10576 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10577 "Integer overflow in %s number",
10580 u = x | b; /* add the digit to the end */
10583 n *= nvshift[shift];
10584 /* If an NV has not enough bits in its
10585 * mantissa to represent an UV this summing of
10586 * small low-order numbers is a waste of time
10587 * (because the NV cannot preserve the
10588 * low-order bits anyway): we could just
10589 * remember when did we overflow and in the
10590 * end just multiply n by the right
10598 /* if we get here, we had success: make a scalar value from
10603 /* final misplaced underbar check */
10604 if (s[-1] == '_') {
10605 if (ckWARN(WARN_SYNTAX))
10606 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10611 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
10612 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10613 "%s number > %s non-portable",
10619 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
10620 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10621 "%s number > %s non-portable",
10626 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10627 sv = new_constant(start, s - start, "integer",
10629 else if (PL_hints & HINT_NEW_BINARY)
10630 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
10635 handle decimal numbers.
10636 we're also sent here when we read a 0 as the first digit
10638 case '1': case '2': case '3': case '4': case '5':
10639 case '6': case '7': case '8': case '9': case '.':
10642 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10645 /* read next group of digits and _ and copy into d */
10646 while (isDIGIT(*s) || *s == '_') {
10647 /* skip underscores, checking for misplaced ones
10651 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10652 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10653 "Misplaced _ in number");
10657 /* check for end of fixed-length buffer */
10659 Perl_croak(aTHX_ number_too_long);
10660 /* if we're ok, copy the character */
10665 /* final misplaced underbar check */
10666 if (lastub && s == lastub + 1) {
10667 if (ckWARN(WARN_SYNTAX))
10668 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10671 /* read a decimal portion if there is one. avoid
10672 3..5 being interpreted as the number 3. followed
10675 if (*s == '.' && s[1] != '.') {
10680 if (ckWARN(WARN_SYNTAX))
10681 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10682 "Misplaced _ in number");
10686 /* copy, ignoring underbars, until we run out of digits.
10688 for (; isDIGIT(*s) || *s == '_'; s++) {
10689 /* fixed length buffer check */
10691 Perl_croak(aTHX_ number_too_long);
10693 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10694 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10695 "Misplaced _ in number");
10701 /* fractional part ending in underbar? */
10702 if (s[-1] == '_') {
10703 if (ckWARN(WARN_SYNTAX))
10704 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10705 "Misplaced _ in number");
10707 if (*s == '.' && isDIGIT(s[1])) {
10708 /* oops, it's really a v-string, but without the "v" */
10714 /* read exponent part, if present */
10715 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10719 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10720 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10722 /* stray preinitial _ */
10724 if (ckWARN(WARN_SYNTAX))
10725 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10726 "Misplaced _ in number");
10730 /* allow positive or negative exponent */
10731 if (*s == '+' || *s == '-')
10734 /* stray initial _ */
10736 if (ckWARN(WARN_SYNTAX))
10737 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10738 "Misplaced _ in number");
10742 /* read digits of exponent */
10743 while (isDIGIT(*s) || *s == '_') {
10746 Perl_croak(aTHX_ number_too_long);
10750 if (((lastub && s == lastub + 1) ||
10751 (!isDIGIT(s[1]) && s[1] != '_'))
10752 && ckWARN(WARN_SYNTAX))
10753 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10754 "Misplaced _ in number");
10761 /* make an sv from the string */
10765 We try to do an integer conversion first if no characters
10766 indicating "float" have been found.
10771 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10773 if (flags == IS_NUMBER_IN_UV) {
10775 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
10778 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10779 if (uv <= (UV) IV_MIN)
10780 sv_setiv(sv, -(IV)uv);
10787 /* terminate the string */
10789 nv = Atof(PL_tokenbuf);
10793 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
10794 (PL_hints & HINT_NEW_INTEGER) )
10795 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
10796 (floatit ? "float" : "integer"),
10800 /* if it starts with a v, it could be a v-string */
10803 sv = newSV(5); /* preallocate storage space */
10804 s = scan_vstring(s,sv);
10808 /* make the op for the constant and return */
10811 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10813 lvalp->opval = Nullop;
10819 S_scan_formline(pTHX_ register char *s)
10822 register char *eol;
10824 SV * const stuff = newSVpvs("");
10825 bool needargs = FALSE;
10826 bool eofmt = FALSE;
10828 while (!needargs) {
10830 #ifdef PERL_STRICT_CR
10831 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
10833 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
10835 if (*t == '\n' || t == PL_bufend) {
10840 if (PL_in_eval && !PL_rsfp) {
10841 eol = (char *) memchr(s,'\n',PL_bufend-s);
10846 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10848 for (t = s; t < eol; t++) {
10849 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10851 goto enough; /* ~~ must be first line in formline */
10853 if (*t == '@' || *t == '^')
10857 sv_catpvn(stuff, s, eol-s);
10858 #ifndef PERL_STRICT_CR
10859 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10860 char *end = SvPVX(stuff) + SvCUR(stuff);
10863 SvCUR_set(stuff, SvCUR(stuff) - 1);
10872 s = filter_gets(PL_linestr, PL_rsfp, 0);
10873 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
10874 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
10875 PL_last_lop = PL_last_uni = Nullch;
10884 if (SvCUR(stuff)) {
10887 PL_lex_state = LEX_NORMAL;
10888 PL_nextval[PL_nexttoke].ival = 0;
10892 PL_lex_state = LEX_FORMLINE;
10894 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10896 else if (PL_encoding)
10897 sv_recode_to_utf8(stuff, PL_encoding);
10899 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10901 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
10905 SvREFCNT_dec(stuff);
10907 PL_lex_formbrack = 0;
10919 PL_cshlen = strlen(PL_cshname);
10924 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10927 const I32 oldsavestack_ix = PL_savestack_ix;
10928 CV* const outsidecv = PL_compcv;
10931 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10933 SAVEI32(PL_subline);
10934 save_item(PL_subname);
10935 SAVESPTR(PL_compcv);
10937 PL_compcv = (CV*)newSV(0);
10938 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
10939 CvFLAGS(PL_compcv) |= flags;
10941 PL_subline = CopLINE(PL_curcop);
10942 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10943 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
10944 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10946 return oldsavestack_ix;
10950 #pragma segment Perl_yylex
10953 Perl_yywarn(pTHX_ const char *s)
10956 PL_in_eval |= EVAL_WARNONLY;
10958 PL_in_eval &= ~EVAL_WARNONLY;
10963 Perl_yyerror(pTHX_ const char *s)
10966 const char *where = NULL;
10967 const char *context = NULL;
10971 if (!yychar || (yychar == ';' && !PL_rsfp))
10973 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10974 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10975 PL_oldbufptr != PL_bufptr) {
10978 The code below is removed for NetWare because it abends/crashes on NetWare
10979 when the script has error such as not having the closing quotes like:
10980 if ($var eq "value)
10981 Checking of white spaces is anyway done in NetWare code.
10984 while (isSPACE(*PL_oldoldbufptr))
10987 context = PL_oldoldbufptr;
10988 contlen = PL_bufptr - PL_oldoldbufptr;
10990 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10991 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10994 The code below is removed for NetWare because it abends/crashes on NetWare
10995 when the script has error such as not having the closing quotes like:
10996 if ($var eq "value)
10997 Checking of white spaces is anyway done in NetWare code.
11000 while (isSPACE(*PL_oldbufptr))
11003 context = PL_oldbufptr;
11004 contlen = PL_bufptr - PL_oldbufptr;
11006 else if (yychar > 255)
11007 where = "next token ???";
11008 else if (yychar == -2) { /* YYEMPTY */
11009 if (PL_lex_state == LEX_NORMAL ||
11010 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11011 where = "at end of line";
11012 else if (PL_lex_inpat)
11013 where = "within pattern";
11015 where = "within string";
11018 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
11020 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11021 else if (isPRINT_LC(yychar))
11022 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
11024 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11025 where = SvPVX_const(where_sv);
11027 msg = sv_2mortal(newSVpv(s, 0));
11028 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11029 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11031 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
11033 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
11034 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11035 Perl_sv_catpvf(aTHX_ msg,
11036 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11037 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11040 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
11041 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
11044 if (PL_error_count >= 10) {
11045 if (PL_in_eval && SvCUR(ERRSV))
11046 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11047 ERRSV, OutCopFILE(PL_curcop));
11049 Perl_croak(aTHX_ "%s has too many errors.\n",
11050 OutCopFILE(PL_curcop));
11053 PL_in_my_stash = NULL;
11057 #pragma segment Main
11061 S_swallow_bom(pTHX_ U8 *s)
11064 const STRLEN slen = SvCUR(PL_linestr);
11067 if (s[1] == 0xFE) {
11068 /* UTF-16 little-endian? (or UTF32-LE?) */
11069 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11070 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
11071 #ifndef PERL_NO_UTF16_FILTER
11072 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
11075 if (PL_bufend > (char*)s) {
11079 filter_add(utf16rev_textfilter, NULL);
11080 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11081 utf16_to_utf8_reversed(s, news,
11082 PL_bufend - (char*)s - 1,
11084 sv_setpvn(PL_linestr, (const char*)news, newlen);
11086 SvUTF8_on(PL_linestr);
11087 s = (U8*)SvPVX(PL_linestr);
11088 PL_bufend = SvPVX(PL_linestr) + newlen;
11091 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
11096 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11097 #ifndef PERL_NO_UTF16_FILTER
11098 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11101 if (PL_bufend > (char *)s) {
11105 filter_add(utf16_textfilter, NULL);
11106 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11107 utf16_to_utf8(s, news,
11108 PL_bufend - (char*)s,
11110 sv_setpvn(PL_linestr, (const char*)news, newlen);
11112 SvUTF8_on(PL_linestr);
11113 s = (U8*)SvPVX(PL_linestr);
11114 PL_bufend = SvPVX(PL_linestr) + newlen;
11117 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
11122 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11123 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11124 s += 3; /* UTF-8 */
11130 if (s[2] == 0xFE && s[3] == 0xFF) {
11131 /* UTF-32 big-endian */
11132 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
11135 else if (s[2] == 0 && s[3] != 0) {
11138 * are a good indicator of UTF-16BE. */
11139 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11144 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11147 * are a good indicator of UTF-16LE. */
11148 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11157 * Restore a source filter.
11161 restore_rsfp(pTHX_ void *f)
11164 PerlIO * const fp = (PerlIO*)f;
11166 if (PL_rsfp == PerlIO_stdin())
11167 PerlIO_clearerr(PL_rsfp);
11168 else if (PL_rsfp && (PL_rsfp != fp))
11169 PerlIO_close(PL_rsfp);
11173 #ifndef PERL_NO_UTF16_FILTER
11175 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11178 const STRLEN old = SvCUR(sv);
11179 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11180 DEBUG_P(PerlIO_printf(Perl_debug_log,
11181 "utf16_textfilter(%p): %d %d (%d)\n",
11182 utf16_textfilter, idx, maxlen, (int) count));
11186 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11187 Copy(SvPVX_const(sv), tmps, old, char);
11188 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11189 SvCUR(sv) - old, &newlen);
11190 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11192 DEBUG_P({sv_dump(sv);});
11197 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11200 const STRLEN old = SvCUR(sv);
11201 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11202 DEBUG_P(PerlIO_printf(Perl_debug_log,
11203 "utf16rev_textfilter(%p): %d %d (%d)\n",
11204 utf16rev_textfilter, idx, maxlen, (int) count));
11208 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11209 Copy(SvPVX_const(sv), tmps, old, char);
11210 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11211 SvCUR(sv) - old, &newlen);
11212 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11214 DEBUG_P({ sv_dump(sv); });
11220 Returns a pointer to the next character after the parsed
11221 vstring, as well as updating the passed in sv.
11223 Function must be called like
11226 s = scan_vstring(s,sv);
11228 The sv should already be large enough to store the vstring
11229 passed in, for performance reasons.
11234 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
11237 const char *pos = s;
11238 const char *start = s;
11239 if (*pos == 'v') pos++; /* get past 'v' */
11240 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11242 if ( *pos != '.') {
11243 /* this may not be a v-string if followed by => */
11244 const char *next = pos;
11245 while (next < PL_bufend && isSPACE(*next))
11247 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
11248 /* return string not v-string */
11249 sv_setpvn(sv,(char *)s,pos-s);
11250 return (char *)pos;
11254 if (!isALPHA(*pos)) {
11255 U8 tmpbuf[UTF8_MAXBYTES+1];
11257 if (*s == 'v') s++; /* get past 'v' */
11259 sv_setpvn(sv, "", 0);
11265 /* this is atoi() that tolerates underscores */
11266 const char *end = pos;
11268 while (--end >= s) {
11273 rev += (*end - '0') * mult;
11275 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
11276 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11277 "Integer overflow in decimal number");
11281 if (rev > 0x7FFFFFFF)
11282 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11284 /* Append native character for the rev point */
11285 tmpend = uvchr_to_utf8(tmpbuf, rev);
11286 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11287 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11289 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
11295 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11299 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11307 * c-indentation-style: bsd
11308 * c-basic-offset: 4
11309 * indent-tabs-mode: t
11312 * ex: set ts=8 sts=4 sw=4 noet: