3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] = "Identifier too long";
30 static const char commaless_variable_list[] = "comma-less variable list";
32 static void restore_rsfp(pTHX_ void *f);
33 #ifndef PERL_NO_UTF16_FILTER
34 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
38 #define XFAKEBRACK 128
41 #ifdef USE_UTF8_SCRIPTS
42 # define UTF (!IN_BYTES)
44 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
47 /* In variables named $^X, these are the legal values for X.
48 * 1999-02-27 mjd-perl-patch@plover.com */
49 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51 /* On MacOS, respect nonbreaking spaces */
52 #ifdef MACOS_TRADITIONAL
53 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
58 /* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
60 * can get by with a single comparison (if the compiler is smart enough).
63 /* #define LEX_NOTPARSING 11 is done in perl.h. */
65 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
66 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
67 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
68 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
69 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
71 /* at end of code, eg "$x" followed by: */
72 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
73 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
75 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
76 string or after \E, $foo, etc */
77 #define LEX_INTERPCONST 2 /* NOT USED */
78 #define LEX_FORMLINE 1 /* expecting a format line */
79 #define LEX_KNOWNEXT 0 /* next token known; just return it */
83 static const char* const lex_state_names[] = {
102 #include "keywords.h"
104 /* CLINE is a macro that ensures PL_copline has a sane value */
109 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
112 * Convenience functions to return different tokens and prime the
113 * lexer for the next token. They all take an argument.
115 * TOKEN : generic token (used for '(', DOLSHARP, etc)
116 * OPERATOR : generic operator
117 * AOPERATOR : assignment operator
118 * PREBLOCK : beginning the block after an if, while, foreach, ...
119 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
120 * PREREF : *EXPR where EXPR is not a simple identifier
121 * TERM : expression term
122 * LOOPX : loop exiting command (goto, last, dump, etc)
123 * FTST : file test operator
124 * FUN0 : zero-argument function
125 * FUN1 : not used, except for not, which isn't a UNIOP
126 * BOop : bitwise or or xor
128 * SHop : shift operator
129 * PWop : power operator
130 * PMop : pattern-matching operator
131 * Aop : addition-level operator
132 * Mop : multiplication-level operator
133 * Eop : equality-testing operator
134 * Rop : relational operator <= != gt
136 * Also see LOP and lop() below.
139 #ifdef DEBUGGING /* Serve -DT. */
140 # define REPORT(retval) tokereport((I32)retval)
142 # define REPORT(retval) (retval)
145 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
146 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
147 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
148 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
149 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
150 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
151 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
152 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
153 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
154 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
155 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
156 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
157 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
158 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
159 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
160 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
161 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
162 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
163 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
164 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
166 /* This bit of chicanery makes a unary function followed by
167 * a parenthesis into a function with one argument, highest precedence.
168 * The UNIDOR macro is for unary functions that can be followed by the //
169 * operator (such as C<shift // 0>).
171 #define UNI2(f,x) { \
175 PL_last_uni = PL_oldbufptr; \
176 PL_last_lop_op = f; \
178 return REPORT( (int)FUNC1 ); \
180 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
182 #define UNI(f) UNI2(f,XTERM)
183 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
185 #define UNIBRACK(f) { \
188 PL_last_uni = PL_oldbufptr; \
190 return REPORT( (int)FUNC1 ); \
192 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
195 /* grandfather return to old style */
196 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
200 /* how to interpret the yylval associated with the token */
204 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
210 static struct debug_tokens { const int token, type; const char *name; }
211 const debug_tokens[] =
213 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
214 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
215 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
216 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
217 { ARROW, TOKENTYPE_NONE, "ARROW" },
218 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
219 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
220 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
221 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
222 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
223 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
224 { DO, TOKENTYPE_NONE, "DO" },
225 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
226 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
227 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
228 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
229 { ELSE, TOKENTYPE_NONE, "ELSE" },
230 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
231 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
232 { FOR, TOKENTYPE_IVAL, "FOR" },
233 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
234 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
235 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
236 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
237 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
238 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
239 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
240 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
241 { IF, TOKENTYPE_IVAL, "IF" },
242 { LABEL, TOKENTYPE_PVAL, "LABEL" },
243 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
244 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
245 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
246 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
247 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
248 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
249 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
250 { MY, TOKENTYPE_IVAL, "MY" },
251 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
252 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
253 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
254 { OROP, TOKENTYPE_IVAL, "OROP" },
255 { OROR, TOKENTYPE_NONE, "OROR" },
256 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
257 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
258 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
259 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
260 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
261 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
262 { PREINC, TOKENTYPE_NONE, "PREINC" },
263 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
264 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
265 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
266 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
267 { SUB, TOKENTYPE_NONE, "SUB" },
268 { THING, TOKENTYPE_OPVAL, "THING" },
269 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
270 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
271 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
272 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
273 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
274 { USE, TOKENTYPE_IVAL, "USE" },
275 { WHEN, TOKENTYPE_IVAL, "WHEN" },
276 { WHILE, TOKENTYPE_IVAL, "WHILE" },
277 { WORD, TOKENTYPE_OPVAL, "WORD" },
278 { 0, TOKENTYPE_NONE, 0 }
281 /* dump the returned token in rv, plus any optional arg in yylval */
284 S_tokereport(pTHX_ I32 rv)
287 const char *name = Nullch;
288 enum token_type type = TOKENTYPE_NONE;
289 const struct debug_tokens *p;
290 SV* const report = newSVpvs("<== ");
292 for (p = debug_tokens; p->token; p++) {
293 if (p->token == (int)rv) {
300 Perl_sv_catpv(aTHX_ report, name);
301 else if ((char)rv > ' ' && (char)rv < '~')
302 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
304 sv_catpvs(report, "EOF");
306 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
309 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
312 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
314 case TOKENTYPE_OPNUM:
315 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
316 PL_op_name[yylval.ival]);
319 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
321 case TOKENTYPE_OPVAL:
323 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
324 PL_op_name[yylval.opval->op_type]);
325 if (yylval.opval->op_type == OP_CONST) {
326 Perl_sv_catpvf(aTHX_ report, " %s",
327 SvPEEK(cSVOPx_sv(yylval.opval)));
332 sv_catpvs(report, "(opval=null)");
335 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
341 /* print the buffer with suitable escapes */
344 S_printbuf(pTHX_ const char* fmt, const char* s)
346 SV* const tmp = newSVpvs("");
347 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
356 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
357 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
361 S_ao(pTHX_ int toketype)
363 if (*PL_bufptr == '=') {
365 if (toketype == ANDAND)
366 yylval.ival = OP_ANDASSIGN;
367 else if (toketype == OROR)
368 yylval.ival = OP_ORASSIGN;
369 else if (toketype == DORDOR)
370 yylval.ival = OP_DORASSIGN;
378 * When Perl expects an operator and finds something else, no_op
379 * prints the warning. It always prints "<something> found where
380 * operator expected. It prints "Missing semicolon on previous line?"
381 * if the surprise occurs at the start of the line. "do you need to
382 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
383 * where the compiler doesn't know if foo is a method call or a function.
384 * It prints "Missing operator before end of line" if there's nothing
385 * after the missing operator, or "... before <...>" if there is something
386 * after the missing operator.
390 S_no_op(pTHX_ const char *what, char *s)
392 char * const oldbp = PL_bufptr;
393 const bool is_first = (PL_oldbufptr == PL_linestart);
399 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
400 if (ckWARN_d(WARN_SYNTAX)) {
402 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
403 "\t(Missing semicolon on previous line?)\n");
404 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
406 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
407 if (t < PL_bufptr && isSPACE(*t))
408 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
409 "\t(Do you need to predeclare %.*s?)\n",
410 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
414 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
415 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
423 * Complain about missing quote/regexp/heredoc terminator.
424 * If it's called with (char *)NULL then it cauterizes the line buffer.
425 * If we're in a delimited string and the delimiter is a control
426 * character, it's reformatted into a two-char sequence like ^C.
431 S_missingterm(pTHX_ char *s)
436 char * const nl = strrchr(s,'\n');
442 iscntrl(PL_multi_close)
444 PL_multi_close < 32 || PL_multi_close == 127
448 tmpbuf[1] = (char)toCTRL(PL_multi_close);
453 *tmpbuf = (char)PL_multi_close;
457 q = strchr(s,'"') ? '\'' : '"';
458 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
461 #define FEATURE_IS_ENABLED(name) \
462 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
463 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
465 * S_feature_is_enabled
466 * Check whether the named feature is enabled.
469 S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
471 HV * const hinthv = GvHV(PL_hintgv);
472 char he_name[32] = "feature_";
473 (void) strncpy(&he_name[8], name, 24);
475 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
483 Perl_deprecate(pTHX_ const char *s)
485 if (ckWARN(WARN_DEPRECATED))
486 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
490 Perl_deprecate_old(pTHX_ const char *s)
492 /* This function should NOT be called for any new deprecated warnings */
493 /* Use Perl_deprecate instead */
495 /* It is here to maintain backward compatibility with the pre-5.8 */
496 /* warnings category hierarchy. The "deprecated" category used to */
497 /* live under the "syntax" category. It is now a top-level category */
498 /* in its own right. */
500 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
501 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
502 "Use of %s is deprecated", s);
506 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
507 * utf16-to-utf8-reversed.
510 #ifdef PERL_CR_FILTER
514 register const char *s = SvPVX_const(sv);
515 register const char * const e = s + SvCUR(sv);
516 /* outer loop optimized to do nothing if there are no CR-LFs */
518 if (*s++ == '\r' && *s == '\n') {
519 /* hit a CR-LF, need to copy the rest */
520 register char *d = s - 1;
523 if (*s == '\r' && s[1] == '\n')
534 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
536 const I32 count = FILTER_READ(idx+1, sv, maxlen);
537 if (count > 0 && !maxlen)
545 * Initialize variables. Uses the Perl save_stack to save its state (for
546 * recursive calls to the parser).
550 Perl_lex_start(pTHX_ SV *line)
555 SAVEI32(PL_lex_dojoin);
556 SAVEI32(PL_lex_brackets);
557 SAVEI32(PL_lex_casemods);
558 SAVEI32(PL_lex_starts);
559 SAVEI32(PL_lex_state);
560 SAVEVPTR(PL_lex_inpat);
561 SAVEI32(PL_lex_inwhat);
562 if (PL_lex_state == LEX_KNOWNEXT) {
563 I32 toke = PL_nexttoke;
564 while (--toke >= 0) {
565 SAVEI32(PL_nexttype[toke]);
566 SAVEVPTR(PL_nextval[toke]);
568 SAVEI32(PL_nexttoke);
570 SAVECOPLINE(PL_curcop);
573 SAVEPPTR(PL_oldbufptr);
574 SAVEPPTR(PL_oldoldbufptr);
575 SAVEPPTR(PL_last_lop);
576 SAVEPPTR(PL_last_uni);
577 SAVEPPTR(PL_linestart);
578 SAVESPTR(PL_linestr);
579 SAVEGENERICPV(PL_lex_brackstack);
580 SAVEGENERICPV(PL_lex_casestack);
581 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
582 SAVESPTR(PL_lex_stuff);
583 SAVEI32(PL_lex_defer);
584 SAVEI32(PL_sublex_info.sub_inwhat);
585 SAVESPTR(PL_lex_repl);
587 SAVEINT(PL_lex_expect);
589 PL_lex_state = LEX_NORMAL;
593 Newx(PL_lex_brackstack, 120, char);
594 Newx(PL_lex_casestack, 12, char);
596 *PL_lex_casestack = '\0';
599 PL_lex_stuff = Nullsv;
600 PL_lex_repl = Nullsv;
604 PL_sublex_info.sub_inwhat = 0;
606 if (SvREADONLY(PL_linestr))
607 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
608 s = SvPV_const(PL_linestr, len);
609 if (!len || s[len-1] != ';') {
610 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
611 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
612 sv_catpvs(PL_linestr, "\n;");
614 SvTEMP_off(PL_linestr);
615 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
616 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
617 PL_last_lop = PL_last_uni = Nullch;
623 * Finalizer for lexing operations. Must be called when the parser is
624 * done with the lexer.
630 PL_doextract = FALSE;
635 * This subroutine has nothing to do with tilting, whether at windmills
636 * or pinball tables. Its name is short for "increment line". It
637 * increments the current line number in CopLINE(PL_curcop) and checks
638 * to see whether the line starts with a comment of the form
639 * # line 500 "foo.pm"
640 * If so, it sets the current line number and file to the values in the comment.
644 S_incline(pTHX_ char *s)
651 CopLINE_inc(PL_curcop);
654 while (SPACE_OR_TAB(*s)) s++;
655 if (strnEQ(s, "line", 4))
659 if (SPACE_OR_TAB(*s))
663 while (SPACE_OR_TAB(*s)) s++;
669 while (SPACE_OR_TAB(*s))
671 if (*s == '"' && (t = strchr(s+1, '"'))) {
676 for (t = s; !isSPACE(*t); t++) ;
679 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
681 if (*e != '\n' && *e != '\0')
682 return; /* false alarm */
688 const char * const cf = CopFILE(PL_curcop);
689 STRLEN tmplen = cf ? strlen(cf) : 0;
690 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
691 /* must copy *{"::_<(eval N)[oldfilename:L]"}
692 * to *{"::_<newfilename"} */
693 char smallbuf[256], smallbuf2[256];
694 char *tmpbuf, *tmpbuf2;
696 STRLEN tmplen2 = strlen(s);
697 if (tmplen + 3 < sizeof smallbuf)
700 Newx(tmpbuf, tmplen + 3, char);
701 if (tmplen2 + 3 < sizeof smallbuf2)
704 Newx(tmpbuf2, tmplen2 + 3, char);
705 tmpbuf[0] = tmpbuf2[0] = '_';
706 tmpbuf[1] = tmpbuf2[1] = '<';
707 memcpy(tmpbuf + 2, cf, ++tmplen);
708 memcpy(tmpbuf2 + 2, s, ++tmplen2);
710 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
712 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
714 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
715 /* adjust ${"::_<newfilename"} to store the new file name */
716 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
717 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
718 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
720 if (tmpbuf != smallbuf) Safefree(tmpbuf);
721 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
724 CopFILE_free(PL_curcop);
725 CopFILE_set(PL_curcop, s);
728 CopLINE_set(PL_curcop, atoi(n)-1);
733 * Called to gobble the appropriate amount and type of whitespace.
734 * Skips comments as well.
738 S_skipspace(pTHX_ register char *s)
740 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
741 while (s < PL_bufend && SPACE_OR_TAB(*s))
747 SSize_t oldprevlen, oldoldprevlen;
748 SSize_t oldloplen = 0, oldunilen = 0;
749 while (s < PL_bufend && isSPACE(*s)) {
750 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
755 if (s < PL_bufend && *s == '#') {
756 while (s < PL_bufend && *s != '\n')
760 if (PL_in_eval && !PL_rsfp) {
767 /* only continue to recharge the buffer if we're at the end
768 * of the buffer, we're not reading from a source filter, and
769 * we're in normal lexing mode
771 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
772 PL_lex_state == LEX_FORMLINE)
775 /* try to recharge the buffer */
776 if ((s = filter_gets(PL_linestr, PL_rsfp,
777 (prevlen = SvCUR(PL_linestr)))) == Nullch)
779 /* end of file. Add on the -p or -n magic */
782 ";}continue{print or die qq(-p destination: $!\\n);}");
783 PL_minus_n = PL_minus_p = 0;
785 else if (PL_minus_n) {
786 sv_setpvn(PL_linestr, ";}", 2);
790 sv_setpvn(PL_linestr,";", 1);
792 /* reset variables for next time we lex */
793 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
795 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
796 PL_last_lop = PL_last_uni = Nullch;
798 /* Close the filehandle. Could be from -P preprocessor,
799 * STDIN, or a regular file. If we were reading code from
800 * STDIN (because the commandline held no -e or filename)
801 * then we don't close it, we reset it so the code can
802 * read from STDIN too.
805 if (PL_preprocess && !PL_in_eval)
806 (void)PerlProc_pclose(PL_rsfp);
807 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
808 PerlIO_clearerr(PL_rsfp);
810 (void)PerlIO_close(PL_rsfp);
815 /* not at end of file, so we only read another line */
816 /* make corresponding updates to old pointers, for yyerror() */
817 oldprevlen = PL_oldbufptr - PL_bufend;
818 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
820 oldunilen = PL_last_uni - PL_bufend;
822 oldloplen = PL_last_lop - PL_bufend;
823 PL_linestart = PL_bufptr = s + prevlen;
824 PL_bufend = s + SvCUR(PL_linestr);
826 PL_oldbufptr = s + oldprevlen;
827 PL_oldoldbufptr = s + oldoldprevlen;
829 PL_last_uni = s + oldunilen;
831 PL_last_lop = s + oldloplen;
834 /* debugger active and we're not compiling the debugger code,
835 * so store the line into the debugger's array of lines
837 if (PERLDB_LINE && PL_curstash != PL_debstash) {
838 SV * const sv = NEWSV(85,0);
840 sv_upgrade(sv, SVt_PVMG);
841 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
844 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
851 * Check the unary operators to ensure there's no ambiguity in how they're
852 * used. An ambiguous piece of code would be:
854 * This doesn't mean rand() + 5. Because rand() is a unary operator,
855 * the +5 is its argument.
864 if (PL_oldoldbufptr != PL_last_uni)
866 while (isSPACE(*PL_last_uni))
868 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
869 if ((t = strchr(s, '(')) && t < PL_bufptr)
871 if (ckWARN_d(WARN_AMBIGUOUS)){
874 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
875 "Warning: Use of \"%s\" without parentheses is ambiguous",
882 * LOP : macro to build a list operator. Its behaviour has been replaced
883 * with a subroutine, S_lop() for which LOP is just another name.
886 #define LOP(f,x) return lop(f,x,s)
890 * Build a list operator (or something that might be one). The rules:
891 * - if we have a next token, then it's a list operator [why?]
892 * - if the next thing is an opening paren, then it's a function
893 * - else it's a list operator
897 S_lop(pTHX_ I32 f, int x, char *s)
903 PL_last_lop = PL_oldbufptr;
904 PL_last_lop_op = (OPCODE)f;
906 return REPORT(LSTOP);
913 return REPORT(LSTOP);
918 * When the lexer realizes it knows the next token (for instance,
919 * it is reordering tokens for the parser) then it can call S_force_next
920 * to know what token to return the next time the lexer is called. Caller
921 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
922 * handles the token correctly.
926 S_force_next(pTHX_ I32 type)
928 PL_nexttype[PL_nexttoke] = type;
930 if (PL_lex_state != LEX_KNOWNEXT) {
931 PL_lex_defer = PL_lex_state;
932 PL_lex_expect = PL_expect;
933 PL_lex_state = LEX_KNOWNEXT;
938 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
940 SV * const sv = newSVpvn(start,len);
941 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
948 * When the lexer knows the next thing is a word (for instance, it has
949 * just seen -> and it knows that the next char is a word char, then
950 * it calls S_force_word to stick the next word into the PL_next lookahead.
953 * char *start : buffer position (must be within PL_linestr)
954 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
955 * int check_keyword : if true, Perl checks to make sure the word isn't
956 * a keyword (do this if the word is a label, e.g. goto FOO)
957 * int allow_pack : if true, : characters will also be allowed (require,
959 * int allow_initial_tick : used by the "sub" lexer only.
963 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
968 start = skipspace(start);
970 if (isIDFIRST_lazy_if(s,UTF) ||
971 (allow_pack && *s == ':') ||
972 (allow_initial_tick && *s == '\'') )
974 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
975 if (check_keyword && keyword(PL_tokenbuf, len))
977 if (token == METHOD) {
982 PL_expect = XOPERATOR;
985 PL_nextval[PL_nexttoke].opval
986 = (OP*)newSVOP(OP_CONST,0,
987 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
988 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
996 * Called when the lexer wants $foo *foo &foo etc, but the program
997 * text only contains the "foo" portion. The first argument is a pointer
998 * to the "foo", and the second argument is the type symbol to prefix.
999 * Forces the next token to be a "WORD".
1000 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1004 S_force_ident(pTHX_ register const char *s, int kind)
1007 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
1008 PL_nextval[PL_nexttoke].opval = o;
1011 o->op_private = OPpCONST_ENTERED;
1012 /* XXX see note in pp_entereval() for why we forgo typo
1013 warnings if the symbol must be introduced in an eval.
1015 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
1016 kind == '$' ? SVt_PV :
1017 kind == '@' ? SVt_PVAV :
1018 kind == '%' ? SVt_PVHV :
1026 Perl_str_to_version(pTHX_ SV *sv)
1031 const char *start = SvPV_const(sv,len);
1032 const char * const end = start + len;
1033 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1034 while (start < end) {
1038 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1043 retval += ((NV)n)/nshift;
1052 * Forces the next token to be a version number.
1053 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1054 * and if "guessing" is TRUE, then no new token is created (and the caller
1055 * must use an alternative parsing method).
1059 S_force_version(pTHX_ char *s, int guessing)
1061 OP *version = Nullop;
1070 while (isDIGIT(*d) || *d == '_' || *d == '.')
1072 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1074 s = scan_num(s, &yylval);
1075 version = yylval.opval;
1076 ver = cSVOPx(version)->op_sv;
1077 if (SvPOK(ver) && !SvNIOK(ver)) {
1078 SvUPGRADE(ver, SVt_PVNV);
1079 SvNV_set(ver, str_to_version(ver));
1080 SvNOK_on(ver); /* hint that it is a version */
1087 /* NOTE: The parser sees the package name and the VERSION swapped */
1088 PL_nextval[PL_nexttoke].opval = version;
1096 * Tokenize a quoted string passed in as an SV. It finds the next
1097 * chunk, up to end of string or a backslash. It may make a new
1098 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1103 S_tokeq(pTHX_ SV *sv)
1106 register char *send;
1114 s = SvPV_force(sv, len);
1115 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1118 while (s < send && *s != '\\')
1123 if ( PL_hints & HINT_NEW_STRING ) {
1124 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1130 if (s + 1 < send && (s[1] == '\\'))
1131 s++; /* all that, just for this */
1136 SvCUR_set(sv, d - SvPVX_const(sv));
1138 if ( PL_hints & HINT_NEW_STRING )
1139 return new_constant(NULL, 0, "q", sv, pv, "q");
1144 * Now come three functions related to double-quote context,
1145 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1146 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1147 * interact with PL_lex_state, and create fake ( ... ) argument lists
1148 * to handle functions and concatenation.
1149 * They assume that whoever calls them will be setting up a fake
1150 * join call, because each subthing puts a ',' after it. This lets
1153 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1155 * (I'm not sure whether the spurious commas at the end of lcfirst's
1156 * arguments and join's arguments are created or not).
1161 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1163 * Pattern matching will set PL_lex_op to the pattern-matching op to
1164 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1166 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1168 * Everything else becomes a FUNC.
1170 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1171 * had an OP_CONST or OP_READLINE). This just sets us up for a
1172 * call to S_sublex_push().
1176 S_sublex_start(pTHX)
1178 register const I32 op_type = yylval.ival;
1180 if (op_type == OP_NULL) {
1181 yylval.opval = PL_lex_op;
1185 if (op_type == OP_CONST || op_type == OP_READLINE) {
1186 SV *sv = tokeq(PL_lex_stuff);
1188 if (SvTYPE(sv) == SVt_PVIV) {
1189 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1191 const char *p = SvPV_const(sv, len);
1192 SV * const nsv = newSVpvn(p, len);
1198 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1199 PL_lex_stuff = Nullsv;
1200 /* Allow <FH> // "foo" */
1201 if (op_type == OP_READLINE)
1202 PL_expect = XTERMORDORDOR;
1206 PL_sublex_info.super_state = PL_lex_state;
1207 PL_sublex_info.sub_inwhat = op_type;
1208 PL_sublex_info.sub_op = PL_lex_op;
1209 PL_lex_state = LEX_INTERPPUSH;
1213 yylval.opval = PL_lex_op;
1223 * Create a new scope to save the lexing state. The scope will be
1224 * ended in S_sublex_done. Returns a '(', starting the function arguments
1225 * to the uc, lc, etc. found before.
1226 * Sets PL_lex_state to LEX_INTERPCONCAT.
1235 PL_lex_state = PL_sublex_info.super_state;
1236 SAVEI32(PL_lex_dojoin);
1237 SAVEI32(PL_lex_brackets);
1238 SAVEI32(PL_lex_casemods);
1239 SAVEI32(PL_lex_starts);
1240 SAVEI32(PL_lex_state);
1241 SAVEVPTR(PL_lex_inpat);
1242 SAVEI32(PL_lex_inwhat);
1243 SAVECOPLINE(PL_curcop);
1244 SAVEPPTR(PL_bufptr);
1245 SAVEPPTR(PL_bufend);
1246 SAVEPPTR(PL_oldbufptr);
1247 SAVEPPTR(PL_oldoldbufptr);
1248 SAVEPPTR(PL_last_lop);
1249 SAVEPPTR(PL_last_uni);
1250 SAVEPPTR(PL_linestart);
1251 SAVESPTR(PL_linestr);
1252 SAVEGENERICPV(PL_lex_brackstack);
1253 SAVEGENERICPV(PL_lex_casestack);
1255 PL_linestr = PL_lex_stuff;
1256 PL_lex_stuff = Nullsv;
1258 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1259 = SvPVX(PL_linestr);
1260 PL_bufend += SvCUR(PL_linestr);
1261 PL_last_lop = PL_last_uni = Nullch;
1262 SAVEFREESV(PL_linestr);
1264 PL_lex_dojoin = FALSE;
1265 PL_lex_brackets = 0;
1266 Newx(PL_lex_brackstack, 120, char);
1267 Newx(PL_lex_casestack, 12, char);
1268 PL_lex_casemods = 0;
1269 *PL_lex_casestack = '\0';
1271 PL_lex_state = LEX_INTERPCONCAT;
1272 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1274 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1275 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1276 PL_lex_inpat = PL_sublex_info.sub_op;
1278 PL_lex_inpat = Nullop;
1285 * Restores lexer state after a S_sublex_push.
1292 if (!PL_lex_starts++) {
1293 SV * const sv = newSVpvs("");
1294 if (SvUTF8(PL_linestr))
1296 PL_expect = XOPERATOR;
1297 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1301 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1302 PL_lex_state = LEX_INTERPCASEMOD;
1306 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1307 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1308 PL_linestr = PL_lex_repl;
1310 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1311 PL_bufend += SvCUR(PL_linestr);
1312 PL_last_lop = PL_last_uni = Nullch;
1313 SAVEFREESV(PL_linestr);
1314 PL_lex_dojoin = FALSE;
1315 PL_lex_brackets = 0;
1316 PL_lex_casemods = 0;
1317 *PL_lex_casestack = '\0';
1319 if (SvEVALED(PL_lex_repl)) {
1320 PL_lex_state = LEX_INTERPNORMAL;
1322 /* we don't clear PL_lex_repl here, so that we can check later
1323 whether this is an evalled subst; that means we rely on the
1324 logic to ensure sublex_done() is called again only via the
1325 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1328 PL_lex_state = LEX_INTERPCONCAT;
1329 PL_lex_repl = Nullsv;
1335 PL_bufend = SvPVX(PL_linestr);
1336 PL_bufend += SvCUR(PL_linestr);
1337 PL_expect = XOPERATOR;
1338 PL_sublex_info.sub_inwhat = 0;
1346 Extracts a pattern, double-quoted string, or transliteration. This
1349 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1350 processing a pattern (PL_lex_inpat is true), a transliteration
1351 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1353 Returns a pointer to the character scanned up to. Iff this is
1354 advanced from the start pointer supplied (ie if anything was
1355 successfully parsed), will leave an OP for the substring scanned
1356 in yylval. Caller must intuit reason for not parsing further
1357 by looking at the next characters herself.
1361 double-quoted style: \r and \n
1362 regexp special ones: \D \s
1364 backrefs: \1 (deprecated in substitution replacements)
1365 case and quoting: \U \Q \E
1366 stops on @ and $, but not for $ as tail anchor
1368 In transliterations:
1369 characters are VERY literal, except for - not at the start or end
1370 of the string, which indicates a range. scan_const expands the
1371 range to the full set of intermediate characters.
1373 In double-quoted strings:
1375 double-quoted style: \r and \n
1377 backrefs: \1 (deprecated)
1378 case and quoting: \U \Q \E
1381 scan_const does *not* construct ops to handle interpolated strings.
1382 It stops processing as soon as it finds an embedded $ or @ variable
1383 and leaves it to the caller to work out what's going on.
1385 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1387 $ in pattern could be $foo or could be tail anchor. Assumption:
1388 it's a tail anchor if $ is the last thing in the string, or if it's
1389 followed by one of ")| \n\t"
1391 \1 (backreferences) are turned into $1
1393 The structure of the code is
1394 while (there's a character to process) {
1395 handle transliteration ranges
1396 skip regexp comments
1397 skip # initiated comments in //x patterns
1398 check for embedded @foo
1399 check for embedded scalars
1401 leave intact backslashes from leave (below)
1402 deprecate \1 in strings and sub replacements
1403 handle string-changing backslashes \l \U \Q \E, etc.
1404 switch (what was escaped) {
1405 handle - in a transliteration (becomes a literal -)
1406 handle \132 octal characters
1407 handle 0x15 hex characters
1408 handle \cV (control V)
1409 handle printf backslashes (\f, \r, \n, etc)
1411 } (end if backslash)
1412 } (end while character to read)
1417 S_scan_const(pTHX_ char *start)
1419 register char *send = PL_bufend; /* end of the constant */
1420 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1421 register char *s = start; /* start of the constant */
1422 register char *d = SvPVX(sv); /* destination for copies */
1423 bool dorange = FALSE; /* are we in a translit range? */
1424 bool didrange = FALSE; /* did we just finish a range? */
1425 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1426 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1429 UV literal_endpoint = 0;
1432 const char *leaveit = /* set of acceptably-backslashed characters */
1434 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1437 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1438 /* If we are doing a trans and we know we want UTF8 set expectation */
1439 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1440 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1444 while (s < send || dorange) {
1445 /* get transliterations out of the way (they're most literal) */
1446 if (PL_lex_inwhat == OP_TRANS) {
1447 /* expand a range A-Z to the full set of characters. AIE! */
1449 I32 i; /* current expanded character */
1450 I32 min; /* first character in range */
1451 I32 max; /* last character in range */
1454 char * const c = (char*)utf8_hop((U8*)d, -1);
1458 *c = (char)UTF_TO_NATIVE(0xff);
1459 /* mark the range as done, and continue */
1465 i = d - SvPVX_const(sv); /* remember current offset */
1466 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1467 d = SvPVX(sv) + i; /* refresh d after realloc */
1468 d -= 2; /* eat the first char and the - */
1470 min = (U8)*d; /* first char in range */
1471 max = (U8)d[1]; /* last char in range */
1475 "Invalid range \"%c-%c\" in transliteration operator",
1476 (char)min, (char)max);
1480 if (literal_endpoint == 2 &&
1481 ((isLOWER(min) && isLOWER(max)) ||
1482 (isUPPER(min) && isUPPER(max)))) {
1484 for (i = min; i <= max; i++)
1486 *d++ = NATIVE_TO_NEED(has_utf8,i);
1488 for (i = min; i <= max; i++)
1490 *d++ = NATIVE_TO_NEED(has_utf8,i);
1495 for (i = min; i <= max; i++)
1498 /* mark the range as done, and continue */
1502 literal_endpoint = 0;
1507 /* range begins (ignore - as first or last char) */
1508 else if (*s == '-' && s+1 < send && s != start) {
1510 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1513 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1523 literal_endpoint = 0;
1528 /* if we get here, we're not doing a transliteration */
1530 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1531 except for the last char, which will be done separately. */
1532 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1534 while (s+1 < send && *s != ')')
1535 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1537 else if (s[2] == '{' /* This should match regcomp.c */
1538 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1541 char *regparse = s + (s[2] == '{' ? 3 : 4);
1544 while (count && (c = *regparse)) {
1545 if (c == '\\' && regparse[1])
1553 if (*regparse != ')')
1554 regparse--; /* Leave one char for continuation. */
1555 while (s < regparse)
1556 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1560 /* likewise skip #-initiated comments in //x patterns */
1561 else if (*s == '#' && PL_lex_inpat &&
1562 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1563 while (s+1 < send && *s != '\n')
1564 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1567 /* check for embedded arrays
1568 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1570 else if (*s == '@' && s[1]
1571 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1574 /* check for embedded scalars. only stop if we're sure it's a
1577 else if (*s == '$') {
1578 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1580 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1581 break; /* in regexp, $ might be tail anchor */
1584 /* End of else if chain - OP_TRANS rejoin rest */
1587 if (*s == '\\' && s+1 < send) {
1590 /* some backslashes we leave behind */
1591 if (*leaveit && *s && strchr(leaveit, *s)) {
1592 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1593 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1597 /* deprecate \1 in strings and substitution replacements */
1598 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1599 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1601 if (ckWARN(WARN_SYNTAX))
1602 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1607 /* string-change backslash escapes */
1608 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1613 /* if we get here, it's either a quoted -, or a digit */
1616 /* quoted - in transliterations */
1618 if (PL_lex_inwhat == OP_TRANS) {
1628 Perl_warner(aTHX_ packWARN(WARN_MISC),
1629 "Unrecognized escape \\%c passed through",
1631 /* default action is to copy the quoted character */
1632 goto default_action;
1635 /* \132 indicates an octal constant */
1636 case '0': case '1': case '2': case '3':
1637 case '4': case '5': case '6': case '7':
1641 uv = grok_oct(s, &len, &flags, NULL);
1644 goto NUM_ESCAPE_INSERT;
1646 /* \x24 indicates a hex constant */
1650 char* const e = strchr(s, '}');
1651 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1652 PERL_SCAN_DISALLOW_PREFIX;
1657 yyerror("Missing right brace on \\x{}");
1661 uv = grok_hex(s, &len, &flags, NULL);
1667 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1668 uv = grok_hex(s, &len, &flags, NULL);
1674 /* Insert oct or hex escaped character.
1675 * There will always enough room in sv since such
1676 * escapes will be longer than any UTF-8 sequence
1677 * they can end up as. */
1679 /* We need to map to chars to ASCII before doing the tests
1682 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1683 if (!has_utf8 && uv > 255) {
1684 /* Might need to recode whatever we have
1685 * accumulated so far if it contains any
1688 * (Can't we keep track of that and avoid
1689 * this rescan? --jhi)
1693 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1694 if (!NATIVE_IS_INVARIANT(*c)) {
1699 const STRLEN offset = d - SvPVX_const(sv);
1701 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1705 while (src >= (const U8 *)SvPVX_const(sv)) {
1706 if (!NATIVE_IS_INVARIANT(*src)) {
1707 const U8 ch = NATIVE_TO_ASCII(*src);
1708 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1709 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1719 if (has_utf8 || uv > 255) {
1720 d = (char*)uvchr_to_utf8((U8*)d, uv);
1722 if (PL_lex_inwhat == OP_TRANS &&
1723 PL_sublex_info.sub_op) {
1724 PL_sublex_info.sub_op->op_private |=
1725 (PL_lex_repl ? OPpTRANS_FROM_UTF
1738 /* \N{LATIN SMALL LETTER A} is a named character */
1742 char* e = strchr(s, '}');
1748 yyerror("Missing right brace on \\N{}");
1752 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1754 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1755 PERL_SCAN_DISALLOW_PREFIX;
1758 uv = grok_hex(s, &len, &flags, NULL);
1760 goto NUM_ESCAPE_INSERT;
1762 res = newSVpvn(s + 1, e - s - 1);
1763 res = new_constant( Nullch, 0, "charnames",
1764 res, Nullsv, "\\N{...}" );
1766 sv_utf8_upgrade(res);
1767 str = SvPV_const(res,len);
1768 #ifdef EBCDIC_NEVER_MIND
1769 /* charnames uses pack U and that has been
1770 * recently changed to do the below uni->native
1771 * mapping, so this would be redundant (and wrong,
1772 * the code point would be doubly converted).
1773 * But leave this in just in case the pack U change
1774 * gets revoked, but the semantics is still
1775 * desireable for charnames. --jhi */
1777 UV uv = utf8_to_uvchr((const U8*)str, 0);
1780 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1782 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1783 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1784 str = SvPV_const(res, len);
1788 if (!has_utf8 && SvUTF8(res)) {
1789 const char * const ostart = SvPVX_const(sv);
1790 SvCUR_set(sv, d - ostart);
1793 sv_utf8_upgrade(sv);
1794 /* this just broke our allocation above... */
1795 SvGROW(sv, (STRLEN)(send - start));
1796 d = SvPVX(sv) + SvCUR(sv);
1799 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1800 const char * const odest = SvPVX_const(sv);
1802 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1803 d = SvPVX(sv) + (d - odest);
1805 Copy(str, d, len, char);
1812 yyerror("Missing braces on \\N{}");
1815 /* \c is a control character */
1824 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1827 yyerror("Missing control char name in \\c");
1831 /* printf-style backslashes, formfeeds, newlines, etc */
1833 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1836 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1839 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1842 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1845 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1848 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1851 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1857 } /* end if (backslash) */
1864 /* If we started with encoded form, or already know we want it
1865 and then encode the next character */
1866 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1868 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1869 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1872 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1873 const STRLEN off = d - SvPVX_const(sv);
1874 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1876 d = (char*)uvchr_to_utf8((U8*)d, uv);
1880 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1882 } /* while loop to process each character */
1884 /* terminate the string and set up the sv */
1886 SvCUR_set(sv, d - SvPVX_const(sv));
1887 if (SvCUR(sv) >= SvLEN(sv))
1888 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1891 if (PL_encoding && !has_utf8) {
1892 sv_recode_to_utf8(sv, PL_encoding);
1898 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1899 PL_sublex_info.sub_op->op_private |=
1900 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1904 /* shrink the sv if we allocated more than we used */
1905 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1906 SvPV_shrink_to_cur(sv);
1909 /* return the substring (via yylval) only if we parsed anything */
1910 if (s > PL_bufptr) {
1911 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1912 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1914 ( PL_lex_inwhat == OP_TRANS
1916 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1919 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1926 * Returns TRUE if there's more to the expression (e.g., a subscript),
1929 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1931 * ->[ and ->{ return TRUE
1932 * { and [ outside a pattern are always subscripts, so return TRUE
1933 * if we're outside a pattern and it's not { or [, then return FALSE
1934 * if we're in a pattern and the first char is a {
1935 * {4,5} (any digits around the comma) returns FALSE
1936 * if we're in a pattern and the first char is a [
1938 * [SOMETHING] has a funky algorithm to decide whether it's a
1939 * character class or not. It has to deal with things like
1940 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1941 * anything else returns TRUE
1944 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1947 S_intuit_more(pTHX_ register char *s)
1949 if (PL_lex_brackets)
1951 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1953 if (*s != '{' && *s != '[')
1958 /* In a pattern, so maybe we have {n,m}. */
1975 /* On the other hand, maybe we have a character class */
1978 if (*s == ']' || *s == '^')
1981 /* this is terrifying, and it works */
1982 int weight = 2; /* let's weigh the evidence */
1984 unsigned char un_char = 255, last_un_char;
1985 const char * const send = strchr(s,']');
1986 char tmpbuf[sizeof PL_tokenbuf * 4];
1988 if (!send) /* has to be an expression */
1991 Zero(seen,256,char);
1994 else if (isDIGIT(*s)) {
1996 if (isDIGIT(s[1]) && s[2] == ']')
2002 for (; s < send; s++) {
2003 last_un_char = un_char;
2004 un_char = (unsigned char)*s;
2009 weight -= seen[un_char] * 10;
2010 if (isALNUM_lazy_if(s+1,UTF)) {
2011 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2012 if ((int)strlen(tmpbuf) > 1
2013 && gv_fetchpv(tmpbuf, 0, SVt_PV))
2018 else if (*s == '$' && s[1] &&
2019 strchr("[#!%*<>()-=",s[1])) {
2020 if (/*{*/ strchr("])} =",s[2]))
2029 if (strchr("wds]",s[1]))
2031 else if (seen['\''] || seen['"'])
2033 else if (strchr("rnftbxcav",s[1]))
2035 else if (isDIGIT(s[1])) {
2037 while (s[1] && isDIGIT(s[1]))
2047 if (strchr("aA01! ",last_un_char))
2049 if (strchr("zZ79~",s[1]))
2051 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2052 weight -= 5; /* cope with negative subscript */
2055 if (!isALNUM(last_un_char)
2056 && !(last_un_char == '$' || last_un_char == '@'
2057 || last_un_char == '&')
2058 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2063 if (keyword(tmpbuf, d - tmpbuf))
2066 if (un_char == last_un_char + 1)
2068 weight -= seen[un_char];
2073 if (weight >= 0) /* probably a character class */
2083 * Does all the checking to disambiguate
2085 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2086 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2088 * First argument is the stuff after the first token, e.g. "bar".
2090 * Not a method if bar is a filehandle.
2091 * Not a method if foo is a subroutine prototyped to take a filehandle.
2092 * Not a method if it's really "Foo $bar"
2093 * Method if it's "foo $bar"
2094 * Not a method if it's really "print foo $bar"
2095 * Method if it's really "foo package::" (interpreted as package->foo)
2096 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2097 * Not a method if bar is a filehandle or package, but is quoted with
2102 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2104 char *s = start + (*start == '$');
2105 char tmpbuf[sizeof PL_tokenbuf];
2110 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2114 const char *proto = SvPVX_const(cv);
2125 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2126 /* start is the beginning of the possible filehandle/object,
2127 * and s is the end of it
2128 * tmpbuf is a copy of it
2131 if (*start == '$') {
2132 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2137 return *s == '(' ? FUNCMETH : METHOD;
2139 if (!keyword(tmpbuf, len)) {
2140 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2145 indirgv = gv_fetchpv(tmpbuf, 0, SVt_PVCV);
2146 if (indirgv && GvCVu(indirgv))
2148 /* filehandle or package name makes it a method */
2149 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2151 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2152 return 0; /* no assumptions -- "=>" quotes bearword */
2154 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2155 newSVpvn(tmpbuf,len));
2156 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2160 return *s == '(' ? FUNCMETH : METHOD;
2168 * Return a string of Perl code to load the debugger. If PERL5DB
2169 * is set, it will return the contents of that, otherwise a
2170 * compile-time require of perl5db.pl.
2177 const char * const pdb = PerlEnv_getenv("PERL5DB");
2181 SETERRNO(0,SS_NORMAL);
2182 return "BEGIN { require 'perl5db.pl' }";
2188 /* Encoded script support. filter_add() effectively inserts a
2189 * 'pre-processing' function into the current source input stream.
2190 * Note that the filter function only applies to the current source file
2191 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2193 * The datasv parameter (which may be NULL) can be used to pass
2194 * private data to this instance of the filter. The filter function
2195 * can recover the SV using the FILTER_DATA macro and use it to
2196 * store private buffers and state information.
2198 * The supplied datasv parameter is upgraded to a PVIO type
2199 * and the IoDIRP/IoANY field is used to store the function pointer,
2200 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2201 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2202 * private use must be set using malloc'd pointers.
2206 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2211 if (!PL_rsfp_filters)
2212 PL_rsfp_filters = newAV();
2214 datasv = NEWSV(255,0);
2215 SvUPGRADE(datasv, SVt_PVIO);
2216 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2217 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2218 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2219 IoANY(datasv), SvPV_nolen(datasv)));
2220 av_unshift(PL_rsfp_filters, 1);
2221 av_store(PL_rsfp_filters, 0, datasv) ;
2226 /* Delete most recently added instance of this filter function. */
2228 Perl_filter_del(pTHX_ filter_t funcp)
2233 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
2235 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2237 /* if filter is on top of stack (usual case) just pop it off */
2238 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2239 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2240 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2241 IoANY(datasv) = (void *)NULL;
2242 sv_free(av_pop(PL_rsfp_filters));
2246 /* we need to search for the correct entry and clear it */
2247 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2251 /* Invoke the idxth filter function for the current rsfp. */
2252 /* maxlen 0 = read one text line */
2254 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2259 if (!PL_rsfp_filters)
2261 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2262 /* Provide a default input filter to make life easy. */
2263 /* Note that we append to the line. This is handy. */
2264 DEBUG_P(PerlIO_printf(Perl_debug_log,
2265 "filter_read %d: from rsfp\n", idx));
2269 const int old_len = SvCUR(buf_sv);
2271 /* ensure buf_sv is large enough */
2272 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2273 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2274 if (PerlIO_error(PL_rsfp))
2275 return -1; /* error */
2277 return 0 ; /* end of file */
2279 SvCUR_set(buf_sv, old_len + len) ;
2282 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2283 if (PerlIO_error(PL_rsfp))
2284 return -1; /* error */
2286 return 0 ; /* end of file */
2289 return SvCUR(buf_sv);
2291 /* Skip this filter slot if filter has been deleted */
2292 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2293 DEBUG_P(PerlIO_printf(Perl_debug_log,
2294 "filter_read %d: skipped (filter deleted)\n",
2296 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2298 /* Get function pointer hidden within datasv */
2299 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2300 DEBUG_P(PerlIO_printf(Perl_debug_log,
2301 "filter_read %d: via function %p (%s)\n",
2302 idx, datasv, SvPV_nolen_const(datasv)));
2303 /* Call function. The function is expected to */
2304 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2305 /* Return: <0:error, =0:eof, >0:not eof */
2306 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2310 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2312 #ifdef PERL_CR_FILTER
2313 if (!PL_rsfp_filters) {
2314 filter_add(S_cr_textfilter,NULL);
2317 if (PL_rsfp_filters) {
2319 SvCUR_set(sv, 0); /* start with empty line */
2320 if (FILTER_READ(0, sv, 0) > 0)
2321 return ( SvPVX(sv) ) ;
2326 return (sv_gets(sv, fp, append));
2330 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2334 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2338 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2339 (gv = gv_fetchpv(pkgname, 0, SVt_PVHV)))
2341 return GvHV(gv); /* Foo:: */
2344 /* use constant CLASS => 'MyClass' */
2345 if ((gv = gv_fetchpv(pkgname, 0, SVt_PVCV))) {
2347 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2348 pkgname = SvPV_nolen_const(sv);
2352 return gv_stashpv(pkgname, FALSE);
2356 S_tokenize_use(pTHX_ int is_use, char *s) {
2357 if (PL_expect != XSTATE)
2358 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2359 is_use ? "use" : "no"));
2361 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2362 s = force_version(s, TRUE);
2363 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2364 PL_nextval[PL_nexttoke].opval = Nullop;
2367 else if (*s == 'v') {
2368 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2369 s = force_version(s, FALSE);
2373 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2374 s = force_version(s, FALSE);
2376 yylval.ival = is_use;
2380 static const char* const exp_name[] =
2381 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2382 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2389 Works out what to call the token just pulled out of the input
2390 stream. The yacc parser takes care of taking the ops we return and
2391 stitching them into a tree.
2397 if read an identifier
2398 if we're in a my declaration
2399 croak if they tried to say my($foo::bar)
2400 build the ops for a my() declaration
2401 if it's an access to a my() variable
2402 are we in a sort block?
2403 croak if my($a); $a <=> $b
2404 build ops for access to a my() variable
2405 if in a dq string, and they've said @foo and we can't find @foo
2407 build ops for a bareword
2408 if we already built the token before, use it.
2413 #pragma segment Perl_yylex
2418 register char *s = PL_bufptr;
2424 SV* tmp = newSVpvs("");
2425 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2426 (IV)CopLINE(PL_curcop),
2427 lex_state_names[PL_lex_state],
2428 exp_name[PL_expect],
2429 pv_display(tmp, s, strlen(s), 0, 60));
2432 /* check if there's an identifier for us to look at */
2433 if (PL_pending_ident)
2434 return REPORT(S_pending_ident(aTHX));
2436 /* no identifier pending identification */
2438 switch (PL_lex_state) {
2440 case LEX_NORMAL: /* Some compilers will produce faster */
2441 case LEX_INTERPNORMAL: /* code if we comment these out. */
2445 /* when we've already built the next token, just pull it out of the queue */
2448 yylval = PL_nextval[PL_nexttoke];
2450 PL_lex_state = PL_lex_defer;
2451 PL_expect = PL_lex_expect;
2452 PL_lex_defer = LEX_NORMAL;
2454 return REPORT(PL_nexttype[PL_nexttoke]);
2456 /* interpolated case modifiers like \L \U, including \Q and \E.
2457 when we get here, PL_bufptr is at the \
2459 case LEX_INTERPCASEMOD:
2461 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2462 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2464 /* handle \E or end of string */
2465 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2467 if (PL_lex_casemods) {
2468 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2469 PL_lex_casestack[PL_lex_casemods] = '\0';
2471 if (PL_bufptr != PL_bufend
2472 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2474 PL_lex_state = LEX_INTERPCONCAT;
2478 if (PL_bufptr != PL_bufend)
2480 PL_lex_state = LEX_INTERPCONCAT;
2484 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2485 "### Saw case modifier\n"); });
2487 if (s[1] == '\\' && s[2] == 'E') {
2489 PL_lex_state = LEX_INTERPCONCAT;
2494 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2495 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2496 if ((*s == 'L' || *s == 'U') &&
2497 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2498 PL_lex_casestack[--PL_lex_casemods] = '\0';
2501 if (PL_lex_casemods > 10)
2502 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2503 PL_lex_casestack[PL_lex_casemods++] = *s;
2504 PL_lex_casestack[PL_lex_casemods] = '\0';
2505 PL_lex_state = LEX_INTERPCONCAT;
2506 PL_nextval[PL_nexttoke].ival = 0;
2509 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2511 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2513 PL_nextval[PL_nexttoke].ival = OP_LC;
2515 PL_nextval[PL_nexttoke].ival = OP_UC;
2517 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2519 Perl_croak(aTHX_ "panic: yylex");
2523 if (PL_lex_starts) {
2526 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2527 if (PL_lex_casemods == 1 && PL_lex_inpat)
2536 case LEX_INTERPPUSH:
2537 return REPORT(sublex_push());
2539 case LEX_INTERPSTART:
2540 if (PL_bufptr == PL_bufend)
2541 return REPORT(sublex_done());
2542 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2543 "### Interpolated variable\n"); });
2545 PL_lex_dojoin = (*PL_bufptr == '@');
2546 PL_lex_state = LEX_INTERPNORMAL;
2547 if (PL_lex_dojoin) {
2548 PL_nextval[PL_nexttoke].ival = 0;
2550 force_ident("\"", '$');
2551 PL_nextval[PL_nexttoke].ival = 0;
2553 PL_nextval[PL_nexttoke].ival = 0;
2555 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2558 if (PL_lex_starts++) {
2560 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2561 if (!PL_lex_casemods && PL_lex_inpat)
2568 case LEX_INTERPENDMAYBE:
2569 if (intuit_more(PL_bufptr)) {
2570 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2576 if (PL_lex_dojoin) {
2577 PL_lex_dojoin = FALSE;
2578 PL_lex_state = LEX_INTERPCONCAT;
2581 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2582 && SvEVALED(PL_lex_repl))
2584 if (PL_bufptr != PL_bufend)
2585 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2586 PL_lex_repl = Nullsv;
2589 case LEX_INTERPCONCAT:
2591 if (PL_lex_brackets)
2592 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2594 if (PL_bufptr == PL_bufend)
2595 return REPORT(sublex_done());
2597 if (SvIVX(PL_linestr) == '\'') {
2598 SV *sv = newSVsv(PL_linestr);
2601 else if ( PL_hints & HINT_NEW_RE )
2602 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2603 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2607 s = scan_const(PL_bufptr);
2609 PL_lex_state = LEX_INTERPCASEMOD;
2611 PL_lex_state = LEX_INTERPSTART;
2614 if (s != PL_bufptr) {
2615 PL_nextval[PL_nexttoke] = yylval;
2618 if (PL_lex_starts++) {
2619 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2620 if (!PL_lex_casemods && PL_lex_inpat)
2633 PL_lex_state = LEX_NORMAL;
2634 s = scan_formline(PL_bufptr);
2635 if (!PL_lex_formbrack)
2641 PL_oldoldbufptr = PL_oldbufptr;
2647 if (isIDFIRST_lazy_if(s,UTF))
2649 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2652 goto fake_eof; /* emulate EOF on ^D or ^Z */
2657 if (PL_lex_brackets) {
2658 yyerror(PL_lex_formbrack
2659 ? "Format not terminated"
2660 : "Missing right curly or square bracket");
2662 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2663 "### Tokener got EOF\n");
2667 if (s++ < PL_bufend)
2668 goto retry; /* ignore stray nulls */
2671 if (!PL_in_eval && !PL_preambled) {
2672 PL_preambled = TRUE;
2673 sv_setpv(PL_linestr,incl_perldb());
2674 if (SvCUR(PL_linestr))
2675 sv_catpvs(PL_linestr,";");
2677 while(AvFILLp(PL_preambleav) >= 0) {
2678 SV *tmpsv = av_shift(PL_preambleav);
2679 sv_catsv(PL_linestr, tmpsv);
2680 sv_catpvs(PL_linestr, ";");
2683 sv_free((SV*)PL_preambleav);
2684 PL_preambleav = NULL;
2686 if (PL_minus_n || PL_minus_p) {
2687 sv_catpvs(PL_linestr, "LINE: while (<>) {");
2689 sv_catpvs(PL_linestr,"chomp;");
2692 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2693 || *PL_splitstr == '"')
2694 && strchr(PL_splitstr + 1, *PL_splitstr))
2695 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2697 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2698 bytes can be used as quoting characters. :-) */
2699 const char *splits = PL_splitstr;
2700 sv_catpvs(PL_linestr, "our @F=split(q\0");
2703 if (*splits == '\\')
2704 sv_catpvn(PL_linestr, splits, 1);
2705 sv_catpvn(PL_linestr, splits, 1);
2706 } while (*splits++);
2707 /* This loop will embed the trailing NUL of
2708 PL_linestr as the last thing it does before
2710 sv_catpvs(PL_linestr, ");");
2714 sv_catpvs(PL_linestr,"our @F=split(' ');");
2718 sv_catpvs(PL_linestr,"use feature ':5.10';");
2719 sv_catpvs(PL_linestr, "\n");
2720 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2721 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2722 PL_last_lop = PL_last_uni = Nullch;
2723 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2724 SV * const sv = NEWSV(85,0);
2726 sv_upgrade(sv, SVt_PVMG);
2727 sv_setsv(sv,PL_linestr);
2730 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2735 bof = PL_rsfp ? TRUE : FALSE;
2736 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2739 if (PL_preprocess && !PL_in_eval)
2740 (void)PerlProc_pclose(PL_rsfp);
2741 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2742 PerlIO_clearerr(PL_rsfp);
2744 (void)PerlIO_close(PL_rsfp);
2746 PL_doextract = FALSE;
2748 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2749 sv_setpv(PL_linestr,PL_minus_p
2750 ? ";}continue{print;}" : ";}");
2751 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2752 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2753 PL_last_lop = PL_last_uni = Nullch;
2754 PL_minus_n = PL_minus_p = 0;
2757 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2758 PL_last_lop = PL_last_uni = Nullch;
2759 sv_setpvn(PL_linestr,"",0);
2760 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2762 /* If it looks like the start of a BOM or raw UTF-16,
2763 * check if it in fact is. */
2769 #ifdef PERLIO_IS_STDIO
2770 # ifdef __GNU_LIBRARY__
2771 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2772 # define FTELL_FOR_PIPE_IS_BROKEN
2776 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2777 # define FTELL_FOR_PIPE_IS_BROKEN
2782 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2783 /* This loses the possibility to detect the bof
2784 * situation on perl -P when the libc5 is being used.
2785 * Workaround? Maybe attach some extra state to PL_rsfp?
2788 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2790 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2793 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2794 s = swallow_bom((U8*)s);
2798 /* Incest with pod. */
2799 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2800 sv_setpvn(PL_linestr, "", 0);
2801 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2802 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2803 PL_last_lop = PL_last_uni = Nullch;
2804 PL_doextract = FALSE;
2808 } while (PL_doextract);
2809 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2810 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2811 SV * const sv = NEWSV(85,0);
2813 sv_upgrade(sv, SVt_PVMG);
2814 sv_setsv(sv,PL_linestr);
2817 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2819 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2820 PL_last_lop = PL_last_uni = Nullch;
2821 if (CopLINE(PL_curcop) == 1) {
2822 while (s < PL_bufend && isSPACE(*s))
2824 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2828 if (*s == '#' && *(s+1) == '!')
2830 #ifdef ALTERNATE_SHEBANG
2832 static char const as[] = ALTERNATE_SHEBANG;
2833 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2834 d = s + (sizeof(as) - 1);
2836 #endif /* ALTERNATE_SHEBANG */
2845 while (*d && !isSPACE(*d))
2849 #ifdef ARG_ZERO_IS_SCRIPT
2850 if (ipathend > ipath) {
2852 * HP-UX (at least) sets argv[0] to the script name,
2853 * which makes $^X incorrect. And Digital UNIX and Linux,
2854 * at least, set argv[0] to the basename of the Perl
2855 * interpreter. So, having found "#!", we'll set it right.
2858 = GvSV(gv_fetchpv("\030", GV_ADD, SVt_PV)); /* $^X */
2859 assert(SvPOK(x) || SvGMAGICAL(x));
2860 if (sv_eq(x, CopFILESV(PL_curcop))) {
2861 sv_setpvn(x, ipath, ipathend - ipath);
2867 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
2868 const char * const lstart = SvPV_const(x,llen);
2870 bstart += blen - llen;
2871 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2872 sv_setpvn(x, ipath, ipathend - ipath);
2877 TAINT_NOT; /* $^X is always tainted, but that's OK */
2879 #endif /* ARG_ZERO_IS_SCRIPT */
2884 d = instr(s,"perl -");
2886 d = instr(s,"perl");
2888 /* avoid getting into infinite loops when shebang
2889 * line contains "Perl" rather than "perl" */
2891 for (d = ipathend-4; d >= ipath; --d) {
2892 if ((*d == 'p' || *d == 'P')
2893 && !ibcmp(d, "perl", 4))
2903 #ifdef ALTERNATE_SHEBANG
2905 * If the ALTERNATE_SHEBANG on this system starts with a
2906 * character that can be part of a Perl expression, then if
2907 * we see it but not "perl", we're probably looking at the
2908 * start of Perl code, not a request to hand off to some
2909 * other interpreter. Similarly, if "perl" is there, but
2910 * not in the first 'word' of the line, we assume the line
2911 * contains the start of the Perl program.
2913 if (d && *s != '#') {
2914 const char *c = ipath;
2915 while (*c && !strchr("; \t\r\n\f\v#", *c))
2918 d = Nullch; /* "perl" not in first word; ignore */
2920 *s = '#'; /* Don't try to parse shebang line */
2922 #endif /* ALTERNATE_SHEBANG */
2923 #ifndef MACOS_TRADITIONAL
2928 !instr(s,"indir") &&
2929 instr(PL_origargv[0],"perl"))
2936 while (s < PL_bufend && isSPACE(*s))
2938 if (s < PL_bufend) {
2939 Newxz(newargv,PL_origargc+3,char*);
2941 while (s < PL_bufend && !isSPACE(*s))
2944 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2947 newargv = PL_origargv;
2950 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2952 Perl_croak(aTHX_ "Can't exec %s", ipath);
2956 const U32 oldpdb = PL_perldb;
2957 const bool oldn = PL_minus_n;
2958 const bool oldp = PL_minus_p;
2960 while (*d && !isSPACE(*d)) d++;
2961 while (SPACE_OR_TAB(*d)) d++;
2964 const bool switches_done = PL_doswitches;
2966 if (*d == 'M' || *d == 'm' || *d == 'C') {
2967 const char * const m = d;
2968 while (*d && !isSPACE(*d)) d++;
2969 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
2972 d = moreswitches(d);
2974 if (PL_doswitches && !switches_done) {
2975 int argc = PL_origargc;
2976 char **argv = PL_origargv;
2979 } while (argc && argv[0][0] == '-' && argv[0][1]);
2980 init_argv_symbols(argc,argv);
2982 if ((PERLDB_LINE && !oldpdb) ||
2983 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
2984 /* if we have already added "LINE: while (<>) {",
2985 we must not do it again */
2987 sv_setpvn(PL_linestr, "", 0);
2988 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2989 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2990 PL_last_lop = PL_last_uni = Nullch;
2991 PL_preambled = FALSE;
2993 (void)gv_fetchfile(PL_origfilename);
3000 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3002 PL_lex_state = LEX_FORMLINE;
3007 #ifdef PERL_STRICT_CR
3008 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3010 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3012 case ' ': case '\t': case '\f': case 013:
3013 #ifdef MACOS_TRADITIONAL
3020 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3021 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3022 /* handle eval qq[#line 1 "foo"\n ...] */
3023 CopLINE_dec(PL_curcop);
3027 while (s < d && *s != '\n')
3031 else if (s > d) /* Found by Ilya: feed random input to Perl. */
3032 Perl_croak(aTHX_ "panic: input overflow");
3034 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3036 PL_lex_state = LEX_FORMLINE;
3046 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3054 while (s < PL_bufend && SPACE_OR_TAB(*s))
3057 if (strnEQ(s,"=>",2)) {
3058 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3059 DEBUG_T( { S_printbuf(aTHX_
3060 "### Saw unary minus before =>, forcing word %s\n", s);
3062 OPERATOR('-'); /* unary minus */
3064 PL_last_uni = PL_oldbufptr;
3066 case 'r': ftst = OP_FTEREAD; break;
3067 case 'w': ftst = OP_FTEWRITE; break;
3068 case 'x': ftst = OP_FTEEXEC; break;
3069 case 'o': ftst = OP_FTEOWNED; break;
3070 case 'R': ftst = OP_FTRREAD; break;
3071 case 'W': ftst = OP_FTRWRITE; break;
3072 case 'X': ftst = OP_FTREXEC; break;
3073 case 'O': ftst = OP_FTROWNED; break;
3074 case 'e': ftst = OP_FTIS; break;
3075 case 'z': ftst = OP_FTZERO; break;
3076 case 's': ftst = OP_FTSIZE; break;
3077 case 'f': ftst = OP_FTFILE; break;
3078 case 'd': ftst = OP_FTDIR; break;
3079 case 'l': ftst = OP_FTLINK; break;
3080 case 'p': ftst = OP_FTPIPE; break;
3081 case 'S': ftst = OP_FTSOCK; break;
3082 case 'u': ftst = OP_FTSUID; break;
3083 case 'g': ftst = OP_FTSGID; break;
3084 case 'k': ftst = OP_FTSVTX; break;
3085 case 'b': ftst = OP_FTBLK; break;
3086 case 'c': ftst = OP_FTCHR; break;
3087 case 't': ftst = OP_FTTTY; break;
3088 case 'T': ftst = OP_FTTEXT; break;
3089 case 'B': ftst = OP_FTBINARY; break;
3090 case 'M': case 'A': case 'C':
3091 gv_fetchpv("\024",GV_ADD, SVt_PV);
3093 case 'M': ftst = OP_FTMTIME; break;
3094 case 'A': ftst = OP_FTATIME; break;
3095 case 'C': ftst = OP_FTCTIME; break;
3103 PL_last_lop_op = (OPCODE)ftst;
3104 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3105 "### Saw file test %c\n", (int)tmp);
3110 /* Assume it was a minus followed by a one-letter named
3111 * subroutine call (or a -bareword), then. */
3112 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3113 "### '-%c' looked like a file test but was not\n",
3120 const char tmp = *s++;
3123 if (PL_expect == XOPERATOR)
3128 else if (*s == '>') {
3131 if (isIDFIRST_lazy_if(s,UTF)) {
3132 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3140 if (PL_expect == XOPERATOR)
3143 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3145 OPERATOR('-'); /* unary minus */
3151 const char tmp = *s++;
3154 if (PL_expect == XOPERATOR)
3159 if (PL_expect == XOPERATOR)
3162 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3169 if (PL_expect != XOPERATOR) {
3170 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3171 PL_expect = XOPERATOR;
3172 force_ident(PL_tokenbuf, '*');
3185 if (PL_expect == XOPERATOR) {
3189 PL_tokenbuf[0] = '%';
3190 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3191 if (!PL_tokenbuf[1]) {
3194 PL_pending_ident = '%';
3205 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
3206 && FEATURE_IS_ENABLED("~~"))
3213 const char tmp = *s++;
3219 goto just_a_word_zero_gv;
3222 switch (PL_expect) {
3225 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3227 PL_bufptr = s; /* update in case we back off */
3233 PL_expect = XTERMBLOCK;
3237 while (isIDFIRST_lazy_if(s,UTF)) {
3239 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3240 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3241 if (tmp < 0) tmp = -tmp;
3257 d = scan_str(d,TRUE,TRUE);
3259 /* MUST advance bufptr here to avoid bogus
3260 "at end of line" context messages from yyerror().
3262 PL_bufptr = s + len;
3263 yyerror("Unterminated attribute parameter in attribute list");
3266 return REPORT(0); /* EOF indicator */
3270 SV *sv = newSVpvn(s, len);
3271 sv_catsv(sv, PL_lex_stuff);
3272 attrs = append_elem(OP_LIST, attrs,
3273 newSVOP(OP_CONST, 0, sv));
3274 SvREFCNT_dec(PL_lex_stuff);
3275 PL_lex_stuff = Nullsv;
3278 if (len == 6 && strnEQ(s, "unique", len)) {
3279 if (PL_in_my == KEY_our)
3281 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3283 ; /* skip to avoid loading attributes.pm */
3286 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3289 /* NOTE: any CV attrs applied here need to be part of
3290 the CVf_BUILTIN_ATTRS define in cv.h! */
3291 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3292 CvLVALUE_on(PL_compcv);
3293 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3294 CvLOCKED_on(PL_compcv);
3295 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3296 CvMETHOD_on(PL_compcv);
3297 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3298 CvASSERTION_on(PL_compcv);
3299 /* After we've set the flags, it could be argued that
3300 we don't need to do the attributes.pm-based setting
3301 process, and shouldn't bother appending recognized
3302 flags. To experiment with that, uncomment the
3303 following "else". (Note that's already been
3304 uncommented. That keeps the above-applied built-in
3305 attributes from being intercepted (and possibly
3306 rejected) by a package's attribute routines, but is
3307 justified by the performance win for the common case
3308 of applying only built-in attributes.) */
3310 attrs = append_elem(OP_LIST, attrs,
3311 newSVOP(OP_CONST, 0,
3315 if (*s == ':' && s[1] != ':')
3318 break; /* require real whitespace or :'s */
3322 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3323 if (*s != ';' && *s != '}' && *s != tmp
3324 && (tmp != '=' || *s != ')')) {
3325 const char q = ((*s == '\'') ? '"' : '\'');
3326 /* If here for an expression, and parsed no attrs, back
3328 if (tmp == '=' && !attrs) {
3332 /* MUST advance bufptr here to avoid bogus "at end of line"
3333 context messages from yyerror().
3337 ? Perl_form(aTHX_ "Invalid separator character "
3338 "%c%c%c in attribute list", q, *s, q)
3339 : "Unterminated attribute list" );
3347 PL_nextval[PL_nexttoke].opval = attrs;
3355 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3356 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3364 const char tmp = *s++;
3369 const char tmp = *s++;
3377 if (PL_lex_brackets <= 0)
3378 yyerror("Unmatched right square bracket");
3381 if (PL_lex_state == LEX_INTERPNORMAL) {
3382 if (PL_lex_brackets == 0) {
3383 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3384 PL_lex_state = LEX_INTERPEND;
3391 if (PL_lex_brackets > 100) {
3392 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3394 switch (PL_expect) {
3396 if (PL_lex_formbrack) {
3400 if (PL_oldoldbufptr == PL_last_lop)
3401 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3403 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3404 OPERATOR(HASHBRACK);
3406 while (s < PL_bufend && SPACE_OR_TAB(*s))
3409 PL_tokenbuf[0] = '\0';
3410 if (d < PL_bufend && *d == '-') {
3411 PL_tokenbuf[0] = '-';
3413 while (d < PL_bufend && SPACE_OR_TAB(*d))
3416 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3417 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3419 while (d < PL_bufend && SPACE_OR_TAB(*d))
3422 const char minus = (PL_tokenbuf[0] == '-');
3423 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3431 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3436 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3441 if (PL_oldoldbufptr == PL_last_lop)
3442 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3444 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3447 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3449 /* This hack is to get the ${} in the message. */
3451 yyerror("syntax error");
3454 OPERATOR(HASHBRACK);
3456 /* This hack serves to disambiguate a pair of curlies
3457 * as being a block or an anon hash. Normally, expectation
3458 * determines that, but in cases where we're not in a
3459 * position to expect anything in particular (like inside
3460 * eval"") we have to resolve the ambiguity. This code
3461 * covers the case where the first term in the curlies is a
3462 * quoted string. Most other cases need to be explicitly
3463 * disambiguated by prepending a "+" before the opening
3464 * curly in order to force resolution as an anon hash.
3466 * XXX should probably propagate the outer expectation
3467 * into eval"" to rely less on this hack, but that could
3468 * potentially break current behavior of eval"".
3472 if (*s == '\'' || *s == '"' || *s == '`') {
3473 /* common case: get past first string, handling escapes */
3474 for (t++; t < PL_bufend && *t != *s;)
3475 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3479 else if (*s == 'q') {
3482 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3485 /* skip q//-like construct */
3487 char open, close, term;
3490 while (t < PL_bufend && isSPACE(*t))
3492 /* check for q => */
3493 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3494 OPERATOR(HASHBRACK);
3498 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3502 for (t++; t < PL_bufend; t++) {
3503 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3505 else if (*t == open)
3509 for (t++; t < PL_bufend; t++) {
3510 if (*t == '\\' && t+1 < PL_bufend)
3512 else if (*t == close && --brackets <= 0)
3514 else if (*t == open)
3521 /* skip plain q word */
3522 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3525 else if (isALNUM_lazy_if(t,UTF)) {
3527 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3530 while (t < PL_bufend && isSPACE(*t))
3532 /* if comma follows first term, call it an anon hash */
3533 /* XXX it could be a comma expression with loop modifiers */
3534 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3535 || (*t == '=' && t[1] == '>')))
3536 OPERATOR(HASHBRACK);
3537 if (PL_expect == XREF)
3540 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3546 yylval.ival = CopLINE(PL_curcop);
3547 if (isSPACE(*s) || *s == '#')
3548 PL_copline = NOLINE; /* invalidate current command line number */
3553 if (PL_lex_brackets <= 0)
3554 yyerror("Unmatched right curly bracket");
3556 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3557 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3558 PL_lex_formbrack = 0;
3559 if (PL_lex_state == LEX_INTERPNORMAL) {
3560 if (PL_lex_brackets == 0) {
3561 if (PL_expect & XFAKEBRACK) {
3562 PL_expect &= XENUMMASK;
3563 PL_lex_state = LEX_INTERPEND;
3565 return yylex(); /* ignore fake brackets */
3567 if (*s == '-' && s[1] == '>')
3568 PL_lex_state = LEX_INTERPENDMAYBE;
3569 else if (*s != '[' && *s != '{')
3570 PL_lex_state = LEX_INTERPEND;
3573 if (PL_expect & XFAKEBRACK) {
3574 PL_expect &= XENUMMASK;
3576 return yylex(); /* ignore fake brackets */
3585 if (PL_expect == XOPERATOR) {
3586 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
3587 && isIDFIRST_lazy_if(s,UTF))
3589 CopLINE_dec(PL_curcop);
3590 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3591 CopLINE_inc(PL_curcop);
3596 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3598 PL_expect = XOPERATOR;
3599 force_ident(PL_tokenbuf, '&');
3603 yylval.ival = (OPpENTERSUB_AMPER<<8);
3615 const char tmp = *s++;
3622 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
3623 && strchr("+-*/%.^&|<",tmp))
3624 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3625 "Reversed %c= operator",(int)tmp);
3627 if (PL_expect == XSTATE && isALPHA(tmp) &&
3628 (s == PL_linestart+1 || s[-2] == '\n') )
3630 if (PL_in_eval && !PL_rsfp) {
3635 if (strnEQ(s,"=cut",4)) {
3649 PL_doextract = TRUE;
3653 if (PL_lex_brackets < PL_lex_formbrack) {
3655 #ifdef PERL_STRICT_CR
3656 for (t = s; SPACE_OR_TAB(*t); t++) ;
3658 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3660 if (*t == '\n' || *t == '#') {
3671 const char tmp = *s++;
3673 /* was this !=~ where !~ was meant?
3674 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3676 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3677 const char *t = s+1;
3679 while (t < PL_bufend && isSPACE(*t))
3682 if (*t == '/' || *t == '?' ||
3683 ((*t == 'm' || *t == 's' || *t == 'y')
3684 && !isALNUM(t[1])) ||
3685 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3686 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3687 "!=~ should be !~");
3697 if (PL_expect != XOPERATOR) {
3698 if (s[1] != '<' && !strchr(s,'>'))
3701 s = scan_heredoc(s);
3703 s = scan_inputsymbol(s);
3704 TERM(sublex_start());
3710 SHop(OP_LEFT_SHIFT);
3724 const char tmp = *s++;
3726 SHop(OP_RIGHT_SHIFT);
3736 if (PL_expect == XOPERATOR) {
3737 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3739 deprecate_old(commaless_variable_list);
3740 return REPORT(','); /* grandfather non-comma-format format */
3744 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3745 PL_tokenbuf[0] = '@';
3746 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3747 sizeof PL_tokenbuf - 1, FALSE);
3748 if (PL_expect == XOPERATOR)
3749 no_op("Array length", s);
3750 if (!PL_tokenbuf[1])
3752 PL_expect = XOPERATOR;
3753 PL_pending_ident = '#';
3757 PL_tokenbuf[0] = '$';
3758 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3759 sizeof PL_tokenbuf - 1, FALSE);
3760 if (PL_expect == XOPERATOR)
3762 if (!PL_tokenbuf[1]) {
3764 yyerror("Final $ should be \\$ or $name");
3768 /* This kludge not intended to be bulletproof. */
3769 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3770 yylval.opval = newSVOP(OP_CONST, 0,
3771 newSViv(PL_compiling.cop_arybase));
3772 yylval.opval->op_private = OPpCONST_ARYBASE;
3778 const char tmp = *s;
3779 if (PL_lex_state == LEX_NORMAL)
3782 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
3783 && intuit_more(s)) {
3785 PL_tokenbuf[0] = '@';
3786 if (ckWARN(WARN_SYNTAX)) {
3789 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3792 PL_bufptr = skipspace(PL_bufptr);
3793 while (t < PL_bufend && *t != ']')
3795 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3796 "Multidimensional syntax %.*s not supported",
3797 (int)((t - PL_bufptr) + 1), PL_bufptr);
3801 else if (*s == '{') {
3803 PL_tokenbuf[0] = '%';
3804 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
3805 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
3807 char tmpbuf[sizeof PL_tokenbuf];
3808 for (t++; isSPACE(*t); t++) ;
3809 if (isIDFIRST_lazy_if(t,UTF)) {
3811 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
3813 for (; isSPACE(*t); t++) ;
3814 if (*t == ';' && get_cv(tmpbuf, FALSE))
3815 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3816 "You need to quote \"%s\"",
3823 PL_expect = XOPERATOR;
3824 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3825 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3826 if (!islop || PL_last_lop_op == OP_GREPSTART)
3827 PL_expect = XOPERATOR;
3828 else if (strchr("$@\"'`q", *s))
3829 PL_expect = XTERM; /* e.g. print $fh "foo" */
3830 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3831 PL_expect = XTERM; /* e.g. print $fh &sub */
3832 else if (isIDFIRST_lazy_if(s,UTF)) {
3833 char tmpbuf[sizeof PL_tokenbuf];
3835 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3836 if ((t2 = keyword(tmpbuf, len))) {
3837 /* binary operators exclude handle interpretations */
3849 PL_expect = XTERM; /* e.g. print $fh length() */
3854 PL_expect = XTERM; /* e.g. print $fh subr() */
3857 else if (isDIGIT(*s))
3858 PL_expect = XTERM; /* e.g. print $fh 3 */
3859 else if (*s == '.' && isDIGIT(s[1]))
3860 PL_expect = XTERM; /* e.g. print $fh .3 */
3861 else if ((*s == '?' || *s == '-' || *s == '+')
3862 && !isSPACE(s[1]) && s[1] != '=')
3863 PL_expect = XTERM; /* e.g. print $fh -1 */
3864 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
3866 PL_expect = XTERM; /* e.g. print $fh /.../
3867 XXX except DORDOR operator
3869 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
3871 PL_expect = XTERM; /* print $fh <<"EOF" */
3874 PL_pending_ident = '$';
3878 if (PL_expect == XOPERATOR)
3880 PL_tokenbuf[0] = '@';
3881 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3882 if (!PL_tokenbuf[1]) {
3885 if (PL_lex_state == LEX_NORMAL)
3887 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3889 PL_tokenbuf[0] = '%';
3891 /* Warn about @ where they meant $. */
3892 if (*s == '[' || *s == '{') {
3893 if (ckWARN(WARN_SYNTAX)) {
3894 const char *t = s + 1;
3895 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3897 if (*t == '}' || *t == ']') {
3899 PL_bufptr = skipspace(PL_bufptr);
3900 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3901 "Scalar value %.*s better written as $%.*s",
3902 (int)(t-PL_bufptr), PL_bufptr,
3903 (int)(t-PL_bufptr-1), PL_bufptr+1);
3908 PL_pending_ident = '@';
3911 case '/': /* may be division, defined-or, or pattern */
3912 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3916 case '?': /* may either be conditional or pattern */
3917 if(PL_expect == XOPERATOR) {
3925 /* A // operator. */
3935 /* Disable warning on "study /blah/" */
3936 if (PL_oldoldbufptr == PL_last_uni
3937 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3938 || memNE(PL_last_uni, "study", 5)
3939 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3942 s = scan_pat(s,OP_MATCH);
3943 TERM(sublex_start());
3947 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3948 #ifdef PERL_STRICT_CR
3951 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3953 && (s == PL_linestart || s[-1] == '\n') )
3955 PL_lex_formbrack = 0;
3959 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3965 yylval.ival = OPf_SPECIAL;
3971 if (PL_expect != XOPERATOR)
3976 case '0': case '1': case '2': case '3': case '4':
3977 case '5': case '6': case '7': case '8': case '9':
3978 s = scan_num(s, &yylval);
3979 DEBUG_T( { S_printbuf(aTHX_ "### Saw number in %s\n", s); } );
3980 if (PL_expect == XOPERATOR)
3985 s = scan_str(s,FALSE,FALSE);
3986 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
3987 if (PL_expect == XOPERATOR) {
3988 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3990 deprecate_old(commaless_variable_list);
3991 return REPORT(','); /* grandfather non-comma-format format */
3997 missingterm((char*)0);
3998 yylval.ival = OP_CONST;
3999 TERM(sublex_start());
4002 s = scan_str(s,FALSE,FALSE);
4003 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4004 if (PL_expect == XOPERATOR) {
4005 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4007 deprecate_old(commaless_variable_list);
4008 return REPORT(','); /* grandfather non-comma-format format */
4014 missingterm((char*)0);
4015 yylval.ival = OP_CONST;
4016 /* FIXME. I think that this can be const if char *d is replaced by
4017 more localised variables. */
4018 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4019 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4020 yylval.ival = OP_STRINGIFY;
4024 TERM(sublex_start());
4027 s = scan_str(s,FALSE,FALSE);
4028 DEBUG_T( { S_printbuf(aTHX_ "### Saw backtick string before %s\n", s); } );
4029 if (PL_expect == XOPERATOR)
4030 no_op("Backticks",s);
4032 missingterm((char*)0);
4033 yylval.ival = OP_BACKTICK;
4035 TERM(sublex_start());
4039 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
4040 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
4042 if (PL_expect == XOPERATOR)
4043 no_op("Backslash",s);
4047 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
4048 char *start = s + 2;
4049 while (isDIGIT(*start) || *start == '_')
4051 if (*start == '.' && isDIGIT(start[1])) {
4052 s = scan_num(s, &yylval);
4055 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
4056 else if (!isALPHA(*start) && (PL_expect == XTERM
4057 || PL_expect == XREF || PL_expect == XSTATE
4058 || PL_expect == XTERMORDORDOR)) {
4059 const char c = *start;
4062 gv = gv_fetchpv(s, 0, SVt_PVCV);
4065 s = scan_num(s, &yylval);
4072 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
4108 I32 orig_keyword = 0;
4113 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4115 /* Some keywords can be followed by any delimiter, including ':' */
4116 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
4117 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4118 (PL_tokenbuf[0] == 'q' &&
4119 strchr("qwxr", PL_tokenbuf[1])))));
4121 /* x::* is just a word, unless x is "CORE" */
4122 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4126 while (d < PL_bufend && isSPACE(*d))
4127 d++; /* no comments skipped here, or s### is misparsed */
4129 /* Is this a label? */
4130 if (!tmp && PL_expect == XSTATE
4131 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4133 yylval.pval = savepv(PL_tokenbuf);
4138 /* Check for keywords */
4139 tmp = keyword(PL_tokenbuf, len);
4141 /* Is this a word before a => operator? */
4142 if (*d == '=' && d[1] == '>') {
4145 = (OP*)newSVOP(OP_CONST, 0,
4146 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4147 yylval.opval->op_private = OPpCONST_BARE;
4151 if (tmp < 0) { /* second-class keyword? */
4152 GV *ogv = NULL; /* override (winner) */
4153 GV *hgv = NULL; /* hidden (loser) */
4154 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4156 if ((gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV)) &&
4159 if (GvIMPORTED_CV(gv))
4161 else if (! CvMETHOD(cv))
4165 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4166 (gv = *gvp) != (GV*)&PL_sv_undef &&
4167 GvCVu(gv) && GvIMPORTED_CV(gv))
4174 tmp = 0; /* overridden by import or by GLOBAL */
4177 && -tmp==KEY_lock /* XXX generalizable kludge */
4179 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
4181 tmp = 0; /* any sub overrides "weak" keyword */
4183 else { /* no override */
4185 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4186 Perl_warner(aTHX_ packWARN(WARN_MISC),
4187 "dump() better written as CORE::dump()");
4191 if (hgv && tmp != KEY_x && tmp != KEY_CORE
4192 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
4193 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4194 "Ambiguous call resolved as CORE::%s(), %s",
4195 GvENAME(hgv), "qualify as such or use &");
4202 default: /* not a keyword */
4203 /* Trade off - by using this evil construction we can pull the
4204 variable gv into the block labelled keylookup. If not, then
4205 we have to give it function scope so that the goto from the
4206 earlier ':' case doesn't bypass the initialisation. */
4208 just_a_word_zero_gv:
4215 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4218 /* Get the rest if it looks like a package qualifier */
4220 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4222 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4225 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4226 *s == '\'' ? "'" : "::");
4231 if (PL_expect == XOPERATOR) {
4232 if (PL_bufptr == PL_linestart) {
4233 CopLINE_dec(PL_curcop);
4234 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4235 CopLINE_inc(PL_curcop);
4238 no_op("Bareword",s);
4241 /* Look for a subroutine with this name in current package,
4242 unless name is "Foo::", in which case Foo is a bearword
4243 (and a package name). */
4246 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4248 if (ckWARN(WARN_BAREWORD)
4249 && ! gv_fetchpv(PL_tokenbuf, 0, SVt_PVHV))
4250 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4251 "Bareword \"%s\" refers to nonexistent package",
4254 PL_tokenbuf[len] = '\0';
4261 /* Mustn't actually add anything to a symbol table.
4262 But also don't want to "initialise" any placeholder
4263 constants that might already be there into full
4264 blown PVGVs with attached PVCV. */
4265 gv = gv_fetchpv(PL_tokenbuf, GV_NOADD_NOINIT,
4270 /* if we saw a global override before, get the right name */
4273 sv = newSVpvs("CORE::GLOBAL::");
4274 sv_catpv(sv,PL_tokenbuf);
4277 /* If len is 0, newSVpv does strlen(), which is correct.
4278 If len is non-zero, then it will be the true length,
4279 and so the scalar will be created correctly. */
4280 sv = newSVpv(PL_tokenbuf,len);
4283 /* Presume this is going to be a bareword of some sort. */
4286 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4287 yylval.opval->op_private = OPpCONST_BARE;
4288 /* UTF-8 package name? */
4289 if (UTF && !IN_BYTES &&
4290 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
4293 /* And if "Foo::", then that's what it certainly is. */
4298 /* Do the explicit type check so that we don't need to force
4299 the initialisation of the symbol table to have a real GV.
4300 Beware - gv may not really be a PVGV, cv may not really be
4301 a PVCV, (because of the space optimisations that gv_init
4302 understands) But they're true if for this symbol there is
4303 respectively a typeglob and a subroutine.
4305 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
4306 /* Real typeglob, so get the real subroutine: */
4308 /* A proxy for a subroutine in this package? */
4309 : SvOK(gv) ? (CV *) gv : NULL)
4312 /* See if it's the indirect object for a list operator. */
4314 if (PL_oldoldbufptr &&
4315 PL_oldoldbufptr < PL_bufptr &&
4316 (PL_oldoldbufptr == PL_last_lop
4317 || PL_oldoldbufptr == PL_last_uni) &&
4318 /* NO SKIPSPACE BEFORE HERE! */
4319 (PL_expect == XREF ||
4320 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4322 bool immediate_paren = *s == '(';
4324 /* (Now we can afford to cross potential line boundary.) */
4327 /* Two barewords in a row may indicate method call. */
4329 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
4330 (tmp = intuit_method(s, gv, cv)))
4333 /* If not a declared subroutine, it's an indirect object. */
4334 /* (But it's an indir obj regardless for sort.) */
4335 /* Also, if "_" follows a filetest operator, it's a bareword */
4338 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4340 (PL_last_lop_op != OP_MAPSTART &&
4341 PL_last_lop_op != OP_GREPSTART))))
4342 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
4343 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
4346 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4351 PL_expect = XOPERATOR;
4354 /* Is this a word before a => operator? */
4355 if (*s == '=' && s[1] == '>' && !pkgname) {
4357 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4358 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4359 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4363 /* If followed by a paren, it's certainly a subroutine. */
4367 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4368 if (*d == ')' && (sv = gv_const_sv(gv))) {
4373 PL_nextval[PL_nexttoke].opval = yylval.opval;
4374 PL_expect = XOPERATOR;
4380 /* If followed by var or block, call it a method (unless sub) */
4382 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
4383 PL_last_lop = PL_oldbufptr;
4384 PL_last_lop_op = OP_METHOD;
4388 /* If followed by a bareword, see if it looks like indir obj. */
4391 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4392 && (tmp = intuit_method(s, gv, cv)))
4395 /* Not a method, so call it a subroutine (if defined) */
4398 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4399 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4400 "Ambiguous use of -%s resolved as -&%s()",
4401 PL_tokenbuf, PL_tokenbuf);
4402 /* Check for a constant sub */
4403 if ((sv = gv_const_sv(gv))) {
4405 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4406 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4407 yylval.opval->op_private = 0;
4411 /* Resolve to GV now. */
4412 if (SvTYPE(gv) != SVt_PVGV) {
4413 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
4414 assert (SvTYPE(gv) == SVt_PVGV);
4415 /* cv must have been some sort of placeholder, so
4416 now needs replacing with a real code reference. */
4420 op_free(yylval.opval);
4421 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4422 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4423 PL_last_lop = PL_oldbufptr;
4424 PL_last_lop_op = OP_ENTERSUB;
4425 /* Is there a prototype? */
4428 const char *proto = SvPV_const((SV*)cv, len);
4431 if (*proto == '$' && proto[1] == '\0')
4433 while (*proto == ';')
4435 if (*proto == '&' && *s == '{') {
4436 sv_setpv(PL_subname, PL_curstash ?
4437 "__ANON__" : "__ANON__::__ANON__");
4441 PL_nextval[PL_nexttoke].opval = yylval.opval;
4447 /* Call it a bare word */
4449 if (PL_hints & HINT_STRICT_SUBS)
4450 yylval.opval->op_private |= OPpCONST_STRICT;
4453 if (lastchar != '-') {
4454 if (ckWARN(WARN_RESERVED)) {
4455 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4456 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4457 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4464 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4465 && ckWARN_d(WARN_AMBIGUOUS)) {
4466 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4467 "Operator or semicolon missing before %c%s",
4468 lastchar, PL_tokenbuf);
4469 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4470 "Ambiguous use of %c resolved as operator %c",
4471 lastchar, lastchar);
4477 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4478 newSVpv(CopFILE(PL_curcop),0));
4482 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4483 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4486 case KEY___PACKAGE__:
4487 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4489 ? newSVhek(HvNAME_HEK(PL_curstash))
4496 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4497 const char *pname = "main";
4498 if (PL_tokenbuf[2] == 'D')
4499 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
4500 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
4504 GvIOp(gv) = newIO();
4505 IoIFP(GvIOp(gv)) = PL_rsfp;
4506 #if defined(HAS_FCNTL) && defined(F_SETFD)
4508 const int fd = PerlIO_fileno(PL_rsfp);
4509 fcntl(fd,F_SETFD,fd >= 3);
4512 /* Mark this internal pseudo-handle as clean */
4513 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4515 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4516 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4517 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4519 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4520 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4521 /* if the script was opened in binmode, we need to revert
4522 * it to text mode for compatibility; but only iff it has CRs
4523 * XXX this is a questionable hack at best. */
4524 if (PL_bufend-PL_bufptr > 2
4525 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4528 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4529 loc = PerlIO_tell(PL_rsfp);
4530 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4533 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4535 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4536 #endif /* NETWARE */
4537 #ifdef PERLIO_IS_STDIO /* really? */
4538 # if defined(__BORLANDC__)
4539 /* XXX see note in do_binmode() */
4540 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4544 PerlIO_seek(PL_rsfp, loc, 0);
4548 #ifdef PERLIO_LAYERS
4551 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4552 else if (PL_encoding) {
4559 XPUSHs(PL_encoding);
4561 call_method("name", G_SCALAR);
4565 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4566 Perl_form(aTHX_ ":encoding(%"SVf")",
4584 if (PL_expect == XSTATE) {
4591 if (*s == ':' && s[1] == ':') {
4594 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4595 if (!(tmp = keyword(PL_tokenbuf, len)))
4596 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4599 else if (tmp == KEY_require || tmp == KEY_do)
4600 /* that's a way to remember we saw "CORE::" */
4613 LOP(OP_ACCEPT,XTERM);
4619 LOP(OP_ATAN2,XTERM);
4625 LOP(OP_BINMODE,XTERM);
4628 LOP(OP_BLESS,XTERM);
4637 /* When 'use switch' is in effect, continue has a dual
4638 life as a control operator. */
4640 if (!FEATURE_IS_ENABLED("switch"))
4643 /* We have to disambiguate the two senses of
4644 "continue". If the next token is a '{' then
4645 treat it as the start of a continue block;
4646 otherwise treat it as a control operator.
4657 (void)gv_fetchpv("ENV", GV_ADD, SVt_PVHV); /* may use HOME */
4674 if (!PL_cryptseen) {
4675 PL_cryptseen = TRUE;
4679 LOP(OP_CRYPT,XTERM);
4682 LOP(OP_CHMOD,XTERM);
4685 LOP(OP_CHOWN,XTERM);
4688 LOP(OP_CONNECT,XTERM);
4707 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4708 if (orig_keyword == KEY_do) {
4717 PL_hints |= HINT_BLOCK_SCOPE;
4727 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4728 LOP(OP_DBMOPEN,XTERM);
4734 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4741 yylval.ival = CopLINE(PL_curcop);
4755 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4756 UNIBRACK(OP_ENTEREVAL);
4774 case KEY_endhostent:
4780 case KEY_endservent:
4783 case KEY_endprotoent:
4794 yylval.ival = CopLINE(PL_curcop);
4796 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4798 if ((PL_bufend - p) >= 3 &&
4799 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4801 else if ((PL_bufend - p) >= 4 &&
4802 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4805 if (isIDFIRST_lazy_if(p,UTF)) {
4806 p = scan_ident(p, PL_bufend,
4807 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4811 Perl_croak(aTHX_ "Missing $ on loop variable");
4816 LOP(OP_FORMLINE,XTERM);
4822 LOP(OP_FCNTL,XTERM);
4828 LOP(OP_FLOCK,XTERM);
4837 LOP(OP_GREPSTART, XREF);
4840 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4855 case KEY_getpriority:
4856 LOP(OP_GETPRIORITY,XTERM);
4858 case KEY_getprotobyname:
4861 case KEY_getprotobynumber:
4862 LOP(OP_GPBYNUMBER,XTERM);
4864 case KEY_getprotoent:
4876 case KEY_getpeername:
4877 UNI(OP_GETPEERNAME);
4879 case KEY_gethostbyname:
4882 case KEY_gethostbyaddr:
4883 LOP(OP_GHBYADDR,XTERM);
4885 case KEY_gethostent:
4888 case KEY_getnetbyname:
4891 case KEY_getnetbyaddr:
4892 LOP(OP_GNBYADDR,XTERM);
4897 case KEY_getservbyname:
4898 LOP(OP_GSBYNAME,XTERM);
4900 case KEY_getservbyport:
4901 LOP(OP_GSBYPORT,XTERM);
4903 case KEY_getservent:
4906 case KEY_getsockname:
4907 UNI(OP_GETSOCKNAME);
4909 case KEY_getsockopt:
4910 LOP(OP_GSOCKOPT,XTERM);
4925 yylval.ival = CopLINE(PL_curcop);
4936 yylval.ival = CopLINE(PL_curcop);
4940 LOP(OP_INDEX,XTERM);
4946 LOP(OP_IOCTL,XTERM);
4958 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4990 LOP(OP_LISTEN,XTERM);
4999 s = scan_pat(s,OP_MATCH);
5000 TERM(sublex_start());
5003 LOP(OP_MAPSTART, XREF);
5006 LOP(OP_MKDIR,XTERM);
5009 LOP(OP_MSGCTL,XTERM);
5012 LOP(OP_MSGGET,XTERM);
5015 LOP(OP_MSGRCV,XTERM);
5018 LOP(OP_MSGSND,XTERM);
5024 if (isIDFIRST_lazy_if(s,UTF)) {
5025 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
5026 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
5028 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
5029 if (!PL_in_my_stash) {
5032 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
5040 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5047 s = tokenize_use(0, s);
5051 if (*s == '(' || (s = skipspace(s), *s == '('))
5058 if (isIDFIRST_lazy_if(s,UTF)) {
5060 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
5061 for (t=d; *t && isSPACE(*t); t++) ;
5062 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
5064 && !(t[0] == '=' && t[1] == '>')
5066 int len = (int)(d-s);
5067 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
5068 "Precedence problem: open %.*s should be open(%.*s)",
5075 yylval.ival = OP_OR;
5085 LOP(OP_OPEN_DIR,XTERM);
5088 checkcomma(s,PL_tokenbuf,"filehandle");
5092 checkcomma(s,PL_tokenbuf,"filehandle");
5111 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5115 LOP(OP_PIPE_OP,XTERM);
5118 s = scan_str(s,FALSE,FALSE);
5120 missingterm((char*)0);
5121 yylval.ival = OP_CONST;
5122 TERM(sublex_start());
5128 s = scan_str(s,FALSE,FALSE);
5130 missingterm((char*)0);
5131 PL_expect = XOPERATOR;
5133 if (SvCUR(PL_lex_stuff)) {
5136 d = SvPV_force(PL_lex_stuff, len);
5139 for (; isSPACE(*d) && len; --len, ++d) ;
5142 if (!warned && ckWARN(WARN_QW)) {
5143 for (; !isSPACE(*d) && len; --len, ++d) {
5145 Perl_warner(aTHX_ packWARN(WARN_QW),
5146 "Possible attempt to separate words with commas");
5149 else if (*d == '#') {
5150 Perl_warner(aTHX_ packWARN(WARN_QW),
5151 "Possible attempt to put comments in qw() list");
5157 for (; !isSPACE(*d) && len; --len, ++d) ;
5159 sv = newSVpvn(b, d-b);
5160 if (DO_UTF8(PL_lex_stuff))
5162 words = append_elem(OP_LIST, words,
5163 newSVOP(OP_CONST, 0, tokeq(sv)));
5167 PL_nextval[PL_nexttoke].opval = words;
5172 SvREFCNT_dec(PL_lex_stuff);
5173 PL_lex_stuff = Nullsv;
5179 s = scan_str(s,FALSE,FALSE);
5181 missingterm((char*)0);
5182 yylval.ival = OP_STRINGIFY;
5183 if (SvIVX(PL_lex_stuff) == '\'')
5184 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
5185 TERM(sublex_start());
5188 s = scan_pat(s,OP_QR);
5189 TERM(sublex_start());
5192 s = scan_str(s,FALSE,FALSE);
5194 missingterm((char*)0);
5195 yylval.ival = OP_BACKTICK;
5197 TERM(sublex_start());
5205 s = force_version(s, FALSE);
5207 else if (*s != 'v' || !isDIGIT(s[1])
5208 || (s = force_version(s, TRUE), *s == 'v'))
5210 *PL_tokenbuf = '\0';
5211 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5212 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5213 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5215 yyerror("<> should be quotes");
5217 if (orig_keyword == KEY_require) {
5225 PL_last_uni = PL_oldbufptr;
5226 PL_last_lop_op = OP_REQUIRE;
5228 return REPORT( (int)REQUIRE );
5234 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5238 LOP(OP_RENAME,XTERM);
5247 LOP(OP_RINDEX,XTERM);
5257 UNIDOR(OP_READLINE);
5270 LOP(OP_REVERSE,XTERM);
5273 UNIDOR(OP_READLINK);
5281 TERM(sublex_start());
5283 TOKEN(1); /* force error */
5286 checkcomma(s,PL_tokenbuf,"filehandle");
5296 LOP(OP_SELECT,XTERM);
5302 LOP(OP_SEMCTL,XTERM);
5305 LOP(OP_SEMGET,XTERM);
5308 LOP(OP_SEMOP,XTERM);
5314 LOP(OP_SETPGRP,XTERM);
5316 case KEY_setpriority:
5317 LOP(OP_SETPRIORITY,XTERM);
5319 case KEY_sethostent:
5325 case KEY_setservent:
5328 case KEY_setprotoent:
5338 LOP(OP_SEEKDIR,XTERM);
5340 case KEY_setsockopt:
5341 LOP(OP_SSOCKOPT,XTERM);
5347 LOP(OP_SHMCTL,XTERM);
5350 LOP(OP_SHMGET,XTERM);
5353 LOP(OP_SHMREAD,XTERM);
5356 LOP(OP_SHMWRITE,XTERM);
5359 LOP(OP_SHUTDOWN,XTERM);
5368 LOP(OP_SOCKET,XTERM);
5370 case KEY_socketpair:
5371 LOP(OP_SOCKPAIR,XTERM);
5374 checkcomma(s,PL_tokenbuf,"subroutine name");
5376 if (*s == ';' || *s == ')') /* probably a close */
5377 Perl_croak(aTHX_ "sort is now a reserved word");
5379 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5383 LOP(OP_SPLIT,XTERM);
5386 LOP(OP_SPRINTF,XTERM);
5389 LOP(OP_SPLICE,XTERM);
5404 LOP(OP_SUBSTR,XTERM);
5410 char tmpbuf[sizeof PL_tokenbuf];
5411 SSize_t tboffset = 0;
5412 expectation attrful;
5413 bool have_name, have_proto, bad_proto;
5414 const int key = tmp;
5418 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5419 (*s == ':' && s[1] == ':'))
5422 attrful = XATTRBLOCK;
5423 /* remember buffer pos'n for later force_word */
5424 tboffset = s - PL_oldbufptr;
5425 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5426 if (strchr(tmpbuf, ':'))
5427 sv_setpv(PL_subname, tmpbuf);
5429 sv_setsv(PL_subname,PL_curstname);
5430 sv_catpvs(PL_subname,"::");
5431 sv_catpvn(PL_subname,tmpbuf,len);
5438 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5439 PL_expect = XTERMBLOCK;
5440 attrful = XATTRTERM;
5441 sv_setpvn(PL_subname,"?",1);
5445 if (key == KEY_format) {
5447 PL_lex_formbrack = PL_lex_brackets + 1;
5449 (void) force_word(PL_oldbufptr + tboffset, WORD,
5454 /* Look for a prototype */
5458 s = scan_str(s,FALSE,FALSE);
5460 Perl_croak(aTHX_ "Prototype not terminated");
5461 /* strip spaces and check for bad characters */
5462 d = SvPVX(PL_lex_stuff);
5465 for (p = d; *p; ++p) {
5468 if (!strchr("$@%*;[]&\\", *p))
5473 if (bad_proto && ckWARN(WARN_SYNTAX))
5474 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5475 "Illegal character in prototype for %"SVf" : %s",
5477 SvCUR_set(PL_lex_stuff, tmp);
5485 if (*s == ':' && s[1] != ':')
5486 PL_expect = attrful;
5487 else if (*s != '{' && key == KEY_sub) {
5489 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5491 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5495 PL_nextval[PL_nexttoke].opval =
5496 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5497 PL_lex_stuff = Nullsv;
5501 sv_setpv(PL_subname,
5502 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5505 (void) force_word(PL_oldbufptr + tboffset, WORD,
5514 LOP(OP_SYSTEM,XREF);
5517 LOP(OP_SYMLINK,XTERM);
5520 LOP(OP_SYSCALL,XTERM);
5523 LOP(OP_SYSOPEN,XTERM);
5526 LOP(OP_SYSSEEK,XTERM);
5529 LOP(OP_SYSREAD,XTERM);
5532 LOP(OP_SYSWRITE,XTERM);
5536 TERM(sublex_start());
5557 LOP(OP_TRUNCATE,XTERM);
5569 yylval.ival = CopLINE(PL_curcop);
5573 yylval.ival = CopLINE(PL_curcop);
5577 LOP(OP_UNLINK,XTERM);
5583 LOP(OP_UNPACK,XTERM);
5586 LOP(OP_UTIME,XTERM);
5592 LOP(OP_UNSHIFT,XTERM);
5595 s = tokenize_use(1, s);
5605 yylval.ival = CopLINE(PL_curcop);
5609 yylval.ival = CopLINE(PL_curcop);
5613 PL_hints |= HINT_BLOCK_SCOPE;
5620 LOP(OP_WAITPID,XTERM);
5629 ctl_l[0] = toCTRL('L');
5631 gv_fetchpv(ctl_l, GV_ADD, SVt_PV);
5634 gv_fetchpv("\f", GV_ADD, SVt_PV); /* Make sure $^L is defined */
5639 if (PL_expect == XOPERATOR)
5645 yylval.ival = OP_XOR;
5650 TERM(sublex_start());
5655 #pragma segment Main
5659 S_pending_ident(pTHX)
5662 register I32 tmp = 0;
5663 /* pit holds the identifier we read and pending_ident is reset */
5664 char pit = PL_pending_ident;
5665 PL_pending_ident = 0;
5667 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5668 "### Pending identifier '%s'\n", PL_tokenbuf); });
5670 /* if we're in a my(), we can't allow dynamics here.
5671 $foo'bar has already been turned into $foo::bar, so
5672 just check for colons.
5674 if it's a legal name, the OP is a PADANY.
5677 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5678 if (strchr(PL_tokenbuf,':'))
5679 yyerror(Perl_form(aTHX_ "No package name allowed for "
5680 "variable %s in \"our\"",
5682 tmp = allocmy(PL_tokenbuf);
5685 if (strchr(PL_tokenbuf,':'))
5686 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5688 yylval.opval = newOP(OP_PADANY, 0);
5689 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5695 build the ops for accesses to a my() variable.
5697 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5698 then used in a comparison. This catches most, but not
5699 all cases. For instance, it catches
5700 sort { my($a); $a <=> $b }
5702 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5703 (although why you'd do that is anyone's guess).
5706 if (!strchr(PL_tokenbuf,':')) {
5708 tmp = pad_findmy(PL_tokenbuf);
5709 if (tmp != NOT_IN_PAD) {
5710 /* might be an "our" variable" */
5711 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5712 /* build ops for a bareword */
5713 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
5714 HEK * const stashname = HvNAME_HEK(stash);
5715 SV * const sym = newSVhek(stashname);
5716 sv_catpvs(sym, "::");
5717 sv_catpv(sym, PL_tokenbuf+1);
5718 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5719 yylval.opval->op_private = OPpCONST_ENTERED;
5722 ? (GV_ADDMULTI | GV_ADDINEVAL)
5725 ((PL_tokenbuf[0] == '$') ? SVt_PV
5726 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5731 /* if it's a sort block and they're naming $a or $b */
5732 if (PL_last_lop_op == OP_SORT &&
5733 PL_tokenbuf[0] == '$' &&
5734 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5737 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5738 d < PL_bufend && *d != '\n';
5741 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5742 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5748 yylval.opval = newOP(OP_PADANY, 0);
5749 yylval.opval->op_targ = tmp;
5755 Whine if they've said @foo in a doublequoted string,
5756 and @foo isn't a variable we can find in the symbol
5759 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5760 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
5761 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5762 && ckWARN(WARN_AMBIGUOUS))
5764 /* Downgraded from fatal to warning 20000522 mjd */
5765 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5766 "Possible unintended interpolation of %s in string",
5771 /* build ops for a bareword */
5772 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5773 yylval.opval->op_private = OPpCONST_ENTERED;
5777 ? (GV_ADDMULTI | GV_ADDINEVAL)
5778 /* If the identifier refers to a stash, don't autovivify it.
5779 * Change 24660 had the side effect of causing symbol table
5780 * hashes to always be defined, even if they were freshly
5781 * created and the only reference in the entire program was
5782 * the single statement with the defined %foo::bar:: test.
5783 * It appears that all code in the wild doing this actually
5784 * wants to know whether sub-packages have been loaded, so
5785 * by avoiding auto-vivifying symbol tables, we ensure that
5786 * defined %foo::bar:: continues to be false, and the existing
5787 * tests still give the expected answers, even though what
5788 * they're actually testing has now changed subtly.
5790 : !(*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'),
5791 ((PL_tokenbuf[0] == '$') ? SVt_PV
5792 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5798 * The following code was generated by perl_keyword.pl.
5802 Perl_keyword (pTHX_ const char *name, I32 len)
5806 case 1: /* 5 tokens of length 1 */
5838 case 2: /* 18 tokens of length 2 */
5984 case 3: /* 29 tokens of length 3 */
5988 if (name[1] == 'N' &&
6051 if (name[1] == 'i' &&
6073 return (FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
6091 if (name[1] == 'o' &&
6100 if (name[1] == 'e' &&
6109 if (name[1] == 'n' &&
6118 if (name[1] == 'o' &&
6127 if (name[1] == 'a' &&
6136 if (name[1] == 'o' &&
6198 if (name[1] == 'e' &&
6212 return (FEATURE_IS_ENABLED("say") ? -KEY_say : 0);
6238 if (name[1] == 'i' &&
6247 if (name[1] == 's' &&
6256 if (name[1] == 'e' &&
6265 if (name[1] == 'o' &&
6277 case 4: /* 41 tokens of length 4 */
6281 if (name[1] == 'O' &&
6291 if (name[1] == 'N' &&
6301 if (name[1] == 'i' &&
6311 if (name[1] == 'h' &&
6321 if (name[1] == 'u' &&
6334 if (name[2] == 'c' &&
6343 if (name[2] == 's' &&
6352 if (name[2] == 'a' &&
6388 if (name[1] == 'o' &&
6401 if (name[2] == 't' &&
6410 if (name[2] == 'o' &&
6419 if (name[2] == 't' &&
6428 if (name[2] == 'e' &&
6441 if (name[1] == 'o' &&
6454 if (name[2] == 'y' &&
6463 if (name[2] == 'l' &&
6479 if (name[2] == 's' &&
6488 if (name[2] == 'n' &&
6497 if (name[2] == 'c' &&
6510 if (name[1] == 'e' &&
6520 if (name[1] == 'p' &&
6533 if (name[2] == 'c' &&
6542 if (name[2] == 'p' &&
6551 if (name[2] == 's' &&
6567 if (name[2] == 'n' &&
6637 if (name[2] == 'r' &&
6646 if (name[2] == 'r' &&
6655 if (name[2] == 'a' &&
6671 if (name[2] == 'l' &&
6733 if (name[2] == 'e' &&
6736 return (FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
6749 case 5: /* 38 tokens of length 5 */
6753 if (name[1] == 'E' &&
6764 if (name[1] == 'H' &&
6778 if (name[2] == 'a' &&
6788 if (name[2] == 'a' &&
6805 if (name[2] == 'e' &&
6815 if (name[2] == 'e' &&
6819 return (FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
6835 if (name[3] == 'i' &&
6844 if (name[3] == 'o' &&
6880 if (name[2] == 'o' &&
6890 if (name[2] == 'y' &&
6904 if (name[1] == 'l' &&
6918 if (name[2] == 'n' &&
6928 if (name[2] == 'o' &&
6942 if (name[1] == 'i' &&
6947 return (FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
6956 if (name[2] == 'd' &&
6966 if (name[2] == 'c' &&
6983 if (name[2] == 'c' &&
6993 if (name[2] == 't' &&
7007 if (name[1] == 'k' &&
7018 if (name[1] == 'r' &&
7032 if (name[2] == 's' &&
7042 if (name[2] == 'd' &&
7059 if (name[2] == 'm' &&
7069 if (name[2] == 'i' &&
7079 if (name[2] == 'e' &&
7089 if (name[2] == 'l' &&
7099 if (name[2] == 'a' &&
7109 if (name[2] == 'u' &&
7123 if (name[1] == 'i' &&
7137 if (name[2] == 'a' &&
7150 if (name[3] == 'e' &&
7185 if (name[2] == 'i' &&
7202 if (name[2] == 'i' &&
7212 if (name[2] == 'i' &&
7229 case 6: /* 33 tokens of length 6 */
7233 if (name[1] == 'c' &&
7248 if (name[2] == 'l' &&
7259 if (name[2] == 'r' &&
7274 if (name[1] == 'e' &&
7289 if (name[2] == 's' &&
7294 if(ckWARN_d(WARN_SYNTAX))
7295 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
7301 if (name[2] == 'i' &&
7319 if (name[2] == 'l' &&
7330 if (name[2] == 'r' &&
7345 if (name[1] == 'm' &&
7360 if (name[2] == 'n' &&
7371 if (name[2] == 's' &&
7386 if (name[1] == 's' &&
7392 if (name[4] == 't' &&
7401 if (name[4] == 'e' &&
7410 if (name[4] == 'c' &&
7419 if (name[4] == 'n' &&
7435 if (name[1] == 'r' &&
7453 if (name[3] == 'a' &&
7463 if (name[3] == 'u' &&
7477 if (name[2] == 'n' &&
7495 if (name[2] == 'a' &&
7509 if (name[3] == 'e' &&
7522 if (name[4] == 't' &&
7531 if (name[4] == 'e' &&
7553 if (name[4] == 't' &&
7562 if (name[4] == 'e' &&
7578 if (name[2] == 'c' &&
7589 if (name[2] == 'l' &&
7600 if (name[2] == 'b' &&
7611 if (name[2] == 's' &&
7634 if (name[4] == 's' &&
7643 if (name[4] == 'n' &&
7656 if (name[3] == 'a' &&
7673 if (name[1] == 'a' &&
7688 case 7: /* 29 tokens of length 7 */
7692 if (name[1] == 'E' &&
7705 if (name[1] == '_' &&
7718 if (name[1] == 'i' &&
7725 return -KEY_binmode;
7731 if (name[1] == 'o' &&
7738 return -KEY_connect;
7747 if (name[2] == 'm' &&
7753 return -KEY_dbmopen;
7764 if (name[4] == 'u' &&
7768 return (FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
7774 if (name[4] == 'n' &&
7795 if (name[1] == 'o' &&
7808 if (name[1] == 'e' &&
7815 if (name[5] == 'r' &&
7818 return -KEY_getpgrp;
7824 if (name[5] == 'i' &&
7827 return -KEY_getppid;
7840 if (name[1] == 'c' &&
7847 return -KEY_lcfirst;
7853 if (name[1] == 'p' &&
7860 return -KEY_opendir;
7866 if (name[1] == 'a' &&
7884 if (name[3] == 'd' &&
7889 return -KEY_readdir;
7895 if (name[3] == 'u' &&
7906 if (name[3] == 'e' &&
7911 return -KEY_reverse;
7930 if (name[3] == 'k' &&
7935 return -KEY_seekdir;
7941 if (name[3] == 'p' &&
7946 return -KEY_setpgrp;
7956 if (name[2] == 'm' &&
7962 return -KEY_shmread;
7968 if (name[2] == 'r' &&
7974 return -KEY_sprintf;
7983 if (name[3] == 'l' &&
7988 return -KEY_symlink;
7997 if (name[4] == 'a' &&
8001 return -KEY_syscall;
8007 if (name[4] == 'p' &&
8011 return -KEY_sysopen;
8017 if (name[4] == 'e' &&
8021 return -KEY_sysread;
8027 if (name[4] == 'e' &&
8031 return -KEY_sysseek;
8049 if (name[1] == 'e' &&
8056 return -KEY_telldir;
8065 if (name[2] == 'f' &&
8071 return -KEY_ucfirst;
8077 if (name[2] == 's' &&
8083 return -KEY_unshift;
8093 if (name[1] == 'a' &&
8100 return -KEY_waitpid;
8109 case 8: /* 26 tokens of length 8 */
8113 if (name[1] == 'U' &&
8121 return KEY_AUTOLOAD;
8132 if (name[3] == 'A' &&
8138 return KEY___DATA__;
8144 if (name[3] == 'I' &&
8150 return -KEY___FILE__;
8156 if (name[3] == 'I' &&
8162 return -KEY___LINE__;
8178 if (name[2] == 'o' &&
8185 return -KEY_closedir;
8191 if (name[2] == 'n' &&
8198 return -KEY_continue;
8208 if (name[1] == 'b' &&
8216 return -KEY_dbmclose;
8222 if (name[1] == 'n' &&
8228 if (name[4] == 'r' &&
8233 return -KEY_endgrent;
8239 if (name[4] == 'w' &&
8244 return -KEY_endpwent;
8257 if (name[1] == 'o' &&
8265 return -KEY_formline;
8271 if (name[1] == 'e' &&
8282 if (name[6] == 'n' &&
8285 return -KEY_getgrent;
8291 if (name[6] == 'i' &&
8294 return -KEY_getgrgid;
8300 if (name[6] == 'a' &&
8303 return -KEY_getgrnam;
8316 if (name[4] == 'o' &&
8321 return -KEY_getlogin;
8332 if (name[6] == 'n' &&
8335 return -KEY_getpwent;
8341 if (name[6] == 'a' &&
8344 return -KEY_getpwnam;
8350 if (name[6] == 'i' &&
8353 return -KEY_getpwuid;
8373 if (name[1] == 'e' &&
8380 if (name[5] == 'i' &&
8387 return -KEY_readline;
8392 return -KEY_readlink;
8403 if (name[5] == 'i' &&
8407 return -KEY_readpipe;
8428 if (name[4] == 'r' &&
8433 return -KEY_setgrent;
8439 if (name[4] == 'w' &&
8444 return -KEY_setpwent;
8460 if (name[3] == 'w' &&
8466 return -KEY_shmwrite;
8472 if (name[3] == 't' &&
8478 return -KEY_shutdown;
8488 if (name[2] == 's' &&
8495 return -KEY_syswrite;
8505 if (name[1] == 'r' &&
8513 return -KEY_truncate;
8522 case 9: /* 8 tokens of length 9 */
8526 if (name[1] == 'n' &&
8535 return -KEY_endnetent;
8541 if (name[1] == 'e' &&
8550 return -KEY_getnetent;
8556 if (name[1] == 'o' &&
8565 return -KEY_localtime;
8571 if (name[1] == 'r' &&
8580 return KEY_prototype;
8586 if (name[1] == 'u' &&
8595 return -KEY_quotemeta;
8601 if (name[1] == 'e' &&
8610 return -KEY_rewinddir;
8616 if (name[1] == 'e' &&
8625 return -KEY_setnetent;
8631 if (name[1] == 'a' &&
8640 return -KEY_wantarray;
8649 case 10: /* 9 tokens of length 10 */
8653 if (name[1] == 'n' &&
8659 if (name[4] == 'o' &&
8666 return -KEY_endhostent;
8672 if (name[4] == 'e' &&
8679 return -KEY_endservent;
8692 if (name[1] == 'e' &&
8698 if (name[4] == 'o' &&
8705 return -KEY_gethostent;
8714 if (name[5] == 'r' &&
8720 return -KEY_getservent;
8726 if (name[5] == 'c' &&
8732 return -KEY_getsockopt;
8757 if (name[4] == 'o' &&
8764 return -KEY_sethostent;
8773 if (name[5] == 'r' &&
8779 return -KEY_setservent;
8785 if (name[5] == 'c' &&
8791 return -KEY_setsockopt;
8808 if (name[2] == 'c' &&
8817 return -KEY_socketpair;
8830 case 11: /* 8 tokens of length 11 */
8834 if (name[1] == '_' &&
8845 return -KEY___PACKAGE__;
8851 if (name[1] == 'n' &&
8862 return -KEY_endprotoent;
8868 if (name[1] == 'e' &&
8877 if (name[5] == 'e' &&
8884 return -KEY_getpeername;
8893 if (name[6] == 'o' &&
8899 return -KEY_getpriority;
8905 if (name[6] == 't' &&
8911 return -KEY_getprotoent;
8925 if (name[4] == 'o' &&
8933 return -KEY_getsockname;
8946 if (name[1] == 'e' &&
8954 if (name[6] == 'o' &&
8960 return -KEY_setpriority;
8966 if (name[6] == 't' &&
8972 return -KEY_setprotoent;
8988 case 12: /* 2 tokens of length 12 */
8989 if (name[0] == 'g' &&
9001 if (name[9] == 'd' &&
9004 { /* getnetbyaddr */
9005 return -KEY_getnetbyaddr;
9011 if (name[9] == 'a' &&
9014 { /* getnetbyname */
9015 return -KEY_getnetbyname;
9027 case 13: /* 4 tokens of length 13 */
9028 if (name[0] == 'g' &&
9035 if (name[4] == 'o' &&
9044 if (name[10] == 'd' &&
9047 { /* gethostbyaddr */
9048 return -KEY_gethostbyaddr;
9054 if (name[10] == 'a' &&
9057 { /* gethostbyname */
9058 return -KEY_gethostbyname;
9071 if (name[4] == 'e' &&
9080 if (name[10] == 'a' &&
9083 { /* getservbyname */
9084 return -KEY_getservbyname;
9090 if (name[10] == 'o' &&
9093 { /* getservbyport */
9094 return -KEY_getservbyport;
9113 case 14: /* 1 tokens of length 14 */
9114 if (name[0] == 'g' &&
9128 { /* getprotobyname */
9129 return -KEY_getprotobyname;
9134 case 16: /* 1 tokens of length 16 */
9135 if (name[0] == 'g' &&
9151 { /* getprotobynumber */
9152 return -KEY_getprotobynumber;
9166 S_checkcomma(pTHX_ register char *s, const char *name, const char *what)
9170 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9171 if (ckWARN(WARN_SYNTAX)) {
9173 for (w = s+2; *w && level; w++) {
9180 for (; *w && isSPACE(*w); w++) ;
9181 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
9182 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9183 "%s (...) interpreted as function",name);
9186 while (s < PL_bufend && isSPACE(*s))
9190 while (s < PL_bufend && isSPACE(*s))
9192 if (isIDFIRST_lazy_if(s,UTF)) {
9194 while (isALNUM_lazy_if(s,UTF))
9196 while (s < PL_bufend && isSPACE(*s))
9200 *s = '\0'; /* XXX If we didn't do this, we could const a lot of toke.c */
9201 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
9205 Perl_croak(aTHX_ "No comma allowed after %s", what);
9210 /* Either returns sv, or mortalizes sv and returns a new SV*.
9211 Best used as sv=new_constant(..., sv, ...).
9212 If s, pv are NULL, calls subroutine with one argument,
9213 and type is used with error messages only. */
9216 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9220 HV * const table = GvHV(PL_hintgv); /* ^H */
9224 const char *why1 = "", *why2 = "", *why3 = "";
9226 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9229 why2 = strEQ(key,"charnames")
9230 ? "(possibly a missing \"use charnames ...\")"
9232 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
9233 (type ? type: "undef"), why2);
9235 /* This is convoluted and evil ("goto considered harmful")
9236 * but I do not understand the intricacies of all the different
9237 * failure modes of %^H in here. The goal here is to make
9238 * the most probable error message user-friendly. --jhi */
9243 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
9244 (type ? type: "undef"), why1, why2, why3);
9246 yyerror(SvPVX_const(msg));
9250 cvp = hv_fetch(table, key, strlen(key), FALSE);
9251 if (!cvp || !SvOK(*cvp)) {
9254 why3 = "} is not defined";
9257 sv_2mortal(sv); /* Parent created it permanently */
9260 pv = sv_2mortal(newSVpvn(s, len));
9262 typesv = sv_2mortal(newSVpv(type, 0));
9264 typesv = &PL_sv_undef;
9266 PUSHSTACKi(PERLSI_OVERLOAD);
9278 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9282 /* Check the eval first */
9283 if (!PL_in_eval && SvTRUE(ERRSV)) {
9284 sv_catpvs(ERRSV, "Propagated");
9285 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
9287 res = SvREFCNT_inc(sv);
9291 (void)SvREFCNT_inc(res);
9300 why1 = "Call to &{$^H{";
9302 why3 = "}} did not return a defined value";
9310 /* Returns a NUL terminated string, with the length of the string written to
9314 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9316 register char *d = dest;
9317 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
9320 Perl_croak(aTHX_ ident_too_long);
9321 if (isALNUM(*s)) /* UTF handled below */
9323 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
9328 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
9332 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9333 char *t = s + UTF8SKIP(s);
9334 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9336 if (d + (t - s) > e)
9337 Perl_croak(aTHX_ ident_too_long);
9338 Copy(s, d, t - s, char);
9351 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9355 char *bracket = Nullch;
9361 e = d + destlen - 3; /* two-character token, ending NUL */
9363 while (isDIGIT(*s)) {
9365 Perl_croak(aTHX_ ident_too_long);
9372 Perl_croak(aTHX_ ident_too_long);
9373 if (isALNUM(*s)) /* UTF handled below */
9375 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9380 else if (*s == ':' && s[1] == ':') {
9384 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9385 char *t = s + UTF8SKIP(s);
9386 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9388 if (d + (t - s) > e)
9389 Perl_croak(aTHX_ ident_too_long);
9390 Copy(s, d, t - s, char);
9401 if (PL_lex_state != LEX_NORMAL)
9402 PL_lex_state = LEX_INTERPENDMAYBE;
9405 if (*s == '$' && s[1] &&
9406 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9419 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9424 if (isSPACE(s[-1])) {
9426 const char ch = *s++;
9427 if (!SPACE_OR_TAB(ch)) {
9433 if (isIDFIRST_lazy_if(d,UTF)) {
9437 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
9439 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
9442 Copy(s, d, e - s, char);
9447 while ((isALNUM(*s) || *s == ':') && d < e)
9450 Perl_croak(aTHX_ ident_too_long);
9453 while (s < send && SPACE_OR_TAB(*s)) s++;
9454 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9455 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
9456 const char *brack = *s == '[' ? "[...]" : "{...}";
9457 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9458 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9459 funny, dest, brack, funny, dest, brack);
9462 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9466 /* Handle extended ${^Foo} variables
9467 * 1999-02-27 mjd-perl-patch@plover.com */
9468 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9472 while (isALNUM(*s) && d < e) {
9476 Perl_croak(aTHX_ ident_too_long);
9481 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9482 PL_lex_state = LEX_INTERPEND;
9487 if (PL_lex_state == LEX_NORMAL) {
9488 if (ckWARN(WARN_AMBIGUOUS) &&
9489 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
9491 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9492 "Ambiguous use of %c{%s} resolved to %c%s",
9493 funny, dest, funny, dest);
9498 s = bracket; /* let the parser handle it */
9502 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9503 PL_lex_state = LEX_INTERPEND;
9508 Perl_pmflag(pTHX_ U32* pmfl, int ch)
9513 *pmfl |= PMf_GLOBAL;
9515 *pmfl |= PMf_CONTINUE;
9519 *pmfl |= PMf_MULTILINE;
9521 *pmfl |= PMf_SINGLELINE;
9523 *pmfl |= PMf_EXTENDED;
9527 S_scan_pat(pTHX_ char *start, I32 type)
9530 char *s = scan_str(start,FALSE,FALSE);
9533 char * const delimiter = skipspace(start);
9534 Perl_croak(aTHX_ *delimiter == '?'
9535 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9536 : "Search pattern not terminated" );
9539 pm = (PMOP*)newPMOP(type, 0);
9540 if (PL_multi_open == '?')
9541 pm->op_pmflags |= PMf_ONCE;
9543 while (*s && strchr("iomsx", *s))
9544 pmflag(&pm->op_pmflags,*s++);
9547 while (*s && strchr("iogcmsx", *s))
9548 pmflag(&pm->op_pmflags,*s++);
9550 /* issue a warning if /c is specified,but /g is not */
9551 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
9552 && ckWARN(WARN_REGEXP))
9554 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
9557 pm->op_pmpermflags = pm->op_pmflags;
9559 PL_lex_op = (OP*)pm;
9560 yylval.ival = OP_MATCH;
9565 S_scan_subst(pTHX_ char *start)
9573 yylval.ival = OP_NULL;
9575 s = scan_str(start,FALSE,FALSE);
9578 Perl_croak(aTHX_ "Substitution pattern not terminated");
9580 if (s[-1] == PL_multi_open)
9583 first_start = PL_multi_start;
9584 s = scan_str(s,FALSE,FALSE);
9587 SvREFCNT_dec(PL_lex_stuff);
9588 PL_lex_stuff = Nullsv;
9590 Perl_croak(aTHX_ "Substitution replacement not terminated");
9592 PL_multi_start = first_start; /* so whole substitution is taken together */
9594 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9600 else if (strchr("iogcmsx", *s))
9601 pmflag(&pm->op_pmflags,*s++);
9606 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
9607 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9612 PL_sublex_info.super_bufptr = s;
9613 PL_sublex_info.super_bufend = PL_bufend;
9615 pm->op_pmflags |= PMf_EVAL;
9616 repl = newSVpvs("");
9618 sv_catpv(repl, es ? "eval " : "do ");
9619 sv_catpvs(repl, "{ ");
9620 sv_catsv(repl, PL_lex_repl);
9621 sv_catpvs(repl, " }");
9623 SvREFCNT_dec(PL_lex_repl);
9627 pm->op_pmpermflags = pm->op_pmflags;
9628 PL_lex_op = (OP*)pm;
9629 yylval.ival = OP_SUBST;
9634 S_scan_trans(pTHX_ char *start)
9643 yylval.ival = OP_NULL;
9645 s = scan_str(start,FALSE,FALSE);
9647 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9648 if (s[-1] == PL_multi_open)
9651 s = scan_str(s,FALSE,FALSE);
9654 SvREFCNT_dec(PL_lex_stuff);
9655 PL_lex_stuff = Nullsv;
9657 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9660 complement = del = squash = 0;
9664 complement = OPpTRANS_COMPLEMENT;
9667 del = OPpTRANS_DELETE;
9670 squash = OPpTRANS_SQUASH;
9679 Newx(tbl, complement&&!del?258:256, short);
9680 o = newPVOP(OP_TRANS, 0, (char*)tbl);
9681 o->op_private &= ~OPpTRANS_ALL;
9682 o->op_private |= del|squash|complement|
9683 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9684 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9687 yylval.ival = OP_TRANS;
9692 S_scan_heredoc(pTHX_ register char *s)
9695 I32 op_type = OP_SCALAR;
9699 const char *found_newline;
9703 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9707 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9710 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
9711 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9714 s = delimcpy(d, e, s, PL_bufend, term, &len);
9724 if (!isALNUM_lazy_if(s,UTF))
9725 deprecate_old("bare << to mean <<\"\"");
9726 for (; isALNUM_lazy_if(s,UTF); s++) {
9731 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9732 Perl_croak(aTHX_ "Delimiter for here document is too long");
9735 len = d - PL_tokenbuf;
9736 #ifndef PERL_STRICT_CR
9737 d = strchr(s, '\r');
9739 char * const olds = s;
9741 while (s < PL_bufend) {
9747 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9756 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9760 if ( outer || !(found_newline = memchr(s, '\n', PL_bufend - s)) ) {
9761 herewas = newSVpvn(s,PL_bufend-s);
9765 herewas = newSVpvn(s,found_newline-s);
9767 s += SvCUR(herewas);
9769 tmpstr = NEWSV(87,79);
9770 sv_upgrade(tmpstr, SVt_PVIV);
9773 SvIV_set(tmpstr, -1);
9775 else if (term == '`') {
9776 op_type = OP_BACKTICK;
9777 SvIV_set(tmpstr, '\\');
9781 PL_multi_start = CopLINE(PL_curcop);
9782 PL_multi_open = PL_multi_close = '<';
9783 term = *PL_tokenbuf;
9784 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9785 char *bufptr = PL_sublex_info.super_bufptr;
9786 char *bufend = PL_sublex_info.super_bufend;
9787 char * const olds = s - SvCUR(herewas);
9788 s = strchr(bufptr, '\n');
9792 while (s < bufend &&
9793 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9795 CopLINE_inc(PL_curcop);
9798 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9799 missingterm(PL_tokenbuf);
9801 sv_setpvn(herewas,bufptr,d-bufptr+1);
9802 sv_setpvn(tmpstr,d+1,s-d);
9804 sv_catpvn(herewas,s,bufend-s);
9805 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9812 while (s < PL_bufend &&
9813 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9815 CopLINE_inc(PL_curcop);
9817 if (s >= PL_bufend) {
9818 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9819 missingterm(PL_tokenbuf);
9821 sv_setpvn(tmpstr,d+1,s-d);
9823 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9825 sv_catpvn(herewas,s,PL_bufend-s);
9826 sv_setsv(PL_linestr,herewas);
9827 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9828 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9829 PL_last_lop = PL_last_uni = Nullch;
9832 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
9833 while (s >= PL_bufend) { /* multiple line string? */
9835 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
9836 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9837 missingterm(PL_tokenbuf);
9839 CopLINE_inc(PL_curcop);
9840 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9841 PL_last_lop = PL_last_uni = Nullch;
9842 #ifndef PERL_STRICT_CR
9843 if (PL_bufend - PL_linestart >= 2) {
9844 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9845 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9847 PL_bufend[-2] = '\n';
9849 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9851 else if (PL_bufend[-1] == '\r')
9852 PL_bufend[-1] = '\n';
9854 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9855 PL_bufend[-1] = '\n';
9857 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9858 SV *sv = NEWSV(88,0);
9860 sv_upgrade(sv, SVt_PVMG);
9861 sv_setsv(sv,PL_linestr);
9864 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop),sv);
9866 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9867 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9868 *(SvPVX(PL_linestr) + off ) = ' ';
9869 sv_catsv(PL_linestr,herewas);
9870 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9871 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9875 sv_catsv(tmpstr,PL_linestr);
9880 PL_multi_end = CopLINE(PL_curcop);
9881 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9882 SvPV_shrink_to_cur(tmpstr);
9884 SvREFCNT_dec(herewas);
9886 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9888 else if (PL_encoding)
9889 sv_recode_to_utf8(tmpstr, PL_encoding);
9891 PL_lex_stuff = tmpstr;
9892 yylval.ival = op_type;
9897 takes: current position in input buffer
9898 returns: new position in input buffer
9899 side-effects: yylval and lex_op are set.
9904 <FH> read from filehandle
9905 <pkg::FH> read from package qualified filehandle
9906 <pkg'FH> read from package qualified filehandle
9907 <$fh> read from filehandle in $fh
9913 S_scan_inputsymbol(pTHX_ char *start)
9915 register char *s = start; /* current position in buffer */
9921 d = PL_tokenbuf; /* start of temp holding space */
9922 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9923 end = strchr(s, '\n');
9926 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9928 /* die if we didn't have space for the contents of the <>,
9929 or if it didn't end, or if we see a newline
9932 if (len >= sizeof PL_tokenbuf)
9933 Perl_croak(aTHX_ "Excessively long <> operator");
9935 Perl_croak(aTHX_ "Unterminated <> operator");
9940 Remember, only scalar variables are interpreted as filehandles by
9941 this code. Anything more complex (e.g., <$fh{$num}>) will be
9942 treated as a glob() call.
9943 This code makes use of the fact that except for the $ at the front,
9944 a scalar variable and a filehandle look the same.
9946 if (*d == '$' && d[1]) d++;
9948 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9949 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9952 /* If we've tried to read what we allow filehandles to look like, and
9953 there's still text left, then it must be a glob() and not a getline.
9954 Use scan_str to pull out the stuff between the <> and treat it
9955 as nothing more than a string.
9958 if (d - PL_tokenbuf != len) {
9959 yylval.ival = OP_GLOB;
9961 s = scan_str(start,FALSE,FALSE);
9963 Perl_croak(aTHX_ "Glob not terminated");
9967 bool readline_overriden = FALSE;
9968 GV *gv_readline = Nullgv;
9970 /* we're in a filehandle read situation */
9973 /* turn <> into <ARGV> */
9975 Copy("ARGV",d,5,char);
9977 /* Check whether readline() is overriden */
9978 if (((gv_readline = gv_fetchpv("readline", 0, SVt_PVCV))
9979 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9981 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
9982 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
9983 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9984 readline_overriden = TRUE;
9986 /* if <$fh>, create the ops to turn the variable into a
9992 /* try to find it in the pad for this block, otherwise find
9993 add symbol table ops
9995 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
9996 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
9997 HV *stash = PAD_COMPNAME_OURSTASH(tmp);
9998 HEK *stashname = HvNAME_HEK(stash);
9999 SV *sym = sv_2mortal(newSVhek(stashname));
10000 sv_catpvs(sym, "::");
10001 sv_catpv(sym, d+1);
10006 OP *o = newOP(OP_PADSV, 0);
10008 PL_lex_op = readline_overriden
10009 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10010 append_elem(OP_LIST, o,
10011 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10012 : (OP*)newUNOP(OP_READLINE, 0, o);
10021 ? (GV_ADDMULTI | GV_ADDINEVAL)
10024 PL_lex_op = readline_overriden
10025 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10026 append_elem(OP_LIST,
10027 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10028 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10029 : (OP*)newUNOP(OP_READLINE, 0,
10030 newUNOP(OP_RV2SV, 0,
10031 newGVOP(OP_GV, 0, gv)));
10033 if (!readline_overriden)
10034 PL_lex_op->op_flags |= OPf_SPECIAL;
10035 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
10036 yylval.ival = OP_NULL;
10039 /* If it's none of the above, it must be a literal filehandle
10040 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10042 GV *gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
10043 PL_lex_op = readline_overriden
10044 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10045 append_elem(OP_LIST,
10046 newGVOP(OP_GV, 0, gv),
10047 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10048 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10049 yylval.ival = OP_NULL;
10058 takes: start position in buffer
10059 keep_quoted preserve \ on the embedded delimiter(s)
10060 keep_delims preserve the delimiters around the string
10061 returns: position to continue reading from buffer
10062 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10063 updates the read buffer.
10065 This subroutine pulls a string out of the input. It is called for:
10066 q single quotes q(literal text)
10067 ' single quotes 'literal text'
10068 qq double quotes qq(interpolate $here please)
10069 " double quotes "interpolate $here please"
10070 qx backticks qx(/bin/ls -l)
10071 ` backticks `/bin/ls -l`
10072 qw quote words @EXPORT_OK = qw( func() $spam )
10073 m// regexp match m/this/
10074 s/// regexp substitute s/this/that/
10075 tr/// string transliterate tr/this/that/
10076 y/// string transliterate y/this/that/
10077 ($*@) sub prototypes sub foo ($)
10078 (stuff) sub attr parameters sub foo : attr(stuff)
10079 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10081 In most of these cases (all but <>, patterns and transliterate)
10082 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10083 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10084 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10087 It skips whitespace before the string starts, and treats the first
10088 character as the delimiter. If the delimiter is one of ([{< then
10089 the corresponding "close" character )]}> is used as the closing
10090 delimiter. It allows quoting of delimiters, and if the string has
10091 balanced delimiters ([{<>}]) it allows nesting.
10093 On success, the SV with the resulting string is put into lex_stuff or,
10094 if that is already non-NULL, into lex_repl. The second case occurs only
10095 when parsing the RHS of the special constructs s/// and tr/// (y///).
10096 For convenience, the terminating delimiter character is stuffed into
10101 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
10103 SV *sv; /* scalar value: string */
10104 char *tmps; /* temp string, used for delimiter matching */
10105 register char *s = start; /* current position in the buffer */
10106 register char term; /* terminating character */
10107 register char *to; /* current position in the sv's data */
10108 I32 brackets = 1; /* bracket nesting level */
10109 bool has_utf8 = FALSE; /* is there any utf8 content? */
10110 I32 termcode; /* terminating char. code */
10111 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10112 STRLEN termlen; /* length of terminating string */
10113 char *last = NULL; /* last position for nesting bracket */
10115 /* skip space before the delimiter */
10119 /* mark where we are, in case we need to report errors */
10122 /* after skipping whitespace, the next character is the terminator */
10125 termcode = termstr[0] = term;
10129 termcode = utf8_to_uvchr((U8*)s, &termlen);
10130 Copy(s, termstr, termlen, U8);
10131 if (!UTF8_IS_INVARIANT(term))
10135 /* mark where we are */
10136 PL_multi_start = CopLINE(PL_curcop);
10137 PL_multi_open = term;
10139 /* find corresponding closing delimiter */
10140 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10141 termcode = termstr[0] = term = tmps[5];
10143 PL_multi_close = term;
10145 /* create a new SV to hold the contents. 87 is leak category, I'm
10146 assuming. 79 is the SV's initial length. What a random number. */
10148 sv_upgrade(sv, SVt_PVIV);
10149 SvIV_set(sv, termcode);
10150 (void)SvPOK_only(sv); /* validate pointer */
10152 /* move past delimiter and try to read a complete string */
10154 sv_catpvn(sv, s, termlen);
10157 if (PL_encoding && !UTF) {
10161 int offset = s - SvPVX_const(PL_linestr);
10162 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10163 &offset, (char*)termstr, termlen);
10164 const char *ns = SvPVX_const(PL_linestr) + offset;
10165 char *svlast = SvEND(sv) - 1;
10167 for (; s < ns; s++) {
10168 if (*s == '\n' && !PL_rsfp)
10169 CopLINE_inc(PL_curcop);
10172 goto read_more_line;
10174 /* handle quoted delimiters */
10175 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10177 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10179 if ((svlast-1 - t) % 2) {
10180 if (!keep_quoted) {
10181 *(svlast-1) = term;
10183 SvCUR_set(sv, SvCUR(sv) - 1);
10188 if (PL_multi_open == PL_multi_close) {
10196 for (t = w = last; t < svlast; w++, t++) {
10197 /* At here, all closes are "was quoted" one,
10198 so we don't check PL_multi_close. */
10200 if (!keep_quoted && *(t+1) == PL_multi_open)
10205 else if (*t == PL_multi_open)
10213 SvCUR_set(sv, w - SvPVX_const(sv));
10216 if (--brackets <= 0)
10221 if (!keep_delims) {
10222 SvCUR_set(sv, SvCUR(sv) - 1);
10228 /* extend sv if need be */
10229 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10230 /* set 'to' to the next character in the sv's string */
10231 to = SvPVX(sv)+SvCUR(sv);
10233 /* if open delimiter is the close delimiter read unbridle */
10234 if (PL_multi_open == PL_multi_close) {
10235 for (; s < PL_bufend; s++,to++) {
10236 /* embedded newlines increment the current line number */
10237 if (*s == '\n' && !PL_rsfp)
10238 CopLINE_inc(PL_curcop);
10239 /* handle quoted delimiters */
10240 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10241 if (!keep_quoted && s[1] == term)
10243 /* any other quotes are simply copied straight through */
10247 /* terminate when run out of buffer (the for() condition), or
10248 have found the terminator */
10249 else if (*s == term) {
10252 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10255 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10261 /* if the terminator isn't the same as the start character (e.g.,
10262 matched brackets), we have to allow more in the quoting, and
10263 be prepared for nested brackets.
10266 /* read until we run out of string, or we find the terminator */
10267 for (; s < PL_bufend; s++,to++) {
10268 /* embedded newlines increment the line count */
10269 if (*s == '\n' && !PL_rsfp)
10270 CopLINE_inc(PL_curcop);
10271 /* backslashes can escape the open or closing characters */
10272 if (*s == '\\' && s+1 < PL_bufend) {
10273 if (!keep_quoted &&
10274 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10279 /* allow nested opens and closes */
10280 else if (*s == PL_multi_close && --brackets <= 0)
10282 else if (*s == PL_multi_open)
10284 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10289 /* terminate the copied string and update the sv's end-of-string */
10291 SvCUR_set(sv, to - SvPVX_const(sv));
10294 * this next chunk reads more into the buffer if we're not done yet
10298 break; /* handle case where we are done yet :-) */
10300 #ifndef PERL_STRICT_CR
10301 if (to - SvPVX_const(sv) >= 2) {
10302 if ((to[-2] == '\r' && to[-1] == '\n') ||
10303 (to[-2] == '\n' && to[-1] == '\r'))
10307 SvCUR_set(sv, to - SvPVX_const(sv));
10309 else if (to[-1] == '\r')
10312 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10317 /* if we're out of file, or a read fails, bail and reset the current
10318 line marker so we can report where the unterminated string began
10321 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10323 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10326 /* we read a line, so increment our line counter */
10327 CopLINE_inc(PL_curcop);
10329 /* update debugger info */
10330 if (PERLDB_LINE && PL_curstash != PL_debstash) {
10331 SV * const sv = NEWSV(88,0);
10333 sv_upgrade(sv, SVt_PVMG);
10334 sv_setsv(sv,PL_linestr);
10335 (void)SvIOK_on(sv);
10337 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop), sv);
10340 /* having changed the buffer, we must update PL_bufend */
10341 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10342 PL_last_lop = PL_last_uni = Nullch;
10345 /* at this point, we have successfully read the delimited string */
10347 if (!PL_encoding || UTF) {
10349 sv_catpvn(sv, s, termlen);
10352 if (has_utf8 || PL_encoding)
10355 PL_multi_end = CopLINE(PL_curcop);
10357 /* if we allocated too much space, give some back */
10358 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10359 SvLEN_set(sv, SvCUR(sv) + 1);
10360 SvPV_renew(sv, SvLEN(sv));
10363 /* decide whether this is the first or second quoted string we've read
10376 takes: pointer to position in buffer
10377 returns: pointer to new position in buffer
10378 side-effects: builds ops for the constant in yylval.op
10380 Read a number in any of the formats that Perl accepts:
10382 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10383 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10386 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10388 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10391 If it reads a number without a decimal point or an exponent, it will
10392 try converting the number to an integer and see if it can do so
10393 without loss of precision.
10397 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10399 register const char *s = start; /* current position in buffer */
10400 register char *d; /* destination in temp buffer */
10401 register char *e; /* end of temp buffer */
10402 NV nv; /* number read, as a double */
10403 SV *sv = Nullsv; /* place to put the converted number */
10404 bool floatit; /* boolean: int or float? */
10405 const char *lastub = NULL; /* position of last underbar */
10406 static char const number_too_long[] = "Number too long";
10408 /* We use the first character to decide what type of number this is */
10412 Perl_croak(aTHX_ "panic: scan_num");
10414 /* if it starts with a 0, it could be an octal number, a decimal in
10415 0.13 disguise, or a hexadecimal number, or a binary number. */
10419 u holds the "number so far"
10420 shift the power of 2 of the base
10421 (hex == 4, octal == 3, binary == 1)
10422 overflowed was the number more than we can hold?
10424 Shift is used when we add a digit. It also serves as an "are
10425 we in octal/hex/binary?" indicator to disallow hex characters
10426 when in octal mode.
10431 bool overflowed = FALSE;
10432 bool just_zero = TRUE; /* just plain 0 or binary number? */
10433 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10434 static const char* const bases[5] =
10435 { "", "binary", "", "octal", "hexadecimal" };
10436 static const char* const Bases[5] =
10437 { "", "Binary", "", "Octal", "Hexadecimal" };
10438 static const char* const maxima[5] =
10440 "0b11111111111111111111111111111111",
10444 const char *base, *Base, *max;
10446 /* check for hex */
10451 } else if (s[1] == 'b') {
10456 /* check for a decimal in disguise */
10457 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10459 /* so it must be octal */
10466 if (ckWARN(WARN_SYNTAX))
10467 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10468 "Misplaced _ in number");
10472 base = bases[shift];
10473 Base = Bases[shift];
10474 max = maxima[shift];
10476 /* read the rest of the number */
10478 /* x is used in the overflow test,
10479 b is the digit we're adding on. */
10484 /* if we don't mention it, we're done */
10488 /* _ are ignored -- but warned about if consecutive */
10490 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10491 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10492 "Misplaced _ in number");
10496 /* 8 and 9 are not octal */
10497 case '8': case '9':
10499 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10503 case '2': case '3': case '4':
10504 case '5': case '6': case '7':
10506 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10509 case '0': case '1':
10510 b = *s++ & 15; /* ASCII digit -> value of digit */
10514 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10515 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10516 /* make sure they said 0x */
10519 b = (*s++ & 7) + 9;
10521 /* Prepare to put the digit we have onto the end
10522 of the number so far. We check for overflows.
10528 x = u << shift; /* make room for the digit */
10530 if ((x >> shift) != u
10531 && !(PL_hints & HINT_NEW_BINARY)) {
10534 if (ckWARN_d(WARN_OVERFLOW))
10535 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10536 "Integer overflow in %s number",
10539 u = x | b; /* add the digit to the end */
10542 n *= nvshift[shift];
10543 /* If an NV has not enough bits in its
10544 * mantissa to represent an UV this summing of
10545 * small low-order numbers is a waste of time
10546 * (because the NV cannot preserve the
10547 * low-order bits anyway): we could just
10548 * remember when did we overflow and in the
10549 * end just multiply n by the right
10557 /* if we get here, we had success: make a scalar value from
10562 /* final misplaced underbar check */
10563 if (s[-1] == '_') {
10564 if (ckWARN(WARN_SYNTAX))
10565 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10570 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
10571 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10572 "%s number > %s non-portable",
10578 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
10579 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10580 "%s number > %s non-portable",
10585 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10586 sv = new_constant(start, s - start, "integer",
10588 else if (PL_hints & HINT_NEW_BINARY)
10589 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
10594 handle decimal numbers.
10595 we're also sent here when we read a 0 as the first digit
10597 case '1': case '2': case '3': case '4': case '5':
10598 case '6': case '7': case '8': case '9': case '.':
10601 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10604 /* read next group of digits and _ and copy into d */
10605 while (isDIGIT(*s) || *s == '_') {
10606 /* skip underscores, checking for misplaced ones
10610 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10611 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10612 "Misplaced _ in number");
10616 /* check for end of fixed-length buffer */
10618 Perl_croak(aTHX_ number_too_long);
10619 /* if we're ok, copy the character */
10624 /* final misplaced underbar check */
10625 if (lastub && s == lastub + 1) {
10626 if (ckWARN(WARN_SYNTAX))
10627 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10630 /* read a decimal portion if there is one. avoid
10631 3..5 being interpreted as the number 3. followed
10634 if (*s == '.' && s[1] != '.') {
10639 if (ckWARN(WARN_SYNTAX))
10640 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10641 "Misplaced _ in number");
10645 /* copy, ignoring underbars, until we run out of digits.
10647 for (; isDIGIT(*s) || *s == '_'; s++) {
10648 /* fixed length buffer check */
10650 Perl_croak(aTHX_ number_too_long);
10652 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10653 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10654 "Misplaced _ in number");
10660 /* fractional part ending in underbar? */
10661 if (s[-1] == '_') {
10662 if (ckWARN(WARN_SYNTAX))
10663 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10664 "Misplaced _ in number");
10666 if (*s == '.' && isDIGIT(s[1])) {
10667 /* oops, it's really a v-string, but without the "v" */
10673 /* read exponent part, if present */
10674 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10678 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10679 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10681 /* stray preinitial _ */
10683 if (ckWARN(WARN_SYNTAX))
10684 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10685 "Misplaced _ in number");
10689 /* allow positive or negative exponent */
10690 if (*s == '+' || *s == '-')
10693 /* stray initial _ */
10695 if (ckWARN(WARN_SYNTAX))
10696 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10697 "Misplaced _ in number");
10701 /* read digits of exponent */
10702 while (isDIGIT(*s) || *s == '_') {
10705 Perl_croak(aTHX_ number_too_long);
10709 if (((lastub && s == lastub + 1) ||
10710 (!isDIGIT(s[1]) && s[1] != '_'))
10711 && ckWARN(WARN_SYNTAX))
10712 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10713 "Misplaced _ in number");
10720 /* make an sv from the string */
10724 We try to do an integer conversion first if no characters
10725 indicating "float" have been found.
10730 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10732 if (flags == IS_NUMBER_IN_UV) {
10734 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
10737 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10738 if (uv <= (UV) IV_MIN)
10739 sv_setiv(sv, -(IV)uv);
10746 /* terminate the string */
10748 nv = Atof(PL_tokenbuf);
10752 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
10753 (PL_hints & HINT_NEW_INTEGER) )
10754 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
10755 (floatit ? "float" : "integer"),
10759 /* if it starts with a v, it could be a v-string */
10762 sv = NEWSV(92,5); /* preallocate storage space */
10763 s = scan_vstring(s,sv);
10767 /* make the op for the constant and return */
10770 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10772 lvalp->opval = Nullop;
10778 S_scan_formline(pTHX_ register char *s)
10780 register char *eol;
10782 SV *stuff = newSVpvs("");
10783 bool needargs = FALSE;
10784 bool eofmt = FALSE;
10786 while (!needargs) {
10788 #ifdef PERL_STRICT_CR
10789 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
10791 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
10793 if (*t == '\n' || t == PL_bufend) {
10798 if (PL_in_eval && !PL_rsfp) {
10799 eol = (char *) memchr(s,'\n',PL_bufend-s);
10804 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10806 for (t = s; t < eol; t++) {
10807 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10809 goto enough; /* ~~ must be first line in formline */
10811 if (*t == '@' || *t == '^')
10815 sv_catpvn(stuff, s, eol-s);
10816 #ifndef PERL_STRICT_CR
10817 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10818 char *end = SvPVX(stuff) + SvCUR(stuff);
10821 SvCUR_set(stuff, SvCUR(stuff) - 1);
10830 s = filter_gets(PL_linestr, PL_rsfp, 0);
10831 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
10832 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
10833 PL_last_lop = PL_last_uni = Nullch;
10842 if (SvCUR(stuff)) {
10845 PL_lex_state = LEX_NORMAL;
10846 PL_nextval[PL_nexttoke].ival = 0;
10850 PL_lex_state = LEX_FORMLINE;
10852 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10854 else if (PL_encoding)
10855 sv_recode_to_utf8(stuff, PL_encoding);
10857 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10859 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
10863 SvREFCNT_dec(stuff);
10865 PL_lex_formbrack = 0;
10876 PL_cshlen = strlen(PL_cshname);
10881 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10883 const I32 oldsavestack_ix = PL_savestack_ix;
10884 CV* outsidecv = PL_compcv;
10887 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10889 SAVEI32(PL_subline);
10890 save_item(PL_subname);
10891 SAVESPTR(PL_compcv);
10893 PL_compcv = (CV*)NEWSV(1104,0);
10894 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
10895 CvFLAGS(PL_compcv) |= flags;
10897 PL_subline = CopLINE(PL_curcop);
10898 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10899 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
10900 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10902 return oldsavestack_ix;
10906 #pragma segment Perl_yylex
10909 Perl_yywarn(pTHX_ const char *s)
10911 PL_in_eval |= EVAL_WARNONLY;
10913 PL_in_eval &= ~EVAL_WARNONLY;
10918 Perl_yyerror(pTHX_ const char *s)
10920 const char *where = NULL;
10921 const char *context = NULL;
10925 if (!yychar || (yychar == ';' && !PL_rsfp))
10927 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10928 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10929 PL_oldbufptr != PL_bufptr) {
10932 The code below is removed for NetWare because it abends/crashes on NetWare
10933 when the script has error such as not having the closing quotes like:
10934 if ($var eq "value)
10935 Checking of white spaces is anyway done in NetWare code.
10938 while (isSPACE(*PL_oldoldbufptr))
10941 context = PL_oldoldbufptr;
10942 contlen = PL_bufptr - PL_oldoldbufptr;
10944 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10945 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10948 The code below is removed for NetWare because it abends/crashes on NetWare
10949 when the script has error such as not having the closing quotes like:
10950 if ($var eq "value)
10951 Checking of white spaces is anyway done in NetWare code.
10954 while (isSPACE(*PL_oldbufptr))
10957 context = PL_oldbufptr;
10958 contlen = PL_bufptr - PL_oldbufptr;
10960 else if (yychar > 255)
10961 where = "next token ???";
10962 else if (yychar == -2) { /* YYEMPTY */
10963 if (PL_lex_state == LEX_NORMAL ||
10964 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10965 where = "at end of line";
10966 else if (PL_lex_inpat)
10967 where = "within pattern";
10969 where = "within string";
10972 SV *where_sv = sv_2mortal(newSVpvs("next char "));
10974 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10975 else if (isPRINT_LC(yychar))
10976 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
10978 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10979 where = SvPVX_const(where_sv);
10981 msg = sv_2mortal(newSVpv(s, 0));
10982 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10983 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10985 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10987 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10988 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10989 Perl_sv_catpvf(aTHX_ msg,
10990 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10991 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10994 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
10995 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
10998 if (PL_error_count >= 10) {
10999 if (PL_in_eval && SvCUR(ERRSV))
11000 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11001 ERRSV, OutCopFILE(PL_curcop));
11003 Perl_croak(aTHX_ "%s has too many errors.\n",
11004 OutCopFILE(PL_curcop));
11007 PL_in_my_stash = NULL;
11011 #pragma segment Main
11015 S_swallow_bom(pTHX_ U8 *s)
11017 const STRLEN slen = SvCUR(PL_linestr);
11020 if (s[1] == 0xFE) {
11021 /* UTF-16 little-endian? (or UTF32-LE?) */
11022 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11023 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
11024 #ifndef PERL_NO_UTF16_FILTER
11025 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
11028 if (PL_bufend > (char*)s) {
11032 filter_add(utf16rev_textfilter, NULL);
11033 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11034 utf16_to_utf8_reversed(s, news,
11035 PL_bufend - (char*)s - 1,
11037 sv_setpvn(PL_linestr, (const char*)news, newlen);
11039 SvUTF8_on(PL_linestr);
11040 s = (U8*)SvPVX(PL_linestr);
11041 PL_bufend = SvPVX(PL_linestr) + newlen;
11044 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
11049 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11050 #ifndef PERL_NO_UTF16_FILTER
11051 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11054 if (PL_bufend > (char *)s) {
11058 filter_add(utf16_textfilter, NULL);
11059 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11060 utf16_to_utf8(s, news,
11061 PL_bufend - (char*)s,
11063 sv_setpvn(PL_linestr, (const char*)news, newlen);
11065 SvUTF8_on(PL_linestr);
11066 s = (U8*)SvPVX(PL_linestr);
11067 PL_bufend = SvPVX(PL_linestr) + newlen;
11070 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
11075 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11076 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11077 s += 3; /* UTF-8 */
11083 if (s[2] == 0xFE && s[3] == 0xFF) {
11084 /* UTF-32 big-endian */
11085 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
11088 else if (s[2] == 0 && s[3] != 0) {
11091 * are a good indicator of UTF-16BE. */
11092 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11097 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11100 * are a good indicator of UTF-16LE. */
11101 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11110 * Restore a source filter.
11114 restore_rsfp(pTHX_ void *f)
11116 PerlIO * const fp = (PerlIO*)f;
11118 if (PL_rsfp == PerlIO_stdin())
11119 PerlIO_clearerr(PL_rsfp);
11120 else if (PL_rsfp && (PL_rsfp != fp))
11121 PerlIO_close(PL_rsfp);
11125 #ifndef PERL_NO_UTF16_FILTER
11127 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11129 const STRLEN old = SvCUR(sv);
11130 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11131 DEBUG_P(PerlIO_printf(Perl_debug_log,
11132 "utf16_textfilter(%p): %d %d (%d)\n",
11133 utf16_textfilter, idx, maxlen, (int) count));
11137 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11138 Copy(SvPVX_const(sv), tmps, old, char);
11139 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11140 SvCUR(sv) - old, &newlen);
11141 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11143 DEBUG_P({sv_dump(sv);});
11148 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11150 const STRLEN old = SvCUR(sv);
11151 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11152 DEBUG_P(PerlIO_printf(Perl_debug_log,
11153 "utf16rev_textfilter(%p): %d %d (%d)\n",
11154 utf16rev_textfilter, idx, maxlen, (int) count));
11158 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11159 Copy(SvPVX_const(sv), tmps, old, char);
11160 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11161 SvCUR(sv) - old, &newlen);
11162 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11164 DEBUG_P({ sv_dump(sv); });
11170 Returns a pointer to the next character after the parsed
11171 vstring, as well as updating the passed in sv.
11173 Function must be called like
11176 s = scan_vstring(s,sv);
11178 The sv should already be large enough to store the vstring
11179 passed in, for performance reasons.
11184 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
11186 const char *pos = s;
11187 const char *start = s;
11188 if (*pos == 'v') pos++; /* get past 'v' */
11189 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11191 if ( *pos != '.') {
11192 /* this may not be a v-string if followed by => */
11193 const char *next = pos;
11194 while (next < PL_bufend && isSPACE(*next))
11196 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
11197 /* return string not v-string */
11198 sv_setpvn(sv,(char *)s,pos-s);
11199 return (char *)pos;
11203 if (!isALPHA(*pos)) {
11204 U8 tmpbuf[UTF8_MAXBYTES+1];
11206 if (*s == 'v') s++; /* get past 'v' */
11208 sv_setpvn(sv, "", 0);
11214 /* this is atoi() that tolerates underscores */
11215 const char *end = pos;
11217 while (--end >= s) {
11222 rev += (*end - '0') * mult;
11224 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
11225 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11226 "Integer overflow in decimal number");
11230 if (rev > 0x7FFFFFFF)
11231 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11233 /* Append native character for the rev point */
11234 tmpend = uvchr_to_utf8(tmpbuf, rev);
11235 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11236 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11238 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
11244 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11248 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11256 * c-indentation-style: bsd
11257 * c-basic-offset: 4
11258 * indent-tabs-mode: t
11261 * ex: set ts=8 sts=4 sw=4 noet: