3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] = "Identifier too long";
30 static const char commaless_variable_list[] = "comma-less variable list";
32 static void restore_rsfp(pTHX_ void *f);
33 #ifndef PERL_NO_UTF16_FILTER
34 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
38 #define XFAKEBRACK 128
41 #ifdef USE_UTF8_SCRIPTS
42 # define UTF (!IN_BYTES)
44 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
47 /* In variables named $^X, these are the legal values for X.
48 * 1999-02-27 mjd-perl-patch@plover.com */
49 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51 /* On MacOS, respect nonbreaking spaces */
52 #ifdef MACOS_TRADITIONAL
53 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
58 /* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
60 * can get by with a single comparison (if the compiler is smart enough).
63 /* #define LEX_NOTPARSING 11 is done in perl.h. */
65 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
66 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
67 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
68 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
69 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
71 /* at end of code, eg "$x" followed by: */
72 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
73 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
75 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
76 string or after \E, $foo, etc */
77 #define LEX_INTERPCONST 2 /* NOT USED */
78 #define LEX_FORMLINE 1 /* expecting a format line */
79 #define LEX_KNOWNEXT 0 /* next token known; just return it */
83 static const char* const lex_state_names[] = {
102 #include "keywords.h"
104 /* CLINE is a macro that ensures PL_copline has a sane value */
109 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
112 * Convenience functions to return different tokens and prime the
113 * lexer for the next token. They all take an argument.
115 * TOKEN : generic token (used for '(', DOLSHARP, etc)
116 * OPERATOR : generic operator
117 * AOPERATOR : assignment operator
118 * PREBLOCK : beginning the block after an if, while, foreach, ...
119 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
120 * PREREF : *EXPR where EXPR is not a simple identifier
121 * TERM : expression term
122 * LOOPX : loop exiting command (goto, last, dump, etc)
123 * FTST : file test operator
124 * FUN0 : zero-argument function
125 * FUN1 : not used, except for not, which isn't a UNIOP
126 * BOop : bitwise or or xor
128 * SHop : shift operator
129 * PWop : power operator
130 * PMop : pattern-matching operator
131 * Aop : addition-level operator
132 * Mop : multiplication-level operator
133 * Eop : equality-testing operator
134 * Rop : relational operator <= != gt
136 * Also see LOP and lop() below.
139 #ifdef DEBUGGING /* Serve -DT. */
140 # define REPORT(retval) tokereport((I32)retval)
142 # define REPORT(retval) (retval)
145 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
146 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
147 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
148 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
149 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
150 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
151 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
152 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
153 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
154 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
155 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
156 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
157 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
158 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
159 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
160 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
161 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
162 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
163 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
164 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
166 /* This bit of chicanery makes a unary function followed by
167 * a parenthesis into a function with one argument, highest precedence.
168 * The UNIDOR macro is for unary functions that can be followed by the //
169 * operator (such as C<shift // 0>).
171 #define UNI2(f,x) { \
175 PL_last_uni = PL_oldbufptr; \
176 PL_last_lop_op = f; \
178 return REPORT( (int)FUNC1 ); \
180 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
182 #define UNI(f) UNI2(f,XTERM)
183 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
185 #define UNIBRACK(f) { \
188 PL_last_uni = PL_oldbufptr; \
190 return REPORT( (int)FUNC1 ); \
192 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
195 /* grandfather return to old style */
196 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
200 /* how to interpret the yylval associated with the token */
204 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
210 static struct debug_tokens { const int token, type; const char *name; }
211 const debug_tokens[] =
213 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
214 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
215 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
216 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
217 { ARROW, TOKENTYPE_NONE, "ARROW" },
218 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
219 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
220 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
221 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
222 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
223 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
224 { DO, TOKENTYPE_NONE, "DO" },
225 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
226 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
227 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
228 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
229 { ELSE, TOKENTYPE_NONE, "ELSE" },
230 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
231 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
232 { FOR, TOKENTYPE_IVAL, "FOR" },
233 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
234 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
235 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
236 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
237 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
238 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
239 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
240 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
241 { IF, TOKENTYPE_IVAL, "IF" },
242 { LABEL, TOKENTYPE_PVAL, "LABEL" },
243 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
244 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
245 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
246 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
247 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
248 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
249 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
250 { MY, TOKENTYPE_IVAL, "MY" },
251 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
252 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
253 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
254 { OROP, TOKENTYPE_IVAL, "OROP" },
255 { OROR, TOKENTYPE_NONE, "OROR" },
256 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
257 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
258 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
259 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
260 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
261 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
262 { PREINC, TOKENTYPE_NONE, "PREINC" },
263 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
264 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
265 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
266 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
267 { SUB, TOKENTYPE_NONE, "SUB" },
268 { THING, TOKENTYPE_OPVAL, "THING" },
269 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
270 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
271 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
272 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
273 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
274 { USE, TOKENTYPE_IVAL, "USE" },
275 { WHEN, TOKENTYPE_IVAL, "WHEN" },
276 { WHILE, TOKENTYPE_IVAL, "WHILE" },
277 { WORD, TOKENTYPE_OPVAL, "WORD" },
278 { 0, TOKENTYPE_NONE, 0 }
281 /* dump the returned token in rv, plus any optional arg in yylval */
284 S_tokereport(pTHX_ I32 rv)
287 const char *name = Nullch;
288 enum token_type type = TOKENTYPE_NONE;
289 const struct debug_tokens *p;
290 SV* const report = newSVpvs("<== ");
292 for (p = debug_tokens; p->token; p++) {
293 if (p->token == (int)rv) {
300 Perl_sv_catpv(aTHX_ report, name);
301 else if ((char)rv > ' ' && (char)rv < '~')
302 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
304 sv_catpvs(report, "EOF");
306 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
309 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
312 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
314 case TOKENTYPE_OPNUM:
315 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
316 PL_op_name[yylval.ival]);
319 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
321 case TOKENTYPE_OPVAL:
323 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
324 PL_op_name[yylval.opval->op_type]);
325 if (yylval.opval->op_type == OP_CONST) {
326 Perl_sv_catpvf(aTHX_ report, " %s",
327 SvPEEK(cSVOPx_sv(yylval.opval)));
332 sv_catpvs(report, "(opval=null)");
335 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
341 /* print the buffer with suitable escapes */
344 S_printbuf(pTHX_ const char* fmt, const char* s)
346 SV* const tmp = newSVpvs("");
347 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
356 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
357 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
361 S_ao(pTHX_ int toketype)
363 if (*PL_bufptr == '=') {
365 if (toketype == ANDAND)
366 yylval.ival = OP_ANDASSIGN;
367 else if (toketype == OROR)
368 yylval.ival = OP_ORASSIGN;
369 else if (toketype == DORDOR)
370 yylval.ival = OP_DORASSIGN;
378 * When Perl expects an operator and finds something else, no_op
379 * prints the warning. It always prints "<something> found where
380 * operator expected. It prints "Missing semicolon on previous line?"
381 * if the surprise occurs at the start of the line. "do you need to
382 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
383 * where the compiler doesn't know if foo is a method call or a function.
384 * It prints "Missing operator before end of line" if there's nothing
385 * after the missing operator, or "... before <...>" if there is something
386 * after the missing operator.
390 S_no_op(pTHX_ const char *what, char *s)
392 char * const oldbp = PL_bufptr;
393 const bool is_first = (PL_oldbufptr == PL_linestart);
399 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
400 if (ckWARN_d(WARN_SYNTAX)) {
402 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
403 "\t(Missing semicolon on previous line?)\n");
404 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
406 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
407 if (t < PL_bufptr && isSPACE(*t))
408 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
409 "\t(Do you need to predeclare %.*s?)\n",
410 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
414 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
415 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
423 * Complain about missing quote/regexp/heredoc terminator.
424 * If it's called with (char *)NULL then it cauterizes the line buffer.
425 * If we're in a delimited string and the delimiter is a control
426 * character, it's reformatted into a two-char sequence like ^C.
431 S_missingterm(pTHX_ char *s)
436 char * const nl = strrchr(s,'\n');
442 iscntrl(PL_multi_close)
444 PL_multi_close < 32 || PL_multi_close == 127
448 tmpbuf[1] = (char)toCTRL(PL_multi_close);
453 *tmpbuf = (char)PL_multi_close;
457 q = strchr(s,'"') ? '\'' : '"';
458 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
461 #define FEATURE_IS_ENABLED(name) \
462 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
463 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
465 * S_feature_is_enabled
466 * Check whether the named feature is enabled.
469 S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
471 HV * const hinthv = GvHV(PL_hintgv);
472 char he_name[32] = "feature_";
473 (void) strncpy(&he_name[8], name, 24);
475 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
483 Perl_deprecate(pTHX_ const char *s)
485 if (ckWARN(WARN_DEPRECATED))
486 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
490 Perl_deprecate_old(pTHX_ const char *s)
492 /* This function should NOT be called for any new deprecated warnings */
493 /* Use Perl_deprecate instead */
495 /* It is here to maintain backward compatibility with the pre-5.8 */
496 /* warnings category hierarchy. The "deprecated" category used to */
497 /* live under the "syntax" category. It is now a top-level category */
498 /* in its own right. */
500 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
501 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
502 "Use of %s is deprecated", s);
506 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
507 * utf16-to-utf8-reversed.
510 #ifdef PERL_CR_FILTER
514 register const char *s = SvPVX_const(sv);
515 register const char * const e = s + SvCUR(sv);
516 /* outer loop optimized to do nothing if there are no CR-LFs */
518 if (*s++ == '\r' && *s == '\n') {
519 /* hit a CR-LF, need to copy the rest */
520 register char *d = s - 1;
523 if (*s == '\r' && s[1] == '\n')
534 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
536 const I32 count = FILTER_READ(idx+1, sv, maxlen);
537 if (count > 0 && !maxlen)
545 * Initialize variables. Uses the Perl save_stack to save its state (for
546 * recursive calls to the parser).
550 Perl_lex_start(pTHX_ SV *line)
555 SAVEI32(PL_lex_dojoin);
556 SAVEI32(PL_lex_brackets);
557 SAVEI32(PL_lex_casemods);
558 SAVEI32(PL_lex_starts);
559 SAVEI32(PL_lex_state);
560 SAVEVPTR(PL_lex_inpat);
561 SAVEI32(PL_lex_inwhat);
562 if (PL_lex_state == LEX_KNOWNEXT) {
563 I32 toke = PL_nexttoke;
564 while (--toke >= 0) {
565 SAVEI32(PL_nexttype[toke]);
566 SAVEVPTR(PL_nextval[toke]);
568 SAVEI32(PL_nexttoke);
570 SAVECOPLINE(PL_curcop);
573 SAVEPPTR(PL_oldbufptr);
574 SAVEPPTR(PL_oldoldbufptr);
575 SAVEPPTR(PL_last_lop);
576 SAVEPPTR(PL_last_uni);
577 SAVEPPTR(PL_linestart);
578 SAVESPTR(PL_linestr);
579 SAVEGENERICPV(PL_lex_brackstack);
580 SAVEGENERICPV(PL_lex_casestack);
581 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
582 SAVESPTR(PL_lex_stuff);
583 SAVEI32(PL_lex_defer);
584 SAVEI32(PL_sublex_info.sub_inwhat);
585 SAVESPTR(PL_lex_repl);
587 SAVEINT(PL_lex_expect);
589 PL_lex_state = LEX_NORMAL;
593 Newx(PL_lex_brackstack, 120, char);
594 Newx(PL_lex_casestack, 12, char);
596 *PL_lex_casestack = '\0';
599 PL_lex_stuff = Nullsv;
600 PL_lex_repl = Nullsv;
604 PL_sublex_info.sub_inwhat = 0;
606 if (SvREADONLY(PL_linestr))
607 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
608 s = SvPV_const(PL_linestr, len);
609 if (!len || s[len-1] != ';') {
610 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
611 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
612 sv_catpvs(PL_linestr, "\n;");
614 SvTEMP_off(PL_linestr);
615 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
616 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
617 PL_last_lop = PL_last_uni = Nullch;
623 * Finalizer for lexing operations. Must be called when the parser is
624 * done with the lexer.
630 PL_doextract = FALSE;
635 * This subroutine has nothing to do with tilting, whether at windmills
636 * or pinball tables. Its name is short for "increment line". It
637 * increments the current line number in CopLINE(PL_curcop) and checks
638 * to see whether the line starts with a comment of the form
639 * # line 500 "foo.pm"
640 * If so, it sets the current line number and file to the values in the comment.
644 S_incline(pTHX_ char *s)
651 CopLINE_inc(PL_curcop);
654 while (SPACE_OR_TAB(*s)) s++;
655 if (strnEQ(s, "line", 4))
659 if (SPACE_OR_TAB(*s))
663 while (SPACE_OR_TAB(*s)) s++;
669 while (SPACE_OR_TAB(*s))
671 if (*s == '"' && (t = strchr(s+1, '"'))) {
676 for (t = s; !isSPACE(*t); t++) ;
679 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
681 if (*e != '\n' && *e != '\0')
682 return; /* false alarm */
688 const char * const cf = CopFILE(PL_curcop);
689 STRLEN tmplen = cf ? strlen(cf) : 0;
690 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
691 /* must copy *{"::_<(eval N)[oldfilename:L]"}
692 * to *{"::_<newfilename"} */
693 char smallbuf[256], smallbuf2[256];
694 char *tmpbuf, *tmpbuf2;
696 STRLEN tmplen2 = strlen(s);
697 if (tmplen + 3 < sizeof smallbuf)
700 Newx(tmpbuf, tmplen + 3, char);
701 if (tmplen2 + 3 < sizeof smallbuf2)
704 Newx(tmpbuf2, tmplen2 + 3, char);
705 tmpbuf[0] = tmpbuf2[0] = '_';
706 tmpbuf[1] = tmpbuf2[1] = '<';
707 memcpy(tmpbuf + 2, cf, ++tmplen);
708 memcpy(tmpbuf2 + 2, s, ++tmplen2);
710 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
712 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
714 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
715 /* adjust ${"::_<newfilename"} to store the new file name */
716 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
717 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
718 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
720 if (tmpbuf != smallbuf) Safefree(tmpbuf);
721 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
724 CopFILE_free(PL_curcop);
725 CopFILE_set(PL_curcop, s);
728 CopLINE_set(PL_curcop, atoi(n)-1);
733 * Called to gobble the appropriate amount and type of whitespace.
734 * Skips comments as well.
738 S_skipspace(pTHX_ register char *s)
740 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
741 while (s < PL_bufend && SPACE_OR_TAB(*s))
747 SSize_t oldprevlen, oldoldprevlen;
748 SSize_t oldloplen = 0, oldunilen = 0;
749 while (s < PL_bufend && isSPACE(*s)) {
750 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
755 if (s < PL_bufend && *s == '#') {
756 while (s < PL_bufend && *s != '\n')
760 if (PL_in_eval && !PL_rsfp) {
767 /* only continue to recharge the buffer if we're at the end
768 * of the buffer, we're not reading from a source filter, and
769 * we're in normal lexing mode
771 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
772 PL_lex_state == LEX_FORMLINE)
775 /* try to recharge the buffer */
776 if ((s = filter_gets(PL_linestr, PL_rsfp,
777 (prevlen = SvCUR(PL_linestr)))) == Nullch)
779 /* end of file. Add on the -p or -n magic */
782 ";}continue{print or die qq(-p destination: $!\\n);}");
783 PL_minus_n = PL_minus_p = 0;
785 else if (PL_minus_n) {
786 sv_setpvn(PL_linestr, ";}", 2);
790 sv_setpvn(PL_linestr,";", 1);
792 /* reset variables for next time we lex */
793 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
795 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
796 PL_last_lop = PL_last_uni = Nullch;
798 /* Close the filehandle. Could be from -P preprocessor,
799 * STDIN, or a regular file. If we were reading code from
800 * STDIN (because the commandline held no -e or filename)
801 * then we don't close it, we reset it so the code can
802 * read from STDIN too.
805 if (PL_preprocess && !PL_in_eval)
806 (void)PerlProc_pclose(PL_rsfp);
807 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
808 PerlIO_clearerr(PL_rsfp);
810 (void)PerlIO_close(PL_rsfp);
815 /* not at end of file, so we only read another line */
816 /* make corresponding updates to old pointers, for yyerror() */
817 oldprevlen = PL_oldbufptr - PL_bufend;
818 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
820 oldunilen = PL_last_uni - PL_bufend;
822 oldloplen = PL_last_lop - PL_bufend;
823 PL_linestart = PL_bufptr = s + prevlen;
824 PL_bufend = s + SvCUR(PL_linestr);
826 PL_oldbufptr = s + oldprevlen;
827 PL_oldoldbufptr = s + oldoldprevlen;
829 PL_last_uni = s + oldunilen;
831 PL_last_lop = s + oldloplen;
834 /* debugger active and we're not compiling the debugger code,
835 * so store the line into the debugger's array of lines
837 if (PERLDB_LINE && PL_curstash != PL_debstash) {
838 SV * const sv = NEWSV(85,0);
840 sv_upgrade(sv, SVt_PVMG);
841 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
844 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
851 * Check the unary operators to ensure there's no ambiguity in how they're
852 * used. An ambiguous piece of code would be:
854 * This doesn't mean rand() + 5. Because rand() is a unary operator,
855 * the +5 is its argument.
864 if (PL_oldoldbufptr != PL_last_uni)
866 while (isSPACE(*PL_last_uni))
868 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
869 if ((t = strchr(s, '(')) && t < PL_bufptr)
871 if (ckWARN_d(WARN_AMBIGUOUS)){
874 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
875 "Warning: Use of \"%s\" without parentheses is ambiguous",
882 * LOP : macro to build a list operator. Its behaviour has been replaced
883 * with a subroutine, S_lop() for which LOP is just another name.
886 #define LOP(f,x) return lop(f,x,s)
890 * Build a list operator (or something that might be one). The rules:
891 * - if we have a next token, then it's a list operator [why?]
892 * - if the next thing is an opening paren, then it's a function
893 * - else it's a list operator
897 S_lop(pTHX_ I32 f, int x, char *s)
903 PL_last_lop = PL_oldbufptr;
904 PL_last_lop_op = (OPCODE)f;
906 return REPORT(LSTOP);
913 return REPORT(LSTOP);
918 * When the lexer realizes it knows the next token (for instance,
919 * it is reordering tokens for the parser) then it can call S_force_next
920 * to know what token to return the next time the lexer is called. Caller
921 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
922 * handles the token correctly.
926 S_force_next(pTHX_ I32 type)
928 PL_nexttype[PL_nexttoke] = type;
930 if (PL_lex_state != LEX_KNOWNEXT) {
931 PL_lex_defer = PL_lex_state;
932 PL_lex_expect = PL_expect;
933 PL_lex_state = LEX_KNOWNEXT;
938 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
940 SV * const sv = newSVpvn(start,len);
941 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
948 * When the lexer knows the next thing is a word (for instance, it has
949 * just seen -> and it knows that the next char is a word char, then
950 * it calls S_force_word to stick the next word into the PL_next lookahead.
953 * char *start : buffer position (must be within PL_linestr)
954 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
955 * int check_keyword : if true, Perl checks to make sure the word isn't
956 * a keyword (do this if the word is a label, e.g. goto FOO)
957 * int allow_pack : if true, : characters will also be allowed (require,
959 * int allow_initial_tick : used by the "sub" lexer only.
963 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
968 start = skipspace(start);
970 if (isIDFIRST_lazy_if(s,UTF) ||
971 (allow_pack && *s == ':') ||
972 (allow_initial_tick && *s == '\'') )
974 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
975 if (check_keyword && keyword(PL_tokenbuf, len))
977 if (token == METHOD) {
982 PL_expect = XOPERATOR;
985 PL_nextval[PL_nexttoke].opval
986 = (OP*)newSVOP(OP_CONST,0,
987 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
988 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
996 * Called when the lexer wants $foo *foo &foo etc, but the program
997 * text only contains the "foo" portion. The first argument is a pointer
998 * to the "foo", and the second argument is the type symbol to prefix.
999 * Forces the next token to be a "WORD".
1000 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1004 S_force_ident(pTHX_ register const char *s, int kind)
1007 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
1008 PL_nextval[PL_nexttoke].opval = o;
1011 o->op_private = OPpCONST_ENTERED;
1012 /* XXX see note in pp_entereval() for why we forgo typo
1013 warnings if the symbol must be introduced in an eval.
1015 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
1016 kind == '$' ? SVt_PV :
1017 kind == '@' ? SVt_PVAV :
1018 kind == '%' ? SVt_PVHV :
1026 Perl_str_to_version(pTHX_ SV *sv)
1031 const char *start = SvPV_const(sv,len);
1032 const char * const end = start + len;
1033 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1034 while (start < end) {
1038 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1043 retval += ((NV)n)/nshift;
1052 * Forces the next token to be a version number.
1053 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1054 * and if "guessing" is TRUE, then no new token is created (and the caller
1055 * must use an alternative parsing method).
1059 S_force_version(pTHX_ char *s, int guessing)
1061 OP *version = Nullop;
1070 while (isDIGIT(*d) || *d == '_' || *d == '.')
1072 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1074 s = scan_num(s, &yylval);
1075 version = yylval.opval;
1076 ver = cSVOPx(version)->op_sv;
1077 if (SvPOK(ver) && !SvNIOK(ver)) {
1078 SvUPGRADE(ver, SVt_PVNV);
1079 SvNV_set(ver, str_to_version(ver));
1080 SvNOK_on(ver); /* hint that it is a version */
1087 /* NOTE: The parser sees the package name and the VERSION swapped */
1088 PL_nextval[PL_nexttoke].opval = version;
1096 * Tokenize a quoted string passed in as an SV. It finds the next
1097 * chunk, up to end of string or a backslash. It may make a new
1098 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1103 S_tokeq(pTHX_ SV *sv)
1106 register char *send;
1114 s = SvPV_force(sv, len);
1115 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1118 while (s < send && *s != '\\')
1123 if ( PL_hints & HINT_NEW_STRING ) {
1124 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1130 if (s + 1 < send && (s[1] == '\\'))
1131 s++; /* all that, just for this */
1136 SvCUR_set(sv, d - SvPVX_const(sv));
1138 if ( PL_hints & HINT_NEW_STRING )
1139 return new_constant(NULL, 0, "q", sv, pv, "q");
1144 * Now come three functions related to double-quote context,
1145 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1146 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1147 * interact with PL_lex_state, and create fake ( ... ) argument lists
1148 * to handle functions and concatenation.
1149 * They assume that whoever calls them will be setting up a fake
1150 * join call, because each subthing puts a ',' after it. This lets
1153 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1155 * (I'm not sure whether the spurious commas at the end of lcfirst's
1156 * arguments and join's arguments are created or not).
1161 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1163 * Pattern matching will set PL_lex_op to the pattern-matching op to
1164 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1166 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1168 * Everything else becomes a FUNC.
1170 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1171 * had an OP_CONST or OP_READLINE). This just sets us up for a
1172 * call to S_sublex_push().
1176 S_sublex_start(pTHX)
1178 register const I32 op_type = yylval.ival;
1180 if (op_type == OP_NULL) {
1181 yylval.opval = PL_lex_op;
1185 if (op_type == OP_CONST || op_type == OP_READLINE) {
1186 SV *sv = tokeq(PL_lex_stuff);
1188 if (SvTYPE(sv) == SVt_PVIV) {
1189 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1191 const char *p = SvPV_const(sv, len);
1192 SV * const nsv = newSVpvn(p, len);
1198 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1199 PL_lex_stuff = Nullsv;
1200 /* Allow <FH> // "foo" */
1201 if (op_type == OP_READLINE)
1202 PL_expect = XTERMORDORDOR;
1206 PL_sublex_info.super_state = PL_lex_state;
1207 PL_sublex_info.sub_inwhat = op_type;
1208 PL_sublex_info.sub_op = PL_lex_op;
1209 PL_lex_state = LEX_INTERPPUSH;
1213 yylval.opval = PL_lex_op;
1223 * Create a new scope to save the lexing state. The scope will be
1224 * ended in S_sublex_done. Returns a '(', starting the function arguments
1225 * to the uc, lc, etc. found before.
1226 * Sets PL_lex_state to LEX_INTERPCONCAT.
1235 PL_lex_state = PL_sublex_info.super_state;
1236 SAVEI32(PL_lex_dojoin);
1237 SAVEI32(PL_lex_brackets);
1238 SAVEI32(PL_lex_casemods);
1239 SAVEI32(PL_lex_starts);
1240 SAVEI32(PL_lex_state);
1241 SAVEVPTR(PL_lex_inpat);
1242 SAVEI32(PL_lex_inwhat);
1243 SAVECOPLINE(PL_curcop);
1244 SAVEPPTR(PL_bufptr);
1245 SAVEPPTR(PL_bufend);
1246 SAVEPPTR(PL_oldbufptr);
1247 SAVEPPTR(PL_oldoldbufptr);
1248 SAVEPPTR(PL_last_lop);
1249 SAVEPPTR(PL_last_uni);
1250 SAVEPPTR(PL_linestart);
1251 SAVESPTR(PL_linestr);
1252 SAVEGENERICPV(PL_lex_brackstack);
1253 SAVEGENERICPV(PL_lex_casestack);
1255 PL_linestr = PL_lex_stuff;
1256 PL_lex_stuff = Nullsv;
1258 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1259 = SvPVX(PL_linestr);
1260 PL_bufend += SvCUR(PL_linestr);
1261 PL_last_lop = PL_last_uni = Nullch;
1262 SAVEFREESV(PL_linestr);
1264 PL_lex_dojoin = FALSE;
1265 PL_lex_brackets = 0;
1266 Newx(PL_lex_brackstack, 120, char);
1267 Newx(PL_lex_casestack, 12, char);
1268 PL_lex_casemods = 0;
1269 *PL_lex_casestack = '\0';
1271 PL_lex_state = LEX_INTERPCONCAT;
1272 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1274 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1275 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1276 PL_lex_inpat = PL_sublex_info.sub_op;
1278 PL_lex_inpat = Nullop;
1285 * Restores lexer state after a S_sublex_push.
1292 if (!PL_lex_starts++) {
1293 SV * const sv = newSVpvs("");
1294 if (SvUTF8(PL_linestr))
1296 PL_expect = XOPERATOR;
1297 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1301 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1302 PL_lex_state = LEX_INTERPCASEMOD;
1306 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1307 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1308 PL_linestr = PL_lex_repl;
1310 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1311 PL_bufend += SvCUR(PL_linestr);
1312 PL_last_lop = PL_last_uni = Nullch;
1313 SAVEFREESV(PL_linestr);
1314 PL_lex_dojoin = FALSE;
1315 PL_lex_brackets = 0;
1316 PL_lex_casemods = 0;
1317 *PL_lex_casestack = '\0';
1319 if (SvEVALED(PL_lex_repl)) {
1320 PL_lex_state = LEX_INTERPNORMAL;
1322 /* we don't clear PL_lex_repl here, so that we can check later
1323 whether this is an evalled subst; that means we rely on the
1324 logic to ensure sublex_done() is called again only via the
1325 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1328 PL_lex_state = LEX_INTERPCONCAT;
1329 PL_lex_repl = Nullsv;
1335 PL_bufend = SvPVX(PL_linestr);
1336 PL_bufend += SvCUR(PL_linestr);
1337 PL_expect = XOPERATOR;
1338 PL_sublex_info.sub_inwhat = 0;
1346 Extracts a pattern, double-quoted string, or transliteration. This
1349 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1350 processing a pattern (PL_lex_inpat is true), a transliteration
1351 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1353 Returns a pointer to the character scanned up to. Iff this is
1354 advanced from the start pointer supplied (ie if anything was
1355 successfully parsed), will leave an OP for the substring scanned
1356 in yylval. Caller must intuit reason for not parsing further
1357 by looking at the next characters herself.
1361 double-quoted style: \r and \n
1362 regexp special ones: \D \s
1364 backrefs: \1 (deprecated in substitution replacements)
1365 case and quoting: \U \Q \E
1366 stops on @ and $, but not for $ as tail anchor
1368 In transliterations:
1369 characters are VERY literal, except for - not at the start or end
1370 of the string, which indicates a range. scan_const expands the
1371 range to the full set of intermediate characters.
1373 In double-quoted strings:
1375 double-quoted style: \r and \n
1377 backrefs: \1 (deprecated)
1378 case and quoting: \U \Q \E
1381 scan_const does *not* construct ops to handle interpolated strings.
1382 It stops processing as soon as it finds an embedded $ or @ variable
1383 and leaves it to the caller to work out what's going on.
1385 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1387 $ in pattern could be $foo or could be tail anchor. Assumption:
1388 it's a tail anchor if $ is the last thing in the string, or if it's
1389 followed by one of ")| \n\t"
1391 \1 (backreferences) are turned into $1
1393 The structure of the code is
1394 while (there's a character to process) {
1395 handle transliteration ranges
1396 skip regexp comments
1397 skip # initiated comments in //x patterns
1398 check for embedded @foo
1399 check for embedded scalars
1401 leave intact backslashes from leave (below)
1402 deprecate \1 in strings and sub replacements
1403 handle string-changing backslashes \l \U \Q \E, etc.
1404 switch (what was escaped) {
1405 handle - in a transliteration (becomes a literal -)
1406 handle \132 octal characters
1407 handle 0x15 hex characters
1408 handle \cV (control V)
1409 handle printf backslashes (\f, \r, \n, etc)
1411 } (end if backslash)
1412 } (end while character to read)
1417 S_scan_const(pTHX_ char *start)
1419 register char *send = PL_bufend; /* end of the constant */
1420 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1421 register char *s = start; /* start of the constant */
1422 register char *d = SvPVX(sv); /* destination for copies */
1423 bool dorange = FALSE; /* are we in a translit range? */
1424 bool didrange = FALSE; /* did we just finish a range? */
1425 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1426 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1429 UV literal_endpoint = 0;
1432 const char *leaveit = /* set of acceptably-backslashed characters */
1434 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1437 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1438 /* If we are doing a trans and we know we want UTF8 set expectation */
1439 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1440 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1444 while (s < send || dorange) {
1445 /* get transliterations out of the way (they're most literal) */
1446 if (PL_lex_inwhat == OP_TRANS) {
1447 /* expand a range A-Z to the full set of characters. AIE! */
1449 I32 i; /* current expanded character */
1450 I32 min; /* first character in range */
1451 I32 max; /* last character in range */
1454 char * const c = (char*)utf8_hop((U8*)d, -1);
1458 *c = (char)UTF_TO_NATIVE(0xff);
1459 /* mark the range as done, and continue */
1465 i = d - SvPVX_const(sv); /* remember current offset */
1466 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1467 d = SvPVX(sv) + i; /* refresh d after realloc */
1468 d -= 2; /* eat the first char and the - */
1470 min = (U8)*d; /* first char in range */
1471 max = (U8)d[1]; /* last char in range */
1475 "Invalid range \"%c-%c\" in transliteration operator",
1476 (char)min, (char)max);
1480 if (literal_endpoint == 2 &&
1481 ((isLOWER(min) && isLOWER(max)) ||
1482 (isUPPER(min) && isUPPER(max)))) {
1484 for (i = min; i <= max; i++)
1486 *d++ = NATIVE_TO_NEED(has_utf8,i);
1488 for (i = min; i <= max; i++)
1490 *d++ = NATIVE_TO_NEED(has_utf8,i);
1495 for (i = min; i <= max; i++)
1498 /* mark the range as done, and continue */
1502 literal_endpoint = 0;
1507 /* range begins (ignore - as first or last char) */
1508 else if (*s == '-' && s+1 < send && s != start) {
1510 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1513 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1523 literal_endpoint = 0;
1528 /* if we get here, we're not doing a transliteration */
1530 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1531 except for the last char, which will be done separately. */
1532 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1534 while (s+1 < send && *s != ')')
1535 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1537 else if (s[2] == '{' /* This should match regcomp.c */
1538 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1541 char *regparse = s + (s[2] == '{' ? 3 : 4);
1544 while (count && (c = *regparse)) {
1545 if (c == '\\' && regparse[1])
1553 if (*regparse != ')')
1554 regparse--; /* Leave one char for continuation. */
1555 while (s < regparse)
1556 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1560 /* likewise skip #-initiated comments in //x patterns */
1561 else if (*s == '#' && PL_lex_inpat &&
1562 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1563 while (s+1 < send && *s != '\n')
1564 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1567 /* check for embedded arrays
1568 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1570 else if (*s == '@' && s[1]
1571 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1574 /* check for embedded scalars. only stop if we're sure it's a
1577 else if (*s == '$') {
1578 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1580 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1581 break; /* in regexp, $ might be tail anchor */
1584 /* End of else if chain - OP_TRANS rejoin rest */
1587 if (*s == '\\' && s+1 < send) {
1590 /* some backslashes we leave behind */
1591 if (*leaveit && *s && strchr(leaveit, *s)) {
1592 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1593 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1597 /* deprecate \1 in strings and substitution replacements */
1598 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1599 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1601 if (ckWARN(WARN_SYNTAX))
1602 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1607 /* string-change backslash escapes */
1608 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1613 /* if we get here, it's either a quoted -, or a digit */
1616 /* quoted - in transliterations */
1618 if (PL_lex_inwhat == OP_TRANS) {
1628 Perl_warner(aTHX_ packWARN(WARN_MISC),
1629 "Unrecognized escape \\%c passed through",
1631 /* default action is to copy the quoted character */
1632 goto default_action;
1635 /* \132 indicates an octal constant */
1636 case '0': case '1': case '2': case '3':
1637 case '4': case '5': case '6': case '7':
1641 uv = grok_oct(s, &len, &flags, NULL);
1644 goto NUM_ESCAPE_INSERT;
1646 /* \x24 indicates a hex constant */
1650 char* const e = strchr(s, '}');
1651 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1652 PERL_SCAN_DISALLOW_PREFIX;
1657 yyerror("Missing right brace on \\x{}");
1661 uv = grok_hex(s, &len, &flags, NULL);
1667 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1668 uv = grok_hex(s, &len, &flags, NULL);
1674 /* Insert oct or hex escaped character.
1675 * There will always enough room in sv since such
1676 * escapes will be longer than any UTF-8 sequence
1677 * they can end up as. */
1679 /* We need to map to chars to ASCII before doing the tests
1682 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1683 if (!has_utf8 && uv > 255) {
1684 /* Might need to recode whatever we have
1685 * accumulated so far if it contains any
1688 * (Can't we keep track of that and avoid
1689 * this rescan? --jhi)
1693 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1694 if (!NATIVE_IS_INVARIANT(*c)) {
1699 const STRLEN offset = d - SvPVX_const(sv);
1701 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1705 while (src >= (const U8 *)SvPVX_const(sv)) {
1706 if (!NATIVE_IS_INVARIANT(*src)) {
1707 const U8 ch = NATIVE_TO_ASCII(*src);
1708 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1709 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1719 if (has_utf8 || uv > 255) {
1720 d = (char*)uvchr_to_utf8((U8*)d, uv);
1722 if (PL_lex_inwhat == OP_TRANS &&
1723 PL_sublex_info.sub_op) {
1724 PL_sublex_info.sub_op->op_private |=
1725 (PL_lex_repl ? OPpTRANS_FROM_UTF
1738 /* \N{LATIN SMALL LETTER A} is a named character */
1742 char* e = strchr(s, '}');
1748 yyerror("Missing right brace on \\N{}");
1752 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1754 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1755 PERL_SCAN_DISALLOW_PREFIX;
1758 uv = grok_hex(s, &len, &flags, NULL);
1760 goto NUM_ESCAPE_INSERT;
1762 res = newSVpvn(s + 1, e - s - 1);
1763 res = new_constant( Nullch, 0, "charnames",
1764 res, Nullsv, "\\N{...}" );
1766 sv_utf8_upgrade(res);
1767 str = SvPV_const(res,len);
1768 #ifdef EBCDIC_NEVER_MIND
1769 /* charnames uses pack U and that has been
1770 * recently changed to do the below uni->native
1771 * mapping, so this would be redundant (and wrong,
1772 * the code point would be doubly converted).
1773 * But leave this in just in case the pack U change
1774 * gets revoked, but the semantics is still
1775 * desireable for charnames. --jhi */
1777 UV uv = utf8_to_uvchr((const U8*)str, 0);
1780 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1782 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1783 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1784 str = SvPV_const(res, len);
1788 if (!has_utf8 && SvUTF8(res)) {
1789 const char * const ostart = SvPVX_const(sv);
1790 SvCUR_set(sv, d - ostart);
1793 sv_utf8_upgrade(sv);
1794 /* this just broke our allocation above... */
1795 SvGROW(sv, (STRLEN)(send - start));
1796 d = SvPVX(sv) + SvCUR(sv);
1799 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1800 const char * const odest = SvPVX_const(sv);
1802 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1803 d = SvPVX(sv) + (d - odest);
1805 Copy(str, d, len, char);
1812 yyerror("Missing braces on \\N{}");
1815 /* \c is a control character */
1824 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1827 yyerror("Missing control char name in \\c");
1831 /* printf-style backslashes, formfeeds, newlines, etc */
1833 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1836 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1839 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1842 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1845 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1848 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1851 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1857 } /* end if (backslash) */
1864 /* If we started with encoded form, or already know we want it
1865 and then encode the next character */
1866 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1868 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1869 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1872 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1873 const STRLEN off = d - SvPVX_const(sv);
1874 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1876 d = (char*)uvchr_to_utf8((U8*)d, uv);
1880 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1882 } /* while loop to process each character */
1884 /* terminate the string and set up the sv */
1886 SvCUR_set(sv, d - SvPVX_const(sv));
1887 if (SvCUR(sv) >= SvLEN(sv))
1888 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1891 if (PL_encoding && !has_utf8) {
1892 sv_recode_to_utf8(sv, PL_encoding);
1898 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1899 PL_sublex_info.sub_op->op_private |=
1900 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1904 /* shrink the sv if we allocated more than we used */
1905 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1906 SvPV_shrink_to_cur(sv);
1909 /* return the substring (via yylval) only if we parsed anything */
1910 if (s > PL_bufptr) {
1911 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1912 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1914 ( PL_lex_inwhat == OP_TRANS
1916 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1919 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1926 * Returns TRUE if there's more to the expression (e.g., a subscript),
1929 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1931 * ->[ and ->{ return TRUE
1932 * { and [ outside a pattern are always subscripts, so return TRUE
1933 * if we're outside a pattern and it's not { or [, then return FALSE
1934 * if we're in a pattern and the first char is a {
1935 * {4,5} (any digits around the comma) returns FALSE
1936 * if we're in a pattern and the first char is a [
1938 * [SOMETHING] has a funky algorithm to decide whether it's a
1939 * character class or not. It has to deal with things like
1940 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1941 * anything else returns TRUE
1944 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1947 S_intuit_more(pTHX_ register char *s)
1949 if (PL_lex_brackets)
1951 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1953 if (*s != '{' && *s != '[')
1958 /* In a pattern, so maybe we have {n,m}. */
1975 /* On the other hand, maybe we have a character class */
1978 if (*s == ']' || *s == '^')
1981 /* this is terrifying, and it works */
1982 int weight = 2; /* let's weigh the evidence */
1984 unsigned char un_char = 255, last_un_char;
1985 const char * const send = strchr(s,']');
1986 char tmpbuf[sizeof PL_tokenbuf * 4];
1988 if (!send) /* has to be an expression */
1991 Zero(seen,256,char);
1994 else if (isDIGIT(*s)) {
1996 if (isDIGIT(s[1]) && s[2] == ']')
2002 for (; s < send; s++) {
2003 last_un_char = un_char;
2004 un_char = (unsigned char)*s;
2009 weight -= seen[un_char] * 10;
2010 if (isALNUM_lazy_if(s+1,UTF)) {
2011 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2012 if ((int)strlen(tmpbuf) > 1
2013 && gv_fetchpv(tmpbuf, 0, SVt_PV))
2018 else if (*s == '$' && s[1] &&
2019 strchr("[#!%*<>()-=",s[1])) {
2020 if (/*{*/ strchr("])} =",s[2]))
2029 if (strchr("wds]",s[1]))
2031 else if (seen['\''] || seen['"'])
2033 else if (strchr("rnftbxcav",s[1]))
2035 else if (isDIGIT(s[1])) {
2037 while (s[1] && isDIGIT(s[1]))
2047 if (strchr("aA01! ",last_un_char))
2049 if (strchr("zZ79~",s[1]))
2051 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2052 weight -= 5; /* cope with negative subscript */
2055 if (!isALNUM(last_un_char)
2056 && !(last_un_char == '$' || last_un_char == '@'
2057 || last_un_char == '&')
2058 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2063 if (keyword(tmpbuf, d - tmpbuf))
2066 if (un_char == last_un_char + 1)
2068 weight -= seen[un_char];
2073 if (weight >= 0) /* probably a character class */
2083 * Does all the checking to disambiguate
2085 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2086 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2088 * First argument is the stuff after the first token, e.g. "bar".
2090 * Not a method if bar is a filehandle.
2091 * Not a method if foo is a subroutine prototyped to take a filehandle.
2092 * Not a method if it's really "Foo $bar"
2093 * Method if it's "foo $bar"
2094 * Not a method if it's really "print foo $bar"
2095 * Method if it's really "foo package::" (interpreted as package->foo)
2096 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2097 * Not a method if bar is a filehandle or package, but is quoted with
2102 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2104 char *s = start + (*start == '$');
2105 char tmpbuf[sizeof PL_tokenbuf];
2110 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2114 const char *proto = SvPVX_const(cv);
2125 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2126 /* start is the beginning of the possible filehandle/object,
2127 * and s is the end of it
2128 * tmpbuf is a copy of it
2131 if (*start == '$') {
2132 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2137 return *s == '(' ? FUNCMETH : METHOD;
2139 if (!keyword(tmpbuf, len)) {
2140 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2145 indirgv = gv_fetchpv(tmpbuf, 0, SVt_PVCV);
2146 if (indirgv && GvCVu(indirgv))
2148 /* filehandle or package name makes it a method */
2149 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2151 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2152 return 0; /* no assumptions -- "=>" quotes bearword */
2154 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2155 newSVpvn(tmpbuf,len));
2156 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2160 return *s == '(' ? FUNCMETH : METHOD;
2168 * Return a string of Perl code to load the debugger. If PERL5DB
2169 * is set, it will return the contents of that, otherwise a
2170 * compile-time require of perl5db.pl.
2177 const char * const pdb = PerlEnv_getenv("PERL5DB");
2181 SETERRNO(0,SS_NORMAL);
2182 return "BEGIN { require 'perl5db.pl' }";
2188 /* Encoded script support. filter_add() effectively inserts a
2189 * 'pre-processing' function into the current source input stream.
2190 * Note that the filter function only applies to the current source file
2191 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2193 * The datasv parameter (which may be NULL) can be used to pass
2194 * private data to this instance of the filter. The filter function
2195 * can recover the SV using the FILTER_DATA macro and use it to
2196 * store private buffers and state information.
2198 * The supplied datasv parameter is upgraded to a PVIO type
2199 * and the IoDIRP/IoANY field is used to store the function pointer,
2200 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2201 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2202 * private use must be set using malloc'd pointers.
2206 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2211 if (!PL_rsfp_filters)
2212 PL_rsfp_filters = newAV();
2214 datasv = NEWSV(255,0);
2215 SvUPGRADE(datasv, SVt_PVIO);
2216 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2217 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2218 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2219 IoANY(datasv), SvPV_nolen(datasv)));
2220 av_unshift(PL_rsfp_filters, 1);
2221 av_store(PL_rsfp_filters, 0, datasv) ;
2226 /* Delete most recently added instance of this filter function. */
2228 Perl_filter_del(pTHX_ filter_t funcp)
2233 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
2235 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2237 /* if filter is on top of stack (usual case) just pop it off */
2238 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2239 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2240 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2241 IoANY(datasv) = (void *)NULL;
2242 sv_free(av_pop(PL_rsfp_filters));
2246 /* we need to search for the correct entry and clear it */
2247 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2251 /* Invoke the idxth filter function for the current rsfp. */
2252 /* maxlen 0 = read one text line */
2254 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2259 if (!PL_rsfp_filters)
2261 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2262 /* Provide a default input filter to make life easy. */
2263 /* Note that we append to the line. This is handy. */
2264 DEBUG_P(PerlIO_printf(Perl_debug_log,
2265 "filter_read %d: from rsfp\n", idx));
2269 const int old_len = SvCUR(buf_sv);
2271 /* ensure buf_sv is large enough */
2272 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2273 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2274 if (PerlIO_error(PL_rsfp))
2275 return -1; /* error */
2277 return 0 ; /* end of file */
2279 SvCUR_set(buf_sv, old_len + len) ;
2282 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2283 if (PerlIO_error(PL_rsfp))
2284 return -1; /* error */
2286 return 0 ; /* end of file */
2289 return SvCUR(buf_sv);
2291 /* Skip this filter slot if filter has been deleted */
2292 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2293 DEBUG_P(PerlIO_printf(Perl_debug_log,
2294 "filter_read %d: skipped (filter deleted)\n",
2296 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2298 /* Get function pointer hidden within datasv */
2299 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2300 DEBUG_P(PerlIO_printf(Perl_debug_log,
2301 "filter_read %d: via function %p (%s)\n",
2302 idx, datasv, SvPV_nolen_const(datasv)));
2303 /* Call function. The function is expected to */
2304 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2305 /* Return: <0:error, =0:eof, >0:not eof */
2306 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2310 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2312 #ifdef PERL_CR_FILTER
2313 if (!PL_rsfp_filters) {
2314 filter_add(S_cr_textfilter,NULL);
2317 if (PL_rsfp_filters) {
2319 SvCUR_set(sv, 0); /* start with empty line */
2320 if (FILTER_READ(0, sv, 0) > 0)
2321 return ( SvPVX(sv) ) ;
2326 return (sv_gets(sv, fp, append));
2330 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2334 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2338 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2339 (gv = gv_fetchpv(pkgname, 0, SVt_PVHV)))
2341 return GvHV(gv); /* Foo:: */
2344 /* use constant CLASS => 'MyClass' */
2345 if ((gv = gv_fetchpv(pkgname, 0, SVt_PVCV))) {
2347 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2348 pkgname = SvPV_nolen_const(sv);
2352 return gv_stashpv(pkgname, FALSE);
2356 S_tokenize_use(pTHX_ int is_use, char *s) {
2357 if (PL_expect != XSTATE)
2358 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2359 is_use ? "use" : "no"));
2361 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2362 s = force_version(s, TRUE);
2363 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2364 PL_nextval[PL_nexttoke].opval = Nullop;
2367 else if (*s == 'v') {
2368 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2369 s = force_version(s, FALSE);
2373 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2374 s = force_version(s, FALSE);
2376 yylval.ival = is_use;
2380 static const char* const exp_name[] =
2381 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2382 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2389 Works out what to call the token just pulled out of the input
2390 stream. The yacc parser takes care of taking the ops we return and
2391 stitching them into a tree.
2397 if read an identifier
2398 if we're in a my declaration
2399 croak if they tried to say my($foo::bar)
2400 build the ops for a my() declaration
2401 if it's an access to a my() variable
2402 are we in a sort block?
2403 croak if my($a); $a <=> $b
2404 build ops for access to a my() variable
2405 if in a dq string, and they've said @foo and we can't find @foo
2407 build ops for a bareword
2408 if we already built the token before, use it.
2413 #pragma segment Perl_yylex
2418 register char *s = PL_bufptr;
2424 SV* tmp = newSVpvs("");
2425 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2426 (IV)CopLINE(PL_curcop),
2427 lex_state_names[PL_lex_state],
2428 exp_name[PL_expect],
2429 pv_display(tmp, s, strlen(s), 0, 60));
2432 /* check if there's an identifier for us to look at */
2433 if (PL_pending_ident)
2434 return REPORT(S_pending_ident(aTHX));
2436 /* no identifier pending identification */
2438 switch (PL_lex_state) {
2440 case LEX_NORMAL: /* Some compilers will produce faster */
2441 case LEX_INTERPNORMAL: /* code if we comment these out. */
2445 /* when we've already built the next token, just pull it out of the queue */
2448 yylval = PL_nextval[PL_nexttoke];
2450 PL_lex_state = PL_lex_defer;
2451 PL_expect = PL_lex_expect;
2452 PL_lex_defer = LEX_NORMAL;
2454 return REPORT(PL_nexttype[PL_nexttoke]);
2456 /* interpolated case modifiers like \L \U, including \Q and \E.
2457 when we get here, PL_bufptr is at the \
2459 case LEX_INTERPCASEMOD:
2461 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2462 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2464 /* handle \E or end of string */
2465 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2467 if (PL_lex_casemods) {
2468 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2469 PL_lex_casestack[PL_lex_casemods] = '\0';
2471 if (PL_bufptr != PL_bufend
2472 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2474 PL_lex_state = LEX_INTERPCONCAT;
2478 if (PL_bufptr != PL_bufend)
2480 PL_lex_state = LEX_INTERPCONCAT;
2484 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2485 "### Saw case modifier\n"); });
2487 if (s[1] == '\\' && s[2] == 'E') {
2489 PL_lex_state = LEX_INTERPCONCAT;
2494 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2495 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2496 if ((*s == 'L' || *s == 'U') &&
2497 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2498 PL_lex_casestack[--PL_lex_casemods] = '\0';
2501 if (PL_lex_casemods > 10)
2502 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2503 PL_lex_casestack[PL_lex_casemods++] = *s;
2504 PL_lex_casestack[PL_lex_casemods] = '\0';
2505 PL_lex_state = LEX_INTERPCONCAT;
2506 PL_nextval[PL_nexttoke].ival = 0;
2509 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2511 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2513 PL_nextval[PL_nexttoke].ival = OP_LC;
2515 PL_nextval[PL_nexttoke].ival = OP_UC;
2517 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2519 Perl_croak(aTHX_ "panic: yylex");
2523 if (PL_lex_starts) {
2526 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2527 if (PL_lex_casemods == 1 && PL_lex_inpat)
2536 case LEX_INTERPPUSH:
2537 return REPORT(sublex_push());
2539 case LEX_INTERPSTART:
2540 if (PL_bufptr == PL_bufend)
2541 return REPORT(sublex_done());
2542 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2543 "### Interpolated variable\n"); });
2545 PL_lex_dojoin = (*PL_bufptr == '@');
2546 PL_lex_state = LEX_INTERPNORMAL;
2547 if (PL_lex_dojoin) {
2548 PL_nextval[PL_nexttoke].ival = 0;
2550 force_ident("\"", '$');
2551 PL_nextval[PL_nexttoke].ival = 0;
2553 PL_nextval[PL_nexttoke].ival = 0;
2555 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2558 if (PL_lex_starts++) {
2560 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2561 if (!PL_lex_casemods && PL_lex_inpat)
2568 case LEX_INTERPENDMAYBE:
2569 if (intuit_more(PL_bufptr)) {
2570 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2576 if (PL_lex_dojoin) {
2577 PL_lex_dojoin = FALSE;
2578 PL_lex_state = LEX_INTERPCONCAT;
2581 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2582 && SvEVALED(PL_lex_repl))
2584 if (PL_bufptr != PL_bufend)
2585 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2586 PL_lex_repl = Nullsv;
2589 case LEX_INTERPCONCAT:
2591 if (PL_lex_brackets)
2592 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2594 if (PL_bufptr == PL_bufend)
2595 return REPORT(sublex_done());
2597 if (SvIVX(PL_linestr) == '\'') {
2598 SV *sv = newSVsv(PL_linestr);
2601 else if ( PL_hints & HINT_NEW_RE )
2602 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2603 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2607 s = scan_const(PL_bufptr);
2609 PL_lex_state = LEX_INTERPCASEMOD;
2611 PL_lex_state = LEX_INTERPSTART;
2614 if (s != PL_bufptr) {
2615 PL_nextval[PL_nexttoke] = yylval;
2618 if (PL_lex_starts++) {
2619 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2620 if (!PL_lex_casemods && PL_lex_inpat)
2633 PL_lex_state = LEX_NORMAL;
2634 s = scan_formline(PL_bufptr);
2635 if (!PL_lex_formbrack)
2641 PL_oldoldbufptr = PL_oldbufptr;
2647 if (isIDFIRST_lazy_if(s,UTF))
2649 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2652 goto fake_eof; /* emulate EOF on ^D or ^Z */
2657 if (PL_lex_brackets) {
2658 yyerror(PL_lex_formbrack
2659 ? "Format not terminated"
2660 : "Missing right curly or square bracket");
2662 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2663 "### Tokener got EOF\n");
2667 if (s++ < PL_bufend)
2668 goto retry; /* ignore stray nulls */
2671 if (!PL_in_eval && !PL_preambled) {
2672 PL_preambled = TRUE;
2673 sv_setpv(PL_linestr,incl_perldb());
2674 if (SvCUR(PL_linestr))
2675 sv_catpvs(PL_linestr,";");
2677 while(AvFILLp(PL_preambleav) >= 0) {
2678 SV *tmpsv = av_shift(PL_preambleav);
2679 sv_catsv(PL_linestr, tmpsv);
2680 sv_catpvs(PL_linestr, ";");
2683 sv_free((SV*)PL_preambleav);
2684 PL_preambleav = NULL;
2686 if (PL_minus_n || PL_minus_p) {
2687 sv_catpvs(PL_linestr, "LINE: while (<>) {");
2689 sv_catpvs(PL_linestr,"chomp;");
2692 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2693 || *PL_splitstr == '"')
2694 && strchr(PL_splitstr + 1, *PL_splitstr))
2695 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2697 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2698 bytes can be used as quoting characters. :-) */
2699 const char *splits = PL_splitstr;
2700 sv_catpvs(PL_linestr, "our @F=split(q\0");
2703 if (*splits == '\\')
2704 sv_catpvn(PL_linestr, splits, 1);
2705 sv_catpvn(PL_linestr, splits, 1);
2706 } while (*splits++);
2707 /* This loop will embed the trailing NUL of
2708 PL_linestr as the last thing it does before
2710 sv_catpvs(PL_linestr, ");");
2714 sv_catpvs(PL_linestr,"our @F=split(' ');");
2718 sv_catpvs(PL_linestr,"use feature ':5.10';");
2719 sv_catpvs(PL_linestr, "\n");
2720 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2721 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2722 PL_last_lop = PL_last_uni = Nullch;
2723 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2724 SV * const sv = NEWSV(85,0);
2726 sv_upgrade(sv, SVt_PVMG);
2727 sv_setsv(sv,PL_linestr);
2730 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2735 bof = PL_rsfp ? TRUE : FALSE;
2736 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2739 if (PL_preprocess && !PL_in_eval)
2740 (void)PerlProc_pclose(PL_rsfp);
2741 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2742 PerlIO_clearerr(PL_rsfp);
2744 (void)PerlIO_close(PL_rsfp);
2746 PL_doextract = FALSE;
2748 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2749 sv_setpv(PL_linestr,PL_minus_p
2750 ? ";}continue{print;}" : ";}");
2751 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2752 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2753 PL_last_lop = PL_last_uni = Nullch;
2754 PL_minus_n = PL_minus_p = 0;
2757 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2758 PL_last_lop = PL_last_uni = Nullch;
2759 sv_setpvn(PL_linestr,"",0);
2760 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2762 /* If it looks like the start of a BOM or raw UTF-16,
2763 * check if it in fact is. */
2769 #ifdef PERLIO_IS_STDIO
2770 # ifdef __GNU_LIBRARY__
2771 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2772 # define FTELL_FOR_PIPE_IS_BROKEN
2776 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2777 # define FTELL_FOR_PIPE_IS_BROKEN
2782 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2783 /* This loses the possibility to detect the bof
2784 * situation on perl -P when the libc5 is being used.
2785 * Workaround? Maybe attach some extra state to PL_rsfp?
2788 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2790 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2793 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2794 s = swallow_bom((U8*)s);
2798 /* Incest with pod. */
2799 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2800 sv_setpvn(PL_linestr, "", 0);
2801 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2802 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2803 PL_last_lop = PL_last_uni = Nullch;
2804 PL_doextract = FALSE;
2808 } while (PL_doextract);
2809 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2810 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2811 SV * const sv = NEWSV(85,0);
2813 sv_upgrade(sv, SVt_PVMG);
2814 sv_setsv(sv,PL_linestr);
2817 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2819 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2820 PL_last_lop = PL_last_uni = Nullch;
2821 if (CopLINE(PL_curcop) == 1) {
2822 while (s < PL_bufend && isSPACE(*s))
2824 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2828 if (*s == '#' && *(s+1) == '!')
2830 #ifdef ALTERNATE_SHEBANG
2832 static char const as[] = ALTERNATE_SHEBANG;
2833 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2834 d = s + (sizeof(as) - 1);
2836 #endif /* ALTERNATE_SHEBANG */
2845 while (*d && !isSPACE(*d))
2849 #ifdef ARG_ZERO_IS_SCRIPT
2850 if (ipathend > ipath) {
2852 * HP-UX (at least) sets argv[0] to the script name,
2853 * which makes $^X incorrect. And Digital UNIX and Linux,
2854 * at least, set argv[0] to the basename of the Perl
2855 * interpreter. So, having found "#!", we'll set it right.
2858 = GvSV(gv_fetchpv("\030", GV_ADD, SVt_PV)); /* $^X */
2859 assert(SvPOK(x) || SvGMAGICAL(x));
2860 if (sv_eq(x, CopFILESV(PL_curcop))) {
2861 sv_setpvn(x, ipath, ipathend - ipath);
2867 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
2868 const char * const lstart = SvPV_const(x,llen);
2870 bstart += blen - llen;
2871 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2872 sv_setpvn(x, ipath, ipathend - ipath);
2877 TAINT_NOT; /* $^X is always tainted, but that's OK */
2879 #endif /* ARG_ZERO_IS_SCRIPT */
2884 d = instr(s,"perl -");
2886 d = instr(s,"perl");
2888 /* avoid getting into infinite loops when shebang
2889 * line contains "Perl" rather than "perl" */
2891 for (d = ipathend-4; d >= ipath; --d) {
2892 if ((*d == 'p' || *d == 'P')
2893 && !ibcmp(d, "perl", 4))
2903 #ifdef ALTERNATE_SHEBANG
2905 * If the ALTERNATE_SHEBANG on this system starts with a
2906 * character that can be part of a Perl expression, then if
2907 * we see it but not "perl", we're probably looking at the
2908 * start of Perl code, not a request to hand off to some
2909 * other interpreter. Similarly, if "perl" is there, but
2910 * not in the first 'word' of the line, we assume the line
2911 * contains the start of the Perl program.
2913 if (d && *s != '#') {
2914 const char *c = ipath;
2915 while (*c && !strchr("; \t\r\n\f\v#", *c))
2918 d = Nullch; /* "perl" not in first word; ignore */
2920 *s = '#'; /* Don't try to parse shebang line */
2922 #endif /* ALTERNATE_SHEBANG */
2923 #ifndef MACOS_TRADITIONAL
2928 !instr(s,"indir") &&
2929 instr(PL_origargv[0],"perl"))
2936 while (s < PL_bufend && isSPACE(*s))
2938 if (s < PL_bufend) {
2939 Newxz(newargv,PL_origargc+3,char*);
2941 while (s < PL_bufend && !isSPACE(*s))
2944 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2947 newargv = PL_origargv;
2950 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2952 Perl_croak(aTHX_ "Can't exec %s", ipath);
2956 const U32 oldpdb = PL_perldb;
2957 const bool oldn = PL_minus_n;
2958 const bool oldp = PL_minus_p;
2960 while (*d && !isSPACE(*d)) d++;
2961 while (SPACE_OR_TAB(*d)) d++;
2964 const bool switches_done = PL_doswitches;
2966 if (*d == 'M' || *d == 'm' || *d == 'C') {
2967 const char * const m = d;
2968 while (*d && !isSPACE(*d)) d++;
2969 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
2972 d = moreswitches(d);
2974 if (PL_doswitches && !switches_done) {
2975 int argc = PL_origargc;
2976 char **argv = PL_origargv;
2979 } while (argc && argv[0][0] == '-' && argv[0][1]);
2980 init_argv_symbols(argc,argv);
2982 if ((PERLDB_LINE && !oldpdb) ||
2983 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
2984 /* if we have already added "LINE: while (<>) {",
2985 we must not do it again */
2987 sv_setpvn(PL_linestr, "", 0);
2988 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2989 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2990 PL_last_lop = PL_last_uni = Nullch;
2991 PL_preambled = FALSE;
2993 (void)gv_fetchfile(PL_origfilename);
2996 if (PL_doswitches && !switches_done) {
2997 int argc = PL_origargc;
2998 char **argv = PL_origargv;
3001 } while (argc && argv[0][0] == '-' && argv[0][1]);
3002 init_argv_symbols(argc,argv);
3008 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3010 PL_lex_state = LEX_FORMLINE;
3015 #ifdef PERL_STRICT_CR
3016 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3018 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3020 case ' ': case '\t': case '\f': case 013:
3021 #ifdef MACOS_TRADITIONAL
3028 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3029 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3030 /* handle eval qq[#line 1 "foo"\n ...] */
3031 CopLINE_dec(PL_curcop);
3035 while (s < d && *s != '\n')
3039 else if (s > d) /* Found by Ilya: feed random input to Perl. */
3040 Perl_croak(aTHX_ "panic: input overflow");
3042 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3044 PL_lex_state = LEX_FORMLINE;
3054 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3062 while (s < PL_bufend && SPACE_OR_TAB(*s))
3065 if (strnEQ(s,"=>",2)) {
3066 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3067 DEBUG_T( { S_printbuf(aTHX_
3068 "### Saw unary minus before =>, forcing word %s\n", s);
3070 OPERATOR('-'); /* unary minus */
3072 PL_last_uni = PL_oldbufptr;
3074 case 'r': ftst = OP_FTEREAD; break;
3075 case 'w': ftst = OP_FTEWRITE; break;
3076 case 'x': ftst = OP_FTEEXEC; break;
3077 case 'o': ftst = OP_FTEOWNED; break;
3078 case 'R': ftst = OP_FTRREAD; break;
3079 case 'W': ftst = OP_FTRWRITE; break;
3080 case 'X': ftst = OP_FTREXEC; break;
3081 case 'O': ftst = OP_FTROWNED; break;
3082 case 'e': ftst = OP_FTIS; break;
3083 case 'z': ftst = OP_FTZERO; break;
3084 case 's': ftst = OP_FTSIZE; break;
3085 case 'f': ftst = OP_FTFILE; break;
3086 case 'd': ftst = OP_FTDIR; break;
3087 case 'l': ftst = OP_FTLINK; break;
3088 case 'p': ftst = OP_FTPIPE; break;
3089 case 'S': ftst = OP_FTSOCK; break;
3090 case 'u': ftst = OP_FTSUID; break;
3091 case 'g': ftst = OP_FTSGID; break;
3092 case 'k': ftst = OP_FTSVTX; break;
3093 case 'b': ftst = OP_FTBLK; break;
3094 case 'c': ftst = OP_FTCHR; break;
3095 case 't': ftst = OP_FTTTY; break;
3096 case 'T': ftst = OP_FTTEXT; break;
3097 case 'B': ftst = OP_FTBINARY; break;
3098 case 'M': case 'A': case 'C':
3099 gv_fetchpv("\024",GV_ADD, SVt_PV);
3101 case 'M': ftst = OP_FTMTIME; break;
3102 case 'A': ftst = OP_FTATIME; break;
3103 case 'C': ftst = OP_FTCTIME; break;
3111 PL_last_lop_op = (OPCODE)ftst;
3112 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3113 "### Saw file test %c\n", (int)tmp);
3118 /* Assume it was a minus followed by a one-letter named
3119 * subroutine call (or a -bareword), then. */
3120 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3121 "### '-%c' looked like a file test but was not\n",
3128 const char tmp = *s++;
3131 if (PL_expect == XOPERATOR)
3136 else if (*s == '>') {
3139 if (isIDFIRST_lazy_if(s,UTF)) {
3140 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3148 if (PL_expect == XOPERATOR)
3151 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3153 OPERATOR('-'); /* unary minus */
3159 const char tmp = *s++;
3162 if (PL_expect == XOPERATOR)
3167 if (PL_expect == XOPERATOR)
3170 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3177 if (PL_expect != XOPERATOR) {
3178 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3179 PL_expect = XOPERATOR;
3180 force_ident(PL_tokenbuf, '*');
3193 if (PL_expect == XOPERATOR) {
3197 PL_tokenbuf[0] = '%';
3198 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3199 if (!PL_tokenbuf[1]) {
3202 PL_pending_ident = '%';
3213 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
3214 && FEATURE_IS_ENABLED("~~"))
3221 const char tmp = *s++;
3227 goto just_a_word_zero_gv;
3230 switch (PL_expect) {
3233 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3235 PL_bufptr = s; /* update in case we back off */
3241 PL_expect = XTERMBLOCK;
3245 while (isIDFIRST_lazy_if(s,UTF)) {
3247 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3248 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3249 if (tmp < 0) tmp = -tmp;
3265 d = scan_str(d,TRUE,TRUE);
3267 /* MUST advance bufptr here to avoid bogus
3268 "at end of line" context messages from yyerror().
3270 PL_bufptr = s + len;
3271 yyerror("Unterminated attribute parameter in attribute list");
3274 return REPORT(0); /* EOF indicator */
3278 SV *sv = newSVpvn(s, len);
3279 sv_catsv(sv, PL_lex_stuff);
3280 attrs = append_elem(OP_LIST, attrs,
3281 newSVOP(OP_CONST, 0, sv));
3282 SvREFCNT_dec(PL_lex_stuff);
3283 PL_lex_stuff = Nullsv;
3286 if (len == 6 && strnEQ(s, "unique", len)) {
3287 if (PL_in_my == KEY_our)
3289 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3291 ; /* skip to avoid loading attributes.pm */
3294 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3297 /* NOTE: any CV attrs applied here need to be part of
3298 the CVf_BUILTIN_ATTRS define in cv.h! */
3299 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3300 CvLVALUE_on(PL_compcv);
3301 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3302 CvLOCKED_on(PL_compcv);
3303 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3304 CvMETHOD_on(PL_compcv);
3305 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3306 CvASSERTION_on(PL_compcv);
3307 /* After we've set the flags, it could be argued that
3308 we don't need to do the attributes.pm-based setting
3309 process, and shouldn't bother appending recognized
3310 flags. To experiment with that, uncomment the
3311 following "else". (Note that's already been
3312 uncommented. That keeps the above-applied built-in
3313 attributes from being intercepted (and possibly
3314 rejected) by a package's attribute routines, but is
3315 justified by the performance win for the common case
3316 of applying only built-in attributes.) */
3318 attrs = append_elem(OP_LIST, attrs,
3319 newSVOP(OP_CONST, 0,
3323 if (*s == ':' && s[1] != ':')
3326 break; /* require real whitespace or :'s */
3330 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3331 if (*s != ';' && *s != '}' && *s != tmp
3332 && (tmp != '=' || *s != ')')) {
3333 const char q = ((*s == '\'') ? '"' : '\'');
3334 /* If here for an expression, and parsed no attrs, back
3336 if (tmp == '=' && !attrs) {
3340 /* MUST advance bufptr here to avoid bogus "at end of line"
3341 context messages from yyerror().
3345 ? Perl_form(aTHX_ "Invalid separator character "
3346 "%c%c%c in attribute list", q, *s, q)
3347 : "Unterminated attribute list" );
3355 PL_nextval[PL_nexttoke].opval = attrs;
3363 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3364 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3372 const char tmp = *s++;
3377 const char tmp = *s++;
3385 if (PL_lex_brackets <= 0)
3386 yyerror("Unmatched right square bracket");
3389 if (PL_lex_state == LEX_INTERPNORMAL) {
3390 if (PL_lex_brackets == 0) {
3391 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3392 PL_lex_state = LEX_INTERPEND;
3399 if (PL_lex_brackets > 100) {
3400 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3402 switch (PL_expect) {
3404 if (PL_lex_formbrack) {
3408 if (PL_oldoldbufptr == PL_last_lop)
3409 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3411 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3412 OPERATOR(HASHBRACK);
3414 while (s < PL_bufend && SPACE_OR_TAB(*s))
3417 PL_tokenbuf[0] = '\0';
3418 if (d < PL_bufend && *d == '-') {
3419 PL_tokenbuf[0] = '-';
3421 while (d < PL_bufend && SPACE_OR_TAB(*d))
3424 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3425 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3427 while (d < PL_bufend && SPACE_OR_TAB(*d))
3430 const char minus = (PL_tokenbuf[0] == '-');
3431 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3439 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3444 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3449 if (PL_oldoldbufptr == PL_last_lop)
3450 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3452 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3455 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3457 /* This hack is to get the ${} in the message. */
3459 yyerror("syntax error");
3462 OPERATOR(HASHBRACK);
3464 /* This hack serves to disambiguate a pair of curlies
3465 * as being a block or an anon hash. Normally, expectation
3466 * determines that, but in cases where we're not in a
3467 * position to expect anything in particular (like inside
3468 * eval"") we have to resolve the ambiguity. This code
3469 * covers the case where the first term in the curlies is a
3470 * quoted string. Most other cases need to be explicitly
3471 * disambiguated by prepending a "+" before the opening
3472 * curly in order to force resolution as an anon hash.
3474 * XXX should probably propagate the outer expectation
3475 * into eval"" to rely less on this hack, but that could
3476 * potentially break current behavior of eval"".
3480 if (*s == '\'' || *s == '"' || *s == '`') {
3481 /* common case: get past first string, handling escapes */
3482 for (t++; t < PL_bufend && *t != *s;)
3483 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3487 else if (*s == 'q') {
3490 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3493 /* skip q//-like construct */
3495 char open, close, term;
3498 while (t < PL_bufend && isSPACE(*t))
3500 /* check for q => */
3501 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3502 OPERATOR(HASHBRACK);
3506 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3510 for (t++; t < PL_bufend; t++) {
3511 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3513 else if (*t == open)
3517 for (t++; t < PL_bufend; t++) {
3518 if (*t == '\\' && t+1 < PL_bufend)
3520 else if (*t == close && --brackets <= 0)
3522 else if (*t == open)
3529 /* skip plain q word */
3530 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3533 else if (isALNUM_lazy_if(t,UTF)) {
3535 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3538 while (t < PL_bufend && isSPACE(*t))
3540 /* if comma follows first term, call it an anon hash */
3541 /* XXX it could be a comma expression with loop modifiers */
3542 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3543 || (*t == '=' && t[1] == '>')))
3544 OPERATOR(HASHBRACK);
3545 if (PL_expect == XREF)
3548 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3554 yylval.ival = CopLINE(PL_curcop);
3555 if (isSPACE(*s) || *s == '#')
3556 PL_copline = NOLINE; /* invalidate current command line number */
3561 if (PL_lex_brackets <= 0)
3562 yyerror("Unmatched right curly bracket");
3564 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3565 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3566 PL_lex_formbrack = 0;
3567 if (PL_lex_state == LEX_INTERPNORMAL) {
3568 if (PL_lex_brackets == 0) {
3569 if (PL_expect & XFAKEBRACK) {
3570 PL_expect &= XENUMMASK;
3571 PL_lex_state = LEX_INTERPEND;
3573 return yylex(); /* ignore fake brackets */
3575 if (*s == '-' && s[1] == '>')
3576 PL_lex_state = LEX_INTERPENDMAYBE;
3577 else if (*s != '[' && *s != '{')
3578 PL_lex_state = LEX_INTERPEND;
3581 if (PL_expect & XFAKEBRACK) {
3582 PL_expect &= XENUMMASK;
3584 return yylex(); /* ignore fake brackets */
3593 if (PL_expect == XOPERATOR) {
3594 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
3595 && isIDFIRST_lazy_if(s,UTF))
3597 CopLINE_dec(PL_curcop);
3598 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3599 CopLINE_inc(PL_curcop);
3604 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3606 PL_expect = XOPERATOR;
3607 force_ident(PL_tokenbuf, '&');
3611 yylval.ival = (OPpENTERSUB_AMPER<<8);
3623 const char tmp = *s++;
3630 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
3631 && strchr("+-*/%.^&|<",tmp))
3632 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3633 "Reversed %c= operator",(int)tmp);
3635 if (PL_expect == XSTATE && isALPHA(tmp) &&
3636 (s == PL_linestart+1 || s[-2] == '\n') )
3638 if (PL_in_eval && !PL_rsfp) {
3643 if (strnEQ(s,"=cut",4)) {
3657 PL_doextract = TRUE;
3661 if (PL_lex_brackets < PL_lex_formbrack) {
3663 #ifdef PERL_STRICT_CR
3664 for (t = s; SPACE_OR_TAB(*t); t++) ;
3666 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3668 if (*t == '\n' || *t == '#') {
3679 const char tmp = *s++;
3681 /* was this !=~ where !~ was meant?
3682 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3684 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3685 const char *t = s+1;
3687 while (t < PL_bufend && isSPACE(*t))
3690 if (*t == '/' || *t == '?' ||
3691 ((*t == 'm' || *t == 's' || *t == 'y')
3692 && !isALNUM(t[1])) ||
3693 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3694 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3695 "!=~ should be !~");
3705 if (PL_expect != XOPERATOR) {
3706 if (s[1] != '<' && !strchr(s,'>'))
3709 s = scan_heredoc(s);
3711 s = scan_inputsymbol(s);
3712 TERM(sublex_start());
3718 SHop(OP_LEFT_SHIFT);
3732 const char tmp = *s++;
3734 SHop(OP_RIGHT_SHIFT);
3744 if (PL_expect == XOPERATOR) {
3745 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3747 deprecate_old(commaless_variable_list);
3748 return REPORT(','); /* grandfather non-comma-format format */
3752 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3753 PL_tokenbuf[0] = '@';
3754 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3755 sizeof PL_tokenbuf - 1, FALSE);
3756 if (PL_expect == XOPERATOR)
3757 no_op("Array length", s);
3758 if (!PL_tokenbuf[1])
3760 PL_expect = XOPERATOR;
3761 PL_pending_ident = '#';
3765 PL_tokenbuf[0] = '$';
3766 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3767 sizeof PL_tokenbuf - 1, FALSE);
3768 if (PL_expect == XOPERATOR)
3770 if (!PL_tokenbuf[1]) {
3772 yyerror("Final $ should be \\$ or $name");
3776 /* This kludge not intended to be bulletproof. */
3777 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3778 yylval.opval = newSVOP(OP_CONST, 0,
3779 newSViv(PL_compiling.cop_arybase));
3780 yylval.opval->op_private = OPpCONST_ARYBASE;
3786 const char tmp = *s;
3787 if (PL_lex_state == LEX_NORMAL)
3790 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
3791 && intuit_more(s)) {
3793 PL_tokenbuf[0] = '@';
3794 if (ckWARN(WARN_SYNTAX)) {
3797 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3800 PL_bufptr = skipspace(PL_bufptr);
3801 while (t < PL_bufend && *t != ']')
3803 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3804 "Multidimensional syntax %.*s not supported",
3805 (int)((t - PL_bufptr) + 1), PL_bufptr);
3809 else if (*s == '{') {
3811 PL_tokenbuf[0] = '%';
3812 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
3813 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
3815 char tmpbuf[sizeof PL_tokenbuf];
3816 for (t++; isSPACE(*t); t++) ;
3817 if (isIDFIRST_lazy_if(t,UTF)) {
3819 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
3821 for (; isSPACE(*t); t++) ;
3822 if (*t == ';' && get_cv(tmpbuf, FALSE))
3823 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3824 "You need to quote \"%s\"",
3831 PL_expect = XOPERATOR;
3832 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3833 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3834 if (!islop || PL_last_lop_op == OP_GREPSTART)
3835 PL_expect = XOPERATOR;
3836 else if (strchr("$@\"'`q", *s))
3837 PL_expect = XTERM; /* e.g. print $fh "foo" */
3838 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3839 PL_expect = XTERM; /* e.g. print $fh &sub */
3840 else if (isIDFIRST_lazy_if(s,UTF)) {
3841 char tmpbuf[sizeof PL_tokenbuf];
3843 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3844 if ((t2 = keyword(tmpbuf, len))) {
3845 /* binary operators exclude handle interpretations */
3857 PL_expect = XTERM; /* e.g. print $fh length() */
3862 PL_expect = XTERM; /* e.g. print $fh subr() */
3865 else if (isDIGIT(*s))
3866 PL_expect = XTERM; /* e.g. print $fh 3 */
3867 else if (*s == '.' && isDIGIT(s[1]))
3868 PL_expect = XTERM; /* e.g. print $fh .3 */
3869 else if ((*s == '?' || *s == '-' || *s == '+')
3870 && !isSPACE(s[1]) && s[1] != '=')
3871 PL_expect = XTERM; /* e.g. print $fh -1 */
3872 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
3874 PL_expect = XTERM; /* e.g. print $fh /.../
3875 XXX except DORDOR operator
3877 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
3879 PL_expect = XTERM; /* print $fh <<"EOF" */
3882 PL_pending_ident = '$';
3886 if (PL_expect == XOPERATOR)
3888 PL_tokenbuf[0] = '@';
3889 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3890 if (!PL_tokenbuf[1]) {
3893 if (PL_lex_state == LEX_NORMAL)
3895 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3897 PL_tokenbuf[0] = '%';
3899 /* Warn about @ where they meant $. */
3900 if (*s == '[' || *s == '{') {
3901 if (ckWARN(WARN_SYNTAX)) {
3902 const char *t = s + 1;
3903 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3905 if (*t == '}' || *t == ']') {
3907 PL_bufptr = skipspace(PL_bufptr);
3908 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3909 "Scalar value %.*s better written as $%.*s",
3910 (int)(t-PL_bufptr), PL_bufptr,
3911 (int)(t-PL_bufptr-1), PL_bufptr+1);
3916 PL_pending_ident = '@';
3919 case '/': /* may be division, defined-or, or pattern */
3920 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3924 case '?': /* may either be conditional or pattern */
3925 if(PL_expect == XOPERATOR) {
3933 /* A // operator. */
3943 /* Disable warning on "study /blah/" */
3944 if (PL_oldoldbufptr == PL_last_uni
3945 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3946 || memNE(PL_last_uni, "study", 5)
3947 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3950 s = scan_pat(s,OP_MATCH);
3951 TERM(sublex_start());
3955 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3956 #ifdef PERL_STRICT_CR
3959 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3961 && (s == PL_linestart || s[-1] == '\n') )
3963 PL_lex_formbrack = 0;
3967 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3973 yylval.ival = OPf_SPECIAL;
3979 if (PL_expect != XOPERATOR)
3984 case '0': case '1': case '2': case '3': case '4':
3985 case '5': case '6': case '7': case '8': case '9':
3986 s = scan_num(s, &yylval);
3987 DEBUG_T( { S_printbuf(aTHX_ "### Saw number in %s\n", s); } );
3988 if (PL_expect == XOPERATOR)
3993 s = scan_str(s,FALSE,FALSE);
3994 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
3995 if (PL_expect == XOPERATOR) {
3996 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3998 deprecate_old(commaless_variable_list);
3999 return REPORT(','); /* grandfather non-comma-format format */
4005 missingterm((char*)0);
4006 yylval.ival = OP_CONST;
4007 TERM(sublex_start());
4010 s = scan_str(s,FALSE,FALSE);
4011 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4012 if (PL_expect == XOPERATOR) {
4013 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4015 deprecate_old(commaless_variable_list);
4016 return REPORT(','); /* grandfather non-comma-format format */
4022 missingterm((char*)0);
4023 yylval.ival = OP_CONST;
4024 /* FIXME. I think that this can be const if char *d is replaced by
4025 more localised variables. */
4026 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4027 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4028 yylval.ival = OP_STRINGIFY;
4032 TERM(sublex_start());
4035 s = scan_str(s,FALSE,FALSE);
4036 DEBUG_T( { S_printbuf(aTHX_ "### Saw backtick string before %s\n", s); } );
4037 if (PL_expect == XOPERATOR)
4038 no_op("Backticks",s);
4040 missingterm((char*)0);
4041 yylval.ival = OP_BACKTICK;
4043 TERM(sublex_start());
4047 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
4048 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
4050 if (PL_expect == XOPERATOR)
4051 no_op("Backslash",s);
4055 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
4056 char *start = s + 2;
4057 while (isDIGIT(*start) || *start == '_')
4059 if (*start == '.' && isDIGIT(start[1])) {
4060 s = scan_num(s, &yylval);
4063 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
4064 else if (!isALPHA(*start) && (PL_expect == XTERM
4065 || PL_expect == XREF || PL_expect == XSTATE
4066 || PL_expect == XTERMORDORDOR)) {
4067 const char c = *start;
4070 gv = gv_fetchpv(s, 0, SVt_PVCV);
4073 s = scan_num(s, &yylval);
4080 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
4116 I32 orig_keyword = 0;
4121 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4123 /* Some keywords can be followed by any delimiter, including ':' */
4124 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
4125 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4126 (PL_tokenbuf[0] == 'q' &&
4127 strchr("qwxr", PL_tokenbuf[1])))));
4129 /* x::* is just a word, unless x is "CORE" */
4130 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4134 while (d < PL_bufend && isSPACE(*d))
4135 d++; /* no comments skipped here, or s### is misparsed */
4137 /* Is this a label? */
4138 if (!tmp && PL_expect == XSTATE
4139 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4141 yylval.pval = savepv(PL_tokenbuf);
4146 /* Check for keywords */
4147 tmp = keyword(PL_tokenbuf, len);
4149 /* Is this a word before a => operator? */
4150 if (*d == '=' && d[1] == '>') {
4153 = (OP*)newSVOP(OP_CONST, 0,
4154 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4155 yylval.opval->op_private = OPpCONST_BARE;
4159 if (tmp < 0) { /* second-class keyword? */
4160 GV *ogv = NULL; /* override (winner) */
4161 GV *hgv = NULL; /* hidden (loser) */
4162 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4164 if ((gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV)) &&
4167 if (GvIMPORTED_CV(gv))
4169 else if (! CvMETHOD(cv))
4173 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4174 (gv = *gvp) != (GV*)&PL_sv_undef &&
4175 GvCVu(gv) && GvIMPORTED_CV(gv))
4182 tmp = 0; /* overridden by import or by GLOBAL */
4185 && -tmp==KEY_lock /* XXX generalizable kludge */
4187 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
4189 tmp = 0; /* any sub overrides "weak" keyword */
4191 else { /* no override */
4193 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4194 Perl_warner(aTHX_ packWARN(WARN_MISC),
4195 "dump() better written as CORE::dump()");
4199 if (hgv && tmp != KEY_x && tmp != KEY_CORE
4200 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
4201 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4202 "Ambiguous call resolved as CORE::%s(), %s",
4203 GvENAME(hgv), "qualify as such or use &");
4210 default: /* not a keyword */
4211 /* Trade off - by using this evil construction we can pull the
4212 variable gv into the block labelled keylookup. If not, then
4213 we have to give it function scope so that the goto from the
4214 earlier ':' case doesn't bypass the initialisation. */
4216 just_a_word_zero_gv:
4223 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4226 /* Get the rest if it looks like a package qualifier */
4228 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4230 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4233 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4234 *s == '\'' ? "'" : "::");
4239 if (PL_expect == XOPERATOR) {
4240 if (PL_bufptr == PL_linestart) {
4241 CopLINE_dec(PL_curcop);
4242 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4243 CopLINE_inc(PL_curcop);
4246 no_op("Bareword",s);
4249 /* Look for a subroutine with this name in current package,
4250 unless name is "Foo::", in which case Foo is a bearword
4251 (and a package name). */
4254 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4256 if (ckWARN(WARN_BAREWORD)
4257 && ! gv_fetchpv(PL_tokenbuf, 0, SVt_PVHV))
4258 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4259 "Bareword \"%s\" refers to nonexistent package",
4262 PL_tokenbuf[len] = '\0';
4269 /* Mustn't actually add anything to a symbol table.
4270 But also don't want to "initialise" any placeholder
4271 constants that might already be there into full
4272 blown PVGVs with attached PVCV. */
4273 gv = gv_fetchpv(PL_tokenbuf, GV_NOADD_NOINIT,
4278 /* if we saw a global override before, get the right name */
4281 sv = newSVpvs("CORE::GLOBAL::");
4282 sv_catpv(sv,PL_tokenbuf);
4285 /* If len is 0, newSVpv does strlen(), which is correct.
4286 If len is non-zero, then it will be the true length,
4287 and so the scalar will be created correctly. */
4288 sv = newSVpv(PL_tokenbuf,len);
4291 /* Presume this is going to be a bareword of some sort. */
4294 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4295 yylval.opval->op_private = OPpCONST_BARE;
4296 /* UTF-8 package name? */
4297 if (UTF && !IN_BYTES &&
4298 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
4301 /* And if "Foo::", then that's what it certainly is. */
4306 /* Do the explicit type check so that we don't need to force
4307 the initialisation of the symbol table to have a real GV.
4308 Beware - gv may not really be a PVGV, cv may not really be
4309 a PVCV, (because of the space optimisations that gv_init
4310 understands) But they're true if for this symbol there is
4311 respectively a typeglob and a subroutine.
4313 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
4314 /* Real typeglob, so get the real subroutine: */
4316 /* A proxy for a subroutine in this package? */
4317 : SvOK(gv) ? (CV *) gv : NULL)
4320 /* See if it's the indirect object for a list operator. */
4322 if (PL_oldoldbufptr &&
4323 PL_oldoldbufptr < PL_bufptr &&
4324 (PL_oldoldbufptr == PL_last_lop
4325 || PL_oldoldbufptr == PL_last_uni) &&
4326 /* NO SKIPSPACE BEFORE HERE! */
4327 (PL_expect == XREF ||
4328 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4330 bool immediate_paren = *s == '(';
4332 /* (Now we can afford to cross potential line boundary.) */
4335 /* Two barewords in a row may indicate method call. */
4337 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
4338 (tmp = intuit_method(s, gv, cv)))
4341 /* If not a declared subroutine, it's an indirect object. */
4342 /* (But it's an indir obj regardless for sort.) */
4343 /* Also, if "_" follows a filetest operator, it's a bareword */
4346 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4348 (PL_last_lop_op != OP_MAPSTART &&
4349 PL_last_lop_op != OP_GREPSTART))))
4350 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
4351 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
4354 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4359 PL_expect = XOPERATOR;
4362 /* Is this a word before a => operator? */
4363 if (*s == '=' && s[1] == '>' && !pkgname) {
4365 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4366 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4367 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4371 /* If followed by a paren, it's certainly a subroutine. */
4375 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4376 if (*d == ')' && (sv = gv_const_sv(gv))) {
4381 PL_nextval[PL_nexttoke].opval = yylval.opval;
4382 PL_expect = XOPERATOR;
4388 /* If followed by var or block, call it a method (unless sub) */
4390 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
4391 PL_last_lop = PL_oldbufptr;
4392 PL_last_lop_op = OP_METHOD;
4396 /* If followed by a bareword, see if it looks like indir obj. */
4399 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4400 && (tmp = intuit_method(s, gv, cv)))
4403 /* Not a method, so call it a subroutine (if defined) */
4406 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4407 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4408 "Ambiguous use of -%s resolved as -&%s()",
4409 PL_tokenbuf, PL_tokenbuf);
4410 /* Check for a constant sub */
4411 if ((sv = gv_const_sv(gv))) {
4413 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4414 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4415 yylval.opval->op_private = 0;
4419 /* Resolve to GV now. */
4420 if (SvTYPE(gv) != SVt_PVGV) {
4421 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
4422 assert (SvTYPE(gv) == SVt_PVGV);
4423 /* cv must have been some sort of placeholder, so
4424 now needs replacing with a real code reference. */
4428 op_free(yylval.opval);
4429 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4430 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4431 PL_last_lop = PL_oldbufptr;
4432 PL_last_lop_op = OP_ENTERSUB;
4433 /* Is there a prototype? */
4436 const char *proto = SvPV_const((SV*)cv, len);
4439 if (*proto == '$' && proto[1] == '\0')
4441 while (*proto == ';')
4443 if (*proto == '&' && *s == '{') {
4444 sv_setpv(PL_subname, PL_curstash ?
4445 "__ANON__" : "__ANON__::__ANON__");
4449 PL_nextval[PL_nexttoke].opval = yylval.opval;
4455 /* Call it a bare word */
4457 if (PL_hints & HINT_STRICT_SUBS)
4458 yylval.opval->op_private |= OPpCONST_STRICT;
4461 if (lastchar != '-') {
4462 if (ckWARN(WARN_RESERVED)) {
4463 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4464 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4465 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4472 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4473 && ckWARN_d(WARN_AMBIGUOUS)) {
4474 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4475 "Operator or semicolon missing before %c%s",
4476 lastchar, PL_tokenbuf);
4477 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4478 "Ambiguous use of %c resolved as operator %c",
4479 lastchar, lastchar);
4485 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4486 newSVpv(CopFILE(PL_curcop),0));
4490 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4491 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4494 case KEY___PACKAGE__:
4495 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4497 ? newSVhek(HvNAME_HEK(PL_curstash))
4504 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4505 const char *pname = "main";
4506 if (PL_tokenbuf[2] == 'D')
4507 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
4508 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
4512 GvIOp(gv) = newIO();
4513 IoIFP(GvIOp(gv)) = PL_rsfp;
4514 #if defined(HAS_FCNTL) && defined(F_SETFD)
4516 const int fd = PerlIO_fileno(PL_rsfp);
4517 fcntl(fd,F_SETFD,fd >= 3);
4520 /* Mark this internal pseudo-handle as clean */
4521 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4523 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4524 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4525 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4527 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4528 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4529 /* if the script was opened in binmode, we need to revert
4530 * it to text mode for compatibility; but only iff it has CRs
4531 * XXX this is a questionable hack at best. */
4532 if (PL_bufend-PL_bufptr > 2
4533 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4536 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4537 loc = PerlIO_tell(PL_rsfp);
4538 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4541 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4543 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4544 #endif /* NETWARE */
4545 #ifdef PERLIO_IS_STDIO /* really? */
4546 # if defined(__BORLANDC__)
4547 /* XXX see note in do_binmode() */
4548 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4552 PerlIO_seek(PL_rsfp, loc, 0);
4556 #ifdef PERLIO_LAYERS
4559 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4560 else if (PL_encoding) {
4567 XPUSHs(PL_encoding);
4569 call_method("name", G_SCALAR);
4573 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4574 Perl_form(aTHX_ ":encoding(%"SVf")",
4592 if (PL_expect == XSTATE) {
4599 if (*s == ':' && s[1] == ':') {
4602 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4603 if (!(tmp = keyword(PL_tokenbuf, len)))
4604 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4607 else if (tmp == KEY_require || tmp == KEY_do)
4608 /* that's a way to remember we saw "CORE::" */
4621 LOP(OP_ACCEPT,XTERM);
4627 LOP(OP_ATAN2,XTERM);
4633 LOP(OP_BINMODE,XTERM);
4636 LOP(OP_BLESS,XTERM);
4645 /* When 'use switch' is in effect, continue has a dual
4646 life as a control operator. */
4648 if (!FEATURE_IS_ENABLED("switch"))
4651 /* We have to disambiguate the two senses of
4652 "continue". If the next token is a '{' then
4653 treat it as the start of a continue block;
4654 otherwise treat it as a control operator.
4665 (void)gv_fetchpv("ENV", GV_ADD, SVt_PVHV); /* may use HOME */
4682 if (!PL_cryptseen) {
4683 PL_cryptseen = TRUE;
4687 LOP(OP_CRYPT,XTERM);
4690 LOP(OP_CHMOD,XTERM);
4693 LOP(OP_CHOWN,XTERM);
4696 LOP(OP_CONNECT,XTERM);
4715 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4716 if (orig_keyword == KEY_do) {
4725 PL_hints |= HINT_BLOCK_SCOPE;
4735 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4736 LOP(OP_DBMOPEN,XTERM);
4742 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4749 yylval.ival = CopLINE(PL_curcop);
4763 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4764 UNIBRACK(OP_ENTEREVAL);
4782 case KEY_endhostent:
4788 case KEY_endservent:
4791 case KEY_endprotoent:
4802 yylval.ival = CopLINE(PL_curcop);
4804 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4806 if ((PL_bufend - p) >= 3 &&
4807 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4809 else if ((PL_bufend - p) >= 4 &&
4810 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4813 if (isIDFIRST_lazy_if(p,UTF)) {
4814 p = scan_ident(p, PL_bufend,
4815 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4819 Perl_croak(aTHX_ "Missing $ on loop variable");
4824 LOP(OP_FORMLINE,XTERM);
4830 LOP(OP_FCNTL,XTERM);
4836 LOP(OP_FLOCK,XTERM);
4845 LOP(OP_GREPSTART, XREF);
4848 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4863 case KEY_getpriority:
4864 LOP(OP_GETPRIORITY,XTERM);
4866 case KEY_getprotobyname:
4869 case KEY_getprotobynumber:
4870 LOP(OP_GPBYNUMBER,XTERM);
4872 case KEY_getprotoent:
4884 case KEY_getpeername:
4885 UNI(OP_GETPEERNAME);
4887 case KEY_gethostbyname:
4890 case KEY_gethostbyaddr:
4891 LOP(OP_GHBYADDR,XTERM);
4893 case KEY_gethostent:
4896 case KEY_getnetbyname:
4899 case KEY_getnetbyaddr:
4900 LOP(OP_GNBYADDR,XTERM);
4905 case KEY_getservbyname:
4906 LOP(OP_GSBYNAME,XTERM);
4908 case KEY_getservbyport:
4909 LOP(OP_GSBYPORT,XTERM);
4911 case KEY_getservent:
4914 case KEY_getsockname:
4915 UNI(OP_GETSOCKNAME);
4917 case KEY_getsockopt:
4918 LOP(OP_GSOCKOPT,XTERM);
4933 yylval.ival = CopLINE(PL_curcop);
4944 yylval.ival = CopLINE(PL_curcop);
4948 LOP(OP_INDEX,XTERM);
4954 LOP(OP_IOCTL,XTERM);
4966 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4998 LOP(OP_LISTEN,XTERM);
5007 s = scan_pat(s,OP_MATCH);
5008 TERM(sublex_start());
5011 LOP(OP_MAPSTART, XREF);
5014 LOP(OP_MKDIR,XTERM);
5017 LOP(OP_MSGCTL,XTERM);
5020 LOP(OP_MSGGET,XTERM);
5023 LOP(OP_MSGRCV,XTERM);
5026 LOP(OP_MSGSND,XTERM);
5032 if (isIDFIRST_lazy_if(s,UTF)) {
5033 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
5034 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
5036 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
5037 if (!PL_in_my_stash) {
5040 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
5048 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5055 s = tokenize_use(0, s);
5059 if (*s == '(' || (s = skipspace(s), *s == '('))
5066 if (isIDFIRST_lazy_if(s,UTF)) {
5068 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
5069 for (t=d; *t && isSPACE(*t); t++) ;
5070 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
5072 && !(t[0] == '=' && t[1] == '>')
5074 int len = (int)(d-s);
5075 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
5076 "Precedence problem: open %.*s should be open(%.*s)",
5083 yylval.ival = OP_OR;
5093 LOP(OP_OPEN_DIR,XTERM);
5096 checkcomma(s,PL_tokenbuf,"filehandle");
5100 checkcomma(s,PL_tokenbuf,"filehandle");
5119 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5123 LOP(OP_PIPE_OP,XTERM);
5126 s = scan_str(s,FALSE,FALSE);
5128 missingterm((char*)0);
5129 yylval.ival = OP_CONST;
5130 TERM(sublex_start());
5136 s = scan_str(s,FALSE,FALSE);
5138 missingterm((char*)0);
5139 PL_expect = XOPERATOR;
5141 if (SvCUR(PL_lex_stuff)) {
5144 d = SvPV_force(PL_lex_stuff, len);
5147 for (; isSPACE(*d) && len; --len, ++d) ;
5150 if (!warned && ckWARN(WARN_QW)) {
5151 for (; !isSPACE(*d) && len; --len, ++d) {
5153 Perl_warner(aTHX_ packWARN(WARN_QW),
5154 "Possible attempt to separate words with commas");
5157 else if (*d == '#') {
5158 Perl_warner(aTHX_ packWARN(WARN_QW),
5159 "Possible attempt to put comments in qw() list");
5165 for (; !isSPACE(*d) && len; --len, ++d) ;
5167 sv = newSVpvn(b, d-b);
5168 if (DO_UTF8(PL_lex_stuff))
5170 words = append_elem(OP_LIST, words,
5171 newSVOP(OP_CONST, 0, tokeq(sv)));
5175 PL_nextval[PL_nexttoke].opval = words;
5180 SvREFCNT_dec(PL_lex_stuff);
5181 PL_lex_stuff = Nullsv;
5187 s = scan_str(s,FALSE,FALSE);
5189 missingterm((char*)0);
5190 yylval.ival = OP_STRINGIFY;
5191 if (SvIVX(PL_lex_stuff) == '\'')
5192 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
5193 TERM(sublex_start());
5196 s = scan_pat(s,OP_QR);
5197 TERM(sublex_start());
5200 s = scan_str(s,FALSE,FALSE);
5202 missingterm((char*)0);
5203 yylval.ival = OP_BACKTICK;
5205 TERM(sublex_start());
5213 s = force_version(s, FALSE);
5215 else if (*s != 'v' || !isDIGIT(s[1])
5216 || (s = force_version(s, TRUE), *s == 'v'))
5218 *PL_tokenbuf = '\0';
5219 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5220 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5221 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5223 yyerror("<> should be quotes");
5225 if (orig_keyword == KEY_require) {
5233 PL_last_uni = PL_oldbufptr;
5234 PL_last_lop_op = OP_REQUIRE;
5236 return REPORT( (int)REQUIRE );
5242 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5246 LOP(OP_RENAME,XTERM);
5255 LOP(OP_RINDEX,XTERM);
5265 UNIDOR(OP_READLINE);
5278 LOP(OP_REVERSE,XTERM);
5281 UNIDOR(OP_READLINK);
5289 TERM(sublex_start());
5291 TOKEN(1); /* force error */
5294 checkcomma(s,PL_tokenbuf,"filehandle");
5304 LOP(OP_SELECT,XTERM);
5310 LOP(OP_SEMCTL,XTERM);
5313 LOP(OP_SEMGET,XTERM);
5316 LOP(OP_SEMOP,XTERM);
5322 LOP(OP_SETPGRP,XTERM);
5324 case KEY_setpriority:
5325 LOP(OP_SETPRIORITY,XTERM);
5327 case KEY_sethostent:
5333 case KEY_setservent:
5336 case KEY_setprotoent:
5346 LOP(OP_SEEKDIR,XTERM);
5348 case KEY_setsockopt:
5349 LOP(OP_SSOCKOPT,XTERM);
5355 LOP(OP_SHMCTL,XTERM);
5358 LOP(OP_SHMGET,XTERM);
5361 LOP(OP_SHMREAD,XTERM);
5364 LOP(OP_SHMWRITE,XTERM);
5367 LOP(OP_SHUTDOWN,XTERM);
5376 LOP(OP_SOCKET,XTERM);
5378 case KEY_socketpair:
5379 LOP(OP_SOCKPAIR,XTERM);
5382 checkcomma(s,PL_tokenbuf,"subroutine name");
5384 if (*s == ';' || *s == ')') /* probably a close */
5385 Perl_croak(aTHX_ "sort is now a reserved word");
5387 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5391 LOP(OP_SPLIT,XTERM);
5394 LOP(OP_SPRINTF,XTERM);
5397 LOP(OP_SPLICE,XTERM);
5412 LOP(OP_SUBSTR,XTERM);
5418 char tmpbuf[sizeof PL_tokenbuf];
5419 SSize_t tboffset = 0;
5420 expectation attrful;
5421 bool have_name, have_proto, bad_proto;
5422 const int key = tmp;
5426 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5427 (*s == ':' && s[1] == ':'))
5430 attrful = XATTRBLOCK;
5431 /* remember buffer pos'n for later force_word */
5432 tboffset = s - PL_oldbufptr;
5433 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5434 if (strchr(tmpbuf, ':'))
5435 sv_setpv(PL_subname, tmpbuf);
5437 sv_setsv(PL_subname,PL_curstname);
5438 sv_catpvs(PL_subname,"::");
5439 sv_catpvn(PL_subname,tmpbuf,len);
5446 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5447 PL_expect = XTERMBLOCK;
5448 attrful = XATTRTERM;
5449 sv_setpvn(PL_subname,"?",1);
5453 if (key == KEY_format) {
5455 PL_lex_formbrack = PL_lex_brackets + 1;
5457 (void) force_word(PL_oldbufptr + tboffset, WORD,
5462 /* Look for a prototype */
5466 s = scan_str(s,FALSE,FALSE);
5468 Perl_croak(aTHX_ "Prototype not terminated");
5469 /* strip spaces and check for bad characters */
5470 d = SvPVX(PL_lex_stuff);
5473 for (p = d; *p; ++p) {
5476 if (!strchr("$@%*;[]&\\", *p))
5481 if (bad_proto && ckWARN(WARN_SYNTAX))
5482 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5483 "Illegal character in prototype for %"SVf" : %s",
5485 SvCUR_set(PL_lex_stuff, tmp);
5493 if (*s == ':' && s[1] != ':')
5494 PL_expect = attrful;
5495 else if (*s != '{' && key == KEY_sub) {
5497 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5499 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5503 PL_nextval[PL_nexttoke].opval =
5504 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5505 PL_lex_stuff = Nullsv;
5509 sv_setpv(PL_subname,
5510 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5513 (void) force_word(PL_oldbufptr + tboffset, WORD,
5522 LOP(OP_SYSTEM,XREF);
5525 LOP(OP_SYMLINK,XTERM);
5528 LOP(OP_SYSCALL,XTERM);
5531 LOP(OP_SYSOPEN,XTERM);
5534 LOP(OP_SYSSEEK,XTERM);
5537 LOP(OP_SYSREAD,XTERM);
5540 LOP(OP_SYSWRITE,XTERM);
5544 TERM(sublex_start());
5565 LOP(OP_TRUNCATE,XTERM);
5577 yylval.ival = CopLINE(PL_curcop);
5581 yylval.ival = CopLINE(PL_curcop);
5585 LOP(OP_UNLINK,XTERM);
5591 LOP(OP_UNPACK,XTERM);
5594 LOP(OP_UTIME,XTERM);
5600 LOP(OP_UNSHIFT,XTERM);
5603 s = tokenize_use(1, s);
5613 yylval.ival = CopLINE(PL_curcop);
5617 yylval.ival = CopLINE(PL_curcop);
5621 PL_hints |= HINT_BLOCK_SCOPE;
5628 LOP(OP_WAITPID,XTERM);
5637 ctl_l[0] = toCTRL('L');
5639 gv_fetchpv(ctl_l, GV_ADD, SVt_PV);
5642 gv_fetchpv("\f", GV_ADD, SVt_PV); /* Make sure $^L is defined */
5647 if (PL_expect == XOPERATOR)
5653 yylval.ival = OP_XOR;
5658 TERM(sublex_start());
5663 #pragma segment Main
5667 S_pending_ident(pTHX)
5670 register I32 tmp = 0;
5671 /* pit holds the identifier we read and pending_ident is reset */
5672 char pit = PL_pending_ident;
5673 PL_pending_ident = 0;
5675 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5676 "### Pending identifier '%s'\n", PL_tokenbuf); });
5678 /* if we're in a my(), we can't allow dynamics here.
5679 $foo'bar has already been turned into $foo::bar, so
5680 just check for colons.
5682 if it's a legal name, the OP is a PADANY.
5685 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5686 if (strchr(PL_tokenbuf,':'))
5687 yyerror(Perl_form(aTHX_ "No package name allowed for "
5688 "variable %s in \"our\"",
5690 tmp = allocmy(PL_tokenbuf);
5693 if (strchr(PL_tokenbuf,':'))
5694 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5696 yylval.opval = newOP(OP_PADANY, 0);
5697 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5703 build the ops for accesses to a my() variable.
5705 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5706 then used in a comparison. This catches most, but not
5707 all cases. For instance, it catches
5708 sort { my($a); $a <=> $b }
5710 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5711 (although why you'd do that is anyone's guess).
5714 if (!strchr(PL_tokenbuf,':')) {
5716 tmp = pad_findmy(PL_tokenbuf);
5717 if (tmp != NOT_IN_PAD) {
5718 /* might be an "our" variable" */
5719 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5720 /* build ops for a bareword */
5721 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
5722 HEK * const stashname = HvNAME_HEK(stash);
5723 SV * const sym = newSVhek(stashname);
5724 sv_catpvs(sym, "::");
5725 sv_catpv(sym, PL_tokenbuf+1);
5726 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5727 yylval.opval->op_private = OPpCONST_ENTERED;
5730 ? (GV_ADDMULTI | GV_ADDINEVAL)
5733 ((PL_tokenbuf[0] == '$') ? SVt_PV
5734 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5739 /* if it's a sort block and they're naming $a or $b */
5740 if (PL_last_lop_op == OP_SORT &&
5741 PL_tokenbuf[0] == '$' &&
5742 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5745 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5746 d < PL_bufend && *d != '\n';
5749 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5750 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5756 yylval.opval = newOP(OP_PADANY, 0);
5757 yylval.opval->op_targ = tmp;
5763 Whine if they've said @foo in a doublequoted string,
5764 and @foo isn't a variable we can find in the symbol
5767 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5768 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
5769 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5770 && ckWARN(WARN_AMBIGUOUS))
5772 /* Downgraded from fatal to warning 20000522 mjd */
5773 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5774 "Possible unintended interpolation of %s in string",
5779 /* build ops for a bareword */
5780 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5781 yylval.opval->op_private = OPpCONST_ENTERED;
5785 ? (GV_ADDMULTI | GV_ADDINEVAL)
5786 /* If the identifier refers to a stash, don't autovivify it.
5787 * Change 24660 had the side effect of causing symbol table
5788 * hashes to always be defined, even if they were freshly
5789 * created and the only reference in the entire program was
5790 * the single statement with the defined %foo::bar:: test.
5791 * It appears that all code in the wild doing this actually
5792 * wants to know whether sub-packages have been loaded, so
5793 * by avoiding auto-vivifying symbol tables, we ensure that
5794 * defined %foo::bar:: continues to be false, and the existing
5795 * tests still give the expected answers, even though what
5796 * they're actually testing has now changed subtly.
5798 : !(*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'),
5799 ((PL_tokenbuf[0] == '$') ? SVt_PV
5800 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5806 * The following code was generated by perl_keyword.pl.
5810 Perl_keyword (pTHX_ const char *name, I32 len)
5814 case 1: /* 5 tokens of length 1 */
5846 case 2: /* 18 tokens of length 2 */
5992 case 3: /* 29 tokens of length 3 */
5996 if (name[1] == 'N' &&
6059 if (name[1] == 'i' &&
6081 return (FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
6099 if (name[1] == 'o' &&
6108 if (name[1] == 'e' &&
6117 if (name[1] == 'n' &&
6126 if (name[1] == 'o' &&
6135 if (name[1] == 'a' &&
6144 if (name[1] == 'o' &&
6206 if (name[1] == 'e' &&
6220 return (FEATURE_IS_ENABLED("say") ? -KEY_say : 0);
6246 if (name[1] == 'i' &&
6255 if (name[1] == 's' &&
6264 if (name[1] == 'e' &&
6273 if (name[1] == 'o' &&
6285 case 4: /* 41 tokens of length 4 */
6289 if (name[1] == 'O' &&
6299 if (name[1] == 'N' &&
6309 if (name[1] == 'i' &&
6319 if (name[1] == 'h' &&
6329 if (name[1] == 'u' &&
6342 if (name[2] == 'c' &&
6351 if (name[2] == 's' &&
6360 if (name[2] == 'a' &&
6396 if (name[1] == 'o' &&
6409 if (name[2] == 't' &&
6418 if (name[2] == 'o' &&
6427 if (name[2] == 't' &&
6436 if (name[2] == 'e' &&
6449 if (name[1] == 'o' &&
6462 if (name[2] == 'y' &&
6471 if (name[2] == 'l' &&
6487 if (name[2] == 's' &&
6496 if (name[2] == 'n' &&
6505 if (name[2] == 'c' &&
6518 if (name[1] == 'e' &&
6528 if (name[1] == 'p' &&
6541 if (name[2] == 'c' &&
6550 if (name[2] == 'p' &&
6559 if (name[2] == 's' &&
6575 if (name[2] == 'n' &&
6645 if (name[2] == 'r' &&
6654 if (name[2] == 'r' &&
6663 if (name[2] == 'a' &&
6679 if (name[2] == 'l' &&
6741 if (name[2] == 'e' &&
6744 return (FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
6757 case 5: /* 38 tokens of length 5 */
6761 if (name[1] == 'E' &&
6772 if (name[1] == 'H' &&
6786 if (name[2] == 'a' &&
6796 if (name[2] == 'a' &&
6813 if (name[2] == 'e' &&
6823 if (name[2] == 'e' &&
6827 return (FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
6843 if (name[3] == 'i' &&
6852 if (name[3] == 'o' &&
6888 if (name[2] == 'o' &&
6898 if (name[2] == 'y' &&
6912 if (name[1] == 'l' &&
6926 if (name[2] == 'n' &&
6936 if (name[2] == 'o' &&
6950 if (name[1] == 'i' &&
6955 return (FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
6964 if (name[2] == 'd' &&
6974 if (name[2] == 'c' &&
6991 if (name[2] == 'c' &&
7001 if (name[2] == 't' &&
7015 if (name[1] == 'k' &&
7026 if (name[1] == 'r' &&
7040 if (name[2] == 's' &&
7050 if (name[2] == 'd' &&
7067 if (name[2] == 'm' &&
7077 if (name[2] == 'i' &&
7087 if (name[2] == 'e' &&
7097 if (name[2] == 'l' &&
7107 if (name[2] == 'a' &&
7117 if (name[2] == 'u' &&
7131 if (name[1] == 'i' &&
7145 if (name[2] == 'a' &&
7158 if (name[3] == 'e' &&
7193 if (name[2] == 'i' &&
7210 if (name[2] == 'i' &&
7220 if (name[2] == 'i' &&
7237 case 6: /* 33 tokens of length 6 */
7241 if (name[1] == 'c' &&
7256 if (name[2] == 'l' &&
7267 if (name[2] == 'r' &&
7282 if (name[1] == 'e' &&
7297 if (name[2] == 's' &&
7302 if(ckWARN_d(WARN_SYNTAX))
7303 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
7309 if (name[2] == 'i' &&
7327 if (name[2] == 'l' &&
7338 if (name[2] == 'r' &&
7353 if (name[1] == 'm' &&
7368 if (name[2] == 'n' &&
7379 if (name[2] == 's' &&
7394 if (name[1] == 's' &&
7400 if (name[4] == 't' &&
7409 if (name[4] == 'e' &&
7418 if (name[4] == 'c' &&
7427 if (name[4] == 'n' &&
7443 if (name[1] == 'r' &&
7461 if (name[3] == 'a' &&
7471 if (name[3] == 'u' &&
7485 if (name[2] == 'n' &&
7503 if (name[2] == 'a' &&
7517 if (name[3] == 'e' &&
7530 if (name[4] == 't' &&
7539 if (name[4] == 'e' &&
7561 if (name[4] == 't' &&
7570 if (name[4] == 'e' &&
7586 if (name[2] == 'c' &&
7597 if (name[2] == 'l' &&
7608 if (name[2] == 'b' &&
7619 if (name[2] == 's' &&
7642 if (name[4] == 's' &&
7651 if (name[4] == 'n' &&
7664 if (name[3] == 'a' &&
7681 if (name[1] == 'a' &&
7696 case 7: /* 29 tokens of length 7 */
7700 if (name[1] == 'E' &&
7713 if (name[1] == '_' &&
7726 if (name[1] == 'i' &&
7733 return -KEY_binmode;
7739 if (name[1] == 'o' &&
7746 return -KEY_connect;
7755 if (name[2] == 'm' &&
7761 return -KEY_dbmopen;
7772 if (name[4] == 'u' &&
7776 return (FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
7782 if (name[4] == 'n' &&
7803 if (name[1] == 'o' &&
7816 if (name[1] == 'e' &&
7823 if (name[5] == 'r' &&
7826 return -KEY_getpgrp;
7832 if (name[5] == 'i' &&
7835 return -KEY_getppid;
7848 if (name[1] == 'c' &&
7855 return -KEY_lcfirst;
7861 if (name[1] == 'p' &&
7868 return -KEY_opendir;
7874 if (name[1] == 'a' &&
7892 if (name[3] == 'd' &&
7897 return -KEY_readdir;
7903 if (name[3] == 'u' &&
7914 if (name[3] == 'e' &&
7919 return -KEY_reverse;
7938 if (name[3] == 'k' &&
7943 return -KEY_seekdir;
7949 if (name[3] == 'p' &&
7954 return -KEY_setpgrp;
7964 if (name[2] == 'm' &&
7970 return -KEY_shmread;
7976 if (name[2] == 'r' &&
7982 return -KEY_sprintf;
7991 if (name[3] == 'l' &&
7996 return -KEY_symlink;
8005 if (name[4] == 'a' &&
8009 return -KEY_syscall;
8015 if (name[4] == 'p' &&
8019 return -KEY_sysopen;
8025 if (name[4] == 'e' &&
8029 return -KEY_sysread;
8035 if (name[4] == 'e' &&
8039 return -KEY_sysseek;
8057 if (name[1] == 'e' &&
8064 return -KEY_telldir;
8073 if (name[2] == 'f' &&
8079 return -KEY_ucfirst;
8085 if (name[2] == 's' &&
8091 return -KEY_unshift;
8101 if (name[1] == 'a' &&
8108 return -KEY_waitpid;
8117 case 8: /* 26 tokens of length 8 */
8121 if (name[1] == 'U' &&
8129 return KEY_AUTOLOAD;
8140 if (name[3] == 'A' &&
8146 return KEY___DATA__;
8152 if (name[3] == 'I' &&
8158 return -KEY___FILE__;
8164 if (name[3] == 'I' &&
8170 return -KEY___LINE__;
8186 if (name[2] == 'o' &&
8193 return -KEY_closedir;
8199 if (name[2] == 'n' &&
8206 return -KEY_continue;
8216 if (name[1] == 'b' &&
8224 return -KEY_dbmclose;
8230 if (name[1] == 'n' &&
8236 if (name[4] == 'r' &&
8241 return -KEY_endgrent;
8247 if (name[4] == 'w' &&
8252 return -KEY_endpwent;
8265 if (name[1] == 'o' &&
8273 return -KEY_formline;
8279 if (name[1] == 'e' &&
8290 if (name[6] == 'n' &&
8293 return -KEY_getgrent;
8299 if (name[6] == 'i' &&
8302 return -KEY_getgrgid;
8308 if (name[6] == 'a' &&
8311 return -KEY_getgrnam;
8324 if (name[4] == 'o' &&
8329 return -KEY_getlogin;
8340 if (name[6] == 'n' &&
8343 return -KEY_getpwent;
8349 if (name[6] == 'a' &&
8352 return -KEY_getpwnam;
8358 if (name[6] == 'i' &&
8361 return -KEY_getpwuid;
8381 if (name[1] == 'e' &&
8388 if (name[5] == 'i' &&
8395 return -KEY_readline;
8400 return -KEY_readlink;
8411 if (name[5] == 'i' &&
8415 return -KEY_readpipe;
8436 if (name[4] == 'r' &&
8441 return -KEY_setgrent;
8447 if (name[4] == 'w' &&
8452 return -KEY_setpwent;
8468 if (name[3] == 'w' &&
8474 return -KEY_shmwrite;
8480 if (name[3] == 't' &&
8486 return -KEY_shutdown;
8496 if (name[2] == 's' &&
8503 return -KEY_syswrite;
8513 if (name[1] == 'r' &&
8521 return -KEY_truncate;
8530 case 9: /* 8 tokens of length 9 */
8534 if (name[1] == 'n' &&
8543 return -KEY_endnetent;
8549 if (name[1] == 'e' &&
8558 return -KEY_getnetent;
8564 if (name[1] == 'o' &&
8573 return -KEY_localtime;
8579 if (name[1] == 'r' &&
8588 return KEY_prototype;
8594 if (name[1] == 'u' &&
8603 return -KEY_quotemeta;
8609 if (name[1] == 'e' &&
8618 return -KEY_rewinddir;
8624 if (name[1] == 'e' &&
8633 return -KEY_setnetent;
8639 if (name[1] == 'a' &&
8648 return -KEY_wantarray;
8657 case 10: /* 9 tokens of length 10 */
8661 if (name[1] == 'n' &&
8667 if (name[4] == 'o' &&
8674 return -KEY_endhostent;
8680 if (name[4] == 'e' &&
8687 return -KEY_endservent;
8700 if (name[1] == 'e' &&
8706 if (name[4] == 'o' &&
8713 return -KEY_gethostent;
8722 if (name[5] == 'r' &&
8728 return -KEY_getservent;
8734 if (name[5] == 'c' &&
8740 return -KEY_getsockopt;
8765 if (name[4] == 'o' &&
8772 return -KEY_sethostent;
8781 if (name[5] == 'r' &&
8787 return -KEY_setservent;
8793 if (name[5] == 'c' &&
8799 return -KEY_setsockopt;
8816 if (name[2] == 'c' &&
8825 return -KEY_socketpair;
8838 case 11: /* 8 tokens of length 11 */
8842 if (name[1] == '_' &&
8853 return -KEY___PACKAGE__;
8859 if (name[1] == 'n' &&
8870 return -KEY_endprotoent;
8876 if (name[1] == 'e' &&
8885 if (name[5] == 'e' &&
8892 return -KEY_getpeername;
8901 if (name[6] == 'o' &&
8907 return -KEY_getpriority;
8913 if (name[6] == 't' &&
8919 return -KEY_getprotoent;
8933 if (name[4] == 'o' &&
8941 return -KEY_getsockname;
8954 if (name[1] == 'e' &&
8962 if (name[6] == 'o' &&
8968 return -KEY_setpriority;
8974 if (name[6] == 't' &&
8980 return -KEY_setprotoent;
8996 case 12: /* 2 tokens of length 12 */
8997 if (name[0] == 'g' &&
9009 if (name[9] == 'd' &&
9012 { /* getnetbyaddr */
9013 return -KEY_getnetbyaddr;
9019 if (name[9] == 'a' &&
9022 { /* getnetbyname */
9023 return -KEY_getnetbyname;
9035 case 13: /* 4 tokens of length 13 */
9036 if (name[0] == 'g' &&
9043 if (name[4] == 'o' &&
9052 if (name[10] == 'd' &&
9055 { /* gethostbyaddr */
9056 return -KEY_gethostbyaddr;
9062 if (name[10] == 'a' &&
9065 { /* gethostbyname */
9066 return -KEY_gethostbyname;
9079 if (name[4] == 'e' &&
9088 if (name[10] == 'a' &&
9091 { /* getservbyname */
9092 return -KEY_getservbyname;
9098 if (name[10] == 'o' &&
9101 { /* getservbyport */
9102 return -KEY_getservbyport;
9121 case 14: /* 1 tokens of length 14 */
9122 if (name[0] == 'g' &&
9136 { /* getprotobyname */
9137 return -KEY_getprotobyname;
9142 case 16: /* 1 tokens of length 16 */
9143 if (name[0] == 'g' &&
9159 { /* getprotobynumber */
9160 return -KEY_getprotobynumber;
9174 S_checkcomma(pTHX_ register char *s, const char *name, const char *what)
9178 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9179 if (ckWARN(WARN_SYNTAX)) {
9181 for (w = s+2; *w && level; w++) {
9188 for (; *w && isSPACE(*w); w++) ;
9189 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
9190 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9191 "%s (...) interpreted as function",name);
9194 while (s < PL_bufend && isSPACE(*s))
9198 while (s < PL_bufend && isSPACE(*s))
9200 if (isIDFIRST_lazy_if(s,UTF)) {
9202 while (isALNUM_lazy_if(s,UTF))
9204 while (s < PL_bufend && isSPACE(*s))
9208 *s = '\0'; /* XXX If we didn't do this, we could const a lot of toke.c */
9209 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
9213 Perl_croak(aTHX_ "No comma allowed after %s", what);
9218 /* Either returns sv, or mortalizes sv and returns a new SV*.
9219 Best used as sv=new_constant(..., sv, ...).
9220 If s, pv are NULL, calls subroutine with one argument,
9221 and type is used with error messages only. */
9224 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9228 HV * const table = GvHV(PL_hintgv); /* ^H */
9232 const char *why1 = "", *why2 = "", *why3 = "";
9234 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9237 why2 = strEQ(key,"charnames")
9238 ? "(possibly a missing \"use charnames ...\")"
9240 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
9241 (type ? type: "undef"), why2);
9243 /* This is convoluted and evil ("goto considered harmful")
9244 * but I do not understand the intricacies of all the different
9245 * failure modes of %^H in here. The goal here is to make
9246 * the most probable error message user-friendly. --jhi */
9251 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
9252 (type ? type: "undef"), why1, why2, why3);
9254 yyerror(SvPVX_const(msg));
9258 cvp = hv_fetch(table, key, strlen(key), FALSE);
9259 if (!cvp || !SvOK(*cvp)) {
9262 why3 = "} is not defined";
9265 sv_2mortal(sv); /* Parent created it permanently */
9268 pv = sv_2mortal(newSVpvn(s, len));
9270 typesv = sv_2mortal(newSVpv(type, 0));
9272 typesv = &PL_sv_undef;
9274 PUSHSTACKi(PERLSI_OVERLOAD);
9286 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9290 /* Check the eval first */
9291 if (!PL_in_eval && SvTRUE(ERRSV)) {
9292 sv_catpvs(ERRSV, "Propagated");
9293 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
9295 res = SvREFCNT_inc(sv);
9299 (void)SvREFCNT_inc(res);
9308 why1 = "Call to &{$^H{";
9310 why3 = "}} did not return a defined value";
9318 /* Returns a NUL terminated string, with the length of the string written to
9322 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9324 register char *d = dest;
9325 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
9328 Perl_croak(aTHX_ ident_too_long);
9329 if (isALNUM(*s)) /* UTF handled below */
9331 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
9336 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
9340 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9341 char *t = s + UTF8SKIP(s);
9342 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9344 if (d + (t - s) > e)
9345 Perl_croak(aTHX_ ident_too_long);
9346 Copy(s, d, t - s, char);
9359 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9363 char *bracket = Nullch;
9369 e = d + destlen - 3; /* two-character token, ending NUL */
9371 while (isDIGIT(*s)) {
9373 Perl_croak(aTHX_ ident_too_long);
9380 Perl_croak(aTHX_ ident_too_long);
9381 if (isALNUM(*s)) /* UTF handled below */
9383 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9388 else if (*s == ':' && s[1] == ':') {
9392 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9393 char *t = s + UTF8SKIP(s);
9394 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9396 if (d + (t - s) > e)
9397 Perl_croak(aTHX_ ident_too_long);
9398 Copy(s, d, t - s, char);
9409 if (PL_lex_state != LEX_NORMAL)
9410 PL_lex_state = LEX_INTERPENDMAYBE;
9413 if (*s == '$' && s[1] &&
9414 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9427 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9432 if (isSPACE(s[-1])) {
9434 const char ch = *s++;
9435 if (!SPACE_OR_TAB(ch)) {
9441 if (isIDFIRST_lazy_if(d,UTF)) {
9445 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
9447 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
9450 Copy(s, d, e - s, char);
9455 while ((isALNUM(*s) || *s == ':') && d < e)
9458 Perl_croak(aTHX_ ident_too_long);
9461 while (s < send && SPACE_OR_TAB(*s)) s++;
9462 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9463 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
9464 const char *brack = *s == '[' ? "[...]" : "{...}";
9465 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9466 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9467 funny, dest, brack, funny, dest, brack);
9470 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9474 /* Handle extended ${^Foo} variables
9475 * 1999-02-27 mjd-perl-patch@plover.com */
9476 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9480 while (isALNUM(*s) && d < e) {
9484 Perl_croak(aTHX_ ident_too_long);
9489 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9490 PL_lex_state = LEX_INTERPEND;
9495 if (PL_lex_state == LEX_NORMAL) {
9496 if (ckWARN(WARN_AMBIGUOUS) &&
9497 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
9499 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9500 "Ambiguous use of %c{%s} resolved to %c%s",
9501 funny, dest, funny, dest);
9506 s = bracket; /* let the parser handle it */
9510 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9511 PL_lex_state = LEX_INTERPEND;
9516 Perl_pmflag(pTHX_ U32* pmfl, int ch)
9521 *pmfl |= PMf_GLOBAL;
9523 *pmfl |= PMf_CONTINUE;
9527 *pmfl |= PMf_MULTILINE;
9529 *pmfl |= PMf_SINGLELINE;
9531 *pmfl |= PMf_EXTENDED;
9535 S_scan_pat(pTHX_ char *start, I32 type)
9538 char *s = scan_str(start,FALSE,FALSE);
9541 char * const delimiter = skipspace(start);
9542 Perl_croak(aTHX_ *delimiter == '?'
9543 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9544 : "Search pattern not terminated" );
9547 pm = (PMOP*)newPMOP(type, 0);
9548 if (PL_multi_open == '?')
9549 pm->op_pmflags |= PMf_ONCE;
9551 while (*s && strchr("iomsx", *s))
9552 pmflag(&pm->op_pmflags,*s++);
9555 while (*s && strchr("iogcmsx", *s))
9556 pmflag(&pm->op_pmflags,*s++);
9558 /* issue a warning if /c is specified,but /g is not */
9559 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
9560 && ckWARN(WARN_REGEXP))
9562 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
9565 pm->op_pmpermflags = pm->op_pmflags;
9567 PL_lex_op = (OP*)pm;
9568 yylval.ival = OP_MATCH;
9573 S_scan_subst(pTHX_ char *start)
9581 yylval.ival = OP_NULL;
9583 s = scan_str(start,FALSE,FALSE);
9586 Perl_croak(aTHX_ "Substitution pattern not terminated");
9588 if (s[-1] == PL_multi_open)
9591 first_start = PL_multi_start;
9592 s = scan_str(s,FALSE,FALSE);
9595 SvREFCNT_dec(PL_lex_stuff);
9596 PL_lex_stuff = Nullsv;
9598 Perl_croak(aTHX_ "Substitution replacement not terminated");
9600 PL_multi_start = first_start; /* so whole substitution is taken together */
9602 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9608 else if (strchr("iogcmsx", *s))
9609 pmflag(&pm->op_pmflags,*s++);
9614 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
9615 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9620 PL_sublex_info.super_bufptr = s;
9621 PL_sublex_info.super_bufend = PL_bufend;
9623 pm->op_pmflags |= PMf_EVAL;
9624 repl = newSVpvs("");
9626 sv_catpv(repl, es ? "eval " : "do ");
9627 sv_catpvs(repl, "{ ");
9628 sv_catsv(repl, PL_lex_repl);
9629 sv_catpvs(repl, " }");
9631 SvREFCNT_dec(PL_lex_repl);
9635 pm->op_pmpermflags = pm->op_pmflags;
9636 PL_lex_op = (OP*)pm;
9637 yylval.ival = OP_SUBST;
9642 S_scan_trans(pTHX_ char *start)
9651 yylval.ival = OP_NULL;
9653 s = scan_str(start,FALSE,FALSE);
9655 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9656 if (s[-1] == PL_multi_open)
9659 s = scan_str(s,FALSE,FALSE);
9662 SvREFCNT_dec(PL_lex_stuff);
9663 PL_lex_stuff = Nullsv;
9665 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9668 complement = del = squash = 0;
9672 complement = OPpTRANS_COMPLEMENT;
9675 del = OPpTRANS_DELETE;
9678 squash = OPpTRANS_SQUASH;
9687 Newx(tbl, complement&&!del?258:256, short);
9688 o = newPVOP(OP_TRANS, 0, (char*)tbl);
9689 o->op_private &= ~OPpTRANS_ALL;
9690 o->op_private |= del|squash|complement|
9691 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9692 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9695 yylval.ival = OP_TRANS;
9700 S_scan_heredoc(pTHX_ register char *s)
9703 I32 op_type = OP_SCALAR;
9707 const char *found_newline;
9711 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9715 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9718 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
9719 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9722 s = delimcpy(d, e, s, PL_bufend, term, &len);
9732 if (!isALNUM_lazy_if(s,UTF))
9733 deprecate_old("bare << to mean <<\"\"");
9734 for (; isALNUM_lazy_if(s,UTF); s++) {
9739 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9740 Perl_croak(aTHX_ "Delimiter for here document is too long");
9743 len = d - PL_tokenbuf;
9744 #ifndef PERL_STRICT_CR
9745 d = strchr(s, '\r');
9747 char * const olds = s;
9749 while (s < PL_bufend) {
9755 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9764 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9768 if ( outer || !(found_newline = memchr(s, '\n', PL_bufend - s)) ) {
9769 herewas = newSVpvn(s,PL_bufend-s);
9773 herewas = newSVpvn(s,found_newline-s);
9775 s += SvCUR(herewas);
9777 tmpstr = NEWSV(87,79);
9778 sv_upgrade(tmpstr, SVt_PVIV);
9781 SvIV_set(tmpstr, -1);
9783 else if (term == '`') {
9784 op_type = OP_BACKTICK;
9785 SvIV_set(tmpstr, '\\');
9789 PL_multi_start = CopLINE(PL_curcop);
9790 PL_multi_open = PL_multi_close = '<';
9791 term = *PL_tokenbuf;
9792 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9793 char *bufptr = PL_sublex_info.super_bufptr;
9794 char *bufend = PL_sublex_info.super_bufend;
9795 char * const olds = s - SvCUR(herewas);
9796 s = strchr(bufptr, '\n');
9800 while (s < bufend &&
9801 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9803 CopLINE_inc(PL_curcop);
9806 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9807 missingterm(PL_tokenbuf);
9809 sv_setpvn(herewas,bufptr,d-bufptr+1);
9810 sv_setpvn(tmpstr,d+1,s-d);
9812 sv_catpvn(herewas,s,bufend-s);
9813 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9820 while (s < PL_bufend &&
9821 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9823 CopLINE_inc(PL_curcop);
9825 if (s >= PL_bufend) {
9826 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9827 missingterm(PL_tokenbuf);
9829 sv_setpvn(tmpstr,d+1,s-d);
9831 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9833 sv_catpvn(herewas,s,PL_bufend-s);
9834 sv_setsv(PL_linestr,herewas);
9835 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9836 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9837 PL_last_lop = PL_last_uni = Nullch;
9840 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
9841 while (s >= PL_bufend) { /* multiple line string? */
9843 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
9844 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9845 missingterm(PL_tokenbuf);
9847 CopLINE_inc(PL_curcop);
9848 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9849 PL_last_lop = PL_last_uni = Nullch;
9850 #ifndef PERL_STRICT_CR
9851 if (PL_bufend - PL_linestart >= 2) {
9852 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9853 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9855 PL_bufend[-2] = '\n';
9857 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9859 else if (PL_bufend[-1] == '\r')
9860 PL_bufend[-1] = '\n';
9862 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9863 PL_bufend[-1] = '\n';
9865 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9866 SV *sv = NEWSV(88,0);
9868 sv_upgrade(sv, SVt_PVMG);
9869 sv_setsv(sv,PL_linestr);
9872 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop),sv);
9874 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9875 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9876 *(SvPVX(PL_linestr) + off ) = ' ';
9877 sv_catsv(PL_linestr,herewas);
9878 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9879 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9883 sv_catsv(tmpstr,PL_linestr);
9888 PL_multi_end = CopLINE(PL_curcop);
9889 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9890 SvPV_shrink_to_cur(tmpstr);
9892 SvREFCNT_dec(herewas);
9894 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9896 else if (PL_encoding)
9897 sv_recode_to_utf8(tmpstr, PL_encoding);
9899 PL_lex_stuff = tmpstr;
9900 yylval.ival = op_type;
9905 takes: current position in input buffer
9906 returns: new position in input buffer
9907 side-effects: yylval and lex_op are set.
9912 <FH> read from filehandle
9913 <pkg::FH> read from package qualified filehandle
9914 <pkg'FH> read from package qualified filehandle
9915 <$fh> read from filehandle in $fh
9921 S_scan_inputsymbol(pTHX_ char *start)
9923 register char *s = start; /* current position in buffer */
9929 d = PL_tokenbuf; /* start of temp holding space */
9930 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9931 end = strchr(s, '\n');
9934 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9936 /* die if we didn't have space for the contents of the <>,
9937 or if it didn't end, or if we see a newline
9940 if (len >= sizeof PL_tokenbuf)
9941 Perl_croak(aTHX_ "Excessively long <> operator");
9943 Perl_croak(aTHX_ "Unterminated <> operator");
9948 Remember, only scalar variables are interpreted as filehandles by
9949 this code. Anything more complex (e.g., <$fh{$num}>) will be
9950 treated as a glob() call.
9951 This code makes use of the fact that except for the $ at the front,
9952 a scalar variable and a filehandle look the same.
9954 if (*d == '$' && d[1]) d++;
9956 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9957 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9960 /* If we've tried to read what we allow filehandles to look like, and
9961 there's still text left, then it must be a glob() and not a getline.
9962 Use scan_str to pull out the stuff between the <> and treat it
9963 as nothing more than a string.
9966 if (d - PL_tokenbuf != len) {
9967 yylval.ival = OP_GLOB;
9969 s = scan_str(start,FALSE,FALSE);
9971 Perl_croak(aTHX_ "Glob not terminated");
9975 bool readline_overriden = FALSE;
9976 GV *gv_readline = Nullgv;
9978 /* we're in a filehandle read situation */
9981 /* turn <> into <ARGV> */
9983 Copy("ARGV",d,5,char);
9985 /* Check whether readline() is overriden */
9986 if (((gv_readline = gv_fetchpv("readline", 0, SVt_PVCV))
9987 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9989 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
9990 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
9991 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9992 readline_overriden = TRUE;
9994 /* if <$fh>, create the ops to turn the variable into a
10000 /* try to find it in the pad for this block, otherwise find
10001 add symbol table ops
10003 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
10004 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
10005 HV *stash = PAD_COMPNAME_OURSTASH(tmp);
10006 HEK *stashname = HvNAME_HEK(stash);
10007 SV *sym = sv_2mortal(newSVhek(stashname));
10008 sv_catpvs(sym, "::");
10009 sv_catpv(sym, d+1);
10014 OP *o = newOP(OP_PADSV, 0);
10016 PL_lex_op = readline_overriden
10017 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10018 append_elem(OP_LIST, o,
10019 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10020 : (OP*)newUNOP(OP_READLINE, 0, o);
10029 ? (GV_ADDMULTI | GV_ADDINEVAL)
10032 PL_lex_op = readline_overriden
10033 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10034 append_elem(OP_LIST,
10035 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10036 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10037 : (OP*)newUNOP(OP_READLINE, 0,
10038 newUNOP(OP_RV2SV, 0,
10039 newGVOP(OP_GV, 0, gv)));
10041 if (!readline_overriden)
10042 PL_lex_op->op_flags |= OPf_SPECIAL;
10043 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
10044 yylval.ival = OP_NULL;
10047 /* If it's none of the above, it must be a literal filehandle
10048 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10050 GV *gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
10051 PL_lex_op = readline_overriden
10052 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10053 append_elem(OP_LIST,
10054 newGVOP(OP_GV, 0, gv),
10055 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10056 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10057 yylval.ival = OP_NULL;
10066 takes: start position in buffer
10067 keep_quoted preserve \ on the embedded delimiter(s)
10068 keep_delims preserve the delimiters around the string
10069 returns: position to continue reading from buffer
10070 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10071 updates the read buffer.
10073 This subroutine pulls a string out of the input. It is called for:
10074 q single quotes q(literal text)
10075 ' single quotes 'literal text'
10076 qq double quotes qq(interpolate $here please)
10077 " double quotes "interpolate $here please"
10078 qx backticks qx(/bin/ls -l)
10079 ` backticks `/bin/ls -l`
10080 qw quote words @EXPORT_OK = qw( func() $spam )
10081 m// regexp match m/this/
10082 s/// regexp substitute s/this/that/
10083 tr/// string transliterate tr/this/that/
10084 y/// string transliterate y/this/that/
10085 ($*@) sub prototypes sub foo ($)
10086 (stuff) sub attr parameters sub foo : attr(stuff)
10087 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10089 In most of these cases (all but <>, patterns and transliterate)
10090 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10091 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10092 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10095 It skips whitespace before the string starts, and treats the first
10096 character as the delimiter. If the delimiter is one of ([{< then
10097 the corresponding "close" character )]}> is used as the closing
10098 delimiter. It allows quoting of delimiters, and if the string has
10099 balanced delimiters ([{<>}]) it allows nesting.
10101 On success, the SV with the resulting string is put into lex_stuff or,
10102 if that is already non-NULL, into lex_repl. The second case occurs only
10103 when parsing the RHS of the special constructs s/// and tr/// (y///).
10104 For convenience, the terminating delimiter character is stuffed into
10109 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
10111 SV *sv; /* scalar value: string */
10112 char *tmps; /* temp string, used for delimiter matching */
10113 register char *s = start; /* current position in the buffer */
10114 register char term; /* terminating character */
10115 register char *to; /* current position in the sv's data */
10116 I32 brackets = 1; /* bracket nesting level */
10117 bool has_utf8 = FALSE; /* is there any utf8 content? */
10118 I32 termcode; /* terminating char. code */
10119 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10120 STRLEN termlen; /* length of terminating string */
10121 char *last = NULL; /* last position for nesting bracket */
10123 /* skip space before the delimiter */
10127 /* mark where we are, in case we need to report errors */
10130 /* after skipping whitespace, the next character is the terminator */
10133 termcode = termstr[0] = term;
10137 termcode = utf8_to_uvchr((U8*)s, &termlen);
10138 Copy(s, termstr, termlen, U8);
10139 if (!UTF8_IS_INVARIANT(term))
10143 /* mark where we are */
10144 PL_multi_start = CopLINE(PL_curcop);
10145 PL_multi_open = term;
10147 /* find corresponding closing delimiter */
10148 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10149 termcode = termstr[0] = term = tmps[5];
10151 PL_multi_close = term;
10153 /* create a new SV to hold the contents. 87 is leak category, I'm
10154 assuming. 79 is the SV's initial length. What a random number. */
10156 sv_upgrade(sv, SVt_PVIV);
10157 SvIV_set(sv, termcode);
10158 (void)SvPOK_only(sv); /* validate pointer */
10160 /* move past delimiter and try to read a complete string */
10162 sv_catpvn(sv, s, termlen);
10165 if (PL_encoding && !UTF) {
10169 int offset = s - SvPVX_const(PL_linestr);
10170 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10171 &offset, (char*)termstr, termlen);
10172 const char *ns = SvPVX_const(PL_linestr) + offset;
10173 char *svlast = SvEND(sv) - 1;
10175 for (; s < ns; s++) {
10176 if (*s == '\n' && !PL_rsfp)
10177 CopLINE_inc(PL_curcop);
10180 goto read_more_line;
10182 /* handle quoted delimiters */
10183 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10185 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10187 if ((svlast-1 - t) % 2) {
10188 if (!keep_quoted) {
10189 *(svlast-1) = term;
10191 SvCUR_set(sv, SvCUR(sv) - 1);
10196 if (PL_multi_open == PL_multi_close) {
10204 for (t = w = last; t < svlast; w++, t++) {
10205 /* At here, all closes are "was quoted" one,
10206 so we don't check PL_multi_close. */
10208 if (!keep_quoted && *(t+1) == PL_multi_open)
10213 else if (*t == PL_multi_open)
10221 SvCUR_set(sv, w - SvPVX_const(sv));
10224 if (--brackets <= 0)
10229 if (!keep_delims) {
10230 SvCUR_set(sv, SvCUR(sv) - 1);
10236 /* extend sv if need be */
10237 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10238 /* set 'to' to the next character in the sv's string */
10239 to = SvPVX(sv)+SvCUR(sv);
10241 /* if open delimiter is the close delimiter read unbridle */
10242 if (PL_multi_open == PL_multi_close) {
10243 for (; s < PL_bufend; s++,to++) {
10244 /* embedded newlines increment the current line number */
10245 if (*s == '\n' && !PL_rsfp)
10246 CopLINE_inc(PL_curcop);
10247 /* handle quoted delimiters */
10248 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10249 if (!keep_quoted && s[1] == term)
10251 /* any other quotes are simply copied straight through */
10255 /* terminate when run out of buffer (the for() condition), or
10256 have found the terminator */
10257 else if (*s == term) {
10260 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10263 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10269 /* if the terminator isn't the same as the start character (e.g.,
10270 matched brackets), we have to allow more in the quoting, and
10271 be prepared for nested brackets.
10274 /* read until we run out of string, or we find the terminator */
10275 for (; s < PL_bufend; s++,to++) {
10276 /* embedded newlines increment the line count */
10277 if (*s == '\n' && !PL_rsfp)
10278 CopLINE_inc(PL_curcop);
10279 /* backslashes can escape the open or closing characters */
10280 if (*s == '\\' && s+1 < PL_bufend) {
10281 if (!keep_quoted &&
10282 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10287 /* allow nested opens and closes */
10288 else if (*s == PL_multi_close && --brackets <= 0)
10290 else if (*s == PL_multi_open)
10292 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10297 /* terminate the copied string and update the sv's end-of-string */
10299 SvCUR_set(sv, to - SvPVX_const(sv));
10302 * this next chunk reads more into the buffer if we're not done yet
10306 break; /* handle case where we are done yet :-) */
10308 #ifndef PERL_STRICT_CR
10309 if (to - SvPVX_const(sv) >= 2) {
10310 if ((to[-2] == '\r' && to[-1] == '\n') ||
10311 (to[-2] == '\n' && to[-1] == '\r'))
10315 SvCUR_set(sv, to - SvPVX_const(sv));
10317 else if (to[-1] == '\r')
10320 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10325 /* if we're out of file, or a read fails, bail and reset the current
10326 line marker so we can report where the unterminated string began
10329 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10331 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10334 /* we read a line, so increment our line counter */
10335 CopLINE_inc(PL_curcop);
10337 /* update debugger info */
10338 if (PERLDB_LINE && PL_curstash != PL_debstash) {
10339 SV * const sv = NEWSV(88,0);
10341 sv_upgrade(sv, SVt_PVMG);
10342 sv_setsv(sv,PL_linestr);
10343 (void)SvIOK_on(sv);
10345 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop), sv);
10348 /* having changed the buffer, we must update PL_bufend */
10349 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10350 PL_last_lop = PL_last_uni = Nullch;
10353 /* at this point, we have successfully read the delimited string */
10355 if (!PL_encoding || UTF) {
10357 sv_catpvn(sv, s, termlen);
10360 if (has_utf8 || PL_encoding)
10363 PL_multi_end = CopLINE(PL_curcop);
10365 /* if we allocated too much space, give some back */
10366 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10367 SvLEN_set(sv, SvCUR(sv) + 1);
10368 SvPV_renew(sv, SvLEN(sv));
10371 /* decide whether this is the first or second quoted string we've read
10384 takes: pointer to position in buffer
10385 returns: pointer to new position in buffer
10386 side-effects: builds ops for the constant in yylval.op
10388 Read a number in any of the formats that Perl accepts:
10390 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10391 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10394 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10396 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10399 If it reads a number without a decimal point or an exponent, it will
10400 try converting the number to an integer and see if it can do so
10401 without loss of precision.
10405 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10407 register const char *s = start; /* current position in buffer */
10408 register char *d; /* destination in temp buffer */
10409 register char *e; /* end of temp buffer */
10410 NV nv; /* number read, as a double */
10411 SV *sv = Nullsv; /* place to put the converted number */
10412 bool floatit; /* boolean: int or float? */
10413 const char *lastub = NULL; /* position of last underbar */
10414 static char const number_too_long[] = "Number too long";
10416 /* We use the first character to decide what type of number this is */
10420 Perl_croak(aTHX_ "panic: scan_num");
10422 /* if it starts with a 0, it could be an octal number, a decimal in
10423 0.13 disguise, or a hexadecimal number, or a binary number. */
10427 u holds the "number so far"
10428 shift the power of 2 of the base
10429 (hex == 4, octal == 3, binary == 1)
10430 overflowed was the number more than we can hold?
10432 Shift is used when we add a digit. It also serves as an "are
10433 we in octal/hex/binary?" indicator to disallow hex characters
10434 when in octal mode.
10439 bool overflowed = FALSE;
10440 bool just_zero = TRUE; /* just plain 0 or binary number? */
10441 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10442 static const char* const bases[5] =
10443 { "", "binary", "", "octal", "hexadecimal" };
10444 static const char* const Bases[5] =
10445 { "", "Binary", "", "Octal", "Hexadecimal" };
10446 static const char* const maxima[5] =
10448 "0b11111111111111111111111111111111",
10452 const char *base, *Base, *max;
10454 /* check for hex */
10459 } else if (s[1] == 'b') {
10464 /* check for a decimal in disguise */
10465 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10467 /* so it must be octal */
10474 if (ckWARN(WARN_SYNTAX))
10475 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10476 "Misplaced _ in number");
10480 base = bases[shift];
10481 Base = Bases[shift];
10482 max = maxima[shift];
10484 /* read the rest of the number */
10486 /* x is used in the overflow test,
10487 b is the digit we're adding on. */
10492 /* if we don't mention it, we're done */
10496 /* _ are ignored -- but warned about if consecutive */
10498 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10499 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10500 "Misplaced _ in number");
10504 /* 8 and 9 are not octal */
10505 case '8': case '9':
10507 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10511 case '2': case '3': case '4':
10512 case '5': case '6': case '7':
10514 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10517 case '0': case '1':
10518 b = *s++ & 15; /* ASCII digit -> value of digit */
10522 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10523 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10524 /* make sure they said 0x */
10527 b = (*s++ & 7) + 9;
10529 /* Prepare to put the digit we have onto the end
10530 of the number so far. We check for overflows.
10536 x = u << shift; /* make room for the digit */
10538 if ((x >> shift) != u
10539 && !(PL_hints & HINT_NEW_BINARY)) {
10542 if (ckWARN_d(WARN_OVERFLOW))
10543 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10544 "Integer overflow in %s number",
10547 u = x | b; /* add the digit to the end */
10550 n *= nvshift[shift];
10551 /* If an NV has not enough bits in its
10552 * mantissa to represent an UV this summing of
10553 * small low-order numbers is a waste of time
10554 * (because the NV cannot preserve the
10555 * low-order bits anyway): we could just
10556 * remember when did we overflow and in the
10557 * end just multiply n by the right
10565 /* if we get here, we had success: make a scalar value from
10570 /* final misplaced underbar check */
10571 if (s[-1] == '_') {
10572 if (ckWARN(WARN_SYNTAX))
10573 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10578 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
10579 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10580 "%s number > %s non-portable",
10586 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
10587 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10588 "%s number > %s non-portable",
10593 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10594 sv = new_constant(start, s - start, "integer",
10596 else if (PL_hints & HINT_NEW_BINARY)
10597 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
10602 handle decimal numbers.
10603 we're also sent here when we read a 0 as the first digit
10605 case '1': case '2': case '3': case '4': case '5':
10606 case '6': case '7': case '8': case '9': case '.':
10609 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10612 /* read next group of digits and _ and copy into d */
10613 while (isDIGIT(*s) || *s == '_') {
10614 /* skip underscores, checking for misplaced ones
10618 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10619 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10620 "Misplaced _ in number");
10624 /* check for end of fixed-length buffer */
10626 Perl_croak(aTHX_ number_too_long);
10627 /* if we're ok, copy the character */
10632 /* final misplaced underbar check */
10633 if (lastub && s == lastub + 1) {
10634 if (ckWARN(WARN_SYNTAX))
10635 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10638 /* read a decimal portion if there is one. avoid
10639 3..5 being interpreted as the number 3. followed
10642 if (*s == '.' && s[1] != '.') {
10647 if (ckWARN(WARN_SYNTAX))
10648 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10649 "Misplaced _ in number");
10653 /* copy, ignoring underbars, until we run out of digits.
10655 for (; isDIGIT(*s) || *s == '_'; s++) {
10656 /* fixed length buffer check */
10658 Perl_croak(aTHX_ number_too_long);
10660 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10661 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10662 "Misplaced _ in number");
10668 /* fractional part ending in underbar? */
10669 if (s[-1] == '_') {
10670 if (ckWARN(WARN_SYNTAX))
10671 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10672 "Misplaced _ in number");
10674 if (*s == '.' && isDIGIT(s[1])) {
10675 /* oops, it's really a v-string, but without the "v" */
10681 /* read exponent part, if present */
10682 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10686 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10687 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10689 /* stray preinitial _ */
10691 if (ckWARN(WARN_SYNTAX))
10692 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10693 "Misplaced _ in number");
10697 /* allow positive or negative exponent */
10698 if (*s == '+' || *s == '-')
10701 /* stray initial _ */
10703 if (ckWARN(WARN_SYNTAX))
10704 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10705 "Misplaced _ in number");
10709 /* read digits of exponent */
10710 while (isDIGIT(*s) || *s == '_') {
10713 Perl_croak(aTHX_ number_too_long);
10717 if (((lastub && s == lastub + 1) ||
10718 (!isDIGIT(s[1]) && s[1] != '_'))
10719 && ckWARN(WARN_SYNTAX))
10720 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10721 "Misplaced _ in number");
10728 /* make an sv from the string */
10732 We try to do an integer conversion first if no characters
10733 indicating "float" have been found.
10738 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10740 if (flags == IS_NUMBER_IN_UV) {
10742 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
10745 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10746 if (uv <= (UV) IV_MIN)
10747 sv_setiv(sv, -(IV)uv);
10754 /* terminate the string */
10756 nv = Atof(PL_tokenbuf);
10760 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
10761 (PL_hints & HINT_NEW_INTEGER) )
10762 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
10763 (floatit ? "float" : "integer"),
10767 /* if it starts with a v, it could be a v-string */
10770 sv = NEWSV(92,5); /* preallocate storage space */
10771 s = scan_vstring(s,sv);
10775 /* make the op for the constant and return */
10778 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10780 lvalp->opval = Nullop;
10786 S_scan_formline(pTHX_ register char *s)
10788 register char *eol;
10790 SV *stuff = newSVpvs("");
10791 bool needargs = FALSE;
10792 bool eofmt = FALSE;
10794 while (!needargs) {
10796 #ifdef PERL_STRICT_CR
10797 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
10799 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
10801 if (*t == '\n' || t == PL_bufend) {
10806 if (PL_in_eval && !PL_rsfp) {
10807 eol = (char *) memchr(s,'\n',PL_bufend-s);
10812 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10814 for (t = s; t < eol; t++) {
10815 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10817 goto enough; /* ~~ must be first line in formline */
10819 if (*t == '@' || *t == '^')
10823 sv_catpvn(stuff, s, eol-s);
10824 #ifndef PERL_STRICT_CR
10825 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10826 char *end = SvPVX(stuff) + SvCUR(stuff);
10829 SvCUR_set(stuff, SvCUR(stuff) - 1);
10838 s = filter_gets(PL_linestr, PL_rsfp, 0);
10839 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
10840 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
10841 PL_last_lop = PL_last_uni = Nullch;
10850 if (SvCUR(stuff)) {
10853 PL_lex_state = LEX_NORMAL;
10854 PL_nextval[PL_nexttoke].ival = 0;
10858 PL_lex_state = LEX_FORMLINE;
10860 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10862 else if (PL_encoding)
10863 sv_recode_to_utf8(stuff, PL_encoding);
10865 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10867 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
10871 SvREFCNT_dec(stuff);
10873 PL_lex_formbrack = 0;
10884 PL_cshlen = strlen(PL_cshname);
10889 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10891 const I32 oldsavestack_ix = PL_savestack_ix;
10892 CV* outsidecv = PL_compcv;
10895 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10897 SAVEI32(PL_subline);
10898 save_item(PL_subname);
10899 SAVESPTR(PL_compcv);
10901 PL_compcv = (CV*)NEWSV(1104,0);
10902 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
10903 CvFLAGS(PL_compcv) |= flags;
10905 PL_subline = CopLINE(PL_curcop);
10906 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10907 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
10908 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10910 return oldsavestack_ix;
10914 #pragma segment Perl_yylex
10917 Perl_yywarn(pTHX_ const char *s)
10919 PL_in_eval |= EVAL_WARNONLY;
10921 PL_in_eval &= ~EVAL_WARNONLY;
10926 Perl_yyerror(pTHX_ const char *s)
10928 const char *where = NULL;
10929 const char *context = NULL;
10933 if (!yychar || (yychar == ';' && !PL_rsfp))
10935 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10936 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10937 PL_oldbufptr != PL_bufptr) {
10940 The code below is removed for NetWare because it abends/crashes on NetWare
10941 when the script has error such as not having the closing quotes like:
10942 if ($var eq "value)
10943 Checking of white spaces is anyway done in NetWare code.
10946 while (isSPACE(*PL_oldoldbufptr))
10949 context = PL_oldoldbufptr;
10950 contlen = PL_bufptr - PL_oldoldbufptr;
10952 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10953 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10956 The code below is removed for NetWare because it abends/crashes on NetWare
10957 when the script has error such as not having the closing quotes like:
10958 if ($var eq "value)
10959 Checking of white spaces is anyway done in NetWare code.
10962 while (isSPACE(*PL_oldbufptr))
10965 context = PL_oldbufptr;
10966 contlen = PL_bufptr - PL_oldbufptr;
10968 else if (yychar > 255)
10969 where = "next token ???";
10970 else if (yychar == -2) { /* YYEMPTY */
10971 if (PL_lex_state == LEX_NORMAL ||
10972 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10973 where = "at end of line";
10974 else if (PL_lex_inpat)
10975 where = "within pattern";
10977 where = "within string";
10980 SV *where_sv = sv_2mortal(newSVpvs("next char "));
10982 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10983 else if (isPRINT_LC(yychar))
10984 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
10986 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10987 where = SvPVX_const(where_sv);
10989 msg = sv_2mortal(newSVpv(s, 0));
10990 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10991 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10993 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10995 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10996 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10997 Perl_sv_catpvf(aTHX_ msg,
10998 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10999 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11002 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
11003 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
11006 if (PL_error_count >= 10) {
11007 if (PL_in_eval && SvCUR(ERRSV))
11008 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11009 ERRSV, OutCopFILE(PL_curcop));
11011 Perl_croak(aTHX_ "%s has too many errors.\n",
11012 OutCopFILE(PL_curcop));
11015 PL_in_my_stash = NULL;
11019 #pragma segment Main
11023 S_swallow_bom(pTHX_ U8 *s)
11025 const STRLEN slen = SvCUR(PL_linestr);
11028 if (s[1] == 0xFE) {
11029 /* UTF-16 little-endian? (or UTF32-LE?) */
11030 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11031 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
11032 #ifndef PERL_NO_UTF16_FILTER
11033 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
11036 if (PL_bufend > (char*)s) {
11040 filter_add(utf16rev_textfilter, NULL);
11041 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11042 utf16_to_utf8_reversed(s, news,
11043 PL_bufend - (char*)s - 1,
11045 sv_setpvn(PL_linestr, (const char*)news, newlen);
11047 SvUTF8_on(PL_linestr);
11048 s = (U8*)SvPVX(PL_linestr);
11049 PL_bufend = SvPVX(PL_linestr) + newlen;
11052 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
11057 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11058 #ifndef PERL_NO_UTF16_FILTER
11059 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11062 if (PL_bufend > (char *)s) {
11066 filter_add(utf16_textfilter, NULL);
11067 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11068 utf16_to_utf8(s, news,
11069 PL_bufend - (char*)s,
11071 sv_setpvn(PL_linestr, (const char*)news, newlen);
11073 SvUTF8_on(PL_linestr);
11074 s = (U8*)SvPVX(PL_linestr);
11075 PL_bufend = SvPVX(PL_linestr) + newlen;
11078 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
11083 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11084 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11085 s += 3; /* UTF-8 */
11091 if (s[2] == 0xFE && s[3] == 0xFF) {
11092 /* UTF-32 big-endian */
11093 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
11096 else if (s[2] == 0 && s[3] != 0) {
11099 * are a good indicator of UTF-16BE. */
11100 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11105 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11108 * are a good indicator of UTF-16LE. */
11109 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11118 * Restore a source filter.
11122 restore_rsfp(pTHX_ void *f)
11124 PerlIO * const fp = (PerlIO*)f;
11126 if (PL_rsfp == PerlIO_stdin())
11127 PerlIO_clearerr(PL_rsfp);
11128 else if (PL_rsfp && (PL_rsfp != fp))
11129 PerlIO_close(PL_rsfp);
11133 #ifndef PERL_NO_UTF16_FILTER
11135 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11137 const STRLEN old = SvCUR(sv);
11138 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11139 DEBUG_P(PerlIO_printf(Perl_debug_log,
11140 "utf16_textfilter(%p): %d %d (%d)\n",
11141 utf16_textfilter, idx, maxlen, (int) count));
11145 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11146 Copy(SvPVX_const(sv), tmps, old, char);
11147 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11148 SvCUR(sv) - old, &newlen);
11149 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11151 DEBUG_P({sv_dump(sv);});
11156 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11158 const STRLEN old = SvCUR(sv);
11159 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11160 DEBUG_P(PerlIO_printf(Perl_debug_log,
11161 "utf16rev_textfilter(%p): %d %d (%d)\n",
11162 utf16rev_textfilter, idx, maxlen, (int) count));
11166 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11167 Copy(SvPVX_const(sv), tmps, old, char);
11168 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11169 SvCUR(sv) - old, &newlen);
11170 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11172 DEBUG_P({ sv_dump(sv); });
11178 Returns a pointer to the next character after the parsed
11179 vstring, as well as updating the passed in sv.
11181 Function must be called like
11184 s = scan_vstring(s,sv);
11186 The sv should already be large enough to store the vstring
11187 passed in, for performance reasons.
11192 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
11194 const char *pos = s;
11195 const char *start = s;
11196 if (*pos == 'v') pos++; /* get past 'v' */
11197 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11199 if ( *pos != '.') {
11200 /* this may not be a v-string if followed by => */
11201 const char *next = pos;
11202 while (next < PL_bufend && isSPACE(*next))
11204 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
11205 /* return string not v-string */
11206 sv_setpvn(sv,(char *)s,pos-s);
11207 return (char *)pos;
11211 if (!isALPHA(*pos)) {
11212 U8 tmpbuf[UTF8_MAXBYTES+1];
11214 if (*s == 'v') s++; /* get past 'v' */
11216 sv_setpvn(sv, "", 0);
11222 /* this is atoi() that tolerates underscores */
11223 const char *end = pos;
11225 while (--end >= s) {
11230 rev += (*end - '0') * mult;
11232 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
11233 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11234 "Integer overflow in decimal number");
11238 if (rev > 0x7FFFFFFF)
11239 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11241 /* Append native character for the rev point */
11242 tmpend = uvchr_to_utf8(tmpbuf, rev);
11243 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11244 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11246 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
11252 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11256 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11264 * c-indentation-style: bsd
11265 * c-basic-offset: 4
11266 * indent-tabs-mode: t
11269 * ex: set ts=8 sts=4 sw=4 noet: