3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] = "Identifier too long";
30 static const char commaless_variable_list[] = "comma-less variable list";
32 static void restore_rsfp(pTHX_ void *f);
33 #ifndef PERL_NO_UTF16_FILTER
34 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
38 #define XFAKEBRACK 128
41 #ifdef USE_UTF8_SCRIPTS
42 # define UTF (!IN_BYTES)
44 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
47 /* In variables named $^X, these are the legal values for X.
48 * 1999-02-27 mjd-perl-patch@plover.com */
49 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51 /* On MacOS, respect nonbreaking spaces */
52 #ifdef MACOS_TRADITIONAL
53 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
58 /* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
60 * can get by with a single comparison (if the compiler is smart enough).
63 /* #define LEX_NOTPARSING 11 is done in perl.h. */
65 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
66 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
67 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
68 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
69 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
71 /* at end of code, eg "$x" followed by: */
72 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
73 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
75 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
76 string or after \E, $foo, etc */
77 #define LEX_INTERPCONST 2 /* NOT USED */
78 #define LEX_FORMLINE 1 /* expecting a format line */
79 #define LEX_KNOWNEXT 0 /* next token known; just return it */
83 static const char* const lex_state_names[] = {
102 #include "keywords.h"
104 /* CLINE is a macro that ensures PL_copline has a sane value */
109 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
112 * Convenience functions to return different tokens and prime the
113 * lexer for the next token. They all take an argument.
115 * TOKEN : generic token (used for '(', DOLSHARP, etc)
116 * OPERATOR : generic operator
117 * AOPERATOR : assignment operator
118 * PREBLOCK : beginning the block after an if, while, foreach, ...
119 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
120 * PREREF : *EXPR where EXPR is not a simple identifier
121 * TERM : expression term
122 * LOOPX : loop exiting command (goto, last, dump, etc)
123 * FTST : file test operator
124 * FUN0 : zero-argument function
125 * FUN1 : not used, except for not, which isn't a UNIOP
126 * BOop : bitwise or or xor
128 * SHop : shift operator
129 * PWop : power operator
130 * PMop : pattern-matching operator
131 * Aop : addition-level operator
132 * Mop : multiplication-level operator
133 * Eop : equality-testing operator
134 * Rop : relational operator <= != gt
136 * Also see LOP and lop() below.
139 #ifdef DEBUGGING /* Serve -DT. */
140 # define REPORT(retval) tokereport((I32)retval)
142 # define REPORT(retval) (retval)
145 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
146 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
147 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
148 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
149 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
150 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
151 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
152 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
153 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
154 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
155 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
156 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
157 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
158 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
159 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
160 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
161 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
162 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
163 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
164 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
166 /* This bit of chicanery makes a unary function followed by
167 * a parenthesis into a function with one argument, highest precedence.
168 * The UNIDOR macro is for unary functions that can be followed by the //
169 * operator (such as C<shift // 0>).
171 #define UNI2(f,x) { \
175 PL_last_uni = PL_oldbufptr; \
176 PL_last_lop_op = f; \
178 return REPORT( (int)FUNC1 ); \
180 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
182 #define UNI(f) UNI2(f,XTERM)
183 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
185 #define UNIBRACK(f) { \
188 PL_last_uni = PL_oldbufptr; \
190 return REPORT( (int)FUNC1 ); \
192 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
195 /* grandfather return to old style */
196 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
200 /* how to interpret the yylval associated with the token */
204 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
210 static struct debug_tokens { const int token, type; const char *name; }
211 const debug_tokens[] =
213 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
214 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
215 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
216 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
217 { ARROW, TOKENTYPE_NONE, "ARROW" },
218 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
219 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
220 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
221 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
222 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
223 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
224 { DO, TOKENTYPE_NONE, "DO" },
225 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
226 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
227 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
228 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
229 { ELSE, TOKENTYPE_NONE, "ELSE" },
230 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
231 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
232 { FOR, TOKENTYPE_IVAL, "FOR" },
233 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
234 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
235 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
236 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
237 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
238 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
239 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
240 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
241 { IF, TOKENTYPE_IVAL, "IF" },
242 { LABEL, TOKENTYPE_PVAL, "LABEL" },
243 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
244 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
245 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
246 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
247 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
248 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
249 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
250 { MY, TOKENTYPE_IVAL, "MY" },
251 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
252 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
253 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
254 { OROP, TOKENTYPE_IVAL, "OROP" },
255 { OROR, TOKENTYPE_NONE, "OROR" },
256 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
257 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
258 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
259 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
260 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
261 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
262 { PREINC, TOKENTYPE_NONE, "PREINC" },
263 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
264 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
265 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
266 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
267 { SUB, TOKENTYPE_NONE, "SUB" },
268 { THING, TOKENTYPE_OPVAL, "THING" },
269 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
270 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
271 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
272 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
273 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
274 { USE, TOKENTYPE_IVAL, "USE" },
275 { WHEN, TOKENTYPE_IVAL, "WHEN" },
276 { WHILE, TOKENTYPE_IVAL, "WHILE" },
277 { WORD, TOKENTYPE_OPVAL, "WORD" },
278 { 0, TOKENTYPE_NONE, 0 }
281 /* dump the returned token in rv, plus any optional arg in yylval */
284 S_tokereport(pTHX_ I32 rv)
287 const char *name = Nullch;
288 enum token_type type = TOKENTYPE_NONE;
289 const struct debug_tokens *p;
290 SV* const report = newSVpvn("<== ", 4);
292 for (p = debug_tokens; p->token; p++) {
293 if (p->token == (int)rv) {
300 Perl_sv_catpv(aTHX_ report, name);
301 else if ((char)rv > ' ' && (char)rv < '~')
302 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
304 Perl_sv_catpv(aTHX_ report, "EOF");
306 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
309 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
312 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
314 case TOKENTYPE_OPNUM:
315 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
316 PL_op_name[yylval.ival]);
319 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
321 case TOKENTYPE_OPVAL:
323 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
324 PL_op_name[yylval.opval->op_type]);
325 if (yylval.opval->op_type == OP_CONST) {
326 Perl_sv_catpvf(aTHX_ report, " %s",
327 SvPEEK(cSVOPx_sv(yylval.opval)));
332 Perl_sv_catpv(aTHX_ report, "(opval=null)");
335 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
341 /* print the buffer with suitable escapes */
344 S_printbuf(pTHX_ const char* fmt, const char* s)
346 SV* const tmp = newSVpvn("", 0);
347 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
356 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
357 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
361 S_ao(pTHX_ int toketype)
363 if (*PL_bufptr == '=') {
365 if (toketype == ANDAND)
366 yylval.ival = OP_ANDASSIGN;
367 else if (toketype == OROR)
368 yylval.ival = OP_ORASSIGN;
369 else if (toketype == DORDOR)
370 yylval.ival = OP_DORASSIGN;
378 * When Perl expects an operator and finds something else, no_op
379 * prints the warning. It always prints "<something> found where
380 * operator expected. It prints "Missing semicolon on previous line?"
381 * if the surprise occurs at the start of the line. "do you need to
382 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
383 * where the compiler doesn't know if foo is a method call or a function.
384 * It prints "Missing operator before end of line" if there's nothing
385 * after the missing operator, or "... before <...>" if there is something
386 * after the missing operator.
390 S_no_op(pTHX_ const char *what, char *s)
392 char * const oldbp = PL_bufptr;
393 const bool is_first = (PL_oldbufptr == PL_linestart);
399 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
400 if (ckWARN_d(WARN_SYNTAX)) {
402 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
403 "\t(Missing semicolon on previous line?)\n");
404 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
406 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
407 if (t < PL_bufptr && isSPACE(*t))
408 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
409 "\t(Do you need to predeclare %.*s?)\n",
410 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
414 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
415 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
423 * Complain about missing quote/regexp/heredoc terminator.
424 * If it's called with (char *)NULL then it cauterizes the line buffer.
425 * If we're in a delimited string and the delimiter is a control
426 * character, it's reformatted into a two-char sequence like ^C.
431 S_missingterm(pTHX_ char *s)
436 char * const nl = strrchr(s,'\n');
442 iscntrl(PL_multi_close)
444 PL_multi_close < 32 || PL_multi_close == 127
448 tmpbuf[1] = (char)toCTRL(PL_multi_close);
453 *tmpbuf = (char)PL_multi_close;
457 q = strchr(s,'"') ? '\'' : '"';
458 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
461 #define FEATURE_IS_ENABLED(name) \
462 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
463 && feature_is_enabled(STR_WITH_LEN(name)))
465 * S_feature_is_enabled
466 * Check whether the named feature is enabled.
469 S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
471 HV * const hinthv = GvHV(PL_hintgv);
472 char he_name[32] = "feature_";
473 (void) strncpy(&he_name[8], name, 24);
475 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
483 Perl_deprecate(pTHX_ const char *s)
485 if (ckWARN(WARN_DEPRECATED))
486 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
490 Perl_deprecate_old(pTHX_ const char *s)
492 /* This function should NOT be called for any new deprecated warnings */
493 /* Use Perl_deprecate instead */
495 /* It is here to maintain backward compatibility with the pre-5.8 */
496 /* warnings category hierarchy. The "deprecated" category used to */
497 /* live under the "syntax" category. It is now a top-level category */
498 /* in its own right. */
500 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
501 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
502 "Use of %s is deprecated", s);
506 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
507 * utf16-to-utf8-reversed.
510 #ifdef PERL_CR_FILTER
514 register const char *s = SvPVX_const(sv);
515 register const char * const e = s + SvCUR(sv);
516 /* outer loop optimized to do nothing if there are no CR-LFs */
518 if (*s++ == '\r' && *s == '\n') {
519 /* hit a CR-LF, need to copy the rest */
520 register char *d = s - 1;
523 if (*s == '\r' && s[1] == '\n')
534 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
536 const I32 count = FILTER_READ(idx+1, sv, maxlen);
537 if (count > 0 && !maxlen)
545 * Initialize variables. Uses the Perl save_stack to save its state (for
546 * recursive calls to the parser).
550 Perl_lex_start(pTHX_ SV *line)
555 SAVEI32(PL_lex_dojoin);
556 SAVEI32(PL_lex_brackets);
557 SAVEI32(PL_lex_casemods);
558 SAVEI32(PL_lex_starts);
559 SAVEI32(PL_lex_state);
560 SAVEVPTR(PL_lex_inpat);
561 SAVEI32(PL_lex_inwhat);
562 if (PL_lex_state == LEX_KNOWNEXT) {
563 I32 toke = PL_nexttoke;
564 while (--toke >= 0) {
565 SAVEI32(PL_nexttype[toke]);
566 SAVEVPTR(PL_nextval[toke]);
568 SAVEI32(PL_nexttoke);
570 SAVECOPLINE(PL_curcop);
573 SAVEPPTR(PL_oldbufptr);
574 SAVEPPTR(PL_oldoldbufptr);
575 SAVEPPTR(PL_last_lop);
576 SAVEPPTR(PL_last_uni);
577 SAVEPPTR(PL_linestart);
578 SAVESPTR(PL_linestr);
579 SAVEGENERICPV(PL_lex_brackstack);
580 SAVEGENERICPV(PL_lex_casestack);
581 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
582 SAVESPTR(PL_lex_stuff);
583 SAVEI32(PL_lex_defer);
584 SAVEI32(PL_sublex_info.sub_inwhat);
585 SAVESPTR(PL_lex_repl);
587 SAVEINT(PL_lex_expect);
589 PL_lex_state = LEX_NORMAL;
593 Newx(PL_lex_brackstack, 120, char);
594 Newx(PL_lex_casestack, 12, char);
596 *PL_lex_casestack = '\0';
599 PL_lex_stuff = Nullsv;
600 PL_lex_repl = Nullsv;
604 PL_sublex_info.sub_inwhat = 0;
606 if (SvREADONLY(PL_linestr))
607 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
608 s = SvPV_const(PL_linestr, len);
609 if (!len || s[len-1] != ';') {
610 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
611 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
612 sv_catpvn(PL_linestr, "\n;", 2);
614 SvTEMP_off(PL_linestr);
615 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
616 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
617 PL_last_lop = PL_last_uni = Nullch;
623 * Finalizer for lexing operations. Must be called when the parser is
624 * done with the lexer.
630 PL_doextract = FALSE;
635 * This subroutine has nothing to do with tilting, whether at windmills
636 * or pinball tables. Its name is short for "increment line". It
637 * increments the current line number in CopLINE(PL_curcop) and checks
638 * to see whether the line starts with a comment of the form
639 * # line 500 "foo.pm"
640 * If so, it sets the current line number and file to the values in the comment.
644 S_incline(pTHX_ char *s)
651 CopLINE_inc(PL_curcop);
654 while (SPACE_OR_TAB(*s)) s++;
655 if (strnEQ(s, "line", 4))
659 if (SPACE_OR_TAB(*s))
663 while (SPACE_OR_TAB(*s)) s++;
669 while (SPACE_OR_TAB(*s))
671 if (*s == '"' && (t = strchr(s+1, '"'))) {
676 for (t = s; !isSPACE(*t); t++) ;
679 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
681 if (*e != '\n' && *e != '\0')
682 return; /* false alarm */
688 const char * const cf = CopFILE(PL_curcop);
689 STRLEN tmplen = cf ? strlen(cf) : 0;
690 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
691 /* must copy *{"::_<(eval N)[oldfilename:L]"}
692 * to *{"::_<newfilename"} */
693 char smallbuf[256], smallbuf2[256];
694 char *tmpbuf, *tmpbuf2;
696 STRLEN tmplen2 = strlen(s);
697 if (tmplen + 3 < sizeof smallbuf)
700 Newx(tmpbuf, tmplen + 3, char);
701 if (tmplen2 + 3 < sizeof smallbuf2)
704 Newx(tmpbuf2, tmplen2 + 3, char);
705 tmpbuf[0] = tmpbuf2[0] = '_';
706 tmpbuf[1] = tmpbuf2[1] = '<';
707 memcpy(tmpbuf + 2, cf, ++tmplen);
708 memcpy(tmpbuf2 + 2, s, ++tmplen2);
710 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
712 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
714 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
715 /* adjust ${"::_<newfilename"} to store the new file name */
716 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
717 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
718 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
720 if (tmpbuf != smallbuf) Safefree(tmpbuf);
721 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
724 CopFILE_free(PL_curcop);
725 CopFILE_set(PL_curcop, s);
728 CopLINE_set(PL_curcop, atoi(n)-1);
733 * Called to gobble the appropriate amount and type of whitespace.
734 * Skips comments as well.
738 S_skipspace(pTHX_ register char *s)
740 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
741 while (s < PL_bufend && SPACE_OR_TAB(*s))
747 SSize_t oldprevlen, oldoldprevlen;
748 SSize_t oldloplen = 0, oldunilen = 0;
749 while (s < PL_bufend && isSPACE(*s)) {
750 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
755 if (s < PL_bufend && *s == '#') {
756 while (s < PL_bufend && *s != '\n')
760 if (PL_in_eval && !PL_rsfp) {
767 /* only continue to recharge the buffer if we're at the end
768 * of the buffer, we're not reading from a source filter, and
769 * we're in normal lexing mode
771 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
772 PL_lex_state == LEX_FORMLINE)
775 /* try to recharge the buffer */
776 if ((s = filter_gets(PL_linestr, PL_rsfp,
777 (prevlen = SvCUR(PL_linestr)))) == Nullch)
779 /* end of file. Add on the -p or -n magic */
782 ";}continue{print or die qq(-p destination: $!\\n);}");
783 PL_minus_n = PL_minus_p = 0;
785 else if (PL_minus_n) {
786 sv_setpvn(PL_linestr, ";}", 2);
790 sv_setpvn(PL_linestr,";", 1);
792 /* reset variables for next time we lex */
793 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
795 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
796 PL_last_lop = PL_last_uni = Nullch;
798 /* Close the filehandle. Could be from -P preprocessor,
799 * STDIN, or a regular file. If we were reading code from
800 * STDIN (because the commandline held no -e or filename)
801 * then we don't close it, we reset it so the code can
802 * read from STDIN too.
805 if (PL_preprocess && !PL_in_eval)
806 (void)PerlProc_pclose(PL_rsfp);
807 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
808 PerlIO_clearerr(PL_rsfp);
810 (void)PerlIO_close(PL_rsfp);
815 /* not at end of file, so we only read another line */
816 /* make corresponding updates to old pointers, for yyerror() */
817 oldprevlen = PL_oldbufptr - PL_bufend;
818 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
820 oldunilen = PL_last_uni - PL_bufend;
822 oldloplen = PL_last_lop - PL_bufend;
823 PL_linestart = PL_bufptr = s + prevlen;
824 PL_bufend = s + SvCUR(PL_linestr);
826 PL_oldbufptr = s + oldprevlen;
827 PL_oldoldbufptr = s + oldoldprevlen;
829 PL_last_uni = s + oldunilen;
831 PL_last_lop = s + oldloplen;
834 /* debugger active and we're not compiling the debugger code,
835 * so store the line into the debugger's array of lines
837 if (PERLDB_LINE && PL_curstash != PL_debstash) {
838 SV * const sv = NEWSV(85,0);
840 sv_upgrade(sv, SVt_PVMG);
841 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
844 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
851 * Check the unary operators to ensure there's no ambiguity in how they're
852 * used. An ambiguous piece of code would be:
854 * This doesn't mean rand() + 5. Because rand() is a unary operator,
855 * the +5 is its argument.
864 if (PL_oldoldbufptr != PL_last_uni)
866 while (isSPACE(*PL_last_uni))
868 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
869 if ((t = strchr(s, '(')) && t < PL_bufptr)
871 if (ckWARN_d(WARN_AMBIGUOUS)){
874 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
875 "Warning: Use of \"%s\" without parentheses is ambiguous",
882 * LOP : macro to build a list operator. Its behaviour has been replaced
883 * with a subroutine, S_lop() for which LOP is just another name.
886 #define LOP(f,x) return lop(f,x,s)
890 * Build a list operator (or something that might be one). The rules:
891 * - if we have a next token, then it's a list operator [why?]
892 * - if the next thing is an opening paren, then it's a function
893 * - else it's a list operator
897 S_lop(pTHX_ I32 f, int x, char *s)
903 PL_last_lop = PL_oldbufptr;
904 PL_last_lop_op = (OPCODE)f;
906 return REPORT(LSTOP);
913 return REPORT(LSTOP);
918 * When the lexer realizes it knows the next token (for instance,
919 * it is reordering tokens for the parser) then it can call S_force_next
920 * to know what token to return the next time the lexer is called. Caller
921 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
922 * handles the token correctly.
926 S_force_next(pTHX_ I32 type)
928 PL_nexttype[PL_nexttoke] = type;
930 if (PL_lex_state != LEX_KNOWNEXT) {
931 PL_lex_defer = PL_lex_state;
932 PL_lex_expect = PL_expect;
933 PL_lex_state = LEX_KNOWNEXT;
938 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
940 SV * const sv = newSVpvn(start,len);
941 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
948 * When the lexer knows the next thing is a word (for instance, it has
949 * just seen -> and it knows that the next char is a word char, then
950 * it calls S_force_word to stick the next word into the PL_next lookahead.
953 * char *start : buffer position (must be within PL_linestr)
954 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
955 * int check_keyword : if true, Perl checks to make sure the word isn't
956 * a keyword (do this if the word is a label, e.g. goto FOO)
957 * int allow_pack : if true, : characters will also be allowed (require,
959 * int allow_initial_tick : used by the "sub" lexer only.
963 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
968 start = skipspace(start);
970 if (isIDFIRST_lazy_if(s,UTF) ||
971 (allow_pack && *s == ':') ||
972 (allow_initial_tick && *s == '\'') )
974 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
975 if (check_keyword && keyword(PL_tokenbuf, len))
977 if (token == METHOD) {
982 PL_expect = XOPERATOR;
985 PL_nextval[PL_nexttoke].opval
986 = (OP*)newSVOP(OP_CONST,0,
987 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
988 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
996 * Called when the lexer wants $foo *foo &foo etc, but the program
997 * text only contains the "foo" portion. The first argument is a pointer
998 * to the "foo", and the second argument is the type symbol to prefix.
999 * Forces the next token to be a "WORD".
1000 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1004 S_force_ident(pTHX_ register const char *s, int kind)
1007 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
1008 PL_nextval[PL_nexttoke].opval = o;
1011 o->op_private = OPpCONST_ENTERED;
1012 /* XXX see note in pp_entereval() for why we forgo typo
1013 warnings if the symbol must be introduced in an eval.
1015 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
1016 kind == '$' ? SVt_PV :
1017 kind == '@' ? SVt_PVAV :
1018 kind == '%' ? SVt_PVHV :
1026 Perl_str_to_version(pTHX_ SV *sv)
1031 const char *start = SvPV_const(sv,len);
1032 const char * const end = start + len;
1033 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1034 while (start < end) {
1038 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1043 retval += ((NV)n)/nshift;
1052 * Forces the next token to be a version number.
1053 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1054 * and if "guessing" is TRUE, then no new token is created (and the caller
1055 * must use an alternative parsing method).
1059 S_force_version(pTHX_ char *s, int guessing)
1061 OP *version = Nullop;
1070 while (isDIGIT(*d) || *d == '_' || *d == '.')
1072 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1074 s = scan_num(s, &yylval);
1075 version = yylval.opval;
1076 ver = cSVOPx(version)->op_sv;
1077 if (SvPOK(ver) && !SvNIOK(ver)) {
1078 SvUPGRADE(ver, SVt_PVNV);
1079 SvNV_set(ver, str_to_version(ver));
1080 SvNOK_on(ver); /* hint that it is a version */
1087 /* NOTE: The parser sees the package name and the VERSION swapped */
1088 PL_nextval[PL_nexttoke].opval = version;
1096 * Tokenize a quoted string passed in as an SV. It finds the next
1097 * chunk, up to end of string or a backslash. It may make a new
1098 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1103 S_tokeq(pTHX_ SV *sv)
1106 register char *send;
1114 s = SvPV_force(sv, len);
1115 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1118 while (s < send && *s != '\\')
1123 if ( PL_hints & HINT_NEW_STRING ) {
1124 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1130 if (s + 1 < send && (s[1] == '\\'))
1131 s++; /* all that, just for this */
1136 SvCUR_set(sv, d - SvPVX_const(sv));
1138 if ( PL_hints & HINT_NEW_STRING )
1139 return new_constant(NULL, 0, "q", sv, pv, "q");
1144 * Now come three functions related to double-quote context,
1145 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1146 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1147 * interact with PL_lex_state, and create fake ( ... ) argument lists
1148 * to handle functions and concatenation.
1149 * They assume that whoever calls them will be setting up a fake
1150 * join call, because each subthing puts a ',' after it. This lets
1153 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1155 * (I'm not sure whether the spurious commas at the end of lcfirst's
1156 * arguments and join's arguments are created or not).
1161 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1163 * Pattern matching will set PL_lex_op to the pattern-matching op to
1164 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1166 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1168 * Everything else becomes a FUNC.
1170 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1171 * had an OP_CONST or OP_READLINE). This just sets us up for a
1172 * call to S_sublex_push().
1176 S_sublex_start(pTHX)
1178 register const I32 op_type = yylval.ival;
1180 if (op_type == OP_NULL) {
1181 yylval.opval = PL_lex_op;
1185 if (op_type == OP_CONST || op_type == OP_READLINE) {
1186 SV *sv = tokeq(PL_lex_stuff);
1188 if (SvTYPE(sv) == SVt_PVIV) {
1189 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1191 const char *p = SvPV_const(sv, len);
1192 SV * const nsv = newSVpvn(p, len);
1198 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1199 PL_lex_stuff = Nullsv;
1200 /* Allow <FH> // "foo" */
1201 if (op_type == OP_READLINE)
1202 PL_expect = XTERMORDORDOR;
1206 PL_sublex_info.super_state = PL_lex_state;
1207 PL_sublex_info.sub_inwhat = op_type;
1208 PL_sublex_info.sub_op = PL_lex_op;
1209 PL_lex_state = LEX_INTERPPUSH;
1213 yylval.opval = PL_lex_op;
1223 * Create a new scope to save the lexing state. The scope will be
1224 * ended in S_sublex_done. Returns a '(', starting the function arguments
1225 * to the uc, lc, etc. found before.
1226 * Sets PL_lex_state to LEX_INTERPCONCAT.
1235 PL_lex_state = PL_sublex_info.super_state;
1236 SAVEI32(PL_lex_dojoin);
1237 SAVEI32(PL_lex_brackets);
1238 SAVEI32(PL_lex_casemods);
1239 SAVEI32(PL_lex_starts);
1240 SAVEI32(PL_lex_state);
1241 SAVEVPTR(PL_lex_inpat);
1242 SAVEI32(PL_lex_inwhat);
1243 SAVECOPLINE(PL_curcop);
1244 SAVEPPTR(PL_bufptr);
1245 SAVEPPTR(PL_bufend);
1246 SAVEPPTR(PL_oldbufptr);
1247 SAVEPPTR(PL_oldoldbufptr);
1248 SAVEPPTR(PL_last_lop);
1249 SAVEPPTR(PL_last_uni);
1250 SAVEPPTR(PL_linestart);
1251 SAVESPTR(PL_linestr);
1252 SAVEGENERICPV(PL_lex_brackstack);
1253 SAVEGENERICPV(PL_lex_casestack);
1255 PL_linestr = PL_lex_stuff;
1256 PL_lex_stuff = Nullsv;
1258 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1259 = SvPVX(PL_linestr);
1260 PL_bufend += SvCUR(PL_linestr);
1261 PL_last_lop = PL_last_uni = Nullch;
1262 SAVEFREESV(PL_linestr);
1264 PL_lex_dojoin = FALSE;
1265 PL_lex_brackets = 0;
1266 Newx(PL_lex_brackstack, 120, char);
1267 Newx(PL_lex_casestack, 12, char);
1268 PL_lex_casemods = 0;
1269 *PL_lex_casestack = '\0';
1271 PL_lex_state = LEX_INTERPCONCAT;
1272 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1274 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1275 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1276 PL_lex_inpat = PL_sublex_info.sub_op;
1278 PL_lex_inpat = Nullop;
1285 * Restores lexer state after a S_sublex_push.
1292 if (!PL_lex_starts++) {
1293 SV * const sv = newSVpvn("",0);
1294 if (SvUTF8(PL_linestr))
1296 PL_expect = XOPERATOR;
1297 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1301 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1302 PL_lex_state = LEX_INTERPCASEMOD;
1306 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1307 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1308 PL_linestr = PL_lex_repl;
1310 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1311 PL_bufend += SvCUR(PL_linestr);
1312 PL_last_lop = PL_last_uni = Nullch;
1313 SAVEFREESV(PL_linestr);
1314 PL_lex_dojoin = FALSE;
1315 PL_lex_brackets = 0;
1316 PL_lex_casemods = 0;
1317 *PL_lex_casestack = '\0';
1319 if (SvEVALED(PL_lex_repl)) {
1320 PL_lex_state = LEX_INTERPNORMAL;
1322 /* we don't clear PL_lex_repl here, so that we can check later
1323 whether this is an evalled subst; that means we rely on the
1324 logic to ensure sublex_done() is called again only via the
1325 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1328 PL_lex_state = LEX_INTERPCONCAT;
1329 PL_lex_repl = Nullsv;
1335 PL_bufend = SvPVX(PL_linestr);
1336 PL_bufend += SvCUR(PL_linestr);
1337 PL_expect = XOPERATOR;
1338 PL_sublex_info.sub_inwhat = 0;
1346 Extracts a pattern, double-quoted string, or transliteration. This
1349 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1350 processing a pattern (PL_lex_inpat is true), a transliteration
1351 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1353 Returns a pointer to the character scanned up to. Iff this is
1354 advanced from the start pointer supplied (ie if anything was
1355 successfully parsed), will leave an OP for the substring scanned
1356 in yylval. Caller must intuit reason for not parsing further
1357 by looking at the next characters herself.
1361 double-quoted style: \r and \n
1362 regexp special ones: \D \s
1364 backrefs: \1 (deprecated in substitution replacements)
1365 case and quoting: \U \Q \E
1366 stops on @ and $, but not for $ as tail anchor
1368 In transliterations:
1369 characters are VERY literal, except for - not at the start or end
1370 of the string, which indicates a range. scan_const expands the
1371 range to the full set of intermediate characters.
1373 In double-quoted strings:
1375 double-quoted style: \r and \n
1377 backrefs: \1 (deprecated)
1378 case and quoting: \U \Q \E
1381 scan_const does *not* construct ops to handle interpolated strings.
1382 It stops processing as soon as it finds an embedded $ or @ variable
1383 and leaves it to the caller to work out what's going on.
1385 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1387 $ in pattern could be $foo or could be tail anchor. Assumption:
1388 it's a tail anchor if $ is the last thing in the string, or if it's
1389 followed by one of ")| \n\t"
1391 \1 (backreferences) are turned into $1
1393 The structure of the code is
1394 while (there's a character to process) {
1395 handle transliteration ranges
1396 skip regexp comments
1397 skip # initiated comments in //x patterns
1398 check for embedded @foo
1399 check for embedded scalars
1401 leave intact backslashes from leave (below)
1402 deprecate \1 in strings and sub replacements
1403 handle string-changing backslashes \l \U \Q \E, etc.
1404 switch (what was escaped) {
1405 handle - in a transliteration (becomes a literal -)
1406 handle \132 octal characters
1407 handle 0x15 hex characters
1408 handle \cV (control V)
1409 handle printf backslashes (\f, \r, \n, etc)
1411 } (end if backslash)
1412 } (end while character to read)
1417 S_scan_const(pTHX_ char *start)
1419 register char *send = PL_bufend; /* end of the constant */
1420 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1421 register char *s = start; /* start of the constant */
1422 register char *d = SvPVX(sv); /* destination for copies */
1423 bool dorange = FALSE; /* are we in a translit range? */
1424 bool didrange = FALSE; /* did we just finish a range? */
1425 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1426 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1429 UV literal_endpoint = 0;
1432 const char *leaveit = /* set of acceptably-backslashed characters */
1434 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1437 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1438 /* If we are doing a trans and we know we want UTF8 set expectation */
1439 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1440 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1444 while (s < send || dorange) {
1445 /* get transliterations out of the way (they're most literal) */
1446 if (PL_lex_inwhat == OP_TRANS) {
1447 /* expand a range A-Z to the full set of characters. AIE! */
1449 I32 i; /* current expanded character */
1450 I32 min; /* first character in range */
1451 I32 max; /* last character in range */
1454 char * const c = (char*)utf8_hop((U8*)d, -1);
1458 *c = (char)UTF_TO_NATIVE(0xff);
1459 /* mark the range as done, and continue */
1465 i = d - SvPVX_const(sv); /* remember current offset */
1466 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1467 d = SvPVX(sv) + i; /* refresh d after realloc */
1468 d -= 2; /* eat the first char and the - */
1470 min = (U8)*d; /* first char in range */
1471 max = (U8)d[1]; /* last char in range */
1475 "Invalid range \"%c-%c\" in transliteration operator",
1476 (char)min, (char)max);
1480 if (literal_endpoint == 2 &&
1481 ((isLOWER(min) && isLOWER(max)) ||
1482 (isUPPER(min) && isUPPER(max)))) {
1484 for (i = min; i <= max; i++)
1486 *d++ = NATIVE_TO_NEED(has_utf8,i);
1488 for (i = min; i <= max; i++)
1490 *d++ = NATIVE_TO_NEED(has_utf8,i);
1495 for (i = min; i <= max; i++)
1498 /* mark the range as done, and continue */
1502 literal_endpoint = 0;
1507 /* range begins (ignore - as first or last char) */
1508 else if (*s == '-' && s+1 < send && s != start) {
1510 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1513 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1523 literal_endpoint = 0;
1528 /* if we get here, we're not doing a transliteration */
1530 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1531 except for the last char, which will be done separately. */
1532 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1534 while (s+1 < send && *s != ')')
1535 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1537 else if (s[2] == '{' /* This should match regcomp.c */
1538 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1541 char *regparse = s + (s[2] == '{' ? 3 : 4);
1544 while (count && (c = *regparse)) {
1545 if (c == '\\' && regparse[1])
1553 if (*regparse != ')')
1554 regparse--; /* Leave one char for continuation. */
1555 while (s < regparse)
1556 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1560 /* likewise skip #-initiated comments in //x patterns */
1561 else if (*s == '#' && PL_lex_inpat &&
1562 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1563 while (s+1 < send && *s != '\n')
1564 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1567 /* check for embedded arrays
1568 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1570 else if (*s == '@' && s[1]
1571 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1574 /* check for embedded scalars. only stop if we're sure it's a
1577 else if (*s == '$') {
1578 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1580 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1581 break; /* in regexp, $ might be tail anchor */
1584 /* End of else if chain - OP_TRANS rejoin rest */
1587 if (*s == '\\' && s+1 < send) {
1590 /* some backslashes we leave behind */
1591 if (*leaveit && *s && strchr(leaveit, *s)) {
1592 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1593 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1597 /* deprecate \1 in strings and substitution replacements */
1598 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1599 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1601 if (ckWARN(WARN_SYNTAX))
1602 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1607 /* string-change backslash escapes */
1608 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1613 /* if we get here, it's either a quoted -, or a digit */
1616 /* quoted - in transliterations */
1618 if (PL_lex_inwhat == OP_TRANS) {
1628 Perl_warner(aTHX_ packWARN(WARN_MISC),
1629 "Unrecognized escape \\%c passed through",
1631 /* default action is to copy the quoted character */
1632 goto default_action;
1635 /* \132 indicates an octal constant */
1636 case '0': case '1': case '2': case '3':
1637 case '4': case '5': case '6': case '7':
1641 uv = grok_oct(s, &len, &flags, NULL);
1644 goto NUM_ESCAPE_INSERT;
1646 /* \x24 indicates a hex constant */
1650 char* const e = strchr(s, '}');
1651 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1652 PERL_SCAN_DISALLOW_PREFIX;
1657 yyerror("Missing right brace on \\x{}");
1661 uv = grok_hex(s, &len, &flags, NULL);
1667 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1668 uv = grok_hex(s, &len, &flags, NULL);
1674 /* Insert oct or hex escaped character.
1675 * There will always enough room in sv since such
1676 * escapes will be longer than any UTF-8 sequence
1677 * they can end up as. */
1679 /* We need to map to chars to ASCII before doing the tests
1682 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1683 if (!has_utf8 && uv > 255) {
1684 /* Might need to recode whatever we have
1685 * accumulated so far if it contains any
1688 * (Can't we keep track of that and avoid
1689 * this rescan? --jhi)
1693 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1694 if (!NATIVE_IS_INVARIANT(*c)) {
1699 const STRLEN offset = d - SvPVX_const(sv);
1701 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1705 while (src >= (const U8 *)SvPVX_const(sv)) {
1706 if (!NATIVE_IS_INVARIANT(*src)) {
1707 const U8 ch = NATIVE_TO_ASCII(*src);
1708 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1709 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1719 if (has_utf8 || uv > 255) {
1720 d = (char*)uvchr_to_utf8((U8*)d, uv);
1722 if (PL_lex_inwhat == OP_TRANS &&
1723 PL_sublex_info.sub_op) {
1724 PL_sublex_info.sub_op->op_private |=
1725 (PL_lex_repl ? OPpTRANS_FROM_UTF
1738 /* \N{LATIN SMALL LETTER A} is a named character */
1742 char* e = strchr(s, '}');
1748 yyerror("Missing right brace on \\N{}");
1752 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1754 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1755 PERL_SCAN_DISALLOW_PREFIX;
1758 uv = grok_hex(s, &len, &flags, NULL);
1760 goto NUM_ESCAPE_INSERT;
1762 res = newSVpvn(s + 1, e - s - 1);
1763 res = new_constant( Nullch, 0, "charnames",
1764 res, Nullsv, "\\N{...}" );
1766 sv_utf8_upgrade(res);
1767 str = SvPV_const(res,len);
1768 #ifdef EBCDIC_NEVER_MIND
1769 /* charnames uses pack U and that has been
1770 * recently changed to do the below uni->native
1771 * mapping, so this would be redundant (and wrong,
1772 * the code point would be doubly converted).
1773 * But leave this in just in case the pack U change
1774 * gets revoked, but the semantics is still
1775 * desireable for charnames. --jhi */
1777 UV uv = utf8_to_uvchr((const U8*)str, 0);
1780 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1782 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1783 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1784 str = SvPV_const(res, len);
1788 if (!has_utf8 && SvUTF8(res)) {
1789 const char * const ostart = SvPVX_const(sv);
1790 SvCUR_set(sv, d - ostart);
1793 sv_utf8_upgrade(sv);
1794 /* this just broke our allocation above... */
1795 SvGROW(sv, (STRLEN)(send - start));
1796 d = SvPVX(sv) + SvCUR(sv);
1799 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1800 const char * const odest = SvPVX_const(sv);
1802 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1803 d = SvPVX(sv) + (d - odest);
1805 Copy(str, d, len, char);
1812 yyerror("Missing braces on \\N{}");
1815 /* \c is a control character */
1824 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1827 yyerror("Missing control char name in \\c");
1831 /* printf-style backslashes, formfeeds, newlines, etc */
1833 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1836 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1839 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1842 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1845 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1848 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1851 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1857 } /* end if (backslash) */
1864 /* If we started with encoded form, or already know we want it
1865 and then encode the next character */
1866 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1868 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1869 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1872 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1873 const STRLEN off = d - SvPVX_const(sv);
1874 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1876 d = (char*)uvchr_to_utf8((U8*)d, uv);
1880 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1882 } /* while loop to process each character */
1884 /* terminate the string and set up the sv */
1886 SvCUR_set(sv, d - SvPVX_const(sv));
1887 if (SvCUR(sv) >= SvLEN(sv))
1888 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1891 if (PL_encoding && !has_utf8) {
1892 sv_recode_to_utf8(sv, PL_encoding);
1898 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1899 PL_sublex_info.sub_op->op_private |=
1900 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1904 /* shrink the sv if we allocated more than we used */
1905 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1906 SvPV_shrink_to_cur(sv);
1909 /* return the substring (via yylval) only if we parsed anything */
1910 if (s > PL_bufptr) {
1911 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1912 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1914 ( PL_lex_inwhat == OP_TRANS
1916 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1919 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1926 * Returns TRUE if there's more to the expression (e.g., a subscript),
1929 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1931 * ->[ and ->{ return TRUE
1932 * { and [ outside a pattern are always subscripts, so return TRUE
1933 * if we're outside a pattern and it's not { or [, then return FALSE
1934 * if we're in a pattern and the first char is a {
1935 * {4,5} (any digits around the comma) returns FALSE
1936 * if we're in a pattern and the first char is a [
1938 * [SOMETHING] has a funky algorithm to decide whether it's a
1939 * character class or not. It has to deal with things like
1940 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1941 * anything else returns TRUE
1944 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1947 S_intuit_more(pTHX_ register char *s)
1949 if (PL_lex_brackets)
1951 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1953 if (*s != '{' && *s != '[')
1958 /* In a pattern, so maybe we have {n,m}. */
1975 /* On the other hand, maybe we have a character class */
1978 if (*s == ']' || *s == '^')
1981 /* this is terrifying, and it works */
1982 int weight = 2; /* let's weigh the evidence */
1984 unsigned char un_char = 255, last_un_char;
1985 const char * const send = strchr(s,']');
1986 char tmpbuf[sizeof PL_tokenbuf * 4];
1988 if (!send) /* has to be an expression */
1991 Zero(seen,256,char);
1994 else if (isDIGIT(*s)) {
1996 if (isDIGIT(s[1]) && s[2] == ']')
2002 for (; s < send; s++) {
2003 last_un_char = un_char;
2004 un_char = (unsigned char)*s;
2009 weight -= seen[un_char] * 10;
2010 if (isALNUM_lazy_if(s+1,UTF)) {
2011 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2012 if ((int)strlen(tmpbuf) > 1
2013 && gv_fetchpv(tmpbuf, 0, SVt_PV))
2018 else if (*s == '$' && s[1] &&
2019 strchr("[#!%*<>()-=",s[1])) {
2020 if (/*{*/ strchr("])} =",s[2]))
2029 if (strchr("wds]",s[1]))
2031 else if (seen['\''] || seen['"'])
2033 else if (strchr("rnftbxcav",s[1]))
2035 else if (isDIGIT(s[1])) {
2037 while (s[1] && isDIGIT(s[1]))
2047 if (strchr("aA01! ",last_un_char))
2049 if (strchr("zZ79~",s[1]))
2051 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2052 weight -= 5; /* cope with negative subscript */
2055 if (!isALNUM(last_un_char)
2056 && !(last_un_char == '$' || last_un_char == '@'
2057 || last_un_char == '&')
2058 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2063 if (keyword(tmpbuf, d - tmpbuf))
2066 if (un_char == last_un_char + 1)
2068 weight -= seen[un_char];
2073 if (weight >= 0) /* probably a character class */
2083 * Does all the checking to disambiguate
2085 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2086 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2088 * First argument is the stuff after the first token, e.g. "bar".
2090 * Not a method if bar is a filehandle.
2091 * Not a method if foo is a subroutine prototyped to take a filehandle.
2092 * Not a method if it's really "Foo $bar"
2093 * Method if it's "foo $bar"
2094 * Not a method if it's really "print foo $bar"
2095 * Method if it's really "foo package::" (interpreted as package->foo)
2096 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2097 * Not a method if bar is a filehandle or package, but is quoted with
2102 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2104 char *s = start + (*start == '$');
2105 char tmpbuf[sizeof PL_tokenbuf];
2110 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2114 const char *proto = SvPVX_const(cv);
2125 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2126 /* start is the beginning of the possible filehandle/object,
2127 * and s is the end of it
2128 * tmpbuf is a copy of it
2131 if (*start == '$') {
2132 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2137 return *s == '(' ? FUNCMETH : METHOD;
2139 if (!keyword(tmpbuf, len)) {
2140 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2145 indirgv = gv_fetchpv(tmpbuf, 0, SVt_PVCV);
2146 if (indirgv && GvCVu(indirgv))
2148 /* filehandle or package name makes it a method */
2149 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2151 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2152 return 0; /* no assumptions -- "=>" quotes bearword */
2154 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2155 newSVpvn(tmpbuf,len));
2156 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2160 return *s == '(' ? FUNCMETH : METHOD;
2168 * Return a string of Perl code to load the debugger. If PERL5DB
2169 * is set, it will return the contents of that, otherwise a
2170 * compile-time require of perl5db.pl.
2177 const char * const pdb = PerlEnv_getenv("PERL5DB");
2181 SETERRNO(0,SS_NORMAL);
2182 return "BEGIN { require 'perl5db.pl' }";
2188 /* Encoded script support. filter_add() effectively inserts a
2189 * 'pre-processing' function into the current source input stream.
2190 * Note that the filter function only applies to the current source file
2191 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2193 * The datasv parameter (which may be NULL) can be used to pass
2194 * private data to this instance of the filter. The filter function
2195 * can recover the SV using the FILTER_DATA macro and use it to
2196 * store private buffers and state information.
2198 * The supplied datasv parameter is upgraded to a PVIO type
2199 * and the IoDIRP/IoANY field is used to store the function pointer,
2200 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2201 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2202 * private use must be set using malloc'd pointers.
2206 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2211 if (!PL_rsfp_filters)
2212 PL_rsfp_filters = newAV();
2214 datasv = NEWSV(255,0);
2215 SvUPGRADE(datasv, SVt_PVIO);
2216 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2217 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2218 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2219 IoANY(datasv), SvPV_nolen(datasv)));
2220 av_unshift(PL_rsfp_filters, 1);
2221 av_store(PL_rsfp_filters, 0, datasv) ;
2226 /* Delete most recently added instance of this filter function. */
2228 Perl_filter_del(pTHX_ filter_t funcp)
2233 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
2235 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2237 /* if filter is on top of stack (usual case) just pop it off */
2238 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2239 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2240 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2241 IoANY(datasv) = (void *)NULL;
2242 sv_free(av_pop(PL_rsfp_filters));
2246 /* we need to search for the correct entry and clear it */
2247 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2251 /* Invoke the idxth filter function for the current rsfp. */
2252 /* maxlen 0 = read one text line */
2254 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2259 if (!PL_rsfp_filters)
2261 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2262 /* Provide a default input filter to make life easy. */
2263 /* Note that we append to the line. This is handy. */
2264 DEBUG_P(PerlIO_printf(Perl_debug_log,
2265 "filter_read %d: from rsfp\n", idx));
2269 const int old_len = SvCUR(buf_sv);
2271 /* ensure buf_sv is large enough */
2272 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2273 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2274 if (PerlIO_error(PL_rsfp))
2275 return -1; /* error */
2277 return 0 ; /* end of file */
2279 SvCUR_set(buf_sv, old_len + len) ;
2282 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2283 if (PerlIO_error(PL_rsfp))
2284 return -1; /* error */
2286 return 0 ; /* end of file */
2289 return SvCUR(buf_sv);
2291 /* Skip this filter slot if filter has been deleted */
2292 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2293 DEBUG_P(PerlIO_printf(Perl_debug_log,
2294 "filter_read %d: skipped (filter deleted)\n",
2296 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2298 /* Get function pointer hidden within datasv */
2299 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2300 DEBUG_P(PerlIO_printf(Perl_debug_log,
2301 "filter_read %d: via function %p (%s)\n",
2302 idx, datasv, SvPV_nolen_const(datasv)));
2303 /* Call function. The function is expected to */
2304 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2305 /* Return: <0:error, =0:eof, >0:not eof */
2306 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2310 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2312 #ifdef PERL_CR_FILTER
2313 if (!PL_rsfp_filters) {
2314 filter_add(S_cr_textfilter,NULL);
2317 if (PL_rsfp_filters) {
2319 SvCUR_set(sv, 0); /* start with empty line */
2320 if (FILTER_READ(0, sv, 0) > 0)
2321 return ( SvPVX(sv) ) ;
2326 return (sv_gets(sv, fp, append));
2330 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2334 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2338 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2339 (gv = gv_fetchpv(pkgname, 0, SVt_PVHV)))
2341 return GvHV(gv); /* Foo:: */
2344 /* use constant CLASS => 'MyClass' */
2345 if ((gv = gv_fetchpv(pkgname, 0, SVt_PVCV))) {
2347 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2348 pkgname = SvPV_nolen_const(sv);
2352 return gv_stashpv(pkgname, FALSE);
2356 S_tokenize_use(pTHX_ int is_use, char *s) {
2357 if (PL_expect != XSTATE)
2358 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2359 is_use ? "use" : "no"));
2361 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2362 s = force_version(s, TRUE);
2363 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2364 PL_nextval[PL_nexttoke].opval = Nullop;
2367 else if (*s == 'v') {
2368 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2369 s = force_version(s, FALSE);
2373 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2374 s = force_version(s, FALSE);
2376 yylval.ival = is_use;
2380 static const char* const exp_name[] =
2381 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2382 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2389 Works out what to call the token just pulled out of the input
2390 stream. The yacc parser takes care of taking the ops we return and
2391 stitching them into a tree.
2397 if read an identifier
2398 if we're in a my declaration
2399 croak if they tried to say my($foo::bar)
2400 build the ops for a my() declaration
2401 if it's an access to a my() variable
2402 are we in a sort block?
2403 croak if my($a); $a <=> $b
2404 build ops for access to a my() variable
2405 if in a dq string, and they've said @foo and we can't find @foo
2407 build ops for a bareword
2408 if we already built the token before, use it.
2413 #pragma segment Perl_yylex
2418 register char *s = PL_bufptr;
2424 SV* tmp = newSVpvn("", 0);
2425 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2426 (IV)CopLINE(PL_curcop),
2427 lex_state_names[PL_lex_state],
2428 exp_name[PL_expect],
2429 pv_display(tmp, s, strlen(s), 0, 60));
2432 /* check if there's an identifier for us to look at */
2433 if (PL_pending_ident)
2434 return REPORT(S_pending_ident(aTHX));
2436 /* no identifier pending identification */
2438 switch (PL_lex_state) {
2440 case LEX_NORMAL: /* Some compilers will produce faster */
2441 case LEX_INTERPNORMAL: /* code if we comment these out. */
2445 /* when we've already built the next token, just pull it out of the queue */
2448 yylval = PL_nextval[PL_nexttoke];
2450 PL_lex_state = PL_lex_defer;
2451 PL_expect = PL_lex_expect;
2452 PL_lex_defer = LEX_NORMAL;
2454 return REPORT(PL_nexttype[PL_nexttoke]);
2456 /* interpolated case modifiers like \L \U, including \Q and \E.
2457 when we get here, PL_bufptr is at the \
2459 case LEX_INTERPCASEMOD:
2461 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2462 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2464 /* handle \E or end of string */
2465 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2467 if (PL_lex_casemods) {
2468 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2469 PL_lex_casestack[PL_lex_casemods] = '\0';
2471 if (PL_bufptr != PL_bufend
2472 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2474 PL_lex_state = LEX_INTERPCONCAT;
2478 if (PL_bufptr != PL_bufend)
2480 PL_lex_state = LEX_INTERPCONCAT;
2484 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2485 "### Saw case modifier\n"); });
2487 if (s[1] == '\\' && s[2] == 'E') {
2489 PL_lex_state = LEX_INTERPCONCAT;
2494 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2495 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2496 if ((*s == 'L' || *s == 'U') &&
2497 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2498 PL_lex_casestack[--PL_lex_casemods] = '\0';
2501 if (PL_lex_casemods > 10)
2502 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2503 PL_lex_casestack[PL_lex_casemods++] = *s;
2504 PL_lex_casestack[PL_lex_casemods] = '\0';
2505 PL_lex_state = LEX_INTERPCONCAT;
2506 PL_nextval[PL_nexttoke].ival = 0;
2509 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2511 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2513 PL_nextval[PL_nexttoke].ival = OP_LC;
2515 PL_nextval[PL_nexttoke].ival = OP_UC;
2517 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2519 Perl_croak(aTHX_ "panic: yylex");
2523 if (PL_lex_starts) {
2526 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2527 if (PL_lex_casemods == 1 && PL_lex_inpat)
2536 case LEX_INTERPPUSH:
2537 return REPORT(sublex_push());
2539 case LEX_INTERPSTART:
2540 if (PL_bufptr == PL_bufend)
2541 return REPORT(sublex_done());
2542 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2543 "### Interpolated variable\n"); });
2545 PL_lex_dojoin = (*PL_bufptr == '@');
2546 PL_lex_state = LEX_INTERPNORMAL;
2547 if (PL_lex_dojoin) {
2548 PL_nextval[PL_nexttoke].ival = 0;
2550 force_ident("\"", '$');
2551 PL_nextval[PL_nexttoke].ival = 0;
2553 PL_nextval[PL_nexttoke].ival = 0;
2555 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2558 if (PL_lex_starts++) {
2560 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2561 if (!PL_lex_casemods && PL_lex_inpat)
2568 case LEX_INTERPENDMAYBE:
2569 if (intuit_more(PL_bufptr)) {
2570 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2576 if (PL_lex_dojoin) {
2577 PL_lex_dojoin = FALSE;
2578 PL_lex_state = LEX_INTERPCONCAT;
2581 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2582 && SvEVALED(PL_lex_repl))
2584 if (PL_bufptr != PL_bufend)
2585 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2586 PL_lex_repl = Nullsv;
2589 case LEX_INTERPCONCAT:
2591 if (PL_lex_brackets)
2592 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2594 if (PL_bufptr == PL_bufend)
2595 return REPORT(sublex_done());
2597 if (SvIVX(PL_linestr) == '\'') {
2598 SV *sv = newSVsv(PL_linestr);
2601 else if ( PL_hints & HINT_NEW_RE )
2602 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2603 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2607 s = scan_const(PL_bufptr);
2609 PL_lex_state = LEX_INTERPCASEMOD;
2611 PL_lex_state = LEX_INTERPSTART;
2614 if (s != PL_bufptr) {
2615 PL_nextval[PL_nexttoke] = yylval;
2618 if (PL_lex_starts++) {
2619 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2620 if (!PL_lex_casemods && PL_lex_inpat)
2633 PL_lex_state = LEX_NORMAL;
2634 s = scan_formline(PL_bufptr);
2635 if (!PL_lex_formbrack)
2641 PL_oldoldbufptr = PL_oldbufptr;
2647 if (isIDFIRST_lazy_if(s,UTF))
2649 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2652 goto fake_eof; /* emulate EOF on ^D or ^Z */
2657 if (PL_lex_brackets) {
2658 yyerror(PL_lex_formbrack
2659 ? "Format not terminated"
2660 : "Missing right curly or square bracket");
2662 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2663 "### Tokener got EOF\n");
2667 if (s++ < PL_bufend)
2668 goto retry; /* ignore stray nulls */
2671 if (!PL_in_eval && !PL_preambled) {
2672 PL_preambled = TRUE;
2673 sv_setpv(PL_linestr,incl_perldb());
2674 if (SvCUR(PL_linestr))
2675 sv_catpvn(PL_linestr,";", 1);
2677 while(AvFILLp(PL_preambleav) >= 0) {
2678 SV *tmpsv = av_shift(PL_preambleav);
2679 sv_catsv(PL_linestr, tmpsv);
2680 sv_catpvn(PL_linestr, ";", 1);
2683 sv_free((SV*)PL_preambleav);
2684 PL_preambleav = NULL;
2686 if (PL_minus_n || PL_minus_p) {
2687 sv_catpv(PL_linestr, "LINE: while (<>) {");
2689 sv_catpv(PL_linestr,"chomp;");
2692 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2693 || *PL_splitstr == '"')
2694 && strchr(PL_splitstr + 1, *PL_splitstr))
2695 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2697 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2698 bytes can be used as quoting characters. :-) */
2699 /* The count here deliberately includes the NUL
2700 that terminates the C string constant. This
2701 embeds the opening NUL into the string. */
2702 const char *splits = PL_splitstr;
2703 sv_catpvn(PL_linestr, "our @F=split(q", 15);
2706 if (*splits == '\\')
2707 sv_catpvn(PL_linestr, splits, 1);
2708 sv_catpvn(PL_linestr, splits, 1);
2709 } while (*splits++);
2710 /* This loop will embed the trailing NUL of
2711 PL_linestr as the last thing it does before
2713 sv_catpvn(PL_linestr, ");", 2);
2717 sv_catpv(PL_linestr,"our @F=split(' ');");
2721 sv_catpv(PL_linestr,"use feature ':5.10';");
2722 sv_catpvn(PL_linestr, "\n", 1);
2723 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2724 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2725 PL_last_lop = PL_last_uni = Nullch;
2726 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2727 SV * const sv = NEWSV(85,0);
2729 sv_upgrade(sv, SVt_PVMG);
2730 sv_setsv(sv,PL_linestr);
2733 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2738 bof = PL_rsfp ? TRUE : FALSE;
2739 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2742 if (PL_preprocess && !PL_in_eval)
2743 (void)PerlProc_pclose(PL_rsfp);
2744 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2745 PerlIO_clearerr(PL_rsfp);
2747 (void)PerlIO_close(PL_rsfp);
2749 PL_doextract = FALSE;
2751 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2752 sv_setpv(PL_linestr,PL_minus_p
2753 ? ";}continue{print;}" : ";}");
2754 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2755 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2756 PL_last_lop = PL_last_uni = Nullch;
2757 PL_minus_n = PL_minus_p = 0;
2760 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2761 PL_last_lop = PL_last_uni = Nullch;
2762 sv_setpvn(PL_linestr,"",0);
2763 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2765 /* If it looks like the start of a BOM or raw UTF-16,
2766 * check if it in fact is. */
2772 #ifdef PERLIO_IS_STDIO
2773 # ifdef __GNU_LIBRARY__
2774 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2775 # define FTELL_FOR_PIPE_IS_BROKEN
2779 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2780 # define FTELL_FOR_PIPE_IS_BROKEN
2785 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2786 /* This loses the possibility to detect the bof
2787 * situation on perl -P when the libc5 is being used.
2788 * Workaround? Maybe attach some extra state to PL_rsfp?
2791 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2793 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2796 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2797 s = swallow_bom((U8*)s);
2801 /* Incest with pod. */
2802 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2803 sv_setpvn(PL_linestr, "", 0);
2804 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2805 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2806 PL_last_lop = PL_last_uni = Nullch;
2807 PL_doextract = FALSE;
2811 } while (PL_doextract);
2812 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2813 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2814 SV * const sv = NEWSV(85,0);
2816 sv_upgrade(sv, SVt_PVMG);
2817 sv_setsv(sv,PL_linestr);
2820 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2822 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2823 PL_last_lop = PL_last_uni = Nullch;
2824 if (CopLINE(PL_curcop) == 1) {
2825 while (s < PL_bufend && isSPACE(*s))
2827 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2831 if (*s == '#' && *(s+1) == '!')
2833 #ifdef ALTERNATE_SHEBANG
2835 static char const as[] = ALTERNATE_SHEBANG;
2836 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2837 d = s + (sizeof(as) - 1);
2839 #endif /* ALTERNATE_SHEBANG */
2848 while (*d && !isSPACE(*d))
2852 #ifdef ARG_ZERO_IS_SCRIPT
2853 if (ipathend > ipath) {
2855 * HP-UX (at least) sets argv[0] to the script name,
2856 * which makes $^X incorrect. And Digital UNIX and Linux,
2857 * at least, set argv[0] to the basename of the Perl
2858 * interpreter. So, having found "#!", we'll set it right.
2861 = GvSV(gv_fetchpv("\030", GV_ADD, SVt_PV)); /* $^X */
2862 assert(SvPOK(x) || SvGMAGICAL(x));
2863 if (sv_eq(x, CopFILESV(PL_curcop))) {
2864 sv_setpvn(x, ipath, ipathend - ipath);
2870 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
2871 const char * const lstart = SvPV_const(x,llen);
2873 bstart += blen - llen;
2874 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2875 sv_setpvn(x, ipath, ipathend - ipath);
2880 TAINT_NOT; /* $^X is always tainted, but that's OK */
2882 #endif /* ARG_ZERO_IS_SCRIPT */
2887 d = instr(s,"perl -");
2889 d = instr(s,"perl");
2891 /* avoid getting into infinite loops when shebang
2892 * line contains "Perl" rather than "perl" */
2894 for (d = ipathend-4; d >= ipath; --d) {
2895 if ((*d == 'p' || *d == 'P')
2896 && !ibcmp(d, "perl", 4))
2906 #ifdef ALTERNATE_SHEBANG
2908 * If the ALTERNATE_SHEBANG on this system starts with a
2909 * character that can be part of a Perl expression, then if
2910 * we see it but not "perl", we're probably looking at the
2911 * start of Perl code, not a request to hand off to some
2912 * other interpreter. Similarly, if "perl" is there, but
2913 * not in the first 'word' of the line, we assume the line
2914 * contains the start of the Perl program.
2916 if (d && *s != '#') {
2917 const char *c = ipath;
2918 while (*c && !strchr("; \t\r\n\f\v#", *c))
2921 d = Nullch; /* "perl" not in first word; ignore */
2923 *s = '#'; /* Don't try to parse shebang line */
2925 #endif /* ALTERNATE_SHEBANG */
2926 #ifndef MACOS_TRADITIONAL
2931 !instr(s,"indir") &&
2932 instr(PL_origargv[0],"perl"))
2939 while (s < PL_bufend && isSPACE(*s))
2941 if (s < PL_bufend) {
2942 Newxz(newargv,PL_origargc+3,char*);
2944 while (s < PL_bufend && !isSPACE(*s))
2947 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2950 newargv = PL_origargv;
2953 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2955 Perl_croak(aTHX_ "Can't exec %s", ipath);
2959 const U32 oldpdb = PL_perldb;
2960 const bool oldn = PL_minus_n;
2961 const bool oldp = PL_minus_p;
2963 while (*d && !isSPACE(*d)) d++;
2964 while (SPACE_OR_TAB(*d)) d++;
2967 const bool switches_done = PL_doswitches;
2969 if (*d == 'M' || *d == 'm' || *d == 'C') {
2970 const char * const m = d;
2971 while (*d && !isSPACE(*d)) d++;
2972 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
2975 d = moreswitches(d);
2977 if (PL_doswitches && !switches_done) {
2978 int argc = PL_origargc;
2979 char **argv = PL_origargv;
2982 } while (argc && argv[0][0] == '-' && argv[0][1]);
2983 init_argv_symbols(argc,argv);
2985 if ((PERLDB_LINE && !oldpdb) ||
2986 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
2987 /* if we have already added "LINE: while (<>) {",
2988 we must not do it again */
2990 sv_setpvn(PL_linestr, "", 0);
2991 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2992 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2993 PL_last_lop = PL_last_uni = Nullch;
2994 PL_preambled = FALSE;
2996 (void)gv_fetchfile(PL_origfilename);
2999 if (PL_doswitches && !switches_done) {
3000 int argc = PL_origargc;
3001 char **argv = PL_origargv;
3004 } while (argc && argv[0][0] == '-' && argv[0][1]);
3005 init_argv_symbols(argc,argv);
3011 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3013 PL_lex_state = LEX_FORMLINE;
3018 #ifdef PERL_STRICT_CR
3019 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3021 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3023 case ' ': case '\t': case '\f': case 013:
3024 #ifdef MACOS_TRADITIONAL
3031 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3032 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3033 /* handle eval qq[#line 1 "foo"\n ...] */
3034 CopLINE_dec(PL_curcop);
3038 while (s < d && *s != '\n')
3042 else if (s > d) /* Found by Ilya: feed random input to Perl. */
3043 Perl_croak(aTHX_ "panic: input overflow");
3045 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3047 PL_lex_state = LEX_FORMLINE;
3057 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3065 while (s < PL_bufend && SPACE_OR_TAB(*s))
3068 if (strnEQ(s,"=>",2)) {
3069 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3070 DEBUG_T( { S_printbuf(aTHX_
3071 "### Saw unary minus before =>, forcing word %s\n", s);
3073 OPERATOR('-'); /* unary minus */
3075 PL_last_uni = PL_oldbufptr;
3077 case 'r': ftst = OP_FTEREAD; break;
3078 case 'w': ftst = OP_FTEWRITE; break;
3079 case 'x': ftst = OP_FTEEXEC; break;
3080 case 'o': ftst = OP_FTEOWNED; break;
3081 case 'R': ftst = OP_FTRREAD; break;
3082 case 'W': ftst = OP_FTRWRITE; break;
3083 case 'X': ftst = OP_FTREXEC; break;
3084 case 'O': ftst = OP_FTROWNED; break;
3085 case 'e': ftst = OP_FTIS; break;
3086 case 'z': ftst = OP_FTZERO; break;
3087 case 's': ftst = OP_FTSIZE; break;
3088 case 'f': ftst = OP_FTFILE; break;
3089 case 'd': ftst = OP_FTDIR; break;
3090 case 'l': ftst = OP_FTLINK; break;
3091 case 'p': ftst = OP_FTPIPE; break;
3092 case 'S': ftst = OP_FTSOCK; break;
3093 case 'u': ftst = OP_FTSUID; break;
3094 case 'g': ftst = OP_FTSGID; break;
3095 case 'k': ftst = OP_FTSVTX; break;
3096 case 'b': ftst = OP_FTBLK; break;
3097 case 'c': ftst = OP_FTCHR; break;
3098 case 't': ftst = OP_FTTTY; break;
3099 case 'T': ftst = OP_FTTEXT; break;
3100 case 'B': ftst = OP_FTBINARY; break;
3101 case 'M': case 'A': case 'C':
3102 gv_fetchpv("\024",GV_ADD, SVt_PV);
3104 case 'M': ftst = OP_FTMTIME; break;
3105 case 'A': ftst = OP_FTATIME; break;
3106 case 'C': ftst = OP_FTCTIME; break;
3114 PL_last_lop_op = (OPCODE)ftst;
3115 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3116 "### Saw file test %c\n", (int)tmp);
3121 /* Assume it was a minus followed by a one-letter named
3122 * subroutine call (or a -bareword), then. */
3123 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3124 "### '-%c' looked like a file test but was not\n",
3131 const char tmp = *s++;
3134 if (PL_expect == XOPERATOR)
3139 else if (*s == '>') {
3142 if (isIDFIRST_lazy_if(s,UTF)) {
3143 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3151 if (PL_expect == XOPERATOR)
3154 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3156 OPERATOR('-'); /* unary minus */
3162 const char tmp = *s++;
3165 if (PL_expect == XOPERATOR)
3170 if (PL_expect == XOPERATOR)
3173 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3180 if (PL_expect != XOPERATOR) {
3181 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3182 PL_expect = XOPERATOR;
3183 force_ident(PL_tokenbuf, '*');
3196 if (PL_expect == XOPERATOR) {
3200 PL_tokenbuf[0] = '%';
3201 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3202 if (!PL_tokenbuf[1]) {
3205 PL_pending_ident = '%';
3216 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
3217 && FEATURE_IS_ENABLED("~~"))
3224 const char tmp = *s++;
3230 goto just_a_word_zero_gv;
3233 switch (PL_expect) {
3236 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3238 PL_bufptr = s; /* update in case we back off */
3244 PL_expect = XTERMBLOCK;
3248 while (isIDFIRST_lazy_if(s,UTF)) {
3250 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3251 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3252 if (tmp < 0) tmp = -tmp;
3268 d = scan_str(d,TRUE,TRUE);
3270 /* MUST advance bufptr here to avoid bogus
3271 "at end of line" context messages from yyerror().
3273 PL_bufptr = s + len;
3274 yyerror("Unterminated attribute parameter in attribute list");
3277 return REPORT(0); /* EOF indicator */
3281 SV *sv = newSVpvn(s, len);
3282 sv_catsv(sv, PL_lex_stuff);
3283 attrs = append_elem(OP_LIST, attrs,
3284 newSVOP(OP_CONST, 0, sv));
3285 SvREFCNT_dec(PL_lex_stuff);
3286 PL_lex_stuff = Nullsv;
3289 if (len == 6 && strnEQ(s, "unique", len)) {
3290 if (PL_in_my == KEY_our)
3292 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3294 ; /* skip to avoid loading attributes.pm */
3297 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3300 /* NOTE: any CV attrs applied here need to be part of
3301 the CVf_BUILTIN_ATTRS define in cv.h! */
3302 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3303 CvLVALUE_on(PL_compcv);
3304 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3305 CvLOCKED_on(PL_compcv);
3306 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3307 CvMETHOD_on(PL_compcv);
3308 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3309 CvASSERTION_on(PL_compcv);
3310 /* After we've set the flags, it could be argued that
3311 we don't need to do the attributes.pm-based setting
3312 process, and shouldn't bother appending recognized
3313 flags. To experiment with that, uncomment the
3314 following "else". (Note that's already been
3315 uncommented. That keeps the above-applied built-in
3316 attributes from being intercepted (and possibly
3317 rejected) by a package's attribute routines, but is
3318 justified by the performance win for the common case
3319 of applying only built-in attributes.) */
3321 attrs = append_elem(OP_LIST, attrs,
3322 newSVOP(OP_CONST, 0,
3326 if (*s == ':' && s[1] != ':')
3329 break; /* require real whitespace or :'s */
3333 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3334 if (*s != ';' && *s != '}' && *s != tmp
3335 && (tmp != '=' || *s != ')')) {
3336 const char q = ((*s == '\'') ? '"' : '\'');
3337 /* If here for an expression, and parsed no attrs, back
3339 if (tmp == '=' && !attrs) {
3343 /* MUST advance bufptr here to avoid bogus "at end of line"
3344 context messages from yyerror().
3348 ? Perl_form(aTHX_ "Invalid separator character "
3349 "%c%c%c in attribute list", q, *s, q)
3350 : "Unterminated attribute list" );
3358 PL_nextval[PL_nexttoke].opval = attrs;
3366 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3367 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3375 const char tmp = *s++;
3380 const char tmp = *s++;
3388 if (PL_lex_brackets <= 0)
3389 yyerror("Unmatched right square bracket");
3392 if (PL_lex_state == LEX_INTERPNORMAL) {
3393 if (PL_lex_brackets == 0) {
3394 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3395 PL_lex_state = LEX_INTERPEND;
3402 if (PL_lex_brackets > 100) {
3403 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3405 switch (PL_expect) {
3407 if (PL_lex_formbrack) {
3411 if (PL_oldoldbufptr == PL_last_lop)
3412 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3414 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3415 OPERATOR(HASHBRACK);
3417 while (s < PL_bufend && SPACE_OR_TAB(*s))
3420 PL_tokenbuf[0] = '\0';
3421 if (d < PL_bufend && *d == '-') {
3422 PL_tokenbuf[0] = '-';
3424 while (d < PL_bufend && SPACE_OR_TAB(*d))
3427 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3428 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3430 while (d < PL_bufend && SPACE_OR_TAB(*d))
3433 const char minus = (PL_tokenbuf[0] == '-');
3434 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3442 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3447 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3452 if (PL_oldoldbufptr == PL_last_lop)
3453 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3455 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3458 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3460 /* This hack is to get the ${} in the message. */
3462 yyerror("syntax error");
3465 OPERATOR(HASHBRACK);
3467 /* This hack serves to disambiguate a pair of curlies
3468 * as being a block or an anon hash. Normally, expectation
3469 * determines that, but in cases where we're not in a
3470 * position to expect anything in particular (like inside
3471 * eval"") we have to resolve the ambiguity. This code
3472 * covers the case where the first term in the curlies is a
3473 * quoted string. Most other cases need to be explicitly
3474 * disambiguated by prepending a "+" before the opening
3475 * curly in order to force resolution as an anon hash.
3477 * XXX should probably propagate the outer expectation
3478 * into eval"" to rely less on this hack, but that could
3479 * potentially break current behavior of eval"".
3483 if (*s == '\'' || *s == '"' || *s == '`') {
3484 /* common case: get past first string, handling escapes */
3485 for (t++; t < PL_bufend && *t != *s;)
3486 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3490 else if (*s == 'q') {
3493 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3496 /* skip q//-like construct */
3498 char open, close, term;
3501 while (t < PL_bufend && isSPACE(*t))
3503 /* check for q => */
3504 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3505 OPERATOR(HASHBRACK);
3509 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3513 for (t++; t < PL_bufend; t++) {
3514 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3516 else if (*t == open)
3520 for (t++; t < PL_bufend; t++) {
3521 if (*t == '\\' && t+1 < PL_bufend)
3523 else if (*t == close && --brackets <= 0)
3525 else if (*t == open)
3532 /* skip plain q word */
3533 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3536 else if (isALNUM_lazy_if(t,UTF)) {
3538 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3541 while (t < PL_bufend && isSPACE(*t))
3543 /* if comma follows first term, call it an anon hash */
3544 /* XXX it could be a comma expression with loop modifiers */
3545 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3546 || (*t == '=' && t[1] == '>')))
3547 OPERATOR(HASHBRACK);
3548 if (PL_expect == XREF)
3551 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3557 yylval.ival = CopLINE(PL_curcop);
3558 if (isSPACE(*s) || *s == '#')
3559 PL_copline = NOLINE; /* invalidate current command line number */
3564 if (PL_lex_brackets <= 0)
3565 yyerror("Unmatched right curly bracket");
3567 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3568 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3569 PL_lex_formbrack = 0;
3570 if (PL_lex_state == LEX_INTERPNORMAL) {
3571 if (PL_lex_brackets == 0) {
3572 if (PL_expect & XFAKEBRACK) {
3573 PL_expect &= XENUMMASK;
3574 PL_lex_state = LEX_INTERPEND;
3576 return yylex(); /* ignore fake brackets */
3578 if (*s == '-' && s[1] == '>')
3579 PL_lex_state = LEX_INTERPENDMAYBE;
3580 else if (*s != '[' && *s != '{')
3581 PL_lex_state = LEX_INTERPEND;
3584 if (PL_expect & XFAKEBRACK) {
3585 PL_expect &= XENUMMASK;
3587 return yylex(); /* ignore fake brackets */
3596 if (PL_expect == XOPERATOR) {
3597 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
3598 && isIDFIRST_lazy_if(s,UTF))
3600 CopLINE_dec(PL_curcop);
3601 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3602 CopLINE_inc(PL_curcop);
3607 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3609 PL_expect = XOPERATOR;
3610 force_ident(PL_tokenbuf, '&');
3614 yylval.ival = (OPpENTERSUB_AMPER<<8);
3626 const char tmp = *s++;
3633 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
3634 && strchr("+-*/%.^&|<",tmp))
3635 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3636 "Reversed %c= operator",(int)tmp);
3638 if (PL_expect == XSTATE && isALPHA(tmp) &&
3639 (s == PL_linestart+1 || s[-2] == '\n') )
3641 if (PL_in_eval && !PL_rsfp) {
3646 if (strnEQ(s,"=cut",4)) {
3660 PL_doextract = TRUE;
3664 if (PL_lex_brackets < PL_lex_formbrack) {
3666 #ifdef PERL_STRICT_CR
3667 for (t = s; SPACE_OR_TAB(*t); t++) ;
3669 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3671 if (*t == '\n' || *t == '#') {
3682 const char tmp = *s++;
3684 /* was this !=~ where !~ was meant?
3685 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3687 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3688 const char *t = s+1;
3690 while (t < PL_bufend && isSPACE(*t))
3693 if (*t == '/' || *t == '?' ||
3694 ((*t == 'm' || *t == 's' || *t == 'y')
3695 && !isALNUM(t[1])) ||
3696 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3697 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3698 "!=~ should be !~");
3708 if (PL_expect != XOPERATOR) {
3709 if (s[1] != '<' && !strchr(s,'>'))
3712 s = scan_heredoc(s);
3714 s = scan_inputsymbol(s);
3715 TERM(sublex_start());
3721 SHop(OP_LEFT_SHIFT);
3735 const char tmp = *s++;
3737 SHop(OP_RIGHT_SHIFT);
3747 if (PL_expect == XOPERATOR) {
3748 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3750 deprecate_old(commaless_variable_list);
3751 return REPORT(','); /* grandfather non-comma-format format */
3755 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3756 PL_tokenbuf[0] = '@';
3757 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3758 sizeof PL_tokenbuf - 1, FALSE);
3759 if (PL_expect == XOPERATOR)
3760 no_op("Array length", s);
3761 if (!PL_tokenbuf[1])
3763 PL_expect = XOPERATOR;
3764 PL_pending_ident = '#';
3768 PL_tokenbuf[0] = '$';
3769 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3770 sizeof PL_tokenbuf - 1, FALSE);
3771 if (PL_expect == XOPERATOR)
3773 if (!PL_tokenbuf[1]) {
3775 yyerror("Final $ should be \\$ or $name");
3779 /* This kludge not intended to be bulletproof. */
3780 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3781 yylval.opval = newSVOP(OP_CONST, 0,
3782 newSViv(PL_compiling.cop_arybase));
3783 yylval.opval->op_private = OPpCONST_ARYBASE;
3789 const char tmp = *s;
3790 if (PL_lex_state == LEX_NORMAL)
3793 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
3794 && intuit_more(s)) {
3796 PL_tokenbuf[0] = '@';
3797 if (ckWARN(WARN_SYNTAX)) {
3800 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3803 PL_bufptr = skipspace(PL_bufptr);
3804 while (t < PL_bufend && *t != ']')
3806 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3807 "Multidimensional syntax %.*s not supported",
3808 (int)((t - PL_bufptr) + 1), PL_bufptr);
3812 else if (*s == '{') {
3814 PL_tokenbuf[0] = '%';
3815 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
3816 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
3818 char tmpbuf[sizeof PL_tokenbuf];
3819 for (t++; isSPACE(*t); t++) ;
3820 if (isIDFIRST_lazy_if(t,UTF)) {
3822 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
3824 for (; isSPACE(*t); t++) ;
3825 if (*t == ';' && get_cv(tmpbuf, FALSE))
3826 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3827 "You need to quote \"%s\"",
3834 PL_expect = XOPERATOR;
3835 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3836 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3837 if (!islop || PL_last_lop_op == OP_GREPSTART)
3838 PL_expect = XOPERATOR;
3839 else if (strchr("$@\"'`q", *s))
3840 PL_expect = XTERM; /* e.g. print $fh "foo" */
3841 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3842 PL_expect = XTERM; /* e.g. print $fh &sub */
3843 else if (isIDFIRST_lazy_if(s,UTF)) {
3844 char tmpbuf[sizeof PL_tokenbuf];
3846 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3847 if ((t2 = keyword(tmpbuf, len))) {
3848 /* binary operators exclude handle interpretations */
3860 PL_expect = XTERM; /* e.g. print $fh length() */
3865 PL_expect = XTERM; /* e.g. print $fh subr() */
3868 else if (isDIGIT(*s))
3869 PL_expect = XTERM; /* e.g. print $fh 3 */
3870 else if (*s == '.' && isDIGIT(s[1]))
3871 PL_expect = XTERM; /* e.g. print $fh .3 */
3872 else if ((*s == '?' || *s == '-' || *s == '+')
3873 && !isSPACE(s[1]) && s[1] != '=')
3874 PL_expect = XTERM; /* e.g. print $fh -1 */
3875 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
3877 PL_expect = XTERM; /* e.g. print $fh /.../
3878 XXX except DORDOR operator
3880 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
3882 PL_expect = XTERM; /* print $fh <<"EOF" */
3885 PL_pending_ident = '$';
3889 if (PL_expect == XOPERATOR)
3891 PL_tokenbuf[0] = '@';
3892 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3893 if (!PL_tokenbuf[1]) {
3896 if (PL_lex_state == LEX_NORMAL)
3898 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3900 PL_tokenbuf[0] = '%';
3902 /* Warn about @ where they meant $. */
3903 if (*s == '[' || *s == '{') {
3904 if (ckWARN(WARN_SYNTAX)) {
3905 const char *t = s + 1;
3906 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3908 if (*t == '}' || *t == ']') {
3910 PL_bufptr = skipspace(PL_bufptr);
3911 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3912 "Scalar value %.*s better written as $%.*s",
3913 (int)(t-PL_bufptr), PL_bufptr,
3914 (int)(t-PL_bufptr-1), PL_bufptr+1);
3919 PL_pending_ident = '@';
3922 case '/': /* may be division, defined-or, or pattern */
3923 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3927 case '?': /* may either be conditional or pattern */
3928 if(PL_expect == XOPERATOR) {
3936 /* A // operator. */
3946 /* Disable warning on "study /blah/" */
3947 if (PL_oldoldbufptr == PL_last_uni
3948 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3949 || memNE(PL_last_uni, "study", 5)
3950 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3953 s = scan_pat(s,OP_MATCH);
3954 TERM(sublex_start());
3958 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3959 #ifdef PERL_STRICT_CR
3962 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3964 && (s == PL_linestart || s[-1] == '\n') )
3966 PL_lex_formbrack = 0;
3970 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3976 yylval.ival = OPf_SPECIAL;
3982 if (PL_expect != XOPERATOR)
3987 case '0': case '1': case '2': case '3': case '4':
3988 case '5': case '6': case '7': case '8': case '9':
3989 s = scan_num(s, &yylval);
3990 DEBUG_T( { S_printbuf(aTHX_ "### Saw number in %s\n", s); } );
3991 if (PL_expect == XOPERATOR)
3996 s = scan_str(s,FALSE,FALSE);
3997 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
3998 if (PL_expect == XOPERATOR) {
3999 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4001 deprecate_old(commaless_variable_list);
4002 return REPORT(','); /* grandfather non-comma-format format */
4008 missingterm((char*)0);
4009 yylval.ival = OP_CONST;
4010 TERM(sublex_start());
4013 s = scan_str(s,FALSE,FALSE);
4014 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4015 if (PL_expect == XOPERATOR) {
4016 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4018 deprecate_old(commaless_variable_list);
4019 return REPORT(','); /* grandfather non-comma-format format */
4025 missingterm((char*)0);
4026 yylval.ival = OP_CONST;
4027 /* FIXME. I think that this can be const if char *d is replaced by
4028 more localised variables. */
4029 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4030 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4031 yylval.ival = OP_STRINGIFY;
4035 TERM(sublex_start());
4038 s = scan_str(s,FALSE,FALSE);
4039 DEBUG_T( { S_printbuf(aTHX_ "### Saw backtick string before %s\n", s); } );
4040 if (PL_expect == XOPERATOR)
4041 no_op("Backticks",s);
4043 missingterm((char*)0);
4044 yylval.ival = OP_BACKTICK;
4046 TERM(sublex_start());
4050 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
4051 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
4053 if (PL_expect == XOPERATOR)
4054 no_op("Backslash",s);
4058 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
4059 char *start = s + 2;
4060 while (isDIGIT(*start) || *start == '_')
4062 if (*start == '.' && isDIGIT(start[1])) {
4063 s = scan_num(s, &yylval);
4066 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
4067 else if (!isALPHA(*start) && (PL_expect == XTERM
4068 || PL_expect == XREF || PL_expect == XSTATE
4069 || PL_expect == XTERMORDORDOR)) {
4070 const char c = *start;
4073 gv = gv_fetchpv(s, 0, SVt_PVCV);
4076 s = scan_num(s, &yylval);
4083 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
4119 I32 orig_keyword = 0;
4124 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4126 /* Some keywords can be followed by any delimiter, including ':' */
4127 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
4128 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4129 (PL_tokenbuf[0] == 'q' &&
4130 strchr("qwxr", PL_tokenbuf[1])))));
4132 /* x::* is just a word, unless x is "CORE" */
4133 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4137 while (d < PL_bufend && isSPACE(*d))
4138 d++; /* no comments skipped here, or s### is misparsed */
4140 /* Is this a label? */
4141 if (!tmp && PL_expect == XSTATE
4142 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4144 yylval.pval = savepv(PL_tokenbuf);
4149 /* Check for keywords */
4150 tmp = keyword(PL_tokenbuf, len);
4152 /* Is this a word before a => operator? */
4153 if (*d == '=' && d[1] == '>') {
4156 = (OP*)newSVOP(OP_CONST, 0,
4157 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4158 yylval.opval->op_private = OPpCONST_BARE;
4162 if (tmp < 0) { /* second-class keyword? */
4163 GV *ogv = NULL; /* override (winner) */
4164 GV *hgv = NULL; /* hidden (loser) */
4165 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4167 if ((gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV)) &&
4170 if (GvIMPORTED_CV(gv))
4172 else if (! CvMETHOD(cv))
4176 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4177 (gv = *gvp) != (GV*)&PL_sv_undef &&
4178 GvCVu(gv) && GvIMPORTED_CV(gv))
4185 tmp = 0; /* overridden by import or by GLOBAL */
4188 && -tmp==KEY_lock /* XXX generalizable kludge */
4190 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
4192 tmp = 0; /* any sub overrides "weak" keyword */
4194 else { /* no override */
4196 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4197 Perl_warner(aTHX_ packWARN(WARN_MISC),
4198 "dump() better written as CORE::dump()");
4202 if (hgv && tmp != KEY_x && tmp != KEY_CORE
4203 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
4204 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4205 "Ambiguous call resolved as CORE::%s(), %s",
4206 GvENAME(hgv), "qualify as such or use &");
4213 default: /* not a keyword */
4214 /* Trade off - by using this evil construction we can pull the
4215 variable gv into the block labelled keylookup. If not, then
4216 we have to give it function scope so that the goto from the
4217 earlier ':' case doesn't bypass the initialisation. */
4219 just_a_word_zero_gv:
4226 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4229 /* Get the rest if it looks like a package qualifier */
4231 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4233 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4236 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4237 *s == '\'' ? "'" : "::");
4242 if (PL_expect == XOPERATOR) {
4243 if (PL_bufptr == PL_linestart) {
4244 CopLINE_dec(PL_curcop);
4245 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4246 CopLINE_inc(PL_curcop);
4249 no_op("Bareword",s);
4252 /* Look for a subroutine with this name in current package,
4253 unless name is "Foo::", in which case Foo is a bearword
4254 (and a package name). */
4257 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4259 if (ckWARN(WARN_BAREWORD)
4260 && ! gv_fetchpv(PL_tokenbuf, 0, SVt_PVHV))
4261 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4262 "Bareword \"%s\" refers to nonexistent package",
4265 PL_tokenbuf[len] = '\0';
4272 /* Mustn't actually add anything to a symbol table.
4273 But also don't want to "initialise" any placeholder
4274 constants that might already be there into full
4275 blown PVGVs with attached PVCV. */
4276 gv = gv_fetchpv(PL_tokenbuf, GV_NOADD_NOINIT,
4281 /* if we saw a global override before, get the right name */
4284 sv = newSVpvn("CORE::GLOBAL::",14);
4285 sv_catpv(sv,PL_tokenbuf);
4288 /* If len is 0, newSVpv does strlen(), which is correct.
4289 If len is non-zero, then it will be the true length,
4290 and so the scalar will be created correctly. */
4291 sv = newSVpv(PL_tokenbuf,len);
4294 /* Presume this is going to be a bareword of some sort. */
4297 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4298 yylval.opval->op_private = OPpCONST_BARE;
4299 /* UTF-8 package name? */
4300 if (UTF && !IN_BYTES &&
4301 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
4304 /* And if "Foo::", then that's what it certainly is. */
4309 /* Do the explicit type check so that we don't need to force
4310 the initialisation of the symbol table to have a real GV.
4311 Beware - gv may not really be a PVGV, cv may not really be
4312 a PVCV, (because of the space optimisations that gv_init
4313 understands) But they're true if for this symbol there is
4314 respectively a typeglob and a subroutine.
4316 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
4317 /* Real typeglob, so get the real subroutine: */
4319 /* A proxy for a subroutine in this package? */
4320 : SvOK(gv) ? (CV *) gv : NULL)
4323 /* See if it's the indirect object for a list operator. */
4325 if (PL_oldoldbufptr &&
4326 PL_oldoldbufptr < PL_bufptr &&
4327 (PL_oldoldbufptr == PL_last_lop
4328 || PL_oldoldbufptr == PL_last_uni) &&
4329 /* NO SKIPSPACE BEFORE HERE! */
4330 (PL_expect == XREF ||
4331 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4333 bool immediate_paren = *s == '(';
4335 /* (Now we can afford to cross potential line boundary.) */
4338 /* Two barewords in a row may indicate method call. */
4340 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
4341 (tmp = intuit_method(s, gv, cv)))
4344 /* If not a declared subroutine, it's an indirect object. */
4345 /* (But it's an indir obj regardless for sort.) */
4346 /* Also, if "_" follows a filetest operator, it's a bareword */
4349 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4351 (PL_last_lop_op != OP_MAPSTART &&
4352 PL_last_lop_op != OP_GREPSTART))))
4353 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
4354 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
4357 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4362 PL_expect = XOPERATOR;
4365 /* Is this a word before a => operator? */
4366 if (*s == '=' && s[1] == '>' && !pkgname) {
4368 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4369 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4370 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4374 /* If followed by a paren, it's certainly a subroutine. */
4378 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4379 if (*d == ')' && (sv = gv_const_sv(gv))) {
4384 PL_nextval[PL_nexttoke].opval = yylval.opval;
4385 PL_expect = XOPERATOR;
4391 /* If followed by var or block, call it a method (unless sub) */
4393 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
4394 PL_last_lop = PL_oldbufptr;
4395 PL_last_lop_op = OP_METHOD;
4399 /* If followed by a bareword, see if it looks like indir obj. */
4402 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4403 && (tmp = intuit_method(s, gv, cv)))
4406 /* Not a method, so call it a subroutine (if defined) */
4409 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4410 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4411 "Ambiguous use of -%s resolved as -&%s()",
4412 PL_tokenbuf, PL_tokenbuf);
4413 /* Check for a constant sub */
4414 if ((sv = gv_const_sv(gv))) {
4416 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4417 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4418 yylval.opval->op_private = 0;
4422 /* Resolve to GV now. */
4423 if (SvTYPE(gv) != SVt_PVGV) {
4424 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
4425 assert (SvTYPE(gv) == SVt_PVGV);
4426 /* cv must have been some sort of placeholder, so
4427 now needs replacing with a real code reference. */
4431 op_free(yylval.opval);
4432 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4433 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4434 PL_last_lop = PL_oldbufptr;
4435 PL_last_lop_op = OP_ENTERSUB;
4436 /* Is there a prototype? */
4439 const char *proto = SvPV_const((SV*)cv, len);
4442 if (*proto == '$' && proto[1] == '\0')
4444 while (*proto == ';')
4446 if (*proto == '&' && *s == '{') {
4447 sv_setpv(PL_subname, PL_curstash ?
4448 "__ANON__" : "__ANON__::__ANON__");
4452 PL_nextval[PL_nexttoke].opval = yylval.opval;
4458 /* Call it a bare word */
4460 if (PL_hints & HINT_STRICT_SUBS)
4461 yylval.opval->op_private |= OPpCONST_STRICT;
4464 if (lastchar != '-') {
4465 if (ckWARN(WARN_RESERVED)) {
4466 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4467 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4468 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4475 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4476 && ckWARN_d(WARN_AMBIGUOUS)) {
4477 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4478 "Operator or semicolon missing before %c%s",
4479 lastchar, PL_tokenbuf);
4480 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4481 "Ambiguous use of %c resolved as operator %c",
4482 lastchar, lastchar);
4488 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4489 newSVpv(CopFILE(PL_curcop),0));
4493 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4494 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4497 case KEY___PACKAGE__:
4498 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4500 ? newSVhek(HvNAME_HEK(PL_curstash))
4507 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4508 const char *pname = "main";
4509 if (PL_tokenbuf[2] == 'D')
4510 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
4511 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
4515 GvIOp(gv) = newIO();
4516 IoIFP(GvIOp(gv)) = PL_rsfp;
4517 #if defined(HAS_FCNTL) && defined(F_SETFD)
4519 const int fd = PerlIO_fileno(PL_rsfp);
4520 fcntl(fd,F_SETFD,fd >= 3);
4523 /* Mark this internal pseudo-handle as clean */
4524 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4526 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4527 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4528 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4530 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4531 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4532 /* if the script was opened in binmode, we need to revert
4533 * it to text mode for compatibility; but only iff it has CRs
4534 * XXX this is a questionable hack at best. */
4535 if (PL_bufend-PL_bufptr > 2
4536 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4539 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4540 loc = PerlIO_tell(PL_rsfp);
4541 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4544 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4546 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4547 #endif /* NETWARE */
4548 #ifdef PERLIO_IS_STDIO /* really? */
4549 # if defined(__BORLANDC__)
4550 /* XXX see note in do_binmode() */
4551 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4555 PerlIO_seek(PL_rsfp, loc, 0);
4559 #ifdef PERLIO_LAYERS
4562 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4563 else if (PL_encoding) {
4570 XPUSHs(PL_encoding);
4572 call_method("name", G_SCALAR);
4576 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4577 Perl_form(aTHX_ ":encoding(%"SVf")",
4595 if (PL_expect == XSTATE) {
4602 if (*s == ':' && s[1] == ':') {
4605 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4606 if (!(tmp = keyword(PL_tokenbuf, len)))
4607 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4610 else if (tmp == KEY_require || tmp == KEY_do)
4611 /* that's a way to remember we saw "CORE::" */
4624 LOP(OP_ACCEPT,XTERM);
4630 LOP(OP_ATAN2,XTERM);
4636 LOP(OP_BINMODE,XTERM);
4639 LOP(OP_BLESS,XTERM);
4648 /* When 'use switch' is in effect, continue has a dual
4649 life as a control operator. */
4651 if (!FEATURE_IS_ENABLED("switch"))
4654 /* We have to disambiguate the two senses of
4655 "continue". If the next token is a '{' then
4656 treat it as the start of a continue block;
4657 otherwise treat it as a control operator.
4668 (void)gv_fetchpv("ENV", GV_ADD, SVt_PVHV); /* may use HOME */
4685 if (!PL_cryptseen) {
4686 PL_cryptseen = TRUE;
4690 LOP(OP_CRYPT,XTERM);
4693 LOP(OP_CHMOD,XTERM);
4696 LOP(OP_CHOWN,XTERM);
4699 LOP(OP_CONNECT,XTERM);
4718 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4719 if (orig_keyword == KEY_do) {
4728 PL_hints |= HINT_BLOCK_SCOPE;
4738 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4739 LOP(OP_DBMOPEN,XTERM);
4745 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4752 yylval.ival = CopLINE(PL_curcop);
4766 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4767 UNIBRACK(OP_ENTEREVAL);
4785 case KEY_endhostent:
4791 case KEY_endservent:
4794 case KEY_endprotoent:
4805 yylval.ival = CopLINE(PL_curcop);
4807 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4809 if ((PL_bufend - p) >= 3 &&
4810 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4812 else if ((PL_bufend - p) >= 4 &&
4813 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4816 if (isIDFIRST_lazy_if(p,UTF)) {
4817 p = scan_ident(p, PL_bufend,
4818 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4822 Perl_croak(aTHX_ "Missing $ on loop variable");
4827 LOP(OP_FORMLINE,XTERM);
4833 LOP(OP_FCNTL,XTERM);
4839 LOP(OP_FLOCK,XTERM);
4848 LOP(OP_GREPSTART, XREF);
4851 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4866 case KEY_getpriority:
4867 LOP(OP_GETPRIORITY,XTERM);
4869 case KEY_getprotobyname:
4872 case KEY_getprotobynumber:
4873 LOP(OP_GPBYNUMBER,XTERM);
4875 case KEY_getprotoent:
4887 case KEY_getpeername:
4888 UNI(OP_GETPEERNAME);
4890 case KEY_gethostbyname:
4893 case KEY_gethostbyaddr:
4894 LOP(OP_GHBYADDR,XTERM);
4896 case KEY_gethostent:
4899 case KEY_getnetbyname:
4902 case KEY_getnetbyaddr:
4903 LOP(OP_GNBYADDR,XTERM);
4908 case KEY_getservbyname:
4909 LOP(OP_GSBYNAME,XTERM);
4911 case KEY_getservbyport:
4912 LOP(OP_GSBYPORT,XTERM);
4914 case KEY_getservent:
4917 case KEY_getsockname:
4918 UNI(OP_GETSOCKNAME);
4920 case KEY_getsockopt:
4921 LOP(OP_GSOCKOPT,XTERM);
4936 yylval.ival = CopLINE(PL_curcop);
4947 yylval.ival = CopLINE(PL_curcop);
4951 LOP(OP_INDEX,XTERM);
4957 LOP(OP_IOCTL,XTERM);
4969 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5001 LOP(OP_LISTEN,XTERM);
5010 s = scan_pat(s,OP_MATCH);
5011 TERM(sublex_start());
5014 LOP(OP_MAPSTART, XREF);
5017 LOP(OP_MKDIR,XTERM);
5020 LOP(OP_MSGCTL,XTERM);
5023 LOP(OP_MSGGET,XTERM);
5026 LOP(OP_MSGRCV,XTERM);
5029 LOP(OP_MSGSND,XTERM);
5035 if (isIDFIRST_lazy_if(s,UTF)) {
5036 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
5037 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
5039 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
5040 if (!PL_in_my_stash) {
5043 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
5051 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5058 s = tokenize_use(0, s);
5062 if (*s == '(' || (s = skipspace(s), *s == '('))
5069 if (isIDFIRST_lazy_if(s,UTF)) {
5071 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
5072 for (t=d; *t && isSPACE(*t); t++) ;
5073 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
5075 && !(t[0] == '=' && t[1] == '>')
5077 int len = (int)(d-s);
5078 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
5079 "Precedence problem: open %.*s should be open(%.*s)",
5086 yylval.ival = OP_OR;
5096 LOP(OP_OPEN_DIR,XTERM);
5099 checkcomma(s,PL_tokenbuf,"filehandle");
5103 checkcomma(s,PL_tokenbuf,"filehandle");
5122 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5126 LOP(OP_PIPE_OP,XTERM);
5129 s = scan_str(s,FALSE,FALSE);
5131 missingterm((char*)0);
5132 yylval.ival = OP_CONST;
5133 TERM(sublex_start());
5139 s = scan_str(s,FALSE,FALSE);
5141 missingterm((char*)0);
5142 PL_expect = XOPERATOR;
5144 if (SvCUR(PL_lex_stuff)) {
5147 d = SvPV_force(PL_lex_stuff, len);
5150 for (; isSPACE(*d) && len; --len, ++d) ;
5153 if (!warned && ckWARN(WARN_QW)) {
5154 for (; !isSPACE(*d) && len; --len, ++d) {
5156 Perl_warner(aTHX_ packWARN(WARN_QW),
5157 "Possible attempt to separate words with commas");
5160 else if (*d == '#') {
5161 Perl_warner(aTHX_ packWARN(WARN_QW),
5162 "Possible attempt to put comments in qw() list");
5168 for (; !isSPACE(*d) && len; --len, ++d) ;
5170 sv = newSVpvn(b, d-b);
5171 if (DO_UTF8(PL_lex_stuff))
5173 words = append_elem(OP_LIST, words,
5174 newSVOP(OP_CONST, 0, tokeq(sv)));
5178 PL_nextval[PL_nexttoke].opval = words;
5183 SvREFCNT_dec(PL_lex_stuff);
5184 PL_lex_stuff = Nullsv;
5190 s = scan_str(s,FALSE,FALSE);
5192 missingterm((char*)0);
5193 yylval.ival = OP_STRINGIFY;
5194 if (SvIVX(PL_lex_stuff) == '\'')
5195 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
5196 TERM(sublex_start());
5199 s = scan_pat(s,OP_QR);
5200 TERM(sublex_start());
5203 s = scan_str(s,FALSE,FALSE);
5205 missingterm((char*)0);
5206 yylval.ival = OP_BACKTICK;
5208 TERM(sublex_start());
5216 s = force_version(s, FALSE);
5218 else if (*s != 'v' || !isDIGIT(s[1])
5219 || (s = force_version(s, TRUE), *s == 'v'))
5221 *PL_tokenbuf = '\0';
5222 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5223 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5224 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5226 yyerror("<> should be quotes");
5228 if (orig_keyword == KEY_require) {
5236 PL_last_uni = PL_oldbufptr;
5237 PL_last_lop_op = OP_REQUIRE;
5239 return REPORT( (int)REQUIRE );
5245 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5249 LOP(OP_RENAME,XTERM);
5258 LOP(OP_RINDEX,XTERM);
5268 UNIDOR(OP_READLINE);
5281 LOP(OP_REVERSE,XTERM);
5284 UNIDOR(OP_READLINK);
5292 TERM(sublex_start());
5294 TOKEN(1); /* force error */
5297 checkcomma(s,PL_tokenbuf,"filehandle");
5307 LOP(OP_SELECT,XTERM);
5313 LOP(OP_SEMCTL,XTERM);
5316 LOP(OP_SEMGET,XTERM);
5319 LOP(OP_SEMOP,XTERM);
5325 LOP(OP_SETPGRP,XTERM);
5327 case KEY_setpriority:
5328 LOP(OP_SETPRIORITY,XTERM);
5330 case KEY_sethostent:
5336 case KEY_setservent:
5339 case KEY_setprotoent:
5349 LOP(OP_SEEKDIR,XTERM);
5351 case KEY_setsockopt:
5352 LOP(OP_SSOCKOPT,XTERM);
5358 LOP(OP_SHMCTL,XTERM);
5361 LOP(OP_SHMGET,XTERM);
5364 LOP(OP_SHMREAD,XTERM);
5367 LOP(OP_SHMWRITE,XTERM);
5370 LOP(OP_SHUTDOWN,XTERM);
5379 LOP(OP_SOCKET,XTERM);
5381 case KEY_socketpair:
5382 LOP(OP_SOCKPAIR,XTERM);
5385 checkcomma(s,PL_tokenbuf,"subroutine name");
5387 if (*s == ';' || *s == ')') /* probably a close */
5388 Perl_croak(aTHX_ "sort is now a reserved word");
5390 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5394 LOP(OP_SPLIT,XTERM);
5397 LOP(OP_SPRINTF,XTERM);
5400 LOP(OP_SPLICE,XTERM);
5415 LOP(OP_SUBSTR,XTERM);
5421 char tmpbuf[sizeof PL_tokenbuf];
5422 SSize_t tboffset = 0;
5423 expectation attrful;
5424 bool have_name, have_proto, bad_proto;
5425 const int key = tmp;
5429 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5430 (*s == ':' && s[1] == ':'))
5433 attrful = XATTRBLOCK;
5434 /* remember buffer pos'n for later force_word */
5435 tboffset = s - PL_oldbufptr;
5436 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5437 if (strchr(tmpbuf, ':'))
5438 sv_setpv(PL_subname, tmpbuf);
5440 sv_setsv(PL_subname,PL_curstname);
5441 sv_catpvn(PL_subname,"::",2);
5442 sv_catpvn(PL_subname,tmpbuf,len);
5449 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5450 PL_expect = XTERMBLOCK;
5451 attrful = XATTRTERM;
5452 sv_setpvn(PL_subname,"?",1);
5456 if (key == KEY_format) {
5458 PL_lex_formbrack = PL_lex_brackets + 1;
5460 (void) force_word(PL_oldbufptr + tboffset, WORD,
5465 /* Look for a prototype */
5469 s = scan_str(s,FALSE,FALSE);
5471 Perl_croak(aTHX_ "Prototype not terminated");
5472 /* strip spaces and check for bad characters */
5473 d = SvPVX(PL_lex_stuff);
5476 for (p = d; *p; ++p) {
5479 if (!strchr("$@%*;[]&\\", *p))
5484 if (bad_proto && ckWARN(WARN_SYNTAX))
5485 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5486 "Illegal character in prototype for %"SVf" : %s",
5488 SvCUR_set(PL_lex_stuff, tmp);
5496 if (*s == ':' && s[1] != ':')
5497 PL_expect = attrful;
5498 else if (*s != '{' && key == KEY_sub) {
5500 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5502 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5506 PL_nextval[PL_nexttoke].opval =
5507 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5508 PL_lex_stuff = Nullsv;
5512 sv_setpv(PL_subname,
5513 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5516 (void) force_word(PL_oldbufptr + tboffset, WORD,
5525 LOP(OP_SYSTEM,XREF);
5528 LOP(OP_SYMLINK,XTERM);
5531 LOP(OP_SYSCALL,XTERM);
5534 LOP(OP_SYSOPEN,XTERM);
5537 LOP(OP_SYSSEEK,XTERM);
5540 LOP(OP_SYSREAD,XTERM);
5543 LOP(OP_SYSWRITE,XTERM);
5547 TERM(sublex_start());
5568 LOP(OP_TRUNCATE,XTERM);
5580 yylval.ival = CopLINE(PL_curcop);
5584 yylval.ival = CopLINE(PL_curcop);
5588 LOP(OP_UNLINK,XTERM);
5594 LOP(OP_UNPACK,XTERM);
5597 LOP(OP_UTIME,XTERM);
5603 LOP(OP_UNSHIFT,XTERM);
5606 s = tokenize_use(1, s);
5616 yylval.ival = CopLINE(PL_curcop);
5620 yylval.ival = CopLINE(PL_curcop);
5624 PL_hints |= HINT_BLOCK_SCOPE;
5631 LOP(OP_WAITPID,XTERM);
5640 ctl_l[0] = toCTRL('L');
5642 gv_fetchpv(ctl_l, GV_ADD, SVt_PV);
5645 gv_fetchpv("\f", GV_ADD, SVt_PV); /* Make sure $^L is defined */
5650 if (PL_expect == XOPERATOR)
5656 yylval.ival = OP_XOR;
5661 TERM(sublex_start());
5666 #pragma segment Main
5670 S_pending_ident(pTHX)
5673 register I32 tmp = 0;
5674 /* pit holds the identifier we read and pending_ident is reset */
5675 char pit = PL_pending_ident;
5676 PL_pending_ident = 0;
5678 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5679 "### Pending identifier '%s'\n", PL_tokenbuf); });
5681 /* if we're in a my(), we can't allow dynamics here.
5682 $foo'bar has already been turned into $foo::bar, so
5683 just check for colons.
5685 if it's a legal name, the OP is a PADANY.
5688 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5689 if (strchr(PL_tokenbuf,':'))
5690 yyerror(Perl_form(aTHX_ "No package name allowed for "
5691 "variable %s in \"our\"",
5693 tmp = allocmy(PL_tokenbuf);
5696 if (strchr(PL_tokenbuf,':'))
5697 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5699 yylval.opval = newOP(OP_PADANY, 0);
5700 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5706 build the ops for accesses to a my() variable.
5708 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5709 then used in a comparison. This catches most, but not
5710 all cases. For instance, it catches
5711 sort { my($a); $a <=> $b }
5713 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5714 (although why you'd do that is anyone's guess).
5717 if (!strchr(PL_tokenbuf,':')) {
5719 tmp = pad_findmy(PL_tokenbuf);
5720 if (tmp != NOT_IN_PAD) {
5721 /* might be an "our" variable" */
5722 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5723 /* build ops for a bareword */
5724 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
5725 HEK * const stashname = HvNAME_HEK(stash);
5726 SV * const sym = newSVhek(stashname);
5727 sv_catpvn(sym, "::", 2);
5728 sv_catpv(sym, PL_tokenbuf+1);
5729 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5730 yylval.opval->op_private = OPpCONST_ENTERED;
5733 ? (GV_ADDMULTI | GV_ADDINEVAL)
5736 ((PL_tokenbuf[0] == '$') ? SVt_PV
5737 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5742 /* if it's a sort block and they're naming $a or $b */
5743 if (PL_last_lop_op == OP_SORT &&
5744 PL_tokenbuf[0] == '$' &&
5745 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5748 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5749 d < PL_bufend && *d != '\n';
5752 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5753 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5759 yylval.opval = newOP(OP_PADANY, 0);
5760 yylval.opval->op_targ = tmp;
5766 Whine if they've said @foo in a doublequoted string,
5767 and @foo isn't a variable we can find in the symbol
5770 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5771 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
5772 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5773 && ckWARN(WARN_AMBIGUOUS))
5775 /* Downgraded from fatal to warning 20000522 mjd */
5776 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5777 "Possible unintended interpolation of %s in string",
5782 /* build ops for a bareword */
5783 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5784 yylval.opval->op_private = OPpCONST_ENTERED;
5788 ? (GV_ADDMULTI | GV_ADDINEVAL)
5789 /* If the identifier refers to a stash, don't autovivify it.
5790 * Change 24660 had the side effect of causing symbol table
5791 * hashes to always be defined, even if they were freshly
5792 * created and the only reference in the entire program was
5793 * the single statement with the defined %foo::bar:: test.
5794 * It appears that all code in the wild doing this actually
5795 * wants to know whether sub-packages have been loaded, so
5796 * by avoiding auto-vivifying symbol tables, we ensure that
5797 * defined %foo::bar:: continues to be false, and the existing
5798 * tests still give the expected answers, even though what
5799 * they're actually testing has now changed subtly.
5801 : !(*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'),
5802 ((PL_tokenbuf[0] == '$') ? SVt_PV
5803 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5809 * The following code was generated by perl_keyword.pl.
5813 Perl_keyword (pTHX_ const char *name, I32 len)
5817 case 1: /* 5 tokens of length 1 */
5849 case 2: /* 18 tokens of length 2 */
5995 case 3: /* 29 tokens of length 3 */
5999 if (name[1] == 'N' &&
6062 if (name[1] == 'i' &&
6084 return (FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
6102 if (name[1] == 'o' &&
6111 if (name[1] == 'e' &&
6120 if (name[1] == 'n' &&
6129 if (name[1] == 'o' &&
6138 if (name[1] == 'a' &&
6147 if (name[1] == 'o' &&
6209 if (name[1] == 'e' &&
6223 return (FEATURE_IS_ENABLED("say") ? -KEY_say : 0);
6249 if (name[1] == 'i' &&
6258 if (name[1] == 's' &&
6267 if (name[1] == 'e' &&
6276 if (name[1] == 'o' &&
6288 case 4: /* 41 tokens of length 4 */
6292 if (name[1] == 'O' &&
6302 if (name[1] == 'N' &&
6312 if (name[1] == 'i' &&
6322 if (name[1] == 'h' &&
6332 if (name[1] == 'u' &&
6345 if (name[2] == 'c' &&
6354 if (name[2] == 's' &&
6363 if (name[2] == 'a' &&
6399 if (name[1] == 'o' &&
6412 if (name[2] == 't' &&
6421 if (name[2] == 'o' &&
6430 if (name[2] == 't' &&
6439 if (name[2] == 'e' &&
6452 if (name[1] == 'o' &&
6465 if (name[2] == 'y' &&
6474 if (name[2] == 'l' &&
6490 if (name[2] == 's' &&
6499 if (name[2] == 'n' &&
6508 if (name[2] == 'c' &&
6521 if (name[1] == 'e' &&
6531 if (name[1] == 'p' &&
6544 if (name[2] == 'c' &&
6553 if (name[2] == 'p' &&
6562 if (name[2] == 's' &&
6578 if (name[2] == 'n' &&
6648 if (name[2] == 'r' &&
6657 if (name[2] == 'r' &&
6666 if (name[2] == 'a' &&
6682 if (name[2] == 'l' &&
6744 if (name[2] == 'e' &&
6747 return (FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
6760 case 5: /* 38 tokens of length 5 */
6764 if (name[1] == 'E' &&
6775 if (name[1] == 'H' &&
6789 if (name[2] == 'a' &&
6799 if (name[2] == 'a' &&
6816 if (name[2] == 'e' &&
6826 if (name[2] == 'e' &&
6830 return (FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
6846 if (name[3] == 'i' &&
6855 if (name[3] == 'o' &&
6891 if (name[2] == 'o' &&
6901 if (name[2] == 'y' &&
6915 if (name[1] == 'l' &&
6929 if (name[2] == 'n' &&
6939 if (name[2] == 'o' &&
6953 if (name[1] == 'i' &&
6958 return (FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
6967 if (name[2] == 'd' &&
6977 if (name[2] == 'c' &&
6994 if (name[2] == 'c' &&
7004 if (name[2] == 't' &&
7018 if (name[1] == 'k' &&
7029 if (name[1] == 'r' &&
7043 if (name[2] == 's' &&
7053 if (name[2] == 'd' &&
7070 if (name[2] == 'm' &&
7080 if (name[2] == 'i' &&
7090 if (name[2] == 'e' &&
7100 if (name[2] == 'l' &&
7110 if (name[2] == 'a' &&
7120 if (name[2] == 'u' &&
7134 if (name[1] == 'i' &&
7148 if (name[2] == 'a' &&
7161 if (name[3] == 'e' &&
7196 if (name[2] == 'i' &&
7213 if (name[2] == 'i' &&
7223 if (name[2] == 'i' &&
7240 case 6: /* 33 tokens of length 6 */
7244 if (name[1] == 'c' &&
7259 if (name[2] == 'l' &&
7270 if (name[2] == 'r' &&
7285 if (name[1] == 'e' &&
7300 if (name[2] == 's' &&
7305 if(ckWARN_d(WARN_SYNTAX))
7306 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
7312 if (name[2] == 'i' &&
7330 if (name[2] == 'l' &&
7341 if (name[2] == 'r' &&
7356 if (name[1] == 'm' &&
7371 if (name[2] == 'n' &&
7382 if (name[2] == 's' &&
7397 if (name[1] == 's' &&
7403 if (name[4] == 't' &&
7412 if (name[4] == 'e' &&
7421 if (name[4] == 'c' &&
7430 if (name[4] == 'n' &&
7446 if (name[1] == 'r' &&
7464 if (name[3] == 'a' &&
7474 if (name[3] == 'u' &&
7488 if (name[2] == 'n' &&
7506 if (name[2] == 'a' &&
7520 if (name[3] == 'e' &&
7533 if (name[4] == 't' &&
7542 if (name[4] == 'e' &&
7564 if (name[4] == 't' &&
7573 if (name[4] == 'e' &&
7589 if (name[2] == 'c' &&
7600 if (name[2] == 'l' &&
7611 if (name[2] == 'b' &&
7622 if (name[2] == 's' &&
7645 if (name[4] == 's' &&
7654 if (name[4] == 'n' &&
7667 if (name[3] == 'a' &&
7684 if (name[1] == 'a' &&
7699 case 7: /* 29 tokens of length 7 */
7703 if (name[1] == 'E' &&
7716 if (name[1] == '_' &&
7729 if (name[1] == 'i' &&
7736 return -KEY_binmode;
7742 if (name[1] == 'o' &&
7749 return -KEY_connect;
7758 if (name[2] == 'm' &&
7764 return -KEY_dbmopen;
7775 if (name[4] == 'u' &&
7779 return (FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
7785 if (name[4] == 'n' &&
7806 if (name[1] == 'o' &&
7819 if (name[1] == 'e' &&
7826 if (name[5] == 'r' &&
7829 return -KEY_getpgrp;
7835 if (name[5] == 'i' &&
7838 return -KEY_getppid;
7851 if (name[1] == 'c' &&
7858 return -KEY_lcfirst;
7864 if (name[1] == 'p' &&
7871 return -KEY_opendir;
7877 if (name[1] == 'a' &&
7895 if (name[3] == 'd' &&
7900 return -KEY_readdir;
7906 if (name[3] == 'u' &&
7917 if (name[3] == 'e' &&
7922 return -KEY_reverse;
7941 if (name[3] == 'k' &&
7946 return -KEY_seekdir;
7952 if (name[3] == 'p' &&
7957 return -KEY_setpgrp;
7967 if (name[2] == 'm' &&
7973 return -KEY_shmread;
7979 if (name[2] == 'r' &&
7985 return -KEY_sprintf;
7994 if (name[3] == 'l' &&
7999 return -KEY_symlink;
8008 if (name[4] == 'a' &&
8012 return -KEY_syscall;
8018 if (name[4] == 'p' &&
8022 return -KEY_sysopen;
8028 if (name[4] == 'e' &&
8032 return -KEY_sysread;
8038 if (name[4] == 'e' &&
8042 return -KEY_sysseek;
8060 if (name[1] == 'e' &&
8067 return -KEY_telldir;
8076 if (name[2] == 'f' &&
8082 return -KEY_ucfirst;
8088 if (name[2] == 's' &&
8094 return -KEY_unshift;
8104 if (name[1] == 'a' &&
8111 return -KEY_waitpid;
8120 case 8: /* 26 tokens of length 8 */
8124 if (name[1] == 'U' &&
8132 return KEY_AUTOLOAD;
8143 if (name[3] == 'A' &&
8149 return KEY___DATA__;
8155 if (name[3] == 'I' &&
8161 return -KEY___FILE__;
8167 if (name[3] == 'I' &&
8173 return -KEY___LINE__;
8189 if (name[2] == 'o' &&
8196 return -KEY_closedir;
8202 if (name[2] == 'n' &&
8209 return -KEY_continue;
8219 if (name[1] == 'b' &&
8227 return -KEY_dbmclose;
8233 if (name[1] == 'n' &&
8239 if (name[4] == 'r' &&
8244 return -KEY_endgrent;
8250 if (name[4] == 'w' &&
8255 return -KEY_endpwent;
8268 if (name[1] == 'o' &&
8276 return -KEY_formline;
8282 if (name[1] == 'e' &&
8293 if (name[6] == 'n' &&
8296 return -KEY_getgrent;
8302 if (name[6] == 'i' &&
8305 return -KEY_getgrgid;
8311 if (name[6] == 'a' &&
8314 return -KEY_getgrnam;
8327 if (name[4] == 'o' &&
8332 return -KEY_getlogin;
8343 if (name[6] == 'n' &&
8346 return -KEY_getpwent;
8352 if (name[6] == 'a' &&
8355 return -KEY_getpwnam;
8361 if (name[6] == 'i' &&
8364 return -KEY_getpwuid;
8384 if (name[1] == 'e' &&
8391 if (name[5] == 'i' &&
8398 return -KEY_readline;
8403 return -KEY_readlink;
8414 if (name[5] == 'i' &&
8418 return -KEY_readpipe;
8439 if (name[4] == 'r' &&
8444 return -KEY_setgrent;
8450 if (name[4] == 'w' &&
8455 return -KEY_setpwent;
8471 if (name[3] == 'w' &&
8477 return -KEY_shmwrite;
8483 if (name[3] == 't' &&
8489 return -KEY_shutdown;
8499 if (name[2] == 's' &&
8506 return -KEY_syswrite;
8516 if (name[1] == 'r' &&
8524 return -KEY_truncate;
8533 case 9: /* 8 tokens of length 9 */
8537 if (name[1] == 'n' &&
8546 return -KEY_endnetent;
8552 if (name[1] == 'e' &&
8561 return -KEY_getnetent;
8567 if (name[1] == 'o' &&
8576 return -KEY_localtime;
8582 if (name[1] == 'r' &&
8591 return KEY_prototype;
8597 if (name[1] == 'u' &&
8606 return -KEY_quotemeta;
8612 if (name[1] == 'e' &&
8621 return -KEY_rewinddir;
8627 if (name[1] == 'e' &&
8636 return -KEY_setnetent;
8642 if (name[1] == 'a' &&
8651 return -KEY_wantarray;
8660 case 10: /* 9 tokens of length 10 */
8664 if (name[1] == 'n' &&
8670 if (name[4] == 'o' &&
8677 return -KEY_endhostent;
8683 if (name[4] == 'e' &&
8690 return -KEY_endservent;
8703 if (name[1] == 'e' &&
8709 if (name[4] == 'o' &&
8716 return -KEY_gethostent;
8725 if (name[5] == 'r' &&
8731 return -KEY_getservent;
8737 if (name[5] == 'c' &&
8743 return -KEY_getsockopt;
8768 if (name[4] == 'o' &&
8775 return -KEY_sethostent;
8784 if (name[5] == 'r' &&
8790 return -KEY_setservent;
8796 if (name[5] == 'c' &&
8802 return -KEY_setsockopt;
8819 if (name[2] == 'c' &&
8828 return -KEY_socketpair;
8841 case 11: /* 8 tokens of length 11 */
8845 if (name[1] == '_' &&
8856 return -KEY___PACKAGE__;
8862 if (name[1] == 'n' &&
8873 return -KEY_endprotoent;
8879 if (name[1] == 'e' &&
8888 if (name[5] == 'e' &&
8895 return -KEY_getpeername;
8904 if (name[6] == 'o' &&
8910 return -KEY_getpriority;
8916 if (name[6] == 't' &&
8922 return -KEY_getprotoent;
8936 if (name[4] == 'o' &&
8944 return -KEY_getsockname;
8957 if (name[1] == 'e' &&
8965 if (name[6] == 'o' &&
8971 return -KEY_setpriority;
8977 if (name[6] == 't' &&
8983 return -KEY_setprotoent;
8999 case 12: /* 2 tokens of length 12 */
9000 if (name[0] == 'g' &&
9012 if (name[9] == 'd' &&
9015 { /* getnetbyaddr */
9016 return -KEY_getnetbyaddr;
9022 if (name[9] == 'a' &&
9025 { /* getnetbyname */
9026 return -KEY_getnetbyname;
9038 case 13: /* 4 tokens of length 13 */
9039 if (name[0] == 'g' &&
9046 if (name[4] == 'o' &&
9055 if (name[10] == 'd' &&
9058 { /* gethostbyaddr */
9059 return -KEY_gethostbyaddr;
9065 if (name[10] == 'a' &&
9068 { /* gethostbyname */
9069 return -KEY_gethostbyname;
9082 if (name[4] == 'e' &&
9091 if (name[10] == 'a' &&
9094 { /* getservbyname */
9095 return -KEY_getservbyname;
9101 if (name[10] == 'o' &&
9104 { /* getservbyport */
9105 return -KEY_getservbyport;
9124 case 14: /* 1 tokens of length 14 */
9125 if (name[0] == 'g' &&
9139 { /* getprotobyname */
9140 return -KEY_getprotobyname;
9145 case 16: /* 1 tokens of length 16 */
9146 if (name[0] == 'g' &&
9162 { /* getprotobynumber */
9163 return -KEY_getprotobynumber;
9177 S_checkcomma(pTHX_ register char *s, const char *name, const char *what)
9181 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9182 if (ckWARN(WARN_SYNTAX)) {
9184 for (w = s+2; *w && level; w++) {
9191 for (; *w && isSPACE(*w); w++) ;
9192 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
9193 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9194 "%s (...) interpreted as function",name);
9197 while (s < PL_bufend && isSPACE(*s))
9201 while (s < PL_bufend && isSPACE(*s))
9203 if (isIDFIRST_lazy_if(s,UTF)) {
9205 while (isALNUM_lazy_if(s,UTF))
9207 while (s < PL_bufend && isSPACE(*s))
9211 *s = '\0'; /* XXX If we didn't do this, we could const a lot of toke.c */
9212 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
9216 Perl_croak(aTHX_ "No comma allowed after %s", what);
9221 /* Either returns sv, or mortalizes sv and returns a new SV*.
9222 Best used as sv=new_constant(..., sv, ...).
9223 If s, pv are NULL, calls subroutine with one argument,
9224 and type is used with error messages only. */
9227 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9231 HV * const table = GvHV(PL_hintgv); /* ^H */
9235 const char *why1 = "", *why2 = "", *why3 = "";
9237 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9240 why2 = strEQ(key,"charnames")
9241 ? "(possibly a missing \"use charnames ...\")"
9243 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
9244 (type ? type: "undef"), why2);
9246 /* This is convoluted and evil ("goto considered harmful")
9247 * but I do not understand the intricacies of all the different
9248 * failure modes of %^H in here. The goal here is to make
9249 * the most probable error message user-friendly. --jhi */
9254 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
9255 (type ? type: "undef"), why1, why2, why3);
9257 yyerror(SvPVX_const(msg));
9261 cvp = hv_fetch(table, key, strlen(key), FALSE);
9262 if (!cvp || !SvOK(*cvp)) {
9265 why3 = "} is not defined";
9268 sv_2mortal(sv); /* Parent created it permanently */
9271 pv = sv_2mortal(newSVpvn(s, len));
9273 typesv = sv_2mortal(newSVpv(type, 0));
9275 typesv = &PL_sv_undef;
9277 PUSHSTACKi(PERLSI_OVERLOAD);
9289 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9293 /* Check the eval first */
9294 if (!PL_in_eval && SvTRUE(ERRSV)) {
9295 sv_catpv(ERRSV, "Propagated");
9296 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
9298 res = SvREFCNT_inc(sv);
9302 (void)SvREFCNT_inc(res);
9311 why1 = "Call to &{$^H{";
9313 why3 = "}} did not return a defined value";
9321 /* Returns a NUL terminated string, with the length of the string written to
9325 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9327 register char *d = dest;
9328 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
9331 Perl_croak(aTHX_ ident_too_long);
9332 if (isALNUM(*s)) /* UTF handled below */
9334 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
9339 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
9343 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9344 char *t = s + UTF8SKIP(s);
9345 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9347 if (d + (t - s) > e)
9348 Perl_croak(aTHX_ ident_too_long);
9349 Copy(s, d, t - s, char);
9362 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9366 char *bracket = Nullch;
9372 e = d + destlen - 3; /* two-character token, ending NUL */
9374 while (isDIGIT(*s)) {
9376 Perl_croak(aTHX_ ident_too_long);
9383 Perl_croak(aTHX_ ident_too_long);
9384 if (isALNUM(*s)) /* UTF handled below */
9386 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9391 else if (*s == ':' && s[1] == ':') {
9395 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9396 char *t = s + UTF8SKIP(s);
9397 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9399 if (d + (t - s) > e)
9400 Perl_croak(aTHX_ ident_too_long);
9401 Copy(s, d, t - s, char);
9412 if (PL_lex_state != LEX_NORMAL)
9413 PL_lex_state = LEX_INTERPENDMAYBE;
9416 if (*s == '$' && s[1] &&
9417 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9430 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9435 if (isSPACE(s[-1])) {
9437 const char ch = *s++;
9438 if (!SPACE_OR_TAB(ch)) {
9444 if (isIDFIRST_lazy_if(d,UTF)) {
9448 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
9450 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
9453 Copy(s, d, e - s, char);
9458 while ((isALNUM(*s) || *s == ':') && d < e)
9461 Perl_croak(aTHX_ ident_too_long);
9464 while (s < send && SPACE_OR_TAB(*s)) s++;
9465 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9466 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
9467 const char *brack = *s == '[' ? "[...]" : "{...}";
9468 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9469 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9470 funny, dest, brack, funny, dest, brack);
9473 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9477 /* Handle extended ${^Foo} variables
9478 * 1999-02-27 mjd-perl-patch@plover.com */
9479 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9483 while (isALNUM(*s) && d < e) {
9487 Perl_croak(aTHX_ ident_too_long);
9492 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9493 PL_lex_state = LEX_INTERPEND;
9498 if (PL_lex_state == LEX_NORMAL) {
9499 if (ckWARN(WARN_AMBIGUOUS) &&
9500 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
9502 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9503 "Ambiguous use of %c{%s} resolved to %c%s",
9504 funny, dest, funny, dest);
9509 s = bracket; /* let the parser handle it */
9513 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9514 PL_lex_state = LEX_INTERPEND;
9519 Perl_pmflag(pTHX_ U32* pmfl, int ch)
9524 *pmfl |= PMf_GLOBAL;
9526 *pmfl |= PMf_CONTINUE;
9530 *pmfl |= PMf_MULTILINE;
9532 *pmfl |= PMf_SINGLELINE;
9534 *pmfl |= PMf_EXTENDED;
9538 S_scan_pat(pTHX_ char *start, I32 type)
9541 char *s = scan_str(start,FALSE,FALSE);
9544 char * const delimiter = skipspace(start);
9545 Perl_croak(aTHX_ *delimiter == '?'
9546 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9547 : "Search pattern not terminated" );
9550 pm = (PMOP*)newPMOP(type, 0);
9551 if (PL_multi_open == '?')
9552 pm->op_pmflags |= PMf_ONCE;
9554 while (*s && strchr("iomsx", *s))
9555 pmflag(&pm->op_pmflags,*s++);
9558 while (*s && strchr("iogcmsx", *s))
9559 pmflag(&pm->op_pmflags,*s++);
9561 /* issue a warning if /c is specified,but /g is not */
9562 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
9563 && ckWARN(WARN_REGEXP))
9565 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
9568 pm->op_pmpermflags = pm->op_pmflags;
9570 PL_lex_op = (OP*)pm;
9571 yylval.ival = OP_MATCH;
9576 S_scan_subst(pTHX_ char *start)
9584 yylval.ival = OP_NULL;
9586 s = scan_str(start,FALSE,FALSE);
9589 Perl_croak(aTHX_ "Substitution pattern not terminated");
9591 if (s[-1] == PL_multi_open)
9594 first_start = PL_multi_start;
9595 s = scan_str(s,FALSE,FALSE);
9598 SvREFCNT_dec(PL_lex_stuff);
9599 PL_lex_stuff = Nullsv;
9601 Perl_croak(aTHX_ "Substitution replacement not terminated");
9603 PL_multi_start = first_start; /* so whole substitution is taken together */
9605 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9611 else if (strchr("iogcmsx", *s))
9612 pmflag(&pm->op_pmflags,*s++);
9617 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
9618 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9623 PL_sublex_info.super_bufptr = s;
9624 PL_sublex_info.super_bufend = PL_bufend;
9626 pm->op_pmflags |= PMf_EVAL;
9627 repl = newSVpvn("",0);
9629 sv_catpv(repl, es ? "eval " : "do ");
9630 sv_catpvn(repl, "{ ", 2);
9631 sv_catsv(repl, PL_lex_repl);
9632 sv_catpvn(repl, " };", 2);
9634 SvREFCNT_dec(PL_lex_repl);
9638 pm->op_pmpermflags = pm->op_pmflags;
9639 PL_lex_op = (OP*)pm;
9640 yylval.ival = OP_SUBST;
9645 S_scan_trans(pTHX_ char *start)
9654 yylval.ival = OP_NULL;
9656 s = scan_str(start,FALSE,FALSE);
9658 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9659 if (s[-1] == PL_multi_open)
9662 s = scan_str(s,FALSE,FALSE);
9665 SvREFCNT_dec(PL_lex_stuff);
9666 PL_lex_stuff = Nullsv;
9668 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9671 complement = del = squash = 0;
9675 complement = OPpTRANS_COMPLEMENT;
9678 del = OPpTRANS_DELETE;
9681 squash = OPpTRANS_SQUASH;
9690 Newx(tbl, complement&&!del?258:256, short);
9691 o = newPVOP(OP_TRANS, 0, (char*)tbl);
9692 o->op_private &= ~OPpTRANS_ALL;
9693 o->op_private |= del|squash|complement|
9694 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9695 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9698 yylval.ival = OP_TRANS;
9703 S_scan_heredoc(pTHX_ register char *s)
9706 I32 op_type = OP_SCALAR;
9710 const char *found_newline;
9714 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9718 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9721 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
9722 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9725 s = delimcpy(d, e, s, PL_bufend, term, &len);
9735 if (!isALNUM_lazy_if(s,UTF))
9736 deprecate_old("bare << to mean <<\"\"");
9737 for (; isALNUM_lazy_if(s,UTF); s++) {
9742 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9743 Perl_croak(aTHX_ "Delimiter for here document is too long");
9746 len = d - PL_tokenbuf;
9747 #ifndef PERL_STRICT_CR
9748 d = strchr(s, '\r');
9750 char * const olds = s;
9752 while (s < PL_bufend) {
9758 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9767 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9771 if ( outer || !(found_newline = memchr(s, '\n', PL_bufend - s)) ) {
9772 herewas = newSVpvn(s,PL_bufend-s);
9776 herewas = newSVpvn(s,found_newline-s);
9778 s += SvCUR(herewas);
9780 tmpstr = NEWSV(87,79);
9781 sv_upgrade(tmpstr, SVt_PVIV);
9784 SvIV_set(tmpstr, -1);
9786 else if (term == '`') {
9787 op_type = OP_BACKTICK;
9788 SvIV_set(tmpstr, '\\');
9792 PL_multi_start = CopLINE(PL_curcop);
9793 PL_multi_open = PL_multi_close = '<';
9794 term = *PL_tokenbuf;
9795 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9796 char *bufptr = PL_sublex_info.super_bufptr;
9797 char *bufend = PL_sublex_info.super_bufend;
9798 char * const olds = s - SvCUR(herewas);
9799 s = strchr(bufptr, '\n');
9803 while (s < bufend &&
9804 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9806 CopLINE_inc(PL_curcop);
9809 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9810 missingterm(PL_tokenbuf);
9812 sv_setpvn(herewas,bufptr,d-bufptr+1);
9813 sv_setpvn(tmpstr,d+1,s-d);
9815 sv_catpvn(herewas,s,bufend-s);
9816 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9823 while (s < PL_bufend &&
9824 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9826 CopLINE_inc(PL_curcop);
9828 if (s >= PL_bufend) {
9829 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9830 missingterm(PL_tokenbuf);
9832 sv_setpvn(tmpstr,d+1,s-d);
9834 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9836 sv_catpvn(herewas,s,PL_bufend-s);
9837 sv_setsv(PL_linestr,herewas);
9838 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9839 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9840 PL_last_lop = PL_last_uni = Nullch;
9843 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
9844 while (s >= PL_bufend) { /* multiple line string? */
9846 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
9847 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9848 missingterm(PL_tokenbuf);
9850 CopLINE_inc(PL_curcop);
9851 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9852 PL_last_lop = PL_last_uni = Nullch;
9853 #ifndef PERL_STRICT_CR
9854 if (PL_bufend - PL_linestart >= 2) {
9855 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9856 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9858 PL_bufend[-2] = '\n';
9860 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9862 else if (PL_bufend[-1] == '\r')
9863 PL_bufend[-1] = '\n';
9865 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9866 PL_bufend[-1] = '\n';
9868 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9869 SV *sv = NEWSV(88,0);
9871 sv_upgrade(sv, SVt_PVMG);
9872 sv_setsv(sv,PL_linestr);
9875 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop),sv);
9877 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9878 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9879 *(SvPVX(PL_linestr) + off ) = ' ';
9880 sv_catsv(PL_linestr,herewas);
9881 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9882 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9886 sv_catsv(tmpstr,PL_linestr);
9891 PL_multi_end = CopLINE(PL_curcop);
9892 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9893 SvPV_shrink_to_cur(tmpstr);
9895 SvREFCNT_dec(herewas);
9897 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9899 else if (PL_encoding)
9900 sv_recode_to_utf8(tmpstr, PL_encoding);
9902 PL_lex_stuff = tmpstr;
9903 yylval.ival = op_type;
9908 takes: current position in input buffer
9909 returns: new position in input buffer
9910 side-effects: yylval and lex_op are set.
9915 <FH> read from filehandle
9916 <pkg::FH> read from package qualified filehandle
9917 <pkg'FH> read from package qualified filehandle
9918 <$fh> read from filehandle in $fh
9924 S_scan_inputsymbol(pTHX_ char *start)
9926 register char *s = start; /* current position in buffer */
9932 d = PL_tokenbuf; /* start of temp holding space */
9933 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9934 end = strchr(s, '\n');
9937 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9939 /* die if we didn't have space for the contents of the <>,
9940 or if it didn't end, or if we see a newline
9943 if (len >= sizeof PL_tokenbuf)
9944 Perl_croak(aTHX_ "Excessively long <> operator");
9946 Perl_croak(aTHX_ "Unterminated <> operator");
9951 Remember, only scalar variables are interpreted as filehandles by
9952 this code. Anything more complex (e.g., <$fh{$num}>) will be
9953 treated as a glob() call.
9954 This code makes use of the fact that except for the $ at the front,
9955 a scalar variable and a filehandle look the same.
9957 if (*d == '$' && d[1]) d++;
9959 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9960 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9963 /* If we've tried to read what we allow filehandles to look like, and
9964 there's still text left, then it must be a glob() and not a getline.
9965 Use scan_str to pull out the stuff between the <> and treat it
9966 as nothing more than a string.
9969 if (d - PL_tokenbuf != len) {
9970 yylval.ival = OP_GLOB;
9972 s = scan_str(start,FALSE,FALSE);
9974 Perl_croak(aTHX_ "Glob not terminated");
9978 bool readline_overriden = FALSE;
9979 GV *gv_readline = Nullgv;
9981 /* we're in a filehandle read situation */
9984 /* turn <> into <ARGV> */
9986 Copy("ARGV",d,5,char);
9988 /* Check whether readline() is overriden */
9989 if (((gv_readline = gv_fetchpv("readline", 0, SVt_PVCV))
9990 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9992 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
9993 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
9994 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9995 readline_overriden = TRUE;
9997 /* if <$fh>, create the ops to turn the variable into a
10003 /* try to find it in the pad for this block, otherwise find
10004 add symbol table ops
10006 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
10007 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
10008 HV *stash = PAD_COMPNAME_OURSTASH(tmp);
10009 HEK *stashname = HvNAME_HEK(stash);
10010 SV *sym = sv_2mortal(newSVhek(stashname));
10011 sv_catpvn(sym, "::", 2);
10012 sv_catpv(sym, d+1);
10017 OP *o = newOP(OP_PADSV, 0);
10019 PL_lex_op = readline_overriden
10020 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10021 append_elem(OP_LIST, o,
10022 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10023 : (OP*)newUNOP(OP_READLINE, 0, o);
10032 ? (GV_ADDMULTI | GV_ADDINEVAL)
10035 PL_lex_op = readline_overriden
10036 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10037 append_elem(OP_LIST,
10038 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10039 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10040 : (OP*)newUNOP(OP_READLINE, 0,
10041 newUNOP(OP_RV2SV, 0,
10042 newGVOP(OP_GV, 0, gv)));
10044 if (!readline_overriden)
10045 PL_lex_op->op_flags |= OPf_SPECIAL;
10046 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
10047 yylval.ival = OP_NULL;
10050 /* If it's none of the above, it must be a literal filehandle
10051 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10053 GV *gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
10054 PL_lex_op = readline_overriden
10055 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10056 append_elem(OP_LIST,
10057 newGVOP(OP_GV, 0, gv),
10058 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10059 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10060 yylval.ival = OP_NULL;
10069 takes: start position in buffer
10070 keep_quoted preserve \ on the embedded delimiter(s)
10071 keep_delims preserve the delimiters around the string
10072 returns: position to continue reading from buffer
10073 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10074 updates the read buffer.
10076 This subroutine pulls a string out of the input. It is called for:
10077 q single quotes q(literal text)
10078 ' single quotes 'literal text'
10079 qq double quotes qq(interpolate $here please)
10080 " double quotes "interpolate $here please"
10081 qx backticks qx(/bin/ls -l)
10082 ` backticks `/bin/ls -l`
10083 qw quote words @EXPORT_OK = qw( func() $spam )
10084 m// regexp match m/this/
10085 s/// regexp substitute s/this/that/
10086 tr/// string transliterate tr/this/that/
10087 y/// string transliterate y/this/that/
10088 ($*@) sub prototypes sub foo ($)
10089 (stuff) sub attr parameters sub foo : attr(stuff)
10090 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10092 In most of these cases (all but <>, patterns and transliterate)
10093 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10094 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10095 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10098 It skips whitespace before the string starts, and treats the first
10099 character as the delimiter. If the delimiter is one of ([{< then
10100 the corresponding "close" character )]}> is used as the closing
10101 delimiter. It allows quoting of delimiters, and if the string has
10102 balanced delimiters ([{<>}]) it allows nesting.
10104 On success, the SV with the resulting string is put into lex_stuff or,
10105 if that is already non-NULL, into lex_repl. The second case occurs only
10106 when parsing the RHS of the special constructs s/// and tr/// (y///).
10107 For convenience, the terminating delimiter character is stuffed into
10112 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
10114 SV *sv; /* scalar value: string */
10115 char *tmps; /* temp string, used for delimiter matching */
10116 register char *s = start; /* current position in the buffer */
10117 register char term; /* terminating character */
10118 register char *to; /* current position in the sv's data */
10119 I32 brackets = 1; /* bracket nesting level */
10120 bool has_utf8 = FALSE; /* is there any utf8 content? */
10121 I32 termcode; /* terminating char. code */
10122 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10123 STRLEN termlen; /* length of terminating string */
10124 char *last = NULL; /* last position for nesting bracket */
10126 /* skip space before the delimiter */
10130 /* mark where we are, in case we need to report errors */
10133 /* after skipping whitespace, the next character is the terminator */
10136 termcode = termstr[0] = term;
10140 termcode = utf8_to_uvchr((U8*)s, &termlen);
10141 Copy(s, termstr, termlen, U8);
10142 if (!UTF8_IS_INVARIANT(term))
10146 /* mark where we are */
10147 PL_multi_start = CopLINE(PL_curcop);
10148 PL_multi_open = term;
10150 /* find corresponding closing delimiter */
10151 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10152 termcode = termstr[0] = term = tmps[5];
10154 PL_multi_close = term;
10156 /* create a new SV to hold the contents. 87 is leak category, I'm
10157 assuming. 79 is the SV's initial length. What a random number. */
10159 sv_upgrade(sv, SVt_PVIV);
10160 SvIV_set(sv, termcode);
10161 (void)SvPOK_only(sv); /* validate pointer */
10163 /* move past delimiter and try to read a complete string */
10165 sv_catpvn(sv, s, termlen);
10168 if (PL_encoding && !UTF) {
10172 int offset = s - SvPVX_const(PL_linestr);
10173 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10174 &offset, (char*)termstr, termlen);
10175 const char *ns = SvPVX_const(PL_linestr) + offset;
10176 char *svlast = SvEND(sv) - 1;
10178 for (; s < ns; s++) {
10179 if (*s == '\n' && !PL_rsfp)
10180 CopLINE_inc(PL_curcop);
10183 goto read_more_line;
10185 /* handle quoted delimiters */
10186 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10188 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10190 if ((svlast-1 - t) % 2) {
10191 if (!keep_quoted) {
10192 *(svlast-1) = term;
10194 SvCUR_set(sv, SvCUR(sv) - 1);
10199 if (PL_multi_open == PL_multi_close) {
10207 for (t = w = last; t < svlast; w++, t++) {
10208 /* At here, all closes are "was quoted" one,
10209 so we don't check PL_multi_close. */
10211 if (!keep_quoted && *(t+1) == PL_multi_open)
10216 else if (*t == PL_multi_open)
10224 SvCUR_set(sv, w - SvPVX_const(sv));
10227 if (--brackets <= 0)
10232 if (!keep_delims) {
10233 SvCUR_set(sv, SvCUR(sv) - 1);
10239 /* extend sv if need be */
10240 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10241 /* set 'to' to the next character in the sv's string */
10242 to = SvPVX(sv)+SvCUR(sv);
10244 /* if open delimiter is the close delimiter read unbridle */
10245 if (PL_multi_open == PL_multi_close) {
10246 for (; s < PL_bufend; s++,to++) {
10247 /* embedded newlines increment the current line number */
10248 if (*s == '\n' && !PL_rsfp)
10249 CopLINE_inc(PL_curcop);
10250 /* handle quoted delimiters */
10251 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10252 if (!keep_quoted && s[1] == term)
10254 /* any other quotes are simply copied straight through */
10258 /* terminate when run out of buffer (the for() condition), or
10259 have found the terminator */
10260 else if (*s == term) {
10263 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10266 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10272 /* if the terminator isn't the same as the start character (e.g.,
10273 matched brackets), we have to allow more in the quoting, and
10274 be prepared for nested brackets.
10277 /* read until we run out of string, or we find the terminator */
10278 for (; s < PL_bufend; s++,to++) {
10279 /* embedded newlines increment the line count */
10280 if (*s == '\n' && !PL_rsfp)
10281 CopLINE_inc(PL_curcop);
10282 /* backslashes can escape the open or closing characters */
10283 if (*s == '\\' && s+1 < PL_bufend) {
10284 if (!keep_quoted &&
10285 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10290 /* allow nested opens and closes */
10291 else if (*s == PL_multi_close && --brackets <= 0)
10293 else if (*s == PL_multi_open)
10295 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10300 /* terminate the copied string and update the sv's end-of-string */
10302 SvCUR_set(sv, to - SvPVX_const(sv));
10305 * this next chunk reads more into the buffer if we're not done yet
10309 break; /* handle case where we are done yet :-) */
10311 #ifndef PERL_STRICT_CR
10312 if (to - SvPVX_const(sv) >= 2) {
10313 if ((to[-2] == '\r' && to[-1] == '\n') ||
10314 (to[-2] == '\n' && to[-1] == '\r'))
10318 SvCUR_set(sv, to - SvPVX_const(sv));
10320 else if (to[-1] == '\r')
10323 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10328 /* if we're out of file, or a read fails, bail and reset the current
10329 line marker so we can report where the unterminated string began
10332 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10334 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10337 /* we read a line, so increment our line counter */
10338 CopLINE_inc(PL_curcop);
10340 /* update debugger info */
10341 if (PERLDB_LINE && PL_curstash != PL_debstash) {
10342 SV * const sv = NEWSV(88,0);
10344 sv_upgrade(sv, SVt_PVMG);
10345 sv_setsv(sv,PL_linestr);
10346 (void)SvIOK_on(sv);
10348 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop), sv);
10351 /* having changed the buffer, we must update PL_bufend */
10352 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10353 PL_last_lop = PL_last_uni = Nullch;
10356 /* at this point, we have successfully read the delimited string */
10358 if (!PL_encoding || UTF) {
10360 sv_catpvn(sv, s, termlen);
10363 if (has_utf8 || PL_encoding)
10366 PL_multi_end = CopLINE(PL_curcop);
10368 /* if we allocated too much space, give some back */
10369 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10370 SvLEN_set(sv, SvCUR(sv) + 1);
10371 SvPV_renew(sv, SvLEN(sv));
10374 /* decide whether this is the first or second quoted string we've read
10387 takes: pointer to position in buffer
10388 returns: pointer to new position in buffer
10389 side-effects: builds ops for the constant in yylval.op
10391 Read a number in any of the formats that Perl accepts:
10393 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10394 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10397 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10399 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10402 If it reads a number without a decimal point or an exponent, it will
10403 try converting the number to an integer and see if it can do so
10404 without loss of precision.
10408 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10410 register const char *s = start; /* current position in buffer */
10411 register char *d; /* destination in temp buffer */
10412 register char *e; /* end of temp buffer */
10413 NV nv; /* number read, as a double */
10414 SV *sv = Nullsv; /* place to put the converted number */
10415 bool floatit; /* boolean: int or float? */
10416 const char *lastub = NULL; /* position of last underbar */
10417 static char const number_too_long[] = "Number too long";
10419 /* We use the first character to decide what type of number this is */
10423 Perl_croak(aTHX_ "panic: scan_num");
10425 /* if it starts with a 0, it could be an octal number, a decimal in
10426 0.13 disguise, or a hexadecimal number, or a binary number. */
10430 u holds the "number so far"
10431 shift the power of 2 of the base
10432 (hex == 4, octal == 3, binary == 1)
10433 overflowed was the number more than we can hold?
10435 Shift is used when we add a digit. It also serves as an "are
10436 we in octal/hex/binary?" indicator to disallow hex characters
10437 when in octal mode.
10442 bool overflowed = FALSE;
10443 bool just_zero = TRUE; /* just plain 0 or binary number? */
10444 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10445 static const char* const bases[5] =
10446 { "", "binary", "", "octal", "hexadecimal" };
10447 static const char* const Bases[5] =
10448 { "", "Binary", "", "Octal", "Hexadecimal" };
10449 static const char* const maxima[5] =
10451 "0b11111111111111111111111111111111",
10455 const char *base, *Base, *max;
10457 /* check for hex */
10462 } else if (s[1] == 'b') {
10467 /* check for a decimal in disguise */
10468 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10470 /* so it must be octal */
10477 if (ckWARN(WARN_SYNTAX))
10478 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10479 "Misplaced _ in number");
10483 base = bases[shift];
10484 Base = Bases[shift];
10485 max = maxima[shift];
10487 /* read the rest of the number */
10489 /* x is used in the overflow test,
10490 b is the digit we're adding on. */
10495 /* if we don't mention it, we're done */
10499 /* _ are ignored -- but warned about if consecutive */
10501 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10502 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10503 "Misplaced _ in number");
10507 /* 8 and 9 are not octal */
10508 case '8': case '9':
10510 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10514 case '2': case '3': case '4':
10515 case '5': case '6': case '7':
10517 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10520 case '0': case '1':
10521 b = *s++ & 15; /* ASCII digit -> value of digit */
10525 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10526 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10527 /* make sure they said 0x */
10530 b = (*s++ & 7) + 9;
10532 /* Prepare to put the digit we have onto the end
10533 of the number so far. We check for overflows.
10539 x = u << shift; /* make room for the digit */
10541 if ((x >> shift) != u
10542 && !(PL_hints & HINT_NEW_BINARY)) {
10545 if (ckWARN_d(WARN_OVERFLOW))
10546 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10547 "Integer overflow in %s number",
10550 u = x | b; /* add the digit to the end */
10553 n *= nvshift[shift];
10554 /* If an NV has not enough bits in its
10555 * mantissa to represent an UV this summing of
10556 * small low-order numbers is a waste of time
10557 * (because the NV cannot preserve the
10558 * low-order bits anyway): we could just
10559 * remember when did we overflow and in the
10560 * end just multiply n by the right
10568 /* if we get here, we had success: make a scalar value from
10573 /* final misplaced underbar check */
10574 if (s[-1] == '_') {
10575 if (ckWARN(WARN_SYNTAX))
10576 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10581 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
10582 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10583 "%s number > %s non-portable",
10589 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
10590 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10591 "%s number > %s non-portable",
10596 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10597 sv = new_constant(start, s - start, "integer",
10599 else if (PL_hints & HINT_NEW_BINARY)
10600 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
10605 handle decimal numbers.
10606 we're also sent here when we read a 0 as the first digit
10608 case '1': case '2': case '3': case '4': case '5':
10609 case '6': case '7': case '8': case '9': case '.':
10612 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10615 /* read next group of digits and _ and copy into d */
10616 while (isDIGIT(*s) || *s == '_') {
10617 /* skip underscores, checking for misplaced ones
10621 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10622 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10623 "Misplaced _ in number");
10627 /* check for end of fixed-length buffer */
10629 Perl_croak(aTHX_ number_too_long);
10630 /* if we're ok, copy the character */
10635 /* final misplaced underbar check */
10636 if (lastub && s == lastub + 1) {
10637 if (ckWARN(WARN_SYNTAX))
10638 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10641 /* read a decimal portion if there is one. avoid
10642 3..5 being interpreted as the number 3. followed
10645 if (*s == '.' && s[1] != '.') {
10650 if (ckWARN(WARN_SYNTAX))
10651 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10652 "Misplaced _ in number");
10656 /* copy, ignoring underbars, until we run out of digits.
10658 for (; isDIGIT(*s) || *s == '_'; s++) {
10659 /* fixed length buffer check */
10661 Perl_croak(aTHX_ number_too_long);
10663 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10664 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10665 "Misplaced _ in number");
10671 /* fractional part ending in underbar? */
10672 if (s[-1] == '_') {
10673 if (ckWARN(WARN_SYNTAX))
10674 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10675 "Misplaced _ in number");
10677 if (*s == '.' && isDIGIT(s[1])) {
10678 /* oops, it's really a v-string, but without the "v" */
10684 /* read exponent part, if present */
10685 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10689 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10690 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10692 /* stray preinitial _ */
10694 if (ckWARN(WARN_SYNTAX))
10695 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10696 "Misplaced _ in number");
10700 /* allow positive or negative exponent */
10701 if (*s == '+' || *s == '-')
10704 /* stray initial _ */
10706 if (ckWARN(WARN_SYNTAX))
10707 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10708 "Misplaced _ in number");
10712 /* read digits of exponent */
10713 while (isDIGIT(*s) || *s == '_') {
10716 Perl_croak(aTHX_ number_too_long);
10720 if (((lastub && s == lastub + 1) ||
10721 (!isDIGIT(s[1]) && s[1] != '_'))
10722 && ckWARN(WARN_SYNTAX))
10723 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10724 "Misplaced _ in number");
10731 /* make an sv from the string */
10735 We try to do an integer conversion first if no characters
10736 indicating "float" have been found.
10741 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10743 if (flags == IS_NUMBER_IN_UV) {
10745 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
10748 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10749 if (uv <= (UV) IV_MIN)
10750 sv_setiv(sv, -(IV)uv);
10757 /* terminate the string */
10759 nv = Atof(PL_tokenbuf);
10763 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
10764 (PL_hints & HINT_NEW_INTEGER) )
10765 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
10766 (floatit ? "float" : "integer"),
10770 /* if it starts with a v, it could be a v-string */
10773 sv = NEWSV(92,5); /* preallocate storage space */
10774 s = scan_vstring(s,sv);
10778 /* make the op for the constant and return */
10781 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10783 lvalp->opval = Nullop;
10789 S_scan_formline(pTHX_ register char *s)
10791 register char *eol;
10793 SV *stuff = newSVpvn("",0);
10794 bool needargs = FALSE;
10795 bool eofmt = FALSE;
10797 while (!needargs) {
10799 #ifdef PERL_STRICT_CR
10800 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
10802 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
10804 if (*t == '\n' || t == PL_bufend) {
10809 if (PL_in_eval && !PL_rsfp) {
10810 eol = (char *) memchr(s,'\n',PL_bufend-s);
10815 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10817 for (t = s; t < eol; t++) {
10818 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10820 goto enough; /* ~~ must be first line in formline */
10822 if (*t == '@' || *t == '^')
10826 sv_catpvn(stuff, s, eol-s);
10827 #ifndef PERL_STRICT_CR
10828 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10829 char *end = SvPVX(stuff) + SvCUR(stuff);
10832 SvCUR_set(stuff, SvCUR(stuff) - 1);
10841 s = filter_gets(PL_linestr, PL_rsfp, 0);
10842 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
10843 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
10844 PL_last_lop = PL_last_uni = Nullch;
10853 if (SvCUR(stuff)) {
10856 PL_lex_state = LEX_NORMAL;
10857 PL_nextval[PL_nexttoke].ival = 0;
10861 PL_lex_state = LEX_FORMLINE;
10863 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10865 else if (PL_encoding)
10866 sv_recode_to_utf8(stuff, PL_encoding);
10868 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10870 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
10874 SvREFCNT_dec(stuff);
10876 PL_lex_formbrack = 0;
10887 PL_cshlen = strlen(PL_cshname);
10892 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10894 const I32 oldsavestack_ix = PL_savestack_ix;
10895 CV* outsidecv = PL_compcv;
10898 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10900 SAVEI32(PL_subline);
10901 save_item(PL_subname);
10902 SAVESPTR(PL_compcv);
10904 PL_compcv = (CV*)NEWSV(1104,0);
10905 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
10906 CvFLAGS(PL_compcv) |= flags;
10908 PL_subline = CopLINE(PL_curcop);
10909 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10910 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
10911 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10913 return oldsavestack_ix;
10917 #pragma segment Perl_yylex
10920 Perl_yywarn(pTHX_ const char *s)
10922 PL_in_eval |= EVAL_WARNONLY;
10924 PL_in_eval &= ~EVAL_WARNONLY;
10929 Perl_yyerror(pTHX_ const char *s)
10931 const char *where = NULL;
10932 const char *context = NULL;
10936 if (!yychar || (yychar == ';' && !PL_rsfp))
10938 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10939 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10940 PL_oldbufptr != PL_bufptr) {
10943 The code below is removed for NetWare because it abends/crashes on NetWare
10944 when the script has error such as not having the closing quotes like:
10945 if ($var eq "value)
10946 Checking of white spaces is anyway done in NetWare code.
10949 while (isSPACE(*PL_oldoldbufptr))
10952 context = PL_oldoldbufptr;
10953 contlen = PL_bufptr - PL_oldoldbufptr;
10955 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10956 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10959 The code below is removed for NetWare because it abends/crashes on NetWare
10960 when the script has error such as not having the closing quotes like:
10961 if ($var eq "value)
10962 Checking of white spaces is anyway done in NetWare code.
10965 while (isSPACE(*PL_oldbufptr))
10968 context = PL_oldbufptr;
10969 contlen = PL_bufptr - PL_oldbufptr;
10971 else if (yychar > 255)
10972 where = "next token ???";
10973 else if (yychar == -2) { /* YYEMPTY */
10974 if (PL_lex_state == LEX_NORMAL ||
10975 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10976 where = "at end of line";
10977 else if (PL_lex_inpat)
10978 where = "within pattern";
10980 where = "within string";
10983 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
10985 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10986 else if (isPRINT_LC(yychar))
10987 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
10989 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10990 where = SvPVX_const(where_sv);
10992 msg = sv_2mortal(newSVpv(s, 0));
10993 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10994 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10996 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10998 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10999 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11000 Perl_sv_catpvf(aTHX_ msg,
11001 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11002 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11005 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
11006 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
11009 if (PL_error_count >= 10) {
11010 if (PL_in_eval && SvCUR(ERRSV))
11011 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11012 ERRSV, OutCopFILE(PL_curcop));
11014 Perl_croak(aTHX_ "%s has too many errors.\n",
11015 OutCopFILE(PL_curcop));
11018 PL_in_my_stash = NULL;
11022 #pragma segment Main
11026 S_swallow_bom(pTHX_ U8 *s)
11028 const STRLEN slen = SvCUR(PL_linestr);
11031 if (s[1] == 0xFE) {
11032 /* UTF-16 little-endian? (or UTF32-LE?) */
11033 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11034 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
11035 #ifndef PERL_NO_UTF16_FILTER
11036 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
11039 if (PL_bufend > (char*)s) {
11043 filter_add(utf16rev_textfilter, NULL);
11044 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11045 utf16_to_utf8_reversed(s, news,
11046 PL_bufend - (char*)s - 1,
11048 sv_setpvn(PL_linestr, (const char*)news, newlen);
11050 SvUTF8_on(PL_linestr);
11051 s = (U8*)SvPVX(PL_linestr);
11052 PL_bufend = SvPVX(PL_linestr) + newlen;
11055 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
11060 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11061 #ifndef PERL_NO_UTF16_FILTER
11062 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11065 if (PL_bufend > (char *)s) {
11069 filter_add(utf16_textfilter, NULL);
11070 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11071 utf16_to_utf8(s, news,
11072 PL_bufend - (char*)s,
11074 sv_setpvn(PL_linestr, (const char*)news, newlen);
11076 SvUTF8_on(PL_linestr);
11077 s = (U8*)SvPVX(PL_linestr);
11078 PL_bufend = SvPVX(PL_linestr) + newlen;
11081 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
11086 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11087 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11088 s += 3; /* UTF-8 */
11094 if (s[2] == 0xFE && s[3] == 0xFF) {
11095 /* UTF-32 big-endian */
11096 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
11099 else if (s[2] == 0 && s[3] != 0) {
11102 * are a good indicator of UTF-16BE. */
11103 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11108 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11111 * are a good indicator of UTF-16LE. */
11112 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11121 * Restore a source filter.
11125 restore_rsfp(pTHX_ void *f)
11127 PerlIO * const fp = (PerlIO*)f;
11129 if (PL_rsfp == PerlIO_stdin())
11130 PerlIO_clearerr(PL_rsfp);
11131 else if (PL_rsfp && (PL_rsfp != fp))
11132 PerlIO_close(PL_rsfp);
11136 #ifndef PERL_NO_UTF16_FILTER
11138 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11140 const STRLEN old = SvCUR(sv);
11141 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11142 DEBUG_P(PerlIO_printf(Perl_debug_log,
11143 "utf16_textfilter(%p): %d %d (%d)\n",
11144 utf16_textfilter, idx, maxlen, (int) count));
11148 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11149 Copy(SvPVX_const(sv), tmps, old, char);
11150 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11151 SvCUR(sv) - old, &newlen);
11152 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11154 DEBUG_P({sv_dump(sv);});
11159 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11161 const STRLEN old = SvCUR(sv);
11162 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11163 DEBUG_P(PerlIO_printf(Perl_debug_log,
11164 "utf16rev_textfilter(%p): %d %d (%d)\n",
11165 utf16rev_textfilter, idx, maxlen, (int) count));
11169 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11170 Copy(SvPVX_const(sv), tmps, old, char);
11171 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11172 SvCUR(sv) - old, &newlen);
11173 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11175 DEBUG_P({ sv_dump(sv); });
11181 Returns a pointer to the next character after the parsed
11182 vstring, as well as updating the passed in sv.
11184 Function must be called like
11187 s = scan_vstring(s,sv);
11189 The sv should already be large enough to store the vstring
11190 passed in, for performance reasons.
11195 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
11197 const char *pos = s;
11198 const char *start = s;
11199 if (*pos == 'v') pos++; /* get past 'v' */
11200 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11202 if ( *pos != '.') {
11203 /* this may not be a v-string if followed by => */
11204 const char *next = pos;
11205 while (next < PL_bufend && isSPACE(*next))
11207 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
11208 /* return string not v-string */
11209 sv_setpvn(sv,(char *)s,pos-s);
11210 return (char *)pos;
11214 if (!isALPHA(*pos)) {
11215 U8 tmpbuf[UTF8_MAXBYTES+1];
11217 if (*s == 'v') s++; /* get past 'v' */
11219 sv_setpvn(sv, "", 0);
11225 /* this is atoi() that tolerates underscores */
11226 const char *end = pos;
11228 while (--end >= s) {
11233 rev += (*end - '0') * mult;
11235 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
11236 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11237 "Integer overflow in decimal number");
11241 if (rev > 0x7FFFFFFF)
11242 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11244 /* Append native character for the rev point */
11245 tmpend = uvchr_to_utf8(tmpbuf, rev);
11246 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11247 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11249 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
11255 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11259 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11267 * c-indentation-style: bsd
11268 * c-basic-offset: 4
11269 * indent-tabs-mode: t
11272 * ex: set ts=8 sts=4 sw=4 noet: