3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] = "Identifier too long";
30 static const char commaless_variable_list[] = "comma-less variable list";
32 static void restore_rsfp(pTHX_ void *f);
33 #ifndef PERL_NO_UTF16_FILTER
34 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
40 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
42 # define CURMAD(slot,sv)
43 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
46 #define XFAKEBRACK 128
49 #ifdef USE_UTF8_SCRIPTS
50 # define UTF (!IN_BYTES)
52 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
55 /* In variables named $^X, these are the legal values for X.
56 * 1999-02-27 mjd-perl-patch@plover.com */
57 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
59 /* On MacOS, respect nonbreaking spaces */
60 #ifdef MACOS_TRADITIONAL
61 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
63 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
66 /* LEX_* are values for PL_lex_state, the state of the lexer.
67 * They are arranged oddly so that the guard on the switch statement
68 * can get by with a single comparison (if the compiler is smart enough).
71 /* #define LEX_NOTPARSING 11 is done in perl.h. */
73 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
74 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
75 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
76 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
77 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
79 /* at end of code, eg "$x" followed by: */
80 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
81 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
83 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
84 string or after \E, $foo, etc */
85 #define LEX_INTERPCONST 2 /* NOT USED */
86 #define LEX_FORMLINE 1 /* expecting a format line */
87 #define LEX_KNOWNEXT 0 /* next token known; just return it */
91 static const char* const lex_state_names[] = {
110 #include "keywords.h"
112 /* CLINE is a macro that ensures PL_copline has a sane value */
117 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
120 # define SKIPSPACE0(s) skipspace0(s)
121 # define SKIPSPACE1(s) skipspace1(s)
122 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
123 # define PEEKSPACE(s) skipspace2(s,0)
125 # define SKIPSPACE0(s) skipspace(s)
126 # define SKIPSPACE1(s) skipspace(s)
127 # define SKIPSPACE2(s,tsv) skipspace(s)
128 # define PEEKSPACE(s) skipspace(s)
132 * Convenience functions to return different tokens and prime the
133 * lexer for the next token. They all take an argument.
135 * TOKEN : generic token (used for '(', DOLSHARP, etc)
136 * OPERATOR : generic operator
137 * AOPERATOR : assignment operator
138 * PREBLOCK : beginning the block after an if, while, foreach, ...
139 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
140 * PREREF : *EXPR where EXPR is not a simple identifier
141 * TERM : expression term
142 * LOOPX : loop exiting command (goto, last, dump, etc)
143 * FTST : file test operator
144 * FUN0 : zero-argument function
145 * FUN1 : not used, except for not, which isn't a UNIOP
146 * BOop : bitwise or or xor
148 * SHop : shift operator
149 * PWop : power operator
150 * PMop : pattern-matching operator
151 * Aop : addition-level operator
152 * Mop : multiplication-level operator
153 * Eop : equality-testing operator
154 * Rop : relational operator <= != gt
156 * Also see LOP and lop() below.
159 #ifdef DEBUGGING /* Serve -DT. */
160 # define REPORT(retval) tokereport((I32)retval)
162 # define REPORT(retval) (retval)
165 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
166 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
167 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
168 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
169 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
170 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
171 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
172 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
173 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
174 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
175 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
176 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
177 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
178 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
179 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
180 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
181 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
182 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
183 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
184 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
186 /* This bit of chicanery makes a unary function followed by
187 * a parenthesis into a function with one argument, highest precedence.
188 * The UNIDOR macro is for unary functions that can be followed by the //
189 * operator (such as C<shift // 0>).
191 #define UNI2(f,x) { \
195 PL_last_uni = PL_oldbufptr; \
196 PL_last_lop_op = f; \
198 return REPORT( (int)FUNC1 ); \
200 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
202 #define UNI(f) UNI2(f,XTERM)
203 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
205 #define UNIBRACK(f) { \
208 PL_last_uni = PL_oldbufptr; \
210 return REPORT( (int)FUNC1 ); \
212 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
215 /* grandfather return to old style */
216 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
220 /* how to interpret the yylval associated with the token */
224 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
230 static struct debug_tokens {
232 enum token_type type;
234 } const debug_tokens[] =
236 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
237 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
238 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
239 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
240 { ARROW, TOKENTYPE_NONE, "ARROW" },
241 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
242 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
243 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
244 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
245 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
246 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
247 { DO, TOKENTYPE_NONE, "DO" },
248 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
249 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
250 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
251 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
252 { ELSE, TOKENTYPE_NONE, "ELSE" },
253 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
254 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
255 { FOR, TOKENTYPE_IVAL, "FOR" },
256 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
257 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
258 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
259 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
260 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
261 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
262 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
263 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
264 { IF, TOKENTYPE_IVAL, "IF" },
265 { LABEL, TOKENTYPE_PVAL, "LABEL" },
266 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
267 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
268 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
269 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
270 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
271 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
272 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
273 { MY, TOKENTYPE_IVAL, "MY" },
274 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
275 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
276 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
277 { OROP, TOKENTYPE_IVAL, "OROP" },
278 { OROR, TOKENTYPE_NONE, "OROR" },
279 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
280 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
281 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
282 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
283 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
284 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
285 { PREINC, TOKENTYPE_NONE, "PREINC" },
286 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
287 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
288 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
289 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
290 { SUB, TOKENTYPE_NONE, "SUB" },
291 { THING, TOKENTYPE_OPVAL, "THING" },
292 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
293 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
294 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
295 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
296 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
297 { USE, TOKENTYPE_IVAL, "USE" },
298 { WHEN, TOKENTYPE_IVAL, "WHEN" },
299 { WHILE, TOKENTYPE_IVAL, "WHILE" },
300 { WORD, TOKENTYPE_OPVAL, "WORD" },
301 { 0, TOKENTYPE_NONE, 0 }
304 /* dump the returned token in rv, plus any optional arg in yylval */
307 S_tokereport(pTHX_ I32 rv)
311 const char *name = NULL;
312 enum token_type type = TOKENTYPE_NONE;
313 const struct debug_tokens *p;
314 SV* const report = newSVpvs("<== ");
316 for (p = debug_tokens; p->token; p++) {
317 if (p->token == (int)rv) {
324 Perl_sv_catpv(aTHX_ report, name);
325 else if ((char)rv > ' ' && (char)rv < '~')
326 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
328 sv_catpvs(report, "EOF");
330 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
333 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
336 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
338 case TOKENTYPE_OPNUM:
339 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
340 PL_op_name[yylval.ival]);
343 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
345 case TOKENTYPE_OPVAL:
347 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
348 PL_op_name[yylval.opval->op_type]);
349 if (yylval.opval->op_type == OP_CONST) {
350 Perl_sv_catpvf(aTHX_ report, " %s",
351 SvPEEK(cSVOPx_sv(yylval.opval)));
356 sv_catpvs(report, "(opval=null)");
359 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
365 /* print the buffer with suitable escapes */
368 S_printbuf(pTHX_ const char* fmt, const char* s)
370 SV* const tmp = newSVpvs("");
371 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
380 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
381 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
385 S_ao(pTHX_ int toketype)
388 if (*PL_bufptr == '=') {
390 if (toketype == ANDAND)
391 yylval.ival = OP_ANDASSIGN;
392 else if (toketype == OROR)
393 yylval.ival = OP_ORASSIGN;
394 else if (toketype == DORDOR)
395 yylval.ival = OP_DORASSIGN;
403 * When Perl expects an operator and finds something else, no_op
404 * prints the warning. It always prints "<something> found where
405 * operator expected. It prints "Missing semicolon on previous line?"
406 * if the surprise occurs at the start of the line. "do you need to
407 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
408 * where the compiler doesn't know if foo is a method call or a function.
409 * It prints "Missing operator before end of line" if there's nothing
410 * after the missing operator, or "... before <...>" if there is something
411 * after the missing operator.
415 S_no_op(pTHX_ const char *what, char *s)
418 char * const oldbp = PL_bufptr;
419 const bool is_first = (PL_oldbufptr == PL_linestart);
425 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
426 if (ckWARN_d(WARN_SYNTAX)) {
428 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
429 "\t(Missing semicolon on previous line?)\n");
430 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
432 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
434 if (t < PL_bufptr && isSPACE(*t))
435 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
436 "\t(Do you need to predeclare %.*s?)\n",
437 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
441 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
442 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
450 * Complain about missing quote/regexp/heredoc terminator.
451 * If it's called with NULL then it cauterizes the line buffer.
452 * If we're in a delimited string and the delimiter is a control
453 * character, it's reformatted into a two-char sequence like ^C.
458 S_missingterm(pTHX_ char *s)
464 char * const nl = strrchr(s,'\n');
470 iscntrl(PL_multi_close)
472 PL_multi_close < 32 || PL_multi_close == 127
476 tmpbuf[1] = (char)toCTRL(PL_multi_close);
481 *tmpbuf = (char)PL_multi_close;
485 q = strchr(s,'"') ? '\'' : '"';
486 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
489 #define FEATURE_IS_ENABLED(name) \
490 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
491 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
493 * S_feature_is_enabled
494 * Check whether the named feature is enabled.
497 S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
500 HV * const hinthv = GvHV(PL_hintgv);
501 char he_name[32] = "feature_";
502 (void) strncpy(&he_name[8], name, 24);
504 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
512 Perl_deprecate(pTHX_ const char *s)
514 if (ckWARN(WARN_DEPRECATED))
515 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
519 Perl_deprecate_old(pTHX_ const char *s)
521 /* This function should NOT be called for any new deprecated warnings */
522 /* Use Perl_deprecate instead */
524 /* It is here to maintain backward compatibility with the pre-5.8 */
525 /* warnings category hierarchy. The "deprecated" category used to */
526 /* live under the "syntax" category. It is now a top-level category */
527 /* in its own right. */
529 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
530 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
531 "Use of %s is deprecated", s);
535 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
536 * utf16-to-utf8-reversed.
539 #ifdef PERL_CR_FILTER
543 register const char *s = SvPVX_const(sv);
544 register const char * const e = s + SvCUR(sv);
545 /* outer loop optimized to do nothing if there are no CR-LFs */
547 if (*s++ == '\r' && *s == '\n') {
548 /* hit a CR-LF, need to copy the rest */
549 register char *d = s - 1;
552 if (*s == '\r' && s[1] == '\n')
563 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
565 const I32 count = FILTER_READ(idx+1, sv, maxlen);
566 if (count > 0 && !maxlen)
574 * Initialize variables. Uses the Perl save_stack to save its state (for
575 * recursive calls to the parser).
579 Perl_lex_start(pTHX_ SV *line)
585 SAVEI32(PL_lex_dojoin);
586 SAVEI32(PL_lex_brackets);
587 SAVEI32(PL_lex_casemods);
588 SAVEI32(PL_lex_starts);
589 SAVEI32(PL_lex_state);
590 SAVEVPTR(PL_lex_inpat);
591 SAVEI32(PL_lex_inwhat);
593 if (PL_lex_state == LEX_KNOWNEXT) {
594 I32 toke = PL_lasttoke;
595 while (--toke >= 0) {
596 SAVEI32(PL_nexttoke[toke].next_type);
597 SAVEVPTR(PL_nexttoke[toke].next_val);
599 SAVEVPTR(PL_nexttoke[toke].next_mad);
601 SAVEI32(PL_lasttoke);
604 SAVESPTR(PL_thistoken);
605 SAVESPTR(PL_thiswhite);
606 SAVESPTR(PL_nextwhite);
607 SAVESPTR(PL_thisopen);
608 SAVESPTR(PL_thisclose);
609 SAVESPTR(PL_thisstuff);
610 SAVEVPTR(PL_thismad);
611 SAVEI32(PL_realtokenstart);
612 SAVEI32(PL_faketokens);
614 SAVEI32(PL_curforce);
616 if (PL_lex_state == LEX_KNOWNEXT) {
617 I32 toke = PL_nexttoke;
618 while (--toke >= 0) {
619 SAVEI32(PL_nexttype[toke]);
620 SAVEVPTR(PL_nextval[toke]);
622 SAVEI32(PL_nexttoke);
625 SAVECOPLINE(PL_curcop);
628 SAVEPPTR(PL_oldbufptr);
629 SAVEPPTR(PL_oldoldbufptr);
630 SAVEPPTR(PL_last_lop);
631 SAVEPPTR(PL_last_uni);
632 SAVEPPTR(PL_linestart);
633 SAVESPTR(PL_linestr);
634 SAVEGENERICPV(PL_lex_brackstack);
635 SAVEGENERICPV(PL_lex_casestack);
636 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
637 SAVESPTR(PL_lex_stuff);
638 SAVEI32(PL_lex_defer);
639 SAVEI32(PL_sublex_info.sub_inwhat);
640 SAVESPTR(PL_lex_repl);
642 SAVEINT(PL_lex_expect);
644 PL_lex_state = LEX_NORMAL;
648 Newx(PL_lex_brackstack, 120, char);
649 Newx(PL_lex_casestack, 12, char);
651 *PL_lex_casestack = '\0';
663 PL_sublex_info.sub_inwhat = 0;
665 if (SvREADONLY(PL_linestr))
666 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
667 s = SvPV_const(PL_linestr, len);
668 if (!len || s[len-1] != ';') {
669 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
670 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
671 sv_catpvs(PL_linestr, "\n;");
673 SvTEMP_off(PL_linestr);
674 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
675 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
676 PL_last_lop = PL_last_uni = NULL;
682 * Finalizer for lexing operations. Must be called when the parser is
683 * done with the lexer.
690 PL_doextract = FALSE;
695 * This subroutine has nothing to do with tilting, whether at windmills
696 * or pinball tables. Its name is short for "increment line". It
697 * increments the current line number in CopLINE(PL_curcop) and checks
698 * to see whether the line starts with a comment of the form
699 * # line 500 "foo.pm"
700 * If so, it sets the current line number and file to the values in the comment.
704 S_incline(pTHX_ char *s)
712 CopLINE_inc(PL_curcop);
715 while (SPACE_OR_TAB(*s))
717 if (strnEQ(s, "line", 4))
721 if (SPACE_OR_TAB(*s))
725 while (SPACE_OR_TAB(*s))
733 while (SPACE_OR_TAB(*s))
735 if (*s == '"' && (t = strchr(s+1, '"'))) {
740 for (t = s; !isSPACE(*t); t++) ;
743 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
745 if (*e != '\n' && *e != '\0')
746 return; /* false alarm */
752 const char * const cf = CopFILE(PL_curcop);
753 STRLEN tmplen = cf ? strlen(cf) : 0;
754 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
755 /* must copy *{"::_<(eval N)[oldfilename:L]"}
756 * to *{"::_<newfilename"} */
757 char smallbuf[256], smallbuf2[256];
758 char *tmpbuf, *tmpbuf2;
760 STRLEN tmplen2 = strlen(s);
761 if (tmplen + 3 < sizeof smallbuf)
764 Newx(tmpbuf, tmplen + 3, char);
765 if (tmplen2 + 3 < sizeof smallbuf2)
768 Newx(tmpbuf2, tmplen2 + 3, char);
769 tmpbuf[0] = tmpbuf2[0] = '_';
770 tmpbuf[1] = tmpbuf2[1] = '<';
771 memcpy(tmpbuf + 2, cf, ++tmplen);
772 memcpy(tmpbuf2 + 2, s, ++tmplen2);
774 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
776 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
778 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
779 /* adjust ${"::_<newfilename"} to store the new file name */
780 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
781 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
782 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
784 if (tmpbuf != smallbuf) Safefree(tmpbuf);
785 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
788 CopFILE_free(PL_curcop);
789 CopFILE_set(PL_curcop, s);
792 CopLINE_set(PL_curcop, atoi(n)-1);
796 /* skip space before PL_thistoken */
799 S_skipspace0(pTHX_ register char *s)
806 PL_thiswhite = newSVpvn("",0);
807 sv_catsv(PL_thiswhite, PL_skipwhite);
808 sv_free(PL_skipwhite);
811 PL_realtokenstart = s - SvPVX(PL_linestr);
815 /* skip space after PL_thistoken */
818 S_skipspace1(pTHX_ register char *s)
820 const char *start = s;
821 I32 startoff = start - SvPVX(PL_linestr);
826 start = SvPVX(PL_linestr) + startoff;
827 if (!PL_thistoken && PL_realtokenstart >= 0) {
828 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
829 PL_thistoken = newSVpvn(tstart, start - tstart);
831 PL_realtokenstart = -1;
834 PL_nextwhite = newSVpvn("",0);
835 sv_catsv(PL_nextwhite, PL_skipwhite);
836 sv_free(PL_skipwhite);
843 S_skipspace2(pTHX_ register char *s, SV **svp)
846 I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
847 I32 startoff = start - SvPVX(PL_linestr);
849 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
850 if (!PL_madskills || !svp)
852 start = SvPVX(PL_linestr) + startoff;
853 if (!PL_thistoken && PL_realtokenstart >= 0) {
854 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
855 PL_thistoken = newSVpvn(tstart, start - tstart);
856 PL_realtokenstart = -1;
860 *svp = newSVpvn("",0);
861 sv_setsv(*svp, PL_skipwhite);
862 sv_free(PL_skipwhite);
872 * Called to gobble the appropriate amount and type of whitespace.
873 * Skips comments as well.
877 S_skipspace(pTHX_ register char *s)
882 int startoff = s - SvPVX(PL_linestr);
885 sv_free(PL_skipwhite);
890 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
891 while (s < PL_bufend && SPACE_OR_TAB(*s))
901 SSize_t oldprevlen, oldoldprevlen;
902 SSize_t oldloplen = 0, oldunilen = 0;
903 while (s < PL_bufend && isSPACE(*s)) {
904 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
909 if (s < PL_bufend && *s == '#') {
910 while (s < PL_bufend && *s != '\n')
914 if (PL_in_eval && !PL_rsfp) {
921 /* only continue to recharge the buffer if we're at the end
922 * of the buffer, we're not reading from a source filter, and
923 * we're in normal lexing mode
925 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
926 PL_lex_state == LEX_FORMLINE)
933 /* try to recharge the buffer */
935 curoff = s - SvPVX(PL_linestr);
938 if ((s = filter_gets(PL_linestr, PL_rsfp,
939 (prevlen = SvCUR(PL_linestr)))) == NULL)
942 if (PL_madskills && curoff != startoff) {
944 PL_skipwhite = newSVpvn("",0);
945 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
949 /* mustn't throw out old stuff yet if madpropping */
950 SvCUR(PL_linestr) = curoff;
951 s = SvPVX(PL_linestr) + curoff;
953 if (curoff && s[-1] == '\n')
957 /* end of file. Add on the -p or -n magic */
958 /* XXX these shouldn't really be added here, can't set PL_faketokens */
962 ";}continue{print or die qq(-p destination: $!\\n);}");
965 ";}continue{print or die qq(-p destination: $!\\n);}");
967 PL_minus_n = PL_minus_p = 0;
969 else if (PL_minus_n) {
971 sv_catpvn(PL_linestr, ";}", 2);
973 sv_setpvn(PL_linestr, ";}", 2);
979 sv_catpvn(PL_linestr,";", 1);
981 sv_setpvn(PL_linestr,";", 1);
984 /* reset variables for next time we lex */
985 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
991 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
992 PL_last_lop = PL_last_uni = NULL;
994 /* Close the filehandle. Could be from -P preprocessor,
995 * STDIN, or a regular file. If we were reading code from
996 * STDIN (because the commandline held no -e or filename)
997 * then we don't close it, we reset it so the code can
998 * read from STDIN too.
1001 if (PL_preprocess && !PL_in_eval)
1002 (void)PerlProc_pclose(PL_rsfp);
1003 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1004 PerlIO_clearerr(PL_rsfp);
1006 (void)PerlIO_close(PL_rsfp);
1011 /* not at end of file, so we only read another line */
1012 /* make corresponding updates to old pointers, for yyerror() */
1013 oldprevlen = PL_oldbufptr - PL_bufend;
1014 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1016 oldunilen = PL_last_uni - PL_bufend;
1018 oldloplen = PL_last_lop - PL_bufend;
1019 PL_linestart = PL_bufptr = s + prevlen;
1020 PL_bufend = s + SvCUR(PL_linestr);
1022 PL_oldbufptr = s + oldprevlen;
1023 PL_oldoldbufptr = s + oldoldprevlen;
1025 PL_last_uni = s + oldunilen;
1027 PL_last_lop = s + oldloplen;
1030 /* debugger active and we're not compiling the debugger code,
1031 * so store the line into the debugger's array of lines
1033 if (PERLDB_LINE && PL_curstash != PL_debstash) {
1034 SV * const sv = newSV(0);
1036 sv_upgrade(sv, SVt_PVMG);
1037 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
1040 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
1048 PL_skipwhite = newSVpvn("",0);
1049 curoff = s - SvPVX(PL_linestr);
1050 if (curoff - startoff)
1051 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1060 * Check the unary operators to ensure there's no ambiguity in how they're
1061 * used. An ambiguous piece of code would be:
1063 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1064 * the +5 is its argument.
1074 if (PL_oldoldbufptr != PL_last_uni)
1076 while (isSPACE(*PL_last_uni))
1078 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++)
1080 if ((t = strchr(s, '(')) && t < PL_bufptr)
1083 if (ckWARN_d(WARN_AMBIGUOUS)){
1084 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1085 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1086 (int)(s - PL_last_uni), PL_last_uni);
1091 * LOP : macro to build a list operator. Its behaviour has been replaced
1092 * with a subroutine, S_lop() for which LOP is just another name.
1095 #define LOP(f,x) return lop(f,x,s)
1099 * Build a list operator (or something that might be one). The rules:
1100 * - if we have a next token, then it's a list operator [why?]
1101 * - if the next thing is an opening paren, then it's a function
1102 * - else it's a list operator
1106 S_lop(pTHX_ I32 f, int x, char *s)
1113 PL_last_lop = PL_oldbufptr;
1114 PL_last_lop_op = (OPCODE)f;
1117 return REPORT(LSTOP);
1120 return REPORT(LSTOP);
1123 return REPORT(FUNC);
1126 return REPORT(FUNC);
1128 return REPORT(LSTOP);
1134 * Sets up for an eventual force_next(). start_force(0) basically does
1135 * an unshift, while start_force(-1) does a push. yylex removes items
1140 S_start_force(pTHX_ int where)
1144 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1145 where = PL_lasttoke;
1146 assert(PL_curforce < 0 || PL_curforce == where);
1147 if (PL_curforce != where) {
1148 for (i = PL_lasttoke; i > where; --i) {
1149 PL_nexttoke[i] = PL_nexttoke[i-1];
1153 if (PL_curforce < 0) /* in case of duplicate start_force() */
1154 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1155 PL_curforce = where;
1158 curmad('^', newSVpvn("",0));
1159 CURMAD('_', PL_nextwhite);
1164 S_curmad(pTHX_ char slot, SV *sv)
1170 if (PL_curforce < 0)
1171 where = &PL_thismad;
1173 where = &PL_nexttoke[PL_curforce].next_mad;
1176 sv_setpvn(sv, "", 0);
1179 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1181 else if (PL_encoding) {
1182 sv_recode_to_utf8(sv, PL_encoding);
1187 /* keep a slot open for the head of the list? */
1188 if (slot != '_' && *where && (*where)->mad_key == '^') {
1189 (*where)->mad_key = slot;
1190 sv_free((*where)->mad_val);
1191 (*where)->mad_val = (void*)sv;
1194 addmad(newMADsv(slot, sv), where, 0);
1197 # define start_force(where) /*EMPTY*/
1198 # define curmad(slot, sv) /*EMPTY*/
1203 * When the lexer realizes it knows the next token (for instance,
1204 * it is reordering tokens for the parser) then it can call S_force_next
1205 * to know what token to return the next time the lexer is called. Caller
1206 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1207 * and possibly PL_expect to ensure the lexer handles the token correctly.
1211 S_force_next(pTHX_ I32 type)
1215 if (PL_curforce < 0)
1216 start_force(PL_lasttoke);
1217 PL_nexttoke[PL_curforce].next_type = type;
1218 if (PL_lex_state != LEX_KNOWNEXT)
1219 PL_lex_defer = PL_lex_state;
1220 PL_lex_state = LEX_KNOWNEXT;
1221 PL_lex_expect = PL_expect;
1224 PL_nexttype[PL_nexttoke] = type;
1226 if (PL_lex_state != LEX_KNOWNEXT) {
1227 PL_lex_defer = PL_lex_state;
1228 PL_lex_expect = PL_expect;
1229 PL_lex_state = LEX_KNOWNEXT;
1235 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1238 SV * const sv = newSVpvn(start,len);
1239 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1246 * When the lexer knows the next thing is a word (for instance, it has
1247 * just seen -> and it knows that the next char is a word char, then
1248 * it calls S_force_word to stick the next word into the PL_next lookahead.
1251 * char *start : buffer position (must be within PL_linestr)
1252 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1253 * int check_keyword : if true, Perl checks to make sure the word isn't
1254 * a keyword (do this if the word is a label, e.g. goto FOO)
1255 * int allow_pack : if true, : characters will also be allowed (require,
1256 * use, etc. do this)
1257 * int allow_initial_tick : used by the "sub" lexer only.
1261 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1267 start = SKIPSPACE1(start);
1269 if (isIDFIRST_lazy_if(s,UTF) ||
1270 (allow_pack && *s == ':') ||
1271 (allow_initial_tick && *s == '\'') )
1273 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1274 if (check_keyword && keyword(PL_tokenbuf, len))
1276 start_force(PL_curforce);
1278 curmad('X', newSVpvn(start,s-start));
1279 if (token == METHOD) {
1284 PL_expect = XOPERATOR;
1287 NEXTVAL_NEXTTOKE.opval
1288 = (OP*)newSVOP(OP_CONST,0,
1289 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1290 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1298 * Called when the lexer wants $foo *foo &foo etc, but the program
1299 * text only contains the "foo" portion. The first argument is a pointer
1300 * to the "foo", and the second argument is the type symbol to prefix.
1301 * Forces the next token to be a "WORD".
1302 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1306 S_force_ident(pTHX_ register const char *s, int kind)
1310 const STRLEN len = strlen(s);
1311 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1312 start_force(PL_curforce);
1313 NEXTVAL_NEXTTOKE.opval = o;
1316 o->op_private = OPpCONST_ENTERED;
1317 /* XXX see note in pp_entereval() for why we forgo typo
1318 warnings if the symbol must be introduced in an eval.
1320 gv_fetchpvn_flags(s, len,
1321 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1323 kind == '$' ? SVt_PV :
1324 kind == '@' ? SVt_PVAV :
1325 kind == '%' ? SVt_PVHV :
1333 Perl_str_to_version(pTHX_ SV *sv)
1338 const char *start = SvPV_const(sv,len);
1339 const char * const end = start + len;
1340 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1341 while (start < end) {
1345 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1350 retval += ((NV)n)/nshift;
1359 * Forces the next token to be a version number.
1360 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1361 * and if "guessing" is TRUE, then no new token is created (and the caller
1362 * must use an alternative parsing method).
1366 S_force_version(pTHX_ char *s, int guessing)
1372 I32 startoff = s - SvPVX(PL_linestr);
1381 while (isDIGIT(*d) || *d == '_' || *d == '.')
1385 start_force(PL_curforce);
1386 curmad('X', newSVpvn(s,d-s));
1389 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1391 s = scan_num(s, &yylval);
1392 version = yylval.opval;
1393 ver = cSVOPx(version)->op_sv;
1394 if (SvPOK(ver) && !SvNIOK(ver)) {
1395 SvUPGRADE(ver, SVt_PVNV);
1396 SvNV_set(ver, str_to_version(ver));
1397 SvNOK_on(ver); /* hint that it is a version */
1400 else if (guessing) {
1403 sv_free(PL_nextwhite); /* let next token collect whitespace */
1405 s = SvPVX(PL_linestr) + startoff;
1413 if (PL_madskills && !version) {
1414 sv_free(PL_nextwhite); /* let next token collect whitespace */
1416 s = SvPVX(PL_linestr) + startoff;
1419 /* NOTE: The parser sees the package name and the VERSION swapped */
1420 start_force(PL_curforce);
1421 NEXTVAL_NEXTTOKE.opval = version;
1429 * Tokenize a quoted string passed in as an SV. It finds the next
1430 * chunk, up to end of string or a backslash. It may make a new
1431 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1436 S_tokeq(pTHX_ SV *sv)
1440 register char *send;
1448 s = SvPV_force(sv, len);
1449 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1452 while (s < send && *s != '\\')
1457 if ( PL_hints & HINT_NEW_STRING ) {
1458 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1464 if (s + 1 < send && (s[1] == '\\'))
1465 s++; /* all that, just for this */
1470 SvCUR_set(sv, d - SvPVX_const(sv));
1472 if ( PL_hints & HINT_NEW_STRING )
1473 return new_constant(NULL, 0, "q", sv, pv, "q");
1478 * Now come three functions related to double-quote context,
1479 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1480 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1481 * interact with PL_lex_state, and create fake ( ... ) argument lists
1482 * to handle functions and concatenation.
1483 * They assume that whoever calls them will be setting up a fake
1484 * join call, because each subthing puts a ',' after it. This lets
1487 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1489 * (I'm not sure whether the spurious commas at the end of lcfirst's
1490 * arguments and join's arguments are created or not).
1495 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1497 * Pattern matching will set PL_lex_op to the pattern-matching op to
1498 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1500 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1502 * Everything else becomes a FUNC.
1504 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1505 * had an OP_CONST or OP_READLINE). This just sets us up for a
1506 * call to S_sublex_push().
1510 S_sublex_start(pTHX)
1513 register const I32 op_type = yylval.ival;
1515 if (op_type == OP_NULL) {
1516 yylval.opval = PL_lex_op;
1520 if (op_type == OP_CONST || op_type == OP_READLINE) {
1521 SV *sv = tokeq(PL_lex_stuff);
1523 if (SvTYPE(sv) == SVt_PVIV) {
1524 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1526 const char * const p = SvPV_const(sv, len);
1527 SV * const nsv = newSVpvn(p, len);
1533 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1534 PL_lex_stuff = NULL;
1535 /* Allow <FH> // "foo" */
1536 if (op_type == OP_READLINE)
1537 PL_expect = XTERMORDORDOR;
1541 PL_sublex_info.super_state = PL_lex_state;
1542 PL_sublex_info.sub_inwhat = op_type;
1543 PL_sublex_info.sub_op = PL_lex_op;
1544 PL_lex_state = LEX_INTERPPUSH;
1548 yylval.opval = PL_lex_op;
1558 * Create a new scope to save the lexing state. The scope will be
1559 * ended in S_sublex_done. Returns a '(', starting the function arguments
1560 * to the uc, lc, etc. found before.
1561 * Sets PL_lex_state to LEX_INTERPCONCAT.
1570 PL_lex_state = PL_sublex_info.super_state;
1571 SAVEI32(PL_lex_dojoin);
1572 SAVEI32(PL_lex_brackets);
1573 SAVEI32(PL_lex_casemods);
1574 SAVEI32(PL_lex_starts);
1575 SAVEI32(PL_lex_state);
1576 SAVEVPTR(PL_lex_inpat);
1577 SAVEI32(PL_lex_inwhat);
1578 SAVECOPLINE(PL_curcop);
1579 SAVEPPTR(PL_bufptr);
1580 SAVEPPTR(PL_bufend);
1581 SAVEPPTR(PL_oldbufptr);
1582 SAVEPPTR(PL_oldoldbufptr);
1583 SAVEPPTR(PL_last_lop);
1584 SAVEPPTR(PL_last_uni);
1585 SAVEPPTR(PL_linestart);
1586 SAVESPTR(PL_linestr);
1587 SAVEGENERICPV(PL_lex_brackstack);
1588 SAVEGENERICPV(PL_lex_casestack);
1590 PL_linestr = PL_lex_stuff;
1591 PL_lex_stuff = NULL;
1593 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1594 = SvPVX(PL_linestr);
1595 PL_bufend += SvCUR(PL_linestr);
1596 PL_last_lop = PL_last_uni = NULL;
1597 SAVEFREESV(PL_linestr);
1599 PL_lex_dojoin = FALSE;
1600 PL_lex_brackets = 0;
1601 Newx(PL_lex_brackstack, 120, char);
1602 Newx(PL_lex_casestack, 12, char);
1603 PL_lex_casemods = 0;
1604 *PL_lex_casestack = '\0';
1606 PL_lex_state = LEX_INTERPCONCAT;
1607 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1609 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1610 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1611 PL_lex_inpat = PL_sublex_info.sub_op;
1613 PL_lex_inpat = NULL;
1620 * Restores lexer state after a S_sublex_push.
1627 if (!PL_lex_starts++) {
1628 SV * const sv = newSVpvs("");
1629 if (SvUTF8(PL_linestr))
1631 PL_expect = XOPERATOR;
1632 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1636 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1637 PL_lex_state = LEX_INTERPCASEMOD;
1641 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1642 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1643 PL_linestr = PL_lex_repl;
1645 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1646 PL_bufend += SvCUR(PL_linestr);
1647 PL_last_lop = PL_last_uni = NULL;
1648 SAVEFREESV(PL_linestr);
1649 PL_lex_dojoin = FALSE;
1650 PL_lex_brackets = 0;
1651 PL_lex_casemods = 0;
1652 *PL_lex_casestack = '\0';
1654 if (SvEVALED(PL_lex_repl)) {
1655 PL_lex_state = LEX_INTERPNORMAL;
1657 /* we don't clear PL_lex_repl here, so that we can check later
1658 whether this is an evalled subst; that means we rely on the
1659 logic to ensure sublex_done() is called again only via the
1660 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1663 PL_lex_state = LEX_INTERPCONCAT;
1673 PL_endwhite = newSVpvn("",0);
1674 sv_catsv(PL_endwhite, PL_thiswhite);
1678 sv_setpvn(PL_thistoken,"",0);
1680 PL_realtokenstart = -1;
1684 PL_bufend = SvPVX(PL_linestr);
1685 PL_bufend += SvCUR(PL_linestr);
1686 PL_expect = XOPERATOR;
1687 PL_sublex_info.sub_inwhat = 0;
1695 Extracts a pattern, double-quoted string, or transliteration. This
1698 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1699 processing a pattern (PL_lex_inpat is true), a transliteration
1700 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1702 Returns a pointer to the character scanned up to. Iff this is
1703 advanced from the start pointer supplied (ie if anything was
1704 successfully parsed), will leave an OP for the substring scanned
1705 in yylval. Caller must intuit reason for not parsing further
1706 by looking at the next characters herself.
1710 double-quoted style: \r and \n
1711 regexp special ones: \D \s
1713 backrefs: \1 (deprecated in substitution replacements)
1714 case and quoting: \U \Q \E
1715 stops on @ and $, but not for $ as tail anchor
1717 In transliterations:
1718 characters are VERY literal, except for - not at the start or end
1719 of the string, which indicates a range. scan_const expands the
1720 range to the full set of intermediate characters.
1722 In double-quoted strings:
1724 double-quoted style: \r and \n
1726 backrefs: \1 (deprecated)
1727 case and quoting: \U \Q \E
1730 scan_const does *not* construct ops to handle interpolated strings.
1731 It stops processing as soon as it finds an embedded $ or @ variable
1732 and leaves it to the caller to work out what's going on.
1734 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1736 $ in pattern could be $foo or could be tail anchor. Assumption:
1737 it's a tail anchor if $ is the last thing in the string, or if it's
1738 followed by one of ")| \n\t"
1740 \1 (backreferences) are turned into $1
1742 The structure of the code is
1743 while (there's a character to process) {
1744 handle transliteration ranges
1745 skip regexp comments
1746 skip # initiated comments in //x patterns
1747 check for embedded @foo
1748 check for embedded scalars
1750 leave intact backslashes from leave (below)
1751 deprecate \1 in strings and sub replacements
1752 handle string-changing backslashes \l \U \Q \E, etc.
1753 switch (what was escaped) {
1754 handle - in a transliteration (becomes a literal -)
1755 handle \132 octal characters
1756 handle 0x15 hex characters
1757 handle \cV (control V)
1758 handle printf backslashes (\f, \r, \n, etc)
1760 } (end if backslash)
1761 } (end while character to read)
1766 S_scan_const(pTHX_ char *start)
1769 register char *send = PL_bufend; /* end of the constant */
1770 SV *sv = newSV(send - start); /* sv for the constant */
1771 register char *s = start; /* start of the constant */
1772 register char *d = SvPVX(sv); /* destination for copies */
1773 bool dorange = FALSE; /* are we in a translit range? */
1774 bool didrange = FALSE; /* did we just finish a range? */
1775 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1776 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1779 UV literal_endpoint = 0;
1782 const char * const leaveit = /* set of acceptably-backslashed characters */
1784 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1787 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1788 /* If we are doing a trans and we know we want UTF8 set expectation */
1789 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1790 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1794 while (s < send || dorange) {
1795 /* get transliterations out of the way (they're most literal) */
1796 if (PL_lex_inwhat == OP_TRANS) {
1797 /* expand a range A-Z to the full set of characters. AIE! */
1799 I32 i; /* current expanded character */
1800 I32 min; /* first character in range */
1801 I32 max; /* last character in range */
1804 char * const c = (char*)utf8_hop((U8*)d, -1);
1808 *c = (char)UTF_TO_NATIVE(0xff);
1809 /* mark the range as done, and continue */
1815 i = d - SvPVX_const(sv); /* remember current offset */
1816 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1817 d = SvPVX(sv) + i; /* refresh d after realloc */
1818 d -= 2; /* eat the first char and the - */
1820 min = (U8)*d; /* first char in range */
1821 max = (U8)d[1]; /* last char in range */
1825 "Invalid range \"%c-%c\" in transliteration operator",
1826 (char)min, (char)max);
1830 if (literal_endpoint == 2 &&
1831 ((isLOWER(min) && isLOWER(max)) ||
1832 (isUPPER(min) && isUPPER(max)))) {
1834 for (i = min; i <= max; i++)
1836 *d++ = NATIVE_TO_NEED(has_utf8,i);
1838 for (i = min; i <= max; i++)
1840 *d++ = NATIVE_TO_NEED(has_utf8,i);
1845 for (i = min; i <= max; i++)
1848 /* mark the range as done, and continue */
1852 literal_endpoint = 0;
1857 /* range begins (ignore - as first or last char) */
1858 else if (*s == '-' && s+1 < send && s != start) {
1860 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1863 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1873 literal_endpoint = 0;
1878 /* if we get here, we're not doing a transliteration */
1880 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1881 except for the last char, which will be done separately. */
1882 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1884 while (s+1 < send && *s != ')')
1885 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1887 else if (s[2] == '{' /* This should match regcomp.c */
1888 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1891 char *regparse = s + (s[2] == '{' ? 3 : 4);
1894 while (count && (c = *regparse)) {
1895 if (c == '\\' && regparse[1])
1903 if (*regparse != ')')
1904 regparse--; /* Leave one char for continuation. */
1905 while (s < regparse)
1906 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1910 /* likewise skip #-initiated comments in //x patterns */
1911 else if (*s == '#' && PL_lex_inpat &&
1912 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1913 while (s+1 < send && *s != '\n')
1914 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1917 /* check for embedded arrays
1918 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1920 else if (*s == '@' && s[1]
1921 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1924 /* check for embedded scalars. only stop if we're sure it's a
1927 else if (*s == '$') {
1928 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1930 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1931 break; /* in regexp, $ might be tail anchor */
1934 /* End of else if chain - OP_TRANS rejoin rest */
1937 if (*s == '\\' && s+1 < send) {
1940 /* some backslashes we leave behind */
1941 if (*leaveit && *s && strchr(leaveit, *s)) {
1942 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1943 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1947 /* deprecate \1 in strings and substitution replacements */
1948 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1949 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1951 if (ckWARN(WARN_SYNTAX))
1952 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1957 /* string-change backslash escapes */
1958 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1963 /* if we get here, it's either a quoted -, or a digit */
1966 /* quoted - in transliterations */
1968 if (PL_lex_inwhat == OP_TRANS) {
1978 Perl_warner(aTHX_ packWARN(WARN_MISC),
1979 "Unrecognized escape \\%c passed through",
1981 /* default action is to copy the quoted character */
1982 goto default_action;
1985 /* \132 indicates an octal constant */
1986 case '0': case '1': case '2': case '3':
1987 case '4': case '5': case '6': case '7':
1991 uv = grok_oct(s, &len, &flags, NULL);
1994 goto NUM_ESCAPE_INSERT;
1996 /* \x24 indicates a hex constant */
2000 char* const e = strchr(s, '}');
2001 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2002 PERL_SCAN_DISALLOW_PREFIX;
2007 yyerror("Missing right brace on \\x{}");
2011 uv = grok_hex(s, &len, &flags, NULL);
2017 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2018 uv = grok_hex(s, &len, &flags, NULL);
2024 /* Insert oct or hex escaped character.
2025 * There will always enough room in sv since such
2026 * escapes will be longer than any UTF-8 sequence
2027 * they can end up as. */
2029 /* We need to map to chars to ASCII before doing the tests
2032 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
2033 if (!has_utf8 && uv > 255) {
2034 /* Might need to recode whatever we have
2035 * accumulated so far if it contains any
2038 * (Can't we keep track of that and avoid
2039 * this rescan? --jhi)
2043 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
2044 if (!NATIVE_IS_INVARIANT(*c)) {
2049 const STRLEN offset = d - SvPVX_const(sv);
2051 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2055 while (src >= (const U8 *)SvPVX_const(sv)) {
2056 if (!NATIVE_IS_INVARIANT(*src)) {
2057 const U8 ch = NATIVE_TO_ASCII(*src);
2058 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2059 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
2069 if (has_utf8 || uv > 255) {
2070 d = (char*)uvchr_to_utf8((U8*)d, uv);
2072 if (PL_lex_inwhat == OP_TRANS &&
2073 PL_sublex_info.sub_op) {
2074 PL_sublex_info.sub_op->op_private |=
2075 (PL_lex_repl ? OPpTRANS_FROM_UTF
2088 /* \N{LATIN SMALL LETTER A} is a named character */
2092 char* e = strchr(s, '}');
2098 yyerror("Missing right brace on \\N{}");
2102 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2104 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2105 PERL_SCAN_DISALLOW_PREFIX;
2108 uv = grok_hex(s, &len, &flags, NULL);
2110 goto NUM_ESCAPE_INSERT;
2112 res = newSVpvn(s + 1, e - s - 1);
2113 res = new_constant( NULL, 0, "charnames",
2114 res, NULL, "\\N{...}" );
2116 sv_utf8_upgrade(res);
2117 str = SvPV_const(res,len);
2118 #ifdef EBCDIC_NEVER_MIND
2119 /* charnames uses pack U and that has been
2120 * recently changed to do the below uni->native
2121 * mapping, so this would be redundant (and wrong,
2122 * the code point would be doubly converted).
2123 * But leave this in just in case the pack U change
2124 * gets revoked, but the semantics is still
2125 * desireable for charnames. --jhi */
2127 UV uv = utf8_to_uvchr((const U8*)str, 0);
2130 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2132 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2133 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2134 str = SvPV_const(res, len);
2138 if (!has_utf8 && SvUTF8(res)) {
2139 const char * const ostart = SvPVX_const(sv);
2140 SvCUR_set(sv, d - ostart);
2143 sv_utf8_upgrade(sv);
2144 /* this just broke our allocation above... */
2145 SvGROW(sv, (STRLEN)(send - start));
2146 d = SvPVX(sv) + SvCUR(sv);
2149 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2150 const char * const odest = SvPVX_const(sv);
2152 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
2153 d = SvPVX(sv) + (d - odest);
2155 Copy(str, d, len, char);
2162 yyerror("Missing braces on \\N{}");
2165 /* \c is a control character */
2174 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2177 yyerror("Missing control char name in \\c");
2181 /* printf-style backslashes, formfeeds, newlines, etc */
2183 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2186 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2189 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2192 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2195 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2198 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2201 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2207 } /* end if (backslash) */
2214 /* If we started with encoded form, or already know we want it
2215 and then encode the next character */
2216 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2218 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2219 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2222 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2223 const STRLEN off = d - SvPVX_const(sv);
2224 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2226 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2230 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2232 } /* while loop to process each character */
2234 /* terminate the string and set up the sv */
2236 SvCUR_set(sv, d - SvPVX_const(sv));
2237 if (SvCUR(sv) >= SvLEN(sv))
2238 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2241 if (PL_encoding && !has_utf8) {
2242 sv_recode_to_utf8(sv, PL_encoding);
2248 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2249 PL_sublex_info.sub_op->op_private |=
2250 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2254 /* shrink the sv if we allocated more than we used */
2255 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2256 SvPV_shrink_to_cur(sv);
2259 /* return the substring (via yylval) only if we parsed anything */
2260 if (s > PL_bufptr) {
2261 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2262 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
2264 ( PL_lex_inwhat == OP_TRANS
2266 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2269 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2276 * Returns TRUE if there's more to the expression (e.g., a subscript),
2279 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2281 * ->[ and ->{ return TRUE
2282 * { and [ outside a pattern are always subscripts, so return TRUE
2283 * if we're outside a pattern and it's not { or [, then return FALSE
2284 * if we're in a pattern and the first char is a {
2285 * {4,5} (any digits around the comma) returns FALSE
2286 * if we're in a pattern and the first char is a [
2288 * [SOMETHING] has a funky algorithm to decide whether it's a
2289 * character class or not. It has to deal with things like
2290 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2291 * anything else returns TRUE
2294 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2297 S_intuit_more(pTHX_ register char *s)
2300 if (PL_lex_brackets)
2302 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2304 if (*s != '{' && *s != '[')
2309 /* In a pattern, so maybe we have {n,m}. */
2326 /* On the other hand, maybe we have a character class */
2329 if (*s == ']' || *s == '^')
2332 /* this is terrifying, and it works */
2333 int weight = 2; /* let's weigh the evidence */
2335 unsigned char un_char = 255, last_un_char;
2336 const char * const send = strchr(s,']');
2337 char tmpbuf[sizeof PL_tokenbuf * 4];
2339 if (!send) /* has to be an expression */
2342 Zero(seen,256,char);
2345 else if (isDIGIT(*s)) {
2347 if (isDIGIT(s[1]) && s[2] == ']')
2353 for (; s < send; s++) {
2354 last_un_char = un_char;
2355 un_char = (unsigned char)*s;
2360 weight -= seen[un_char] * 10;
2361 if (isALNUM_lazy_if(s+1,UTF)) {
2363 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2364 len = (int)strlen(tmpbuf);
2365 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2370 else if (*s == '$' && s[1] &&
2371 strchr("[#!%*<>()-=",s[1])) {
2372 if (/*{*/ strchr("])} =",s[2]))
2381 if (strchr("wds]",s[1]))
2383 else if (seen['\''] || seen['"'])
2385 else if (strchr("rnftbxcav",s[1]))
2387 else if (isDIGIT(s[1])) {
2389 while (s[1] && isDIGIT(s[1]))
2399 if (strchr("aA01! ",last_un_char))
2401 if (strchr("zZ79~",s[1]))
2403 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2404 weight -= 5; /* cope with negative subscript */
2407 if (!isALNUM(last_un_char)
2408 && !(last_un_char == '$' || last_un_char == '@'
2409 || last_un_char == '&')
2410 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2415 if (keyword(tmpbuf, d - tmpbuf))
2418 if (un_char == last_un_char + 1)
2420 weight -= seen[un_char];
2425 if (weight >= 0) /* probably a character class */
2435 * Does all the checking to disambiguate
2437 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2438 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2440 * First argument is the stuff after the first token, e.g. "bar".
2442 * Not a method if bar is a filehandle.
2443 * Not a method if foo is a subroutine prototyped to take a filehandle.
2444 * Not a method if it's really "Foo $bar"
2445 * Method if it's "foo $bar"
2446 * Not a method if it's really "print foo $bar"
2447 * Method if it's really "foo package::" (interpreted as package->foo)
2448 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2449 * Not a method if bar is a filehandle or package, but is quoted with
2454 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2457 char *s = start + (*start == '$');
2458 char tmpbuf[sizeof PL_tokenbuf];
2466 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2470 const char *proto = SvPVX_const(cv);
2481 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2482 /* start is the beginning of the possible filehandle/object,
2483 * and s is the end of it
2484 * tmpbuf is a copy of it
2487 if (*start == '$') {
2488 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2491 len = start - SvPVX(PL_linestr);
2495 start = SvPVX(PL_linestr) + len;
2499 return *s == '(' ? FUNCMETH : METHOD;
2501 if (!keyword(tmpbuf, len)) {
2502 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2506 soff = s - SvPVX(PL_linestr);
2510 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2511 if (indirgv && GvCVu(indirgv))
2513 /* filehandle or package name makes it a method */
2514 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2516 soff = s - SvPVX(PL_linestr);
2519 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2520 return 0; /* no assumptions -- "=>" quotes bearword */
2522 start_force(PL_curforce);
2523 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2524 newSVpvn(tmpbuf,len));
2525 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2527 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2532 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2534 return *s == '(' ? FUNCMETH : METHOD;
2542 * Return a string of Perl code to load the debugger. If PERL5DB
2543 * is set, it will return the contents of that, otherwise a
2544 * compile-time require of perl5db.pl.
2552 const char * const pdb = PerlEnv_getenv("PERL5DB");
2556 SETERRNO(0,SS_NORMAL);
2557 return "BEGIN { require 'perl5db.pl' }";
2563 /* Encoded script support. filter_add() effectively inserts a
2564 * 'pre-processing' function into the current source input stream.
2565 * Note that the filter function only applies to the current source file
2566 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2568 * The datasv parameter (which may be NULL) can be used to pass
2569 * private data to this instance of the filter. The filter function
2570 * can recover the SV using the FILTER_DATA macro and use it to
2571 * store private buffers and state information.
2573 * The supplied datasv parameter is upgraded to a PVIO type
2574 * and the IoDIRP/IoANY field is used to store the function pointer,
2575 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2576 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2577 * private use must be set using malloc'd pointers.
2581 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2587 if (!PL_rsfp_filters)
2588 PL_rsfp_filters = newAV();
2591 SvUPGRADE(datasv, SVt_PVIO);
2592 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2593 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2594 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2595 IoANY(datasv), SvPV_nolen(datasv)));
2596 av_unshift(PL_rsfp_filters, 1);
2597 av_store(PL_rsfp_filters, 0, datasv) ;
2602 /* Delete most recently added instance of this filter function. */
2604 Perl_filter_del(pTHX_ filter_t funcp)
2610 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
2612 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2614 /* if filter is on top of stack (usual case) just pop it off */
2615 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2616 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2617 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2618 IoANY(datasv) = (void *)NULL;
2619 sv_free(av_pop(PL_rsfp_filters));
2623 /* we need to search for the correct entry and clear it */
2624 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2628 /* Invoke the idxth filter function for the current rsfp. */
2629 /* maxlen 0 = read one text line */
2631 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2636 /* This API is bad. It should have been using unsigned int for maxlen.
2637 Not sure if we want to change the API, but if not we should sanity
2638 check the value here. */
2639 const unsigned int correct_length
2648 if (!PL_rsfp_filters)
2650 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2651 /* Provide a default input filter to make life easy. */
2652 /* Note that we append to the line. This is handy. */
2653 DEBUG_P(PerlIO_printf(Perl_debug_log,
2654 "filter_read %d: from rsfp\n", idx));
2655 if (correct_length) {
2658 const int old_len = SvCUR(buf_sv);
2660 /* ensure buf_sv is large enough */
2661 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2662 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2663 correct_length)) <= 0) {
2664 if (PerlIO_error(PL_rsfp))
2665 return -1; /* error */
2667 return 0 ; /* end of file */
2669 SvCUR_set(buf_sv, old_len + len) ;
2672 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2673 if (PerlIO_error(PL_rsfp))
2674 return -1; /* error */
2676 return 0 ; /* end of file */
2679 return SvCUR(buf_sv);
2681 /* Skip this filter slot if filter has been deleted */
2682 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2683 DEBUG_P(PerlIO_printf(Perl_debug_log,
2684 "filter_read %d: skipped (filter deleted)\n",
2686 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2688 /* Get function pointer hidden within datasv */
2689 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2690 DEBUG_P(PerlIO_printf(Perl_debug_log,
2691 "filter_read %d: via function %p (%s)\n",
2692 idx, datasv, SvPV_nolen_const(datasv)));
2693 /* Call function. The function is expected to */
2694 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2695 /* Return: <0:error, =0:eof, >0:not eof */
2696 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2700 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2703 #ifdef PERL_CR_FILTER
2704 if (!PL_rsfp_filters) {
2705 filter_add(S_cr_textfilter,NULL);
2708 if (PL_rsfp_filters) {
2710 SvCUR_set(sv, 0); /* start with empty line */
2711 if (FILTER_READ(0, sv, 0) > 0)
2712 return ( SvPVX(sv) ) ;
2717 return (sv_gets(sv, fp, append));
2721 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2726 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2730 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2731 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2733 return GvHV(gv); /* Foo:: */
2736 /* use constant CLASS => 'MyClass' */
2737 if ((gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV))) {
2739 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2740 pkgname = SvPV_nolen_const(sv);
2744 return gv_stashpv(pkgname, FALSE);
2750 * The intent of this yylex wrapper is to minimize the changes to the
2751 * tokener when we aren't interested in collecting madprops. It remains
2752 * to be seen how successful this strategy will be...
2759 char *s = PL_bufptr;
2761 /* make sure PL_thiswhite is initialized */
2765 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
2766 if (PL_pending_ident)
2767 return S_pending_ident(aTHX);
2769 /* previous token ate up our whitespace? */
2770 if (!PL_lasttoke && PL_nextwhite) {
2771 PL_thiswhite = PL_nextwhite;
2775 /* isolate the token, and figure out where it is without whitespace */
2776 PL_realtokenstart = -1;
2780 assert(PL_curforce < 0);
2782 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2783 if (!PL_thistoken) {
2784 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2785 PL_thistoken = newSVpvn("",0);
2787 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
2788 PL_thistoken = newSVpvn(tstart, s - tstart);
2791 if (PL_thismad) /* install head */
2792 CURMAD('X', PL_thistoken);
2795 /* last whitespace of a sublex? */
2796 if (optype == ')' && PL_endwhite) {
2797 CURMAD('X', PL_endwhite);
2802 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
2803 if (!PL_thiswhite && !PL_endwhite && !optype) {
2804 sv_free(PL_thistoken);
2809 /* put off final whitespace till peg */
2810 if (optype == ';' && !PL_rsfp) {
2811 PL_nextwhite = PL_thiswhite;
2814 else if (PL_thisopen) {
2815 CURMAD('q', PL_thisopen);
2817 sv_free(PL_thistoken);
2821 /* Store actual token text as madprop X */
2822 CURMAD('X', PL_thistoken);
2826 /* add preceding whitespace as madprop _ */
2827 CURMAD('_', PL_thiswhite);
2831 /* add quoted material as madprop = */
2832 CURMAD('=', PL_thisstuff);
2836 /* add terminating quote as madprop Q */
2837 CURMAD('Q', PL_thisclose);
2841 /* special processing based on optype */
2845 /* opval doesn't need a TOKEN since it can already store mp */
2856 append_madprops(PL_thismad, yylval.opval, 0);
2864 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
2873 /* remember any fake bracket that lexer is about to discard */
2874 if (PL_lex_brackets == 1 &&
2875 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
2878 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2881 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
2882 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
2885 break; /* don't bother looking for trailing comment */
2894 /* attach a trailing comment to its statement instead of next token */
2898 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
2900 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2902 if (*s == '\n' || *s == '#') {
2903 while (s < PL_bufend && *s != '\n')
2907 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
2908 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
2925 /* Create new token struct. Note: opvals return early above. */
2926 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
2933 S_tokenize_use(pTHX_ int is_use, char *s) {
2935 if (PL_expect != XSTATE)
2936 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2937 is_use ? "use" : "no"));
2939 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2940 s = force_version(s, TRUE);
2941 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
2942 start_force(PL_curforce);
2943 NEXTVAL_NEXTTOKE.opval = NULL;
2946 else if (*s == 'v') {
2947 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2948 s = force_version(s, FALSE);
2952 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2953 s = force_version(s, FALSE);
2955 yylval.ival = is_use;
2959 static const char* const exp_name[] =
2960 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2961 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2968 Works out what to call the token just pulled out of the input
2969 stream. The yacc parser takes care of taking the ops we return and
2970 stitching them into a tree.
2976 if read an identifier
2977 if we're in a my declaration
2978 croak if they tried to say my($foo::bar)
2979 build the ops for a my() declaration
2980 if it's an access to a my() variable
2981 are we in a sort block?
2982 croak if my($a); $a <=> $b
2983 build ops for access to a my() variable
2984 if in a dq string, and they've said @foo and we can't find @foo
2986 build ops for a bareword
2987 if we already built the token before, use it.
2992 #pragma segment Perl_yylex
2998 register char *s = PL_bufptr;
3004 SV* tmp = newSVpvs("");
3005 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3006 (IV)CopLINE(PL_curcop),
3007 lex_state_names[PL_lex_state],
3008 exp_name[PL_expect],
3009 pv_display(tmp, s, strlen(s), 0, 60));
3012 /* check if there's an identifier for us to look at */
3013 if (PL_pending_ident)
3014 return REPORT(S_pending_ident(aTHX));
3016 /* no identifier pending identification */
3018 switch (PL_lex_state) {
3020 case LEX_NORMAL: /* Some compilers will produce faster */
3021 case LEX_INTERPNORMAL: /* code if we comment these out. */
3025 /* when we've already built the next token, just pull it out of the queue */
3029 yylval = PL_nexttoke[PL_lasttoke].next_val;
3031 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3032 PL_nexttoke[PL_lasttoke].next_mad = 0;
3033 if (PL_thismad && PL_thismad->mad_key == '_') {
3034 PL_thiswhite = (SV*)PL_thismad->mad_val;
3035 PL_thismad->mad_val = 0;
3036 mad_free(PL_thismad);
3041 PL_lex_state = PL_lex_defer;
3042 PL_expect = PL_lex_expect;
3043 PL_lex_defer = LEX_NORMAL;
3044 if (!PL_nexttoke[PL_lasttoke].next_type)
3049 yylval = PL_nextval[PL_nexttoke];
3051 PL_lex_state = PL_lex_defer;
3052 PL_expect = PL_lex_expect;
3053 PL_lex_defer = LEX_NORMAL;
3057 /* FIXME - can these be merged? */
3058 return(PL_nexttoke[PL_lasttoke].next_type);
3060 return REPORT(PL_nexttype[PL_nexttoke]);
3063 /* interpolated case modifiers like \L \U, including \Q and \E.
3064 when we get here, PL_bufptr is at the \
3066 case LEX_INTERPCASEMOD:
3068 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3069 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3071 /* handle \E or end of string */
3072 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3074 if (PL_lex_casemods) {
3075 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3076 PL_lex_casestack[PL_lex_casemods] = '\0';
3078 if (PL_bufptr != PL_bufend
3079 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3081 PL_lex_state = LEX_INTERPCONCAT;
3084 PL_thistoken = newSVpvn("\\E",2);
3090 while (PL_bufptr != PL_bufend &&
3091 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3093 PL_thiswhite = newSVpvn("",0);
3094 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3098 if (PL_bufptr != PL_bufend)
3101 PL_lex_state = LEX_INTERPCONCAT;
3105 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3106 "### Saw case modifier\n"); });
3108 if (s[1] == '\\' && s[2] == 'E') {
3111 PL_thiswhite = newSVpvn("",0);
3112 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3115 PL_lex_state = LEX_INTERPCONCAT;
3120 if (!PL_madskills) /* when just compiling don't need correct */
3121 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3122 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3123 if ((*s == 'L' || *s == 'U') &&
3124 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3125 PL_lex_casestack[--PL_lex_casemods] = '\0';
3128 if (PL_lex_casemods > 10)
3129 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3130 PL_lex_casestack[PL_lex_casemods++] = *s;
3131 PL_lex_casestack[PL_lex_casemods] = '\0';
3132 PL_lex_state = LEX_INTERPCONCAT;
3133 start_force(PL_curforce);
3134 NEXTVAL_NEXTTOKE.ival = 0;
3136 start_force(PL_curforce);
3138 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3140 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3142 NEXTVAL_NEXTTOKE.ival = OP_LC;
3144 NEXTVAL_NEXTTOKE.ival = OP_UC;
3146 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3148 Perl_croak(aTHX_ "panic: yylex");
3150 SV* const tmpsv = newSVpvn("",0);
3151 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3157 if (PL_lex_starts) {
3163 sv_free(PL_thistoken);
3164 PL_thistoken = newSVpvn("",0);
3167 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3168 if (PL_lex_casemods == 1 && PL_lex_inpat)
3177 case LEX_INTERPPUSH:
3178 return REPORT(sublex_push());
3180 case LEX_INTERPSTART:
3181 if (PL_bufptr == PL_bufend)
3182 return REPORT(sublex_done());
3183 DEBUG_T({ PerlIO_printf(Perl_debug_log,
3184 "### Interpolated variable\n"); });
3186 PL_lex_dojoin = (*PL_bufptr == '@');
3187 PL_lex_state = LEX_INTERPNORMAL;
3188 if (PL_lex_dojoin) {
3189 start_force(PL_curforce);
3190 NEXTVAL_NEXTTOKE.ival = 0;
3192 start_force(PL_curforce);
3193 force_ident("\"", '$');
3194 start_force(PL_curforce);
3195 NEXTVAL_NEXTTOKE.ival = 0;
3197 start_force(PL_curforce);
3198 NEXTVAL_NEXTTOKE.ival = 0;
3200 start_force(PL_curforce);
3201 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
3204 if (PL_lex_starts++) {
3209 sv_free(PL_thistoken);
3210 PL_thistoken = newSVpvn("",0);
3213 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3214 if (!PL_lex_casemods && PL_lex_inpat)
3221 case LEX_INTERPENDMAYBE:
3222 if (intuit_more(PL_bufptr)) {
3223 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
3229 if (PL_lex_dojoin) {
3230 PL_lex_dojoin = FALSE;
3231 PL_lex_state = LEX_INTERPCONCAT;
3235 sv_free(PL_thistoken);
3236 PL_thistoken = newSVpvn("",0);
3241 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3242 && SvEVALED(PL_lex_repl))
3244 if (PL_bufptr != PL_bufend)
3245 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3249 case LEX_INTERPCONCAT:
3251 if (PL_lex_brackets)
3252 Perl_croak(aTHX_ "panic: INTERPCONCAT");
3254 if (PL_bufptr == PL_bufend)
3255 return REPORT(sublex_done());
3257 if (SvIVX(PL_linestr) == '\'') {
3258 SV *sv = newSVsv(PL_linestr);
3261 else if ( PL_hints & HINT_NEW_RE )
3262 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
3263 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3267 s = scan_const(PL_bufptr);
3269 PL_lex_state = LEX_INTERPCASEMOD;
3271 PL_lex_state = LEX_INTERPSTART;
3274 if (s != PL_bufptr) {
3275 start_force(PL_curforce);
3277 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3279 NEXTVAL_NEXTTOKE = yylval;
3282 if (PL_lex_starts++) {
3286 sv_free(PL_thistoken);
3287 PL_thistoken = newSVpvn("",0);
3290 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3291 if (!PL_lex_casemods && PL_lex_inpat)
3304 PL_lex_state = LEX_NORMAL;
3305 s = scan_formline(PL_bufptr);
3306 if (!PL_lex_formbrack)
3312 PL_oldoldbufptr = PL_oldbufptr;
3318 sv_free(PL_thistoken);
3321 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
3325 if (isIDFIRST_lazy_if(s,UTF))
3327 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
3330 goto fake_eof; /* emulate EOF on ^D or ^Z */
3339 if (PL_lex_brackets) {
3340 yyerror(PL_lex_formbrack
3341 ? "Format not terminated"
3342 : "Missing right curly or square bracket");
3344 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3345 "### Tokener got EOF\n");
3349 if (s++ < PL_bufend)
3350 goto retry; /* ignore stray nulls */
3353 if (!PL_in_eval && !PL_preambled) {
3354 PL_preambled = TRUE;
3359 sv_setpv(PL_linestr,incl_perldb());
3360 if (SvCUR(PL_linestr))
3361 sv_catpvs(PL_linestr,";");
3363 while(AvFILLp(PL_preambleav) >= 0) {
3364 SV *tmpsv = av_shift(PL_preambleav);
3365 sv_catsv(PL_linestr, tmpsv);
3366 sv_catpvs(PL_linestr, ";");
3369 sv_free((SV*)PL_preambleav);
3370 PL_preambleav = NULL;
3372 if (PL_minus_n || PL_minus_p) {
3373 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3375 sv_catpvs(PL_linestr,"chomp;");
3378 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3379 || *PL_splitstr == '"')
3380 && strchr(PL_splitstr + 1, *PL_splitstr))
3381 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3383 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3384 bytes can be used as quoting characters. :-) */
3385 const char *splits = PL_splitstr;
3386 sv_catpvs(PL_linestr, "our @F=split(q\0");
3389 if (*splits == '\\')
3390 sv_catpvn(PL_linestr, splits, 1);
3391 sv_catpvn(PL_linestr, splits, 1);
3392 } while (*splits++);
3393 /* This loop will embed the trailing NUL of
3394 PL_linestr as the last thing it does before
3396 sv_catpvs(PL_linestr, ");");
3400 sv_catpvs(PL_linestr,"our @F=split(' ');");
3404 sv_catpvs(PL_linestr,"use feature ':5.10';");
3405 sv_catpvs(PL_linestr, "\n");
3406 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3407 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3408 PL_last_lop = PL_last_uni = NULL;
3409 if (PERLDB_LINE && PL_curstash != PL_debstash) {
3410 SV * const sv = newSV(0);
3412 sv_upgrade(sv, SVt_PVMG);
3413 sv_setsv(sv,PL_linestr);
3416 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
3421 bof = PL_rsfp ? TRUE : FALSE;
3422 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3425 PL_realtokenstart = -1;
3428 if (PL_preprocess && !PL_in_eval)
3429 (void)PerlProc_pclose(PL_rsfp);
3430 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3431 PerlIO_clearerr(PL_rsfp);
3433 (void)PerlIO_close(PL_rsfp);
3435 PL_doextract = FALSE;
3437 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3442 sv_setpv(PL_linestr,PL_minus_p
3443 ? ";}continue{print;}" : ";}");
3444 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3445 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3446 PL_last_lop = PL_last_uni = NULL;
3447 PL_minus_n = PL_minus_p = 0;
3450 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3451 PL_last_lop = PL_last_uni = NULL;
3452 sv_setpvn(PL_linestr,"",0);
3453 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3455 /* If it looks like the start of a BOM or raw UTF-16,
3456 * check if it in fact is. */
3462 #ifdef PERLIO_IS_STDIO
3463 # ifdef __GNU_LIBRARY__
3464 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3465 # define FTELL_FOR_PIPE_IS_BROKEN
3469 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3470 # define FTELL_FOR_PIPE_IS_BROKEN
3475 #ifdef FTELL_FOR_PIPE_IS_BROKEN
3476 /* This loses the possibility to detect the bof
3477 * situation on perl -P when the libc5 is being used.
3478 * Workaround? Maybe attach some extra state to PL_rsfp?
3481 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
3483 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3486 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3487 s = swallow_bom((U8*)s);
3491 /* Incest with pod. */
3494 sv_catsv(PL_thiswhite, PL_linestr);
3496 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3497 sv_setpvn(PL_linestr, "", 0);
3498 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3499 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3500 PL_last_lop = PL_last_uni = NULL;
3501 PL_doextract = FALSE;
3505 } while (PL_doextract);
3506 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3507 if (PERLDB_LINE && PL_curstash != PL_debstash) {
3508 SV * const sv = newSV(0);
3510 sv_upgrade(sv, SVt_PVMG);
3511 sv_setsv(sv,PL_linestr);
3514 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
3516 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3517 PL_last_lop = PL_last_uni = NULL;
3518 if (CopLINE(PL_curcop) == 1) {
3519 while (s < PL_bufend && isSPACE(*s))
3521 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3525 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3529 if (*s == '#' && *(s+1) == '!')
3531 #ifdef ALTERNATE_SHEBANG
3533 static char const as[] = ALTERNATE_SHEBANG;
3534 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3535 d = s + (sizeof(as) - 1);
3537 #endif /* ALTERNATE_SHEBANG */
3546 while (*d && !isSPACE(*d))
3550 #ifdef ARG_ZERO_IS_SCRIPT
3551 if (ipathend > ipath) {
3553 * HP-UX (at least) sets argv[0] to the script name,
3554 * which makes $^X incorrect. And Digital UNIX and Linux,
3555 * at least, set argv[0] to the basename of the Perl
3556 * interpreter. So, having found "#!", we'll set it right.
3558 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3560 assert(SvPOK(x) || SvGMAGICAL(x));
3561 if (sv_eq(x, CopFILESV(PL_curcop))) {
3562 sv_setpvn(x, ipath, ipathend - ipath);
3568 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3569 const char * const lstart = SvPV_const(x,llen);
3571 bstart += blen - llen;
3572 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3573 sv_setpvn(x, ipath, ipathend - ipath);
3578 TAINT_NOT; /* $^X is always tainted, but that's OK */
3580 #endif /* ARG_ZERO_IS_SCRIPT */
3585 d = instr(s,"perl -");
3587 d = instr(s,"perl");
3589 /* avoid getting into infinite loops when shebang
3590 * line contains "Perl" rather than "perl" */
3592 for (d = ipathend-4; d >= ipath; --d) {
3593 if ((*d == 'p' || *d == 'P')
3594 && !ibcmp(d, "perl", 4))
3604 #ifdef ALTERNATE_SHEBANG
3606 * If the ALTERNATE_SHEBANG on this system starts with a
3607 * character that can be part of a Perl expression, then if
3608 * we see it but not "perl", we're probably looking at the
3609 * start of Perl code, not a request to hand off to some
3610 * other interpreter. Similarly, if "perl" is there, but
3611 * not in the first 'word' of the line, we assume the line
3612 * contains the start of the Perl program.
3614 if (d && *s != '#') {
3615 const char *c = ipath;
3616 while (*c && !strchr("; \t\r\n\f\v#", *c))
3619 d = NULL; /* "perl" not in first word; ignore */
3621 *s = '#'; /* Don't try to parse shebang line */
3623 #endif /* ALTERNATE_SHEBANG */
3624 #ifndef MACOS_TRADITIONAL
3629 !instr(s,"indir") &&
3630 instr(PL_origargv[0],"perl"))
3637 while (s < PL_bufend && isSPACE(*s))
3639 if (s < PL_bufend) {
3640 Newxz(newargv,PL_origargc+3,char*);
3642 while (s < PL_bufend && !isSPACE(*s))
3645 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3648 newargv = PL_origargv;
3651 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3653 Perl_croak(aTHX_ "Can't exec %s", ipath);
3657 while (*d && !isSPACE(*d)) d++;
3658 while (SPACE_OR_TAB(*d)) d++;
3661 const bool switches_done = PL_doswitches;
3662 const U32 oldpdb = PL_perldb;
3663 const bool oldn = PL_minus_n;
3664 const bool oldp = PL_minus_p;
3667 if (*d == 'M' || *d == 'm' || *d == 'C') {
3668 const char * const m = d;
3669 while (*d && !isSPACE(*d))
3671 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3674 d = moreswitches(d);
3676 if (PL_doswitches && !switches_done) {
3677 int argc = PL_origargc;
3678 char **argv = PL_origargv;
3681 } while (argc && argv[0][0] == '-' && argv[0][1]);
3682 init_argv_symbols(argc,argv);
3684 if ((PERLDB_LINE && !oldpdb) ||
3685 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3686 /* if we have already added "LINE: while (<>) {",
3687 we must not do it again */
3689 sv_setpvn(PL_linestr, "", 0);
3690 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3691 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3692 PL_last_lop = PL_last_uni = NULL;
3693 PL_preambled = FALSE;
3695 (void)gv_fetchfile(PL_origfilename);
3702 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3704 PL_lex_state = LEX_FORMLINE;
3709 #ifdef PERL_STRICT_CR
3710 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3712 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3714 case ' ': case '\t': case '\f': case 013:
3715 #ifdef MACOS_TRADITIONAL
3719 PL_realtokenstart = -1;
3728 PL_realtokenstart = -1;
3732 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3733 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3734 /* handle eval qq[#line 1 "foo"\n ...] */
3735 CopLINE_dec(PL_curcop);
3738 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
3740 if (!PL_in_eval || PL_rsfp)
3745 while (d < PL_bufend && *d != '\n')
3749 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3750 Perl_croak(aTHX_ "panic: input overflow");
3753 PL_thiswhite = newSVpvn(s, d - s);
3758 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3760 PL_lex_state = LEX_FORMLINE;
3766 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
3767 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
3770 TOKEN(PEG); /* make sure any #! line is accessible */
3775 /* if (PL_madskills && PL_lex_formbrack) { */
3777 while (d < PL_bufend && *d != '\n')
3781 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3782 Perl_croak(aTHX_ "panic: input overflow");
3783 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
3785 PL_thiswhite = newSVpvn("",0);
3786 if (CopLINE(PL_curcop) == 1) {
3787 sv_setpvn(PL_thiswhite, "", 0);
3790 sv_catpvn(PL_thiswhite, s, d - s);
3804 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3812 while (s < PL_bufend && SPACE_OR_TAB(*s))
3815 if (strnEQ(s,"=>",2)) {
3816 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3817 DEBUG_T( { S_printbuf(aTHX_
3818 "### Saw unary minus before =>, forcing word %s\n", s);
3820 OPERATOR('-'); /* unary minus */
3822 PL_last_uni = PL_oldbufptr;
3824 case 'r': ftst = OP_FTEREAD; break;
3825 case 'w': ftst = OP_FTEWRITE; break;
3826 case 'x': ftst = OP_FTEEXEC; break;
3827 case 'o': ftst = OP_FTEOWNED; break;
3828 case 'R': ftst = OP_FTRREAD; break;
3829 case 'W': ftst = OP_FTRWRITE; break;
3830 case 'X': ftst = OP_FTREXEC; break;
3831 case 'O': ftst = OP_FTROWNED; break;
3832 case 'e': ftst = OP_FTIS; break;
3833 case 'z': ftst = OP_FTZERO; break;
3834 case 's': ftst = OP_FTSIZE; break;
3835 case 'f': ftst = OP_FTFILE; break;
3836 case 'd': ftst = OP_FTDIR; break;
3837 case 'l': ftst = OP_FTLINK; break;
3838 case 'p': ftst = OP_FTPIPE; break;
3839 case 'S': ftst = OP_FTSOCK; break;
3840 case 'u': ftst = OP_FTSUID; break;
3841 case 'g': ftst = OP_FTSGID; break;
3842 case 'k': ftst = OP_FTSVTX; break;
3843 case 'b': ftst = OP_FTBLK; break;
3844 case 'c': ftst = OP_FTCHR; break;
3845 case 't': ftst = OP_FTTTY; break;
3846 case 'T': ftst = OP_FTTEXT; break;
3847 case 'B': ftst = OP_FTBINARY; break;
3848 case 'M': case 'A': case 'C':
3849 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
3851 case 'M': ftst = OP_FTMTIME; break;
3852 case 'A': ftst = OP_FTATIME; break;
3853 case 'C': ftst = OP_FTCTIME; break;
3861 PL_last_lop_op = (OPCODE)ftst;
3862 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3863 "### Saw file test %c\n", (int)tmp);
3868 /* Assume it was a minus followed by a one-letter named
3869 * subroutine call (or a -bareword), then. */
3870 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3871 "### '-%c' looked like a file test but was not\n",
3878 const char tmp = *s++;
3881 if (PL_expect == XOPERATOR)
3886 else if (*s == '>') {
3889 if (isIDFIRST_lazy_if(s,UTF)) {
3890 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3898 if (PL_expect == XOPERATOR)
3901 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3903 OPERATOR('-'); /* unary minus */
3909 const char tmp = *s++;
3912 if (PL_expect == XOPERATOR)
3917 if (PL_expect == XOPERATOR)
3920 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3927 if (PL_expect != XOPERATOR) {
3928 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3929 PL_expect = XOPERATOR;
3930 force_ident(PL_tokenbuf, '*');
3943 if (PL_expect == XOPERATOR) {
3947 PL_tokenbuf[0] = '%';
3948 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3949 if (!PL_tokenbuf[1]) {
3952 PL_pending_ident = '%';
3963 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
3964 && FEATURE_IS_ENABLED("~~"))
3971 const char tmp = *s++;
3977 goto just_a_word_zero_gv;
3980 switch (PL_expect) {
3986 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3988 PL_bufptr = s; /* update in case we back off */
3994 PL_expect = XTERMBLOCK;
3997 stuffstart = s - SvPVX(PL_linestr) - 1;
4001 while (isIDFIRST_lazy_if(s,UTF)) {
4003 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4004 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
4005 if (tmp < 0) tmp = -tmp;
4021 d = scan_str(d,TRUE,TRUE);
4023 /* MUST advance bufptr here to avoid bogus
4024 "at end of line" context messages from yyerror().
4026 PL_bufptr = s + len;
4027 yyerror("Unterminated attribute parameter in attribute list");
4030 return REPORT(0); /* EOF indicator */
4034 SV *sv = newSVpvn(s, len);
4035 sv_catsv(sv, PL_lex_stuff);
4036 attrs = append_elem(OP_LIST, attrs,
4037 newSVOP(OP_CONST, 0, sv));
4038 SvREFCNT_dec(PL_lex_stuff);
4039 PL_lex_stuff = NULL;
4042 if (len == 6 && strnEQ(s, "unique", len)) {
4043 if (PL_in_my == KEY_our) {
4045 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
4047 /* skip to avoid loading attributes.pm */
4049 deprecate(":unique");
4052 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4055 /* NOTE: any CV attrs applied here need to be part of
4056 the CVf_BUILTIN_ATTRS define in cv.h! */
4057 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
4058 CvLVALUE_on(PL_compcv);
4059 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
4060 CvLOCKED_on(PL_compcv);
4061 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
4062 CvMETHOD_on(PL_compcv);
4063 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
4064 CvASSERTION_on(PL_compcv);
4065 /* After we've set the flags, it could be argued that
4066 we don't need to do the attributes.pm-based setting
4067 process, and shouldn't bother appending recognized
4068 flags. To experiment with that, uncomment the
4069 following "else". (Note that's already been
4070 uncommented. That keeps the above-applied built-in
4071 attributes from being intercepted (and possibly
4072 rejected) by a package's attribute routines, but is
4073 justified by the performance win for the common case
4074 of applying only built-in attributes.) */
4076 attrs = append_elem(OP_LIST, attrs,
4077 newSVOP(OP_CONST, 0,
4081 if (*s == ':' && s[1] != ':')
4084 break; /* require real whitespace or :'s */
4085 /* XXX losing whitespace on sequential attributes here */
4089 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4090 if (*s != ';' && *s != '}' && *s != tmp
4091 && (tmp != '=' || *s != ')')) {
4092 const char q = ((*s == '\'') ? '"' : '\'');
4093 /* If here for an expression, and parsed no attrs, back
4095 if (tmp == '=' && !attrs) {
4099 /* MUST advance bufptr here to avoid bogus "at end of line"
4100 context messages from yyerror().
4104 ? Perl_form(aTHX_ "Invalid separator character "
4105 "%c%c%c in attribute list", q, *s, q)
4106 : "Unterminated attribute list" );
4114 start_force(PL_curforce);
4115 NEXTVAL_NEXTTOKE.opval = attrs;
4116 CURMAD('_', PL_nextwhite);
4121 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4122 (s - SvPVX(PL_linestr)) - stuffstart);
4130 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4131 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
4139 const char tmp = *s++;
4144 const char tmp = *s++;
4152 if (PL_lex_brackets <= 0)
4153 yyerror("Unmatched right square bracket");
4156 if (PL_lex_state == LEX_INTERPNORMAL) {
4157 if (PL_lex_brackets == 0) {
4158 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
4159 PL_lex_state = LEX_INTERPEND;
4166 if (PL_lex_brackets > 100) {
4167 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4169 switch (PL_expect) {
4171 if (PL_lex_formbrack) {
4175 if (PL_oldoldbufptr == PL_last_lop)
4176 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4178 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4179 OPERATOR(HASHBRACK);
4181 while (s < PL_bufend && SPACE_OR_TAB(*s))
4184 PL_tokenbuf[0] = '\0';
4185 if (d < PL_bufend && *d == '-') {
4186 PL_tokenbuf[0] = '-';
4188 while (d < PL_bufend && SPACE_OR_TAB(*d))
4191 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4192 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4194 while (d < PL_bufend && SPACE_OR_TAB(*d))
4197 const char minus = (PL_tokenbuf[0] == '-');
4198 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4206 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4211 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4216 if (PL_oldoldbufptr == PL_last_lop)
4217 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4219 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4222 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4224 /* This hack is to get the ${} in the message. */
4226 yyerror("syntax error");
4229 OPERATOR(HASHBRACK);
4231 /* This hack serves to disambiguate a pair of curlies
4232 * as being a block or an anon hash. Normally, expectation
4233 * determines that, but in cases where we're not in a
4234 * position to expect anything in particular (like inside
4235 * eval"") we have to resolve the ambiguity. This code
4236 * covers the case where the first term in the curlies is a
4237 * quoted string. Most other cases need to be explicitly
4238 * disambiguated by prepending a "+" before the opening
4239 * curly in order to force resolution as an anon hash.
4241 * XXX should probably propagate the outer expectation
4242 * into eval"" to rely less on this hack, but that could
4243 * potentially break current behavior of eval"".
4247 if (*s == '\'' || *s == '"' || *s == '`') {
4248 /* common case: get past first string, handling escapes */
4249 for (t++; t < PL_bufend && *t != *s;)
4250 if (*t++ == '\\' && (*t == '\\' || *t == *s))
4254 else if (*s == 'q') {
4257 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4260 /* skip q//-like construct */
4262 char open, close, term;
4265 while (t < PL_bufend && isSPACE(*t))
4267 /* check for q => */
4268 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4269 OPERATOR(HASHBRACK);
4273 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4277 for (t++; t < PL_bufend; t++) {
4278 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4280 else if (*t == open)
4284 for (t++; t < PL_bufend; t++) {
4285 if (*t == '\\' && t+1 < PL_bufend)
4287 else if (*t == close && --brackets <= 0)
4289 else if (*t == open)
4296 /* skip plain q word */
4297 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4300 else if (isALNUM_lazy_if(t,UTF)) {
4302 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4305 while (t < PL_bufend && isSPACE(*t))
4307 /* if comma follows first term, call it an anon hash */
4308 /* XXX it could be a comma expression with loop modifiers */
4309 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4310 || (*t == '=' && t[1] == '>')))
4311 OPERATOR(HASHBRACK);
4312 if (PL_expect == XREF)
4315 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4321 yylval.ival = CopLINE(PL_curcop);
4322 if (isSPACE(*s) || *s == '#')
4323 PL_copline = NOLINE; /* invalidate current command line number */
4328 if (PL_lex_brackets <= 0)
4329 yyerror("Unmatched right curly bracket");
4331 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4332 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4333 PL_lex_formbrack = 0;
4334 if (PL_lex_state == LEX_INTERPNORMAL) {
4335 if (PL_lex_brackets == 0) {
4336 if (PL_expect & XFAKEBRACK) {
4337 PL_expect &= XENUMMASK;
4338 PL_lex_state = LEX_INTERPEND;
4343 PL_thiswhite = newSVpvn("",0);
4344 sv_catpvn(PL_thiswhite,"}",1);
4347 return yylex(); /* ignore fake brackets */
4349 if (*s == '-' && s[1] == '>')
4350 PL_lex_state = LEX_INTERPENDMAYBE;
4351 else if (*s != '[' && *s != '{')
4352 PL_lex_state = LEX_INTERPEND;
4355 if (PL_expect & XFAKEBRACK) {
4356 PL_expect &= XENUMMASK;
4358 return yylex(); /* ignore fake brackets */
4360 start_force(PL_curforce);
4362 curmad('X', newSVpvn(s-1,1));
4363 CURMAD('_', PL_thiswhite);
4368 PL_thistoken = newSVpvn("",0);
4376 if (PL_expect == XOPERATOR) {
4377 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4378 && isIDFIRST_lazy_if(s,UTF))
4380 CopLINE_dec(PL_curcop);
4381 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4382 CopLINE_inc(PL_curcop);
4387 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4389 PL_expect = XOPERATOR;
4390 force_ident(PL_tokenbuf, '&');
4394 yylval.ival = (OPpENTERSUB_AMPER<<8);
4406 const char tmp = *s++;
4413 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4414 && strchr("+-*/%.^&|<",tmp))
4415 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4416 "Reversed %c= operator",(int)tmp);
4418 if (PL_expect == XSTATE && isALPHA(tmp) &&
4419 (s == PL_linestart+1 || s[-2] == '\n') )
4421 if (PL_in_eval && !PL_rsfp) {
4426 if (strnEQ(s,"=cut",4)) {
4442 PL_thiswhite = newSVpvn("",0);
4443 sv_catpvn(PL_thiswhite, PL_linestart,
4444 PL_bufend - PL_linestart);
4448 PL_doextract = TRUE;
4452 if (PL_lex_brackets < PL_lex_formbrack) {
4454 #ifdef PERL_STRICT_CR
4455 for (t = s; SPACE_OR_TAB(*t); t++) ;
4457 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
4459 if (*t == '\n' || *t == '#') {
4470 const char tmp = *s++;
4472 /* was this !=~ where !~ was meant?
4473 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4475 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4476 const char *t = s+1;
4478 while (t < PL_bufend && isSPACE(*t))
4481 if (*t == '/' || *t == '?' ||
4482 ((*t == 'm' || *t == 's' || *t == 'y')
4483 && !isALNUM(t[1])) ||
4484 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4485 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4486 "!=~ should be !~");
4496 if (PL_expect != XOPERATOR) {
4497 if (s[1] != '<' && !strchr(s,'>'))
4500 s = scan_heredoc(s);
4502 s = scan_inputsymbol(s);
4503 TERM(sublex_start());
4509 SHop(OP_LEFT_SHIFT);
4523 const char tmp = *s++;
4525 SHop(OP_RIGHT_SHIFT);
4526 else if (tmp == '=')
4535 if (PL_expect == XOPERATOR) {
4536 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4538 deprecate_old(commaless_variable_list);
4539 return REPORT(','); /* grandfather non-comma-format format */
4543 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4544 PL_tokenbuf[0] = '@';
4545 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4546 sizeof PL_tokenbuf - 1, FALSE);
4547 if (PL_expect == XOPERATOR)
4548 no_op("Array length", s);
4549 if (!PL_tokenbuf[1])
4551 PL_expect = XOPERATOR;
4552 PL_pending_ident = '#';
4556 PL_tokenbuf[0] = '$';
4557 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4558 sizeof PL_tokenbuf - 1, FALSE);
4559 if (PL_expect == XOPERATOR)
4561 if (!PL_tokenbuf[1]) {
4563 yyerror("Final $ should be \\$ or $name");
4567 /* This kludge not intended to be bulletproof. */
4568 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4569 yylval.opval = newSVOP(OP_CONST, 0,
4570 newSViv(CopARYBASE_get(&PL_compiling)));
4571 yylval.opval->op_private = OPpCONST_ARYBASE;
4577 const char tmp = *s;
4578 if (PL_lex_state == LEX_NORMAL)
4581 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4582 && intuit_more(s)) {
4584 PL_tokenbuf[0] = '@';
4585 if (ckWARN(WARN_SYNTAX)) {
4588 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
4591 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4592 while (t < PL_bufend && *t != ']')
4594 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4595 "Multidimensional syntax %.*s not supported",
4596 (int)((t - PL_bufptr) + 1), PL_bufptr);
4600 else if (*s == '{') {
4602 PL_tokenbuf[0] = '%';
4603 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
4604 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4606 char tmpbuf[sizeof PL_tokenbuf];
4607 for (t++; isSPACE(*t); t++) ;
4608 if (isIDFIRST_lazy_if(t,UTF)) {
4610 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4612 for (; isSPACE(*t); t++) ;
4613 if (*t == ';' && get_cv(tmpbuf, FALSE))
4614 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4615 "You need to quote \"%s\"",
4622 PL_expect = XOPERATOR;
4623 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4624 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4625 if (!islop || PL_last_lop_op == OP_GREPSTART)
4626 PL_expect = XOPERATOR;
4627 else if (strchr("$@\"'`q", *s))
4628 PL_expect = XTERM; /* e.g. print $fh "foo" */
4629 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4630 PL_expect = XTERM; /* e.g. print $fh &sub */
4631 else if (isIDFIRST_lazy_if(s,UTF)) {
4632 char tmpbuf[sizeof PL_tokenbuf];
4634 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4635 if ((t2 = keyword(tmpbuf, len))) {
4636 /* binary operators exclude handle interpretations */
4648 PL_expect = XTERM; /* e.g. print $fh length() */
4653 PL_expect = XTERM; /* e.g. print $fh subr() */
4656 else if (isDIGIT(*s))
4657 PL_expect = XTERM; /* e.g. print $fh 3 */
4658 else if (*s == '.' && isDIGIT(s[1]))
4659 PL_expect = XTERM; /* e.g. print $fh .3 */
4660 else if ((*s == '?' || *s == '-' || *s == '+')
4661 && !isSPACE(s[1]) && s[1] != '=')
4662 PL_expect = XTERM; /* e.g. print $fh -1 */
4663 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4665 PL_expect = XTERM; /* e.g. print $fh /.../
4666 XXX except DORDOR operator
4668 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4670 PL_expect = XTERM; /* print $fh <<"EOF" */
4673 PL_pending_ident = '$';
4677 if (PL_expect == XOPERATOR)
4679 PL_tokenbuf[0] = '@';
4680 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4681 if (!PL_tokenbuf[1]) {
4684 if (PL_lex_state == LEX_NORMAL)
4686 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4688 PL_tokenbuf[0] = '%';
4690 /* Warn about @ where they meant $. */
4691 if (*s == '[' || *s == '{') {
4692 if (ckWARN(WARN_SYNTAX)) {
4693 const char *t = s + 1;
4694 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4696 if (*t == '}' || *t == ']') {
4698 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4699 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4700 "Scalar value %.*s better written as $%.*s",
4701 (int)(t-PL_bufptr), PL_bufptr,
4702 (int)(t-PL_bufptr-1), PL_bufptr+1);
4707 PL_pending_ident = '@';
4710 case '/': /* may be division, defined-or, or pattern */
4711 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4715 case '?': /* may either be conditional or pattern */
4716 if(PL_expect == XOPERATOR) {
4724 /* A // operator. */
4734 /* Disable warning on "study /blah/" */
4735 if (PL_oldoldbufptr == PL_last_uni
4736 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4737 || memNE(PL_last_uni, "study", 5)
4738 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4741 s = scan_pat(s,OP_MATCH);
4742 TERM(sublex_start());
4746 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4747 #ifdef PERL_STRICT_CR
4750 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
4752 && (s == PL_linestart || s[-1] == '\n') )
4754 PL_lex_formbrack = 0;
4758 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
4764 yylval.ival = OPf_SPECIAL;
4770 if (PL_expect != XOPERATOR)
4775 case '0': case '1': case '2': case '3': case '4':
4776 case '5': case '6': case '7': case '8': case '9':
4777 s = scan_num(s, &yylval);
4778 DEBUG_T( { S_printbuf(aTHX_ "### Saw number in %s\n", s); } );
4779 if (PL_expect == XOPERATOR)
4784 s = scan_str(s,!!PL_madskills,FALSE);
4785 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4786 if (PL_expect == XOPERATOR) {
4787 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4789 deprecate_old(commaless_variable_list);
4790 return REPORT(','); /* grandfather non-comma-format format */
4797 yylval.ival = OP_CONST;
4798 TERM(sublex_start());
4801 s = scan_str(s,!!PL_madskills,FALSE);
4802 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4803 if (PL_expect == XOPERATOR) {
4804 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4806 deprecate_old(commaless_variable_list);
4807 return REPORT(','); /* grandfather non-comma-format format */
4814 yylval.ival = OP_CONST;
4815 /* FIXME. I think that this can be const if char *d is replaced by
4816 more localised variables. */
4817 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4818 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4819 yylval.ival = OP_STRINGIFY;
4823 TERM(sublex_start());
4826 s = scan_str(s,!!PL_madskills,FALSE);
4827 DEBUG_T( { S_printbuf(aTHX_ "### Saw backtick string before %s\n", s); } );
4828 if (PL_expect == XOPERATOR)
4829 no_op("Backticks",s);
4832 yylval.ival = OP_BACKTICK;
4834 TERM(sublex_start());
4838 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
4839 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
4841 if (PL_expect == XOPERATOR)
4842 no_op("Backslash",s);
4846 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
4847 char *start = s + 2;
4848 while (isDIGIT(*start) || *start == '_')
4850 if (*start == '.' && isDIGIT(start[1])) {
4851 s = scan_num(s, &yylval);
4854 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
4855 else if (!isALPHA(*start) && (PL_expect == XTERM
4856 || PL_expect == XREF || PL_expect == XSTATE
4857 || PL_expect == XTERMORDORDOR)) {
4858 /* XXX Use gv_fetchpvn rather than stomping on a const string */
4859 const char c = *start;
4862 gv = gv_fetchpv(s, 0, SVt_PVCV);
4865 s = scan_num(s, &yylval);
4872 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
4908 I32 orig_keyword = 0;
4913 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4915 /* Some keywords can be followed by any delimiter, including ':' */
4916 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
4917 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4918 (PL_tokenbuf[0] == 'q' &&
4919 strchr("qwxr", PL_tokenbuf[1])))));
4921 /* x::* is just a word, unless x is "CORE" */
4922 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4926 while (d < PL_bufend && isSPACE(*d))
4927 d++; /* no comments skipped here, or s### is misparsed */
4929 /* Is this a label? */
4930 if (!tmp && PL_expect == XSTATE
4931 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4933 yylval.pval = savepv(PL_tokenbuf);
4938 /* Check for keywords */
4939 tmp = keyword(PL_tokenbuf, len);
4941 /* Is this a word before a => operator? */
4942 if (*d == '=' && d[1] == '>') {
4945 = (OP*)newSVOP(OP_CONST, 0,
4946 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4947 yylval.opval->op_private = OPpCONST_BARE;
4951 if (tmp < 0) { /* second-class keyword? */
4952 GV *ogv = NULL; /* override (winner) */
4953 GV *hgv = NULL; /* hidden (loser) */
4954 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4956 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
4959 if (GvIMPORTED_CV(gv))
4961 else if (! CvMETHOD(cv))
4965 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4966 (gv = *gvp) != (GV*)&PL_sv_undef &&
4967 GvCVu(gv) && GvIMPORTED_CV(gv))
4974 tmp = 0; /* overridden by import or by GLOBAL */
4977 && -tmp==KEY_lock /* XXX generalizable kludge */
4979 && !hv_fetchs(GvHVn(PL_incgv), "Thread.pm", FALSE))
4981 tmp = 0; /* any sub overrides "weak" keyword */
4983 else { /* no override */
4985 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4986 Perl_warner(aTHX_ packWARN(WARN_MISC),
4987 "dump() better written as CORE::dump()");
4991 if (hgv && tmp != KEY_x && tmp != KEY_CORE
4992 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
4993 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4994 "Ambiguous call resolved as CORE::%s(), %s",
4995 GvENAME(hgv), "qualify as such or use &");
5002 default: /* not a keyword */
5003 /* Trade off - by using this evil construction we can pull the
5004 variable gv into the block labelled keylookup. If not, then
5005 we have to give it function scope so that the goto from the
5006 earlier ':' case doesn't bypass the initialisation. */
5008 just_a_word_zero_gv:
5016 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5019 SV *nextPL_nextwhite = 0;
5023 /* Get the rest if it looks like a package qualifier */
5025 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5027 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5030 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5031 *s == '\'' ? "'" : "::");
5036 if (PL_expect == XOPERATOR) {
5037 if (PL_bufptr == PL_linestart) {
5038 CopLINE_dec(PL_curcop);
5039 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
5040 CopLINE_inc(PL_curcop);
5043 no_op("Bareword",s);
5046 /* Look for a subroutine with this name in current package,
5047 unless name is "Foo::", in which case Foo is a bearword
5048 (and a package name). */
5050 if (len > 2 && !PL_madskills &&
5051 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5053 if (ckWARN(WARN_BAREWORD)
5054 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5055 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5056 "Bareword \"%s\" refers to nonexistent package",
5059 PL_tokenbuf[len] = '\0';
5065 /* Mustn't actually add anything to a symbol table.
5066 But also don't want to "initialise" any placeholder
5067 constants that might already be there into full
5068 blown PVGVs with attached PVCV. */
5069 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5070 GV_NOADD_NOINIT, SVt_PVCV);
5075 /* if we saw a global override before, get the right name */
5078 sv = newSVpvs("CORE::GLOBAL::");
5079 sv_catpv(sv,PL_tokenbuf);
5082 /* If len is 0, newSVpv does strlen(), which is correct.
5083 If len is non-zero, then it will be the true length,
5084 and so the scalar will be created correctly. */
5085 sv = newSVpv(PL_tokenbuf,len);
5088 if (PL_madskills && !PL_thistoken) {
5089 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5090 PL_thistoken = newSVpv(start,s - start);
5091 PL_realtokenstart = s - SvPVX(PL_linestr);
5095 /* Presume this is going to be a bareword of some sort. */
5098 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5099 yylval.opval->op_private = OPpCONST_BARE;
5100 /* UTF-8 package name? */
5101 if (UTF && !IN_BYTES &&
5102 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5105 /* And if "Foo::", then that's what it certainly is. */
5110 /* Do the explicit type check so that we don't need to force
5111 the initialisation of the symbol table to have a real GV.
5112 Beware - gv may not really be a PVGV, cv may not really be
5113 a PVCV, (because of the space optimisations that gv_init
5114 understands) But they're true if for this symbol there is
5115 respectively a typeglob and a subroutine.
5117 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5118 /* Real typeglob, so get the real subroutine: */
5120 /* A proxy for a subroutine in this package? */
5121 : SvOK(gv) ? (CV *) gv : NULL)
5124 /* See if it's the indirect object for a list operator. */
5126 if (PL_oldoldbufptr &&
5127 PL_oldoldbufptr < PL_bufptr &&
5128 (PL_oldoldbufptr == PL_last_lop
5129 || PL_oldoldbufptr == PL_last_uni) &&
5130 /* NO SKIPSPACE BEFORE HERE! */
5131 (PL_expect == XREF ||
5132 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5134 bool immediate_paren = *s == '(';
5136 /* (Now we can afford to cross potential line boundary.) */
5137 s = SKIPSPACE2(s,nextPL_nextwhite);
5139 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5142 /* Two barewords in a row may indicate method call. */
5144 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5145 (tmp = intuit_method(s, gv, cv)))
5148 /* If not a declared subroutine, it's an indirect object. */
5149 /* (But it's an indir obj regardless for sort.) */
5150 /* Also, if "_" follows a filetest operator, it's a bareword */
5153 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5155 (PL_last_lop_op != OP_MAPSTART &&
5156 PL_last_lop_op != OP_GREPSTART))))
5157 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5158 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5161 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5166 PL_expect = XOPERATOR;
5169 s = SKIPSPACE2(s,nextPL_nextwhite);
5170 PL_nextwhite = nextPL_nextwhite;
5175 /* Is this a word before a => operator? */
5176 if (*s == '=' && s[1] == '>' && !pkgname) {
5178 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
5179 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5180 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
5184 /* If followed by a paren, it's certainly a subroutine. */
5188 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
5189 if (*d == ')' && (sv = gv_const_sv(gv))) {
5193 char *par = SvPVX(PL_linestr) + PL_realtokenstart;
5194 sv_catpvn(PL_thistoken, par, s - par);
5196 sv_free(PL_nextwhite);
5206 PL_nextwhite = PL_thiswhite;
5209 start_force(PL_curforce);
5211 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5212 PL_expect = XOPERATOR;
5215 PL_nextwhite = nextPL_nextwhite;
5216 curmad('X', PL_thistoken);
5217 PL_thistoken = newSVpvn("",0);
5225 /* If followed by var or block, call it a method (unless sub) */
5227 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5228 PL_last_lop = PL_oldbufptr;
5229 PL_last_lop_op = OP_METHOD;
5233 /* If followed by a bareword, see if it looks like indir obj. */
5236 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5237 && (tmp = intuit_method(s, gv, cv)))
5240 /* Not a method, so call it a subroutine (if defined) */
5243 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5244 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5245 "Ambiguous use of -%s resolved as -&%s()",
5246 PL_tokenbuf, PL_tokenbuf);
5247 /* Check for a constant sub */
5248 if ((sv = gv_const_sv(gv))) {
5250 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
5251 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5252 yylval.opval->op_private = 0;
5256 /* Resolve to GV now. */
5257 if (SvTYPE(gv) != SVt_PVGV) {
5258 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5259 assert (SvTYPE(gv) == SVt_PVGV);
5260 /* cv must have been some sort of placeholder, so
5261 now needs replacing with a real code reference. */
5265 op_free(yylval.opval);
5266 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5267 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5268 PL_last_lop = PL_oldbufptr;
5269 PL_last_lop_op = OP_ENTERSUB;
5270 /* Is there a prototype? */
5277 const char *proto = SvPV_const((SV*)cv, protolen);
5280 if (*proto == '$' && proto[1] == '\0')
5282 while (*proto == ';')
5284 if (*proto == '&' && *s == '{') {
5285 sv_setpv(PL_subname, PL_curstash ?
5286 "__ANON__" : "__ANON__::__ANON__");
5293 PL_nextwhite = PL_thiswhite;
5296 start_force(PL_curforce);
5297 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5300 PL_nextwhite = nextPL_nextwhite;
5301 curmad('X', PL_thistoken);
5302 PL_thistoken = newSVpvn("",0);
5309 /* Guess harder when madskills require "best effort". */
5310 if (PL_madskills && (!gv || !GvCVu(gv))) {
5311 int probable_sub = 0;
5312 if (strchr("\"'`$@%0123456789!*+{[<", *s))
5314 else if (isALPHA(*s)) {
5318 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5319 if (!keyword(tmpbuf,tmplen))
5322 while (d < PL_bufend && isSPACE(*d))
5324 if (*d == '=' && d[1] == '>')
5329 gv = gv_fetchpv(PL_tokenbuf, TRUE, SVt_PVCV);
5330 op_free(yylval.opval);
5331 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5332 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5333 PL_last_lop = PL_oldbufptr;
5334 PL_last_lop_op = OP_ENTERSUB;
5335 PL_nextwhite = PL_thiswhite;
5337 start_force(PL_curforce);
5338 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5340 PL_nextwhite = nextPL_nextwhite;
5341 curmad('X', PL_thistoken);
5342 PL_thistoken = newSVpvn("",0);
5347 NEXTVAL_NEXTTOKE.opval = yylval.opval;
5354 /* Call it a bare word */
5356 if (PL_hints & HINT_STRICT_SUBS)
5357 yylval.opval->op_private |= OPpCONST_STRICT;
5360 if (lastchar != '-') {
5361 if (ckWARN(WARN_RESERVED)) {
5362 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
5363 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
5364 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5371 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5372 && ckWARN_d(WARN_AMBIGUOUS)) {
5373 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5374 "Operator or semicolon missing before %c%s",
5375 lastchar, PL_tokenbuf);
5376 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5377 "Ambiguous use of %c resolved as operator %c",
5378 lastchar, lastchar);
5384 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5385 newSVpv(CopFILE(PL_curcop),0));
5389 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5390 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5393 case KEY___PACKAGE__:
5394 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5396 ? newSVhek(HvNAME_HEK(PL_curstash))
5403 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5404 const char *pname = "main";
5405 if (PL_tokenbuf[2] == 'D')
5406 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5407 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5411 GvIOp(gv) = newIO();
5412 IoIFP(GvIOp(gv)) = PL_rsfp;
5413 #if defined(HAS_FCNTL) && defined(F_SETFD)
5415 const int fd = PerlIO_fileno(PL_rsfp);
5416 fcntl(fd,F_SETFD,fd >= 3);
5419 /* Mark this internal pseudo-handle as clean */
5420 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5422 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
5423 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5424 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5426 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5427 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5428 /* if the script was opened in binmode, we need to revert
5429 * it to text mode for compatibility; but only iff it has CRs
5430 * XXX this is a questionable hack at best. */
5431 if (PL_bufend-PL_bufptr > 2
5432 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5435 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5436 loc = PerlIO_tell(PL_rsfp);
5437 (void)PerlIO_seek(PL_rsfp, 0L, 0);
5440 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5442 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5443 #endif /* NETWARE */
5444 #ifdef PERLIO_IS_STDIO /* really? */
5445 # if defined(__BORLANDC__)
5446 /* XXX see note in do_binmode() */
5447 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5451 PerlIO_seek(PL_rsfp, loc, 0);
5455 #ifdef PERLIO_LAYERS
5458 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5459 else if (PL_encoding) {
5466 XPUSHs(PL_encoding);
5468 call_method("name", G_SCALAR);
5472 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5473 Perl_form(aTHX_ ":encoding(%"SVf")",
5482 if (PL_realtokenstart >= 0) {
5483 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5485 PL_endwhite = newSVpvn("",0);
5486 sv_catsv(PL_endwhite, PL_thiswhite);
5488 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5489 PL_realtokenstart = -1;
5491 while ((s = filter_gets(PL_endwhite, PL_rsfp,
5492 SvCUR(PL_endwhite))) != Nullch) ;
5506 if (PL_expect == XSTATE) {
5513 if (*s == ':' && s[1] == ':') {
5516 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5517 if (!(tmp = keyword(PL_tokenbuf, len)))
5518 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5521 else if (tmp == KEY_require || tmp == KEY_do)
5522 /* that's a way to remember we saw "CORE::" */
5535 LOP(OP_ACCEPT,XTERM);
5541 LOP(OP_ATAN2,XTERM);
5547 LOP(OP_BINMODE,XTERM);
5550 LOP(OP_BLESS,XTERM);
5559 /* When 'use switch' is in effect, continue has a dual
5560 life as a control operator. */
5562 if (!FEATURE_IS_ENABLED("switch"))
5565 /* We have to disambiguate the two senses of
5566 "continue". If the next token is a '{' then
5567 treat it as the start of a continue block;
5568 otherwise treat it as a control operator.
5580 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5597 if (!PL_cryptseen) {
5598 PL_cryptseen = TRUE;
5602 LOP(OP_CRYPT,XTERM);
5605 LOP(OP_CHMOD,XTERM);
5608 LOP(OP_CHOWN,XTERM);
5611 LOP(OP_CONNECT,XTERM);
5630 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5631 if (orig_keyword == KEY_do) {
5640 PL_hints |= HINT_BLOCK_SCOPE;
5650 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
5651 LOP(OP_DBMOPEN,XTERM);
5657 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5664 yylval.ival = CopLINE(PL_curcop);
5680 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
5681 UNIBRACK(OP_ENTEREVAL);
5699 case KEY_endhostent:
5705 case KEY_endservent:
5708 case KEY_endprotoent:
5719 yylval.ival = CopLINE(PL_curcop);
5721 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
5724 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
5727 if ((PL_bufend - p) >= 3 &&
5728 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
5730 else if ((PL_bufend - p) >= 4 &&
5731 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
5734 if (isIDFIRST_lazy_if(p,UTF)) {
5735 p = scan_ident(p, PL_bufend,
5736 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5740 Perl_croak(aTHX_ "Missing $ on loop variable");
5742 s = SvPVX(PL_linestr) + soff;
5748 LOP(OP_FORMLINE,XTERM);
5754 LOP(OP_FCNTL,XTERM);
5760 LOP(OP_FLOCK,XTERM);
5769 LOP(OP_GREPSTART, XREF);
5772 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5787 case KEY_getpriority:
5788 LOP(OP_GETPRIORITY,XTERM);
5790 case KEY_getprotobyname:
5793 case KEY_getprotobynumber:
5794 LOP(OP_GPBYNUMBER,XTERM);
5796 case KEY_getprotoent:
5808 case KEY_getpeername:
5809 UNI(OP_GETPEERNAME);
5811 case KEY_gethostbyname:
5814 case KEY_gethostbyaddr:
5815 LOP(OP_GHBYADDR,XTERM);
5817 case KEY_gethostent:
5820 case KEY_getnetbyname:
5823 case KEY_getnetbyaddr:
5824 LOP(OP_GNBYADDR,XTERM);
5829 case KEY_getservbyname:
5830 LOP(OP_GSBYNAME,XTERM);
5832 case KEY_getservbyport:
5833 LOP(OP_GSBYPORT,XTERM);
5835 case KEY_getservent:
5838 case KEY_getsockname:
5839 UNI(OP_GETSOCKNAME);
5841 case KEY_getsockopt:
5842 LOP(OP_GSOCKOPT,XTERM);
5857 yylval.ival = CopLINE(PL_curcop);
5868 yylval.ival = CopLINE(PL_curcop);
5872 LOP(OP_INDEX,XTERM);
5878 LOP(OP_IOCTL,XTERM);
5890 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5922 LOP(OP_LISTEN,XTERM);
5931 s = scan_pat(s,OP_MATCH);
5932 TERM(sublex_start());
5935 LOP(OP_MAPSTART, XREF);
5938 LOP(OP_MKDIR,XTERM);
5941 LOP(OP_MSGCTL,XTERM);
5944 LOP(OP_MSGGET,XTERM);
5947 LOP(OP_MSGRCV,XTERM);
5950 LOP(OP_MSGSND,XTERM);
5956 if (isIDFIRST_lazy_if(s,UTF)) {
5960 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
5961 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
5963 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
5964 if (!PL_in_my_stash) {
5967 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
5971 if (PL_madskills) { /* just add type to declarator token */
5972 sv_catsv(PL_thistoken, PL_nextwhite);
5974 sv_catpvn(PL_thistoken, start, s - start);
5982 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5989 s = tokenize_use(0, s);
5993 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6000 if (isIDFIRST_lazy_if(s,UTF)) {
6002 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
6003 for (t=d; *t && isSPACE(*t); t++) ;
6004 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6006 && !(t[0] == '=' && t[1] == '>')
6008 int parms_len = (int)(d-s);
6009 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6010 "Precedence problem: open %.*s should be open(%.*s)",
6011 parms_len, s, parms_len, s);
6017 yylval.ival = OP_OR;
6027 LOP(OP_OPEN_DIR,XTERM);
6030 checkcomma(s,PL_tokenbuf,"filehandle");
6034 checkcomma(s,PL_tokenbuf,"filehandle");
6053 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6057 LOP(OP_PIPE_OP,XTERM);
6060 s = scan_str(s,!!PL_madskills,FALSE);
6063 yylval.ival = OP_CONST;
6064 TERM(sublex_start());
6070 s = scan_str(s,!!PL_madskills,FALSE);
6073 PL_expect = XOPERATOR;
6075 if (SvCUR(PL_lex_stuff)) {
6078 d = SvPV_force(PL_lex_stuff, len);
6080 for (; isSPACE(*d) && len; --len, ++d)
6085 if (!warned && ckWARN(WARN_QW)) {
6086 for (; !isSPACE(*d) && len; --len, ++d) {
6088 Perl_warner(aTHX_ packWARN(WARN_QW),
6089 "Possible attempt to separate words with commas");
6092 else if (*d == '#') {
6093 Perl_warner(aTHX_ packWARN(WARN_QW),
6094 "Possible attempt to put comments in qw() list");
6100 for (; !isSPACE(*d) && len; --len, ++d)
6103 sv = newSVpvn(b, d-b);
6104 if (DO_UTF8(PL_lex_stuff))
6106 words = append_elem(OP_LIST, words,
6107 newSVOP(OP_CONST, 0, tokeq(sv)));
6111 start_force(PL_curforce);
6112 NEXTVAL_NEXTTOKE.opval = words;
6117 SvREFCNT_dec(PL_lex_stuff);
6118 PL_lex_stuff = NULL;
6124 s = scan_str(s,!!PL_madskills,FALSE);
6127 yylval.ival = OP_STRINGIFY;
6128 if (SvIVX(PL_lex_stuff) == '\'')
6129 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
6130 TERM(sublex_start());
6133 s = scan_pat(s,OP_QR);
6134 TERM(sublex_start());
6137 s = scan_str(s,!!PL_madskills,FALSE);
6140 yylval.ival = OP_BACKTICK;
6142 TERM(sublex_start());
6150 s = force_version(s, FALSE);
6152 else if (*s != 'v' || !isDIGIT(s[1])
6153 || (s = force_version(s, TRUE), *s == 'v'))
6155 *PL_tokenbuf = '\0';
6156 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6157 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6158 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
6160 yyerror("<> should be quotes");
6162 if (orig_keyword == KEY_require) {
6170 PL_last_uni = PL_oldbufptr;
6171 PL_last_lop_op = OP_REQUIRE;
6173 return REPORT( (int)REQUIRE );
6179 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6183 LOP(OP_RENAME,XTERM);
6192 LOP(OP_RINDEX,XTERM);
6202 UNIDOR(OP_READLINE);
6215 LOP(OP_REVERSE,XTERM);
6218 UNIDOR(OP_READLINK);
6226 TERM(sublex_start());
6228 TOKEN(1); /* force error */
6231 checkcomma(s,PL_tokenbuf,"filehandle");
6241 LOP(OP_SELECT,XTERM);
6247 LOP(OP_SEMCTL,XTERM);
6250 LOP(OP_SEMGET,XTERM);
6253 LOP(OP_SEMOP,XTERM);
6259 LOP(OP_SETPGRP,XTERM);
6261 case KEY_setpriority:
6262 LOP(OP_SETPRIORITY,XTERM);
6264 case KEY_sethostent:
6270 case KEY_setservent:
6273 case KEY_setprotoent:
6283 LOP(OP_SEEKDIR,XTERM);
6285 case KEY_setsockopt:
6286 LOP(OP_SSOCKOPT,XTERM);
6292 LOP(OP_SHMCTL,XTERM);
6295 LOP(OP_SHMGET,XTERM);
6298 LOP(OP_SHMREAD,XTERM);
6301 LOP(OP_SHMWRITE,XTERM);
6304 LOP(OP_SHUTDOWN,XTERM);
6313 LOP(OP_SOCKET,XTERM);
6315 case KEY_socketpair:
6316 LOP(OP_SOCKPAIR,XTERM);
6319 checkcomma(s,PL_tokenbuf,"subroutine name");
6321 if (*s == ';' || *s == ')') /* probably a close */
6322 Perl_croak(aTHX_ "sort is now a reserved word");
6324 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6328 LOP(OP_SPLIT,XTERM);
6331 LOP(OP_SPRINTF,XTERM);
6334 LOP(OP_SPLICE,XTERM);
6349 LOP(OP_SUBSTR,XTERM);
6355 char tmpbuf[sizeof PL_tokenbuf];
6356 SSize_t tboffset = 0;
6357 expectation attrful;
6358 bool have_name, have_proto, bad_proto;
6359 const int key = tmp;
6364 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6365 SV *subtoken = newSVpvn(tstart, s - tstart);
6369 s = SKIPSPACE2(s,tmpwhite);
6374 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6375 (*s == ':' && s[1] == ':'))
6382 attrful = XATTRBLOCK;
6383 /* remember buffer pos'n for later force_word */
6384 tboffset = s - PL_oldbufptr;
6385 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6388 nametoke = newSVpvn(s, d - s);
6390 if (strchr(tmpbuf, ':'))
6391 sv_setpv(PL_subname, tmpbuf);
6393 sv_setsv(PL_subname,PL_curstname);
6394 sv_catpvs(PL_subname,"::");
6395 sv_catpvn(PL_subname,tmpbuf,len);
6402 CURMAD('X', nametoke);
6403 CURMAD('_', tmpwhite);
6404 (void) force_word(PL_oldbufptr + tboffset, WORD,
6407 s = SKIPSPACE2(d,tmpwhite);
6414 Perl_croak(aTHX_ "Missing name in \"my sub\"");
6415 PL_expect = XTERMBLOCK;
6416 attrful = XATTRTERM;
6417 sv_setpvn(PL_subname,"?",1);
6421 if (key == KEY_format) {
6423 PL_lex_formbrack = PL_lex_brackets + 1;
6425 PL_thistoken = subtoken;
6429 (void) force_word(PL_oldbufptr + tboffset, WORD,
6435 /* Look for a prototype */
6439 s = scan_str(s,!!PL_madskills,FALSE);
6441 Perl_croak(aTHX_ "Prototype not terminated");
6442 /* strip spaces and check for bad characters */
6443 d = SvPVX(PL_lex_stuff);
6446 for (p = d; *p; ++p) {
6449 if (!strchr("$@%*;[]&\\", *p))
6454 if (bad_proto && ckWARN(WARN_SYNTAX))
6455 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6456 "Illegal character in prototype for %"SVf" : %s",
6458 SvCUR_set(PL_lex_stuff, tmp);
6463 CURMAD('q', PL_thisopen);
6464 CURMAD('_', tmpwhite);
6465 CURMAD('=', PL_thisstuff);
6466 CURMAD('Q', PL_thisclose);
6467 NEXTVAL_NEXTTOKE.opval =
6468 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6469 PL_lex_stuff = Nullsv;
6472 s = SKIPSPACE2(s,tmpwhite);
6480 if (*s == ':' && s[1] != ':')
6481 PL_expect = attrful;
6482 else if (*s != '{' && key == KEY_sub) {
6484 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6486 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
6493 curmad('^', newSVpvn("",0));
6494 CURMAD('_', tmpwhite);
6498 PL_thistoken = subtoken;
6501 NEXTVAL_NEXTTOKE.opval =
6502 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6503 PL_lex_stuff = NULL;
6508 sv_setpv(PL_subname,
6509 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
6513 (void) force_word(PL_oldbufptr + tboffset, WORD,
6523 LOP(OP_SYSTEM,XREF);
6526 LOP(OP_SYMLINK,XTERM);
6529 LOP(OP_SYSCALL,XTERM);
6532 LOP(OP_SYSOPEN,XTERM);
6535 LOP(OP_SYSSEEK,XTERM);
6538 LOP(OP_SYSREAD,XTERM);
6541 LOP(OP_SYSWRITE,XTERM);
6545 TERM(sublex_start());
6566 LOP(OP_TRUNCATE,XTERM);
6578 yylval.ival = CopLINE(PL_curcop);
6582 yylval.ival = CopLINE(PL_curcop);
6586 LOP(OP_UNLINK,XTERM);
6592 LOP(OP_UNPACK,XTERM);
6595 LOP(OP_UTIME,XTERM);
6601 LOP(OP_UNSHIFT,XTERM);
6604 s = tokenize_use(1, s);
6614 yylval.ival = CopLINE(PL_curcop);
6618 yylval.ival = CopLINE(PL_curcop);
6622 PL_hints |= HINT_BLOCK_SCOPE;
6629 LOP(OP_WAITPID,XTERM);
6638 ctl_l[0] = toCTRL('L');
6640 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
6643 /* Make sure $^L is defined */
6644 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
6649 if (PL_expect == XOPERATOR)
6655 yylval.ival = OP_XOR;
6660 TERM(sublex_start());
6665 #pragma segment Main
6669 S_pending_ident(pTHX)
6673 register I32 tmp = 0;
6674 /* pit holds the identifier we read and pending_ident is reset */
6675 char pit = PL_pending_ident;
6676 PL_pending_ident = 0;
6678 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
6679 DEBUG_T({ PerlIO_printf(Perl_debug_log,
6680 "### Pending identifier '%s'\n", PL_tokenbuf); });
6682 /* if we're in a my(), we can't allow dynamics here.
6683 $foo'bar has already been turned into $foo::bar, so
6684 just check for colons.
6686 if it's a legal name, the OP is a PADANY.
6689 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
6690 if (strchr(PL_tokenbuf,':'))
6691 yyerror(Perl_form(aTHX_ "No package name allowed for "
6692 "variable %s in \"our\"",
6694 tmp = allocmy(PL_tokenbuf);
6697 if (strchr(PL_tokenbuf,':'))
6698 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
6700 yylval.opval = newOP(OP_PADANY, 0);
6701 yylval.opval->op_targ = allocmy(PL_tokenbuf);
6707 build the ops for accesses to a my() variable.
6709 Deny my($a) or my($b) in a sort block, *if* $a or $b is
6710 then used in a comparison. This catches most, but not
6711 all cases. For instance, it catches
6712 sort { my($a); $a <=> $b }
6714 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
6715 (although why you'd do that is anyone's guess).
6718 if (!strchr(PL_tokenbuf,':')) {
6720 tmp = pad_findmy(PL_tokenbuf);
6721 if (tmp != NOT_IN_PAD) {
6722 /* might be an "our" variable" */
6723 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6724 /* build ops for a bareword */
6725 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
6726 HEK * const stashname = HvNAME_HEK(stash);
6727 SV * const sym = newSVhek(stashname);
6728 sv_catpvs(sym, "::");
6729 sv_catpv(sym, PL_tokenbuf+1);
6730 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
6731 yylval.opval->op_private = OPpCONST_ENTERED;
6734 ? (GV_ADDMULTI | GV_ADDINEVAL)
6737 ((PL_tokenbuf[0] == '$') ? SVt_PV
6738 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6743 /* if it's a sort block and they're naming $a or $b */
6744 if (PL_last_lop_op == OP_SORT &&
6745 PL_tokenbuf[0] == '$' &&
6746 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
6749 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
6750 d < PL_bufend && *d != '\n';
6753 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
6754 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
6760 yylval.opval = newOP(OP_PADANY, 0);
6761 yylval.opval->op_targ = tmp;
6767 Whine if they've said @foo in a doublequoted string,
6768 and @foo isn't a variable we can find in the symbol
6771 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
6772 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
6773 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
6774 && ckWARN(WARN_AMBIGUOUS))
6776 /* Downgraded from fatal to warning 20000522 mjd */
6777 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6778 "Possible unintended interpolation of %s in string",
6783 /* build ops for a bareword */
6784 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
6785 yylval.opval->op_private = OPpCONST_ENTERED;
6788 /* If the identifier refers to a stash, don't autovivify it.
6789 * Change 24660 had the side effect of causing symbol table
6790 * hashes to always be defined, even if they were freshly
6791 * created and the only reference in the entire program was
6792 * the single statement with the defined %foo::bar:: test.
6793 * It appears that all code in the wild doing this actually
6794 * wants to know whether sub-packages have been loaded, so
6795 * by avoiding auto-vivifying symbol tables, we ensure that
6796 * defined %foo::bar:: continues to be false, and the existing
6797 * tests still give the expected answers, even though what
6798 * they're actually testing has now changed subtly.
6800 (*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'
6802 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
6803 ((PL_tokenbuf[0] == '$') ? SVt_PV
6804 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
6810 * The following code was generated by perl_keyword.pl.
6814 Perl_keyword (pTHX_ const char *name, I32 len)
6819 case 1: /* 5 tokens of length 1 */
6851 case 2: /* 18 tokens of length 2 */
6997 case 3: /* 29 tokens of length 3 */
7001 if (name[1] == 'N' &&
7064 if (name[1] == 'i' &&
7086 return (FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
7104 if (name[1] == 'o' &&
7113 if (name[1] == 'e' &&
7122 if (name[1] == 'n' &&
7131 if (name[1] == 'o' &&
7140 if (name[1] == 'a' &&
7149 if (name[1] == 'o' &&
7211 if (name[1] == 'e' &&
7225 return (FEATURE_IS_ENABLED("say") ? -KEY_say : 0);
7251 if (name[1] == 'i' &&
7260 if (name[1] == 's' &&
7269 if (name[1] == 'e' &&
7278 if (name[1] == 'o' &&
7290 case 4: /* 41 tokens of length 4 */
7294 if (name[1] == 'O' &&
7304 if (name[1] == 'N' &&
7314 if (name[1] == 'i' &&
7324 if (name[1] == 'h' &&
7334 if (name[1] == 'u' &&
7347 if (name[2] == 'c' &&
7356 if (name[2] == 's' &&
7365 if (name[2] == 'a' &&
7401 if (name[1] == 'o' &&
7414 if (name[2] == 't' &&
7423 if (name[2] == 'o' &&
7432 if (name[2] == 't' &&
7441 if (name[2] == 'e' &&
7454 if (name[1] == 'o' &&
7467 if (name[2] == 'y' &&
7476 if (name[2] == 'l' &&
7492 if (name[2] == 's' &&
7501 if (name[2] == 'n' &&
7510 if (name[2] == 'c' &&
7523 if (name[1] == 'e' &&
7533 if (name[1] == 'p' &&
7546 if (name[2] == 'c' &&
7555 if (name[2] == 'p' &&
7564 if (name[2] == 's' &&
7580 if (name[2] == 'n' &&
7650 if (name[2] == 'r' &&
7659 if (name[2] == 'r' &&
7668 if (name[2] == 'a' &&
7684 if (name[2] == 'l' &&
7746 if (name[2] == 'e' &&
7749 return (FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
7762 case 5: /* 38 tokens of length 5 */
7766 if (name[1] == 'E' &&
7777 if (name[1] == 'H' &&
7791 if (name[2] == 'a' &&
7801 if (name[2] == 'a' &&
7818 if (name[2] == 'e' &&
7828 if (name[2] == 'e' &&
7832 return (FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
7848 if (name[3] == 'i' &&
7857 if (name[3] == 'o' &&
7893 if (name[2] == 'o' &&
7903 if (name[2] == 'y' &&
7917 if (name[1] == 'l' &&
7931 if (name[2] == 'n' &&
7941 if (name[2] == 'o' &&
7955 if (name[1] == 'i' &&
7960 return (FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
7969 if (name[2] == 'd' &&
7979 if (name[2] == 'c' &&
7996 if (name[2] == 'c' &&
8006 if (name[2] == 't' &&
8020 if (name[1] == 'k' &&
8031 if (name[1] == 'r' &&
8045 if (name[2] == 's' &&
8055 if (name[2] == 'd' &&
8072 if (name[2] == 'm' &&
8082 if (name[2] == 'i' &&
8092 if (name[2] == 'e' &&
8102 if (name[2] == 'l' &&
8112 if (name[2] == 'a' &&
8122 if (name[2] == 'u' &&
8136 if (name[1] == 'i' &&
8150 if (name[2] == 'a' &&
8163 if (name[3] == 'e' &&
8198 if (name[2] == 'i' &&
8215 if (name[2] == 'i' &&
8225 if (name[2] == 'i' &&
8242 case 6: /* 33 tokens of length 6 */
8246 if (name[1] == 'c' &&
8261 if (name[2] == 'l' &&
8272 if (name[2] == 'r' &&
8287 if (name[1] == 'e' &&
8302 if (name[2] == 's' &&
8307 if(ckWARN_d(WARN_SYNTAX))
8308 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8314 if (name[2] == 'i' &&
8332 if (name[2] == 'l' &&
8343 if (name[2] == 'r' &&
8358 if (name[1] == 'm' &&
8373 if (name[2] == 'n' &&
8384 if (name[2] == 's' &&
8399 if (name[1] == 's' &&
8405 if (name[4] == 't' &&
8414 if (name[4] == 'e' &&
8423 if (name[4] == 'c' &&
8432 if (name[4] == 'n' &&
8448 if (name[1] == 'r' &&
8466 if (name[3] == 'a' &&
8476 if (name[3] == 'u' &&
8490 if (name[2] == 'n' &&
8508 if (name[2] == 'a' &&
8522 if (name[3] == 'e' &&
8535 if (name[4] == 't' &&
8544 if (name[4] == 'e' &&
8566 if (name[4] == 't' &&
8575 if (name[4] == 'e' &&
8591 if (name[2] == 'c' &&
8602 if (name[2] == 'l' &&
8613 if (name[2] == 'b' &&
8624 if (name[2] == 's' &&
8647 if (name[4] == 's' &&
8656 if (name[4] == 'n' &&
8669 if (name[3] == 'a' &&
8686 if (name[1] == 'a' &&
8701 case 7: /* 29 tokens of length 7 */
8705 if (name[1] == 'E' &&
8718 if (name[1] == '_' &&
8731 if (name[1] == 'i' &&
8738 return -KEY_binmode;
8744 if (name[1] == 'o' &&
8751 return -KEY_connect;
8760 if (name[2] == 'm' &&
8766 return -KEY_dbmopen;
8777 if (name[4] == 'u' &&
8781 return (FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
8787 if (name[4] == 'n' &&
8808 if (name[1] == 'o' &&
8821 if (name[1] == 'e' &&
8828 if (name[5] == 'r' &&
8831 return -KEY_getpgrp;
8837 if (name[5] == 'i' &&
8840 return -KEY_getppid;
8853 if (name[1] == 'c' &&
8860 return -KEY_lcfirst;
8866 if (name[1] == 'p' &&
8873 return -KEY_opendir;
8879 if (name[1] == 'a' &&
8897 if (name[3] == 'd' &&
8902 return -KEY_readdir;
8908 if (name[3] == 'u' &&
8919 if (name[3] == 'e' &&
8924 return -KEY_reverse;
8943 if (name[3] == 'k' &&
8948 return -KEY_seekdir;
8954 if (name[3] == 'p' &&
8959 return -KEY_setpgrp;
8969 if (name[2] == 'm' &&
8975 return -KEY_shmread;
8981 if (name[2] == 'r' &&
8987 return -KEY_sprintf;
8996 if (name[3] == 'l' &&
9001 return -KEY_symlink;
9010 if (name[4] == 'a' &&
9014 return -KEY_syscall;
9020 if (name[4] == 'p' &&
9024 return -KEY_sysopen;
9030 if (name[4] == 'e' &&
9034 return -KEY_sysread;
9040 if (name[4] == 'e' &&
9044 return -KEY_sysseek;
9062 if (name[1] == 'e' &&
9069 return -KEY_telldir;
9078 if (name[2] == 'f' &&
9084 return -KEY_ucfirst;
9090 if (name[2] == 's' &&
9096 return -KEY_unshift;
9106 if (name[1] == 'a' &&
9113 return -KEY_waitpid;
9122 case 8: /* 26 tokens of length 8 */
9126 if (name[1] == 'U' &&
9134 return KEY_AUTOLOAD;
9145 if (name[3] == 'A' &&
9151 return KEY___DATA__;
9157 if (name[3] == 'I' &&
9163 return -KEY___FILE__;
9169 if (name[3] == 'I' &&
9175 return -KEY___LINE__;
9191 if (name[2] == 'o' &&
9198 return -KEY_closedir;
9204 if (name[2] == 'n' &&
9211 return -KEY_continue;
9221 if (name[1] == 'b' &&
9229 return -KEY_dbmclose;
9235 if (name[1] == 'n' &&
9241 if (name[4] == 'r' &&
9246 return -KEY_endgrent;
9252 if (name[4] == 'w' &&
9257 return -KEY_endpwent;
9270 if (name[1] == 'o' &&
9278 return -KEY_formline;
9284 if (name[1] == 'e' &&
9295 if (name[6] == 'n' &&
9298 return -KEY_getgrent;
9304 if (name[6] == 'i' &&
9307 return -KEY_getgrgid;
9313 if (name[6] == 'a' &&
9316 return -KEY_getgrnam;
9329 if (name[4] == 'o' &&
9334 return -KEY_getlogin;
9345 if (name[6] == 'n' &&
9348 return -KEY_getpwent;
9354 if (name[6] == 'a' &&
9357 return -KEY_getpwnam;
9363 if (name[6] == 'i' &&
9366 return -KEY_getpwuid;
9386 if (name[1] == 'e' &&
9393 if (name[5] == 'i' &&
9400 return -KEY_readline;
9405 return -KEY_readlink;
9416 if (name[5] == 'i' &&
9420 return -KEY_readpipe;
9441 if (name[4] == 'r' &&
9446 return -KEY_setgrent;
9452 if (name[4] == 'w' &&
9457 return -KEY_setpwent;
9473 if (name[3] == 'w' &&
9479 return -KEY_shmwrite;
9485 if (name[3] == 't' &&
9491 return -KEY_shutdown;
9501 if (name[2] == 's' &&
9508 return -KEY_syswrite;
9518 if (name[1] == 'r' &&
9526 return -KEY_truncate;
9535 case 9: /* 8 tokens of length 9 */
9539 if (name[1] == 'n' &&
9548 return -KEY_endnetent;
9554 if (name[1] == 'e' &&
9563 return -KEY_getnetent;
9569 if (name[1] == 'o' &&
9578 return -KEY_localtime;
9584 if (name[1] == 'r' &&
9593 return KEY_prototype;
9599 if (name[1] == 'u' &&
9608 return -KEY_quotemeta;
9614 if (name[1] == 'e' &&
9623 return -KEY_rewinddir;
9629 if (name[1] == 'e' &&
9638 return -KEY_setnetent;
9644 if (name[1] == 'a' &&
9653 return -KEY_wantarray;
9662 case 10: /* 9 tokens of length 10 */
9666 if (name[1] == 'n' &&
9672 if (name[4] == 'o' &&
9679 return -KEY_endhostent;
9685 if (name[4] == 'e' &&
9692 return -KEY_endservent;
9705 if (name[1] == 'e' &&
9711 if (name[4] == 'o' &&
9718 return -KEY_gethostent;
9727 if (name[5] == 'r' &&
9733 return -KEY_getservent;
9739 if (name[5] == 'c' &&
9745 return -KEY_getsockopt;
9770 if (name[4] == 'o' &&
9777 return -KEY_sethostent;
9786 if (name[5] == 'r' &&
9792 return -KEY_setservent;
9798 if (name[5] == 'c' &&
9804 return -KEY_setsockopt;
9821 if (name[2] == 'c' &&
9830 return -KEY_socketpair;
9843 case 11: /* 8 tokens of length 11 */
9847 if (name[1] == '_' &&
9858 return -KEY___PACKAGE__;
9864 if (name[1] == 'n' &&
9875 return -KEY_endprotoent;
9881 if (name[1] == 'e' &&
9890 if (name[5] == 'e' &&
9897 return -KEY_getpeername;
9906 if (name[6] == 'o' &&
9912 return -KEY_getpriority;
9918 if (name[6] == 't' &&
9924 return -KEY_getprotoent;
9938 if (name[4] == 'o' &&
9946 return -KEY_getsockname;
9959 if (name[1] == 'e' &&
9967 if (name[6] == 'o' &&
9973 return -KEY_setpriority;
9979 if (name[6] == 't' &&
9985 return -KEY_setprotoent;
10001 case 12: /* 2 tokens of length 12 */
10002 if (name[0] == 'g' &&
10014 if (name[9] == 'd' &&
10017 { /* getnetbyaddr */
10018 return -KEY_getnetbyaddr;
10024 if (name[9] == 'a' &&
10027 { /* getnetbyname */
10028 return -KEY_getnetbyname;
10040 case 13: /* 4 tokens of length 13 */
10041 if (name[0] == 'g' &&
10048 if (name[4] == 'o' &&
10057 if (name[10] == 'd' &&
10060 { /* gethostbyaddr */
10061 return -KEY_gethostbyaddr;
10067 if (name[10] == 'a' &&
10070 { /* gethostbyname */
10071 return -KEY_gethostbyname;
10084 if (name[4] == 'e' &&
10093 if (name[10] == 'a' &&
10096 { /* getservbyname */
10097 return -KEY_getservbyname;
10103 if (name[10] == 'o' &&
10106 { /* getservbyport */
10107 return -KEY_getservbyport;
10126 case 14: /* 1 tokens of length 14 */
10127 if (name[0] == 'g' &&
10141 { /* getprotobyname */
10142 return -KEY_getprotobyname;
10147 case 16: /* 1 tokens of length 16 */
10148 if (name[0] == 'g' &&
10164 { /* getprotobynumber */
10165 return -KEY_getprotobynumber;
10179 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10183 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
10184 if (ckWARN(WARN_SYNTAX)) {
10187 for (w = s+2; *w && level; w++) {
10190 else if (*w == ')')
10193 while (isSPACE(*w))
10195 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
10196 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10197 "%s (...) interpreted as function",name);
10200 while (s < PL_bufend && isSPACE(*s))
10204 while (s < PL_bufend && isSPACE(*s))
10206 if (isIDFIRST_lazy_if(s,UTF)) {
10207 const char * const w = s++;
10208 while (isALNUM_lazy_if(s,UTF))
10210 while (s < PL_bufend && isSPACE(*s))
10214 if (keyword(w, s - w))
10217 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10218 if (gv && GvCVu(gv))
10220 Perl_croak(aTHX_ "No comma allowed after %s", what);
10225 /* Either returns sv, or mortalizes sv and returns a new SV*.
10226 Best used as sv=new_constant(..., sv, ...).
10227 If s, pv are NULL, calls subroutine with one argument,
10228 and type is used with error messages only. */
10231 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
10235 HV * const table = GvHV(PL_hintgv); /* ^H */
10239 const char *why1 = "", *why2 = "", *why3 = "";
10241 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10244 why2 = strEQ(key,"charnames")
10245 ? "(possibly a missing \"use charnames ...\")"
10247 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10248 (type ? type: "undef"), why2);
10250 /* This is convoluted and evil ("goto considered harmful")
10251 * but I do not understand the intricacies of all the different
10252 * failure modes of %^H in here. The goal here is to make
10253 * the most probable error message user-friendly. --jhi */
10258 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10259 (type ? type: "undef"), why1, why2, why3);
10261 yyerror(SvPVX_const(msg));
10265 cvp = hv_fetch(table, key, strlen(key), FALSE);
10266 if (!cvp || !SvOK(*cvp)) {
10269 why3 = "} is not defined";
10272 sv_2mortal(sv); /* Parent created it permanently */
10275 pv = sv_2mortal(newSVpvn(s, len));
10277 typesv = sv_2mortal(newSVpv(type, 0));
10279 typesv = &PL_sv_undef;
10281 PUSHSTACKi(PERLSI_OVERLOAD);
10293 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10297 /* Check the eval first */
10298 if (!PL_in_eval && SvTRUE(ERRSV)) {
10299 sv_catpvs(ERRSV, "Propagated");
10300 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10302 res = SvREFCNT_inc_simple(sv);
10306 SvREFCNT_inc_simple_void(res);
10315 why1 = "Call to &{$^H{";
10317 why3 = "}} did not return a defined value";
10325 /* Returns a NUL terminated string, with the length of the string written to
10329 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10332 register char *d = dest;
10333 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
10336 Perl_croak(aTHX_ ident_too_long);
10337 if (isALNUM(*s)) /* UTF handled below */
10339 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
10344 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
10348 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10349 char *t = s + UTF8SKIP(s);
10350 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10352 if (d + (t - s) > e)
10353 Perl_croak(aTHX_ ident_too_long);
10354 Copy(s, d, t - s, char);
10367 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10370 char *bracket = NULL;
10372 register char *d = dest;
10373 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
10378 while (isDIGIT(*s)) {
10380 Perl_croak(aTHX_ ident_too_long);
10387 Perl_croak(aTHX_ ident_too_long);
10388 if (isALNUM(*s)) /* UTF handled below */
10390 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10395 else if (*s == ':' && s[1] == ':') {
10399 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10400 char *t = s + UTF8SKIP(s);
10401 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10403 if (d + (t - s) > e)
10404 Perl_croak(aTHX_ ident_too_long);
10405 Copy(s, d, t - s, char);
10416 if (PL_lex_state != LEX_NORMAL)
10417 PL_lex_state = LEX_INTERPENDMAYBE;
10420 if (*s == '$' && s[1] &&
10421 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10434 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10439 if (isSPACE(s[-1])) {
10441 const char ch = *s++;
10442 if (!SPACE_OR_TAB(ch)) {
10448 if (isIDFIRST_lazy_if(d,UTF)) {
10452 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10453 end += UTF8SKIP(end);
10454 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10455 end += UTF8SKIP(end);
10457 Copy(s, d, end - s, char);
10462 while ((isALNUM(*s) || *s == ':') && d < e)
10465 Perl_croak(aTHX_ ident_too_long);
10468 while (s < send && SPACE_OR_TAB(*s)) s++;
10469 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10470 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
10471 const char *brack = *s == '[' ? "[...]" : "{...}";
10472 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10473 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10474 funny, dest, brack, funny, dest, brack);
10477 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10481 /* Handle extended ${^Foo} variables
10482 * 1999-02-27 mjd-perl-patch@plover.com */
10483 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10487 while (isALNUM(*s) && d < e) {
10491 Perl_croak(aTHX_ ident_too_long);
10496 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10497 PL_lex_state = LEX_INTERPEND;
10502 if (PL_lex_state == LEX_NORMAL) {
10503 if (ckWARN(WARN_AMBIGUOUS) &&
10504 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
10506 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10507 "Ambiguous use of %c{%s} resolved to %c%s",
10508 funny, dest, funny, dest);
10513 s = bracket; /* let the parser handle it */
10517 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10518 PL_lex_state = LEX_INTERPEND;
10523 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10525 PERL_UNUSED_CONTEXT;
10528 else if (ch == 'g')
10529 *pmfl |= PMf_GLOBAL;
10530 else if (ch == 'c')
10531 *pmfl |= PMf_CONTINUE;
10532 else if (ch == 'o')
10534 else if (ch == 'm')
10535 *pmfl |= PMf_MULTILINE;
10536 else if (ch == 's')
10537 *pmfl |= PMf_SINGLELINE;
10538 else if (ch == 'x')
10539 *pmfl |= PMf_EXTENDED;
10543 S_scan_pat(pTHX_ char *start, I32 type)
10547 char *s = scan_str(start,!!PL_madskills,FALSE);
10548 const char * const valid_flags = (type == OP_QR) ? "iomsx" : "iogcmsx";
10555 const char * const delimiter = skipspace(start);
10556 Perl_croak(aTHX_ *delimiter == '?'
10557 ? "Search pattern not terminated or ternary operator parsed as search pattern"
10558 : "Search pattern not terminated" );
10561 pm = (PMOP*)newPMOP(type, 0);
10562 if (PL_multi_open == '?')
10563 pm->op_pmflags |= PMf_ONCE;
10567 while (*s && strchr(valid_flags, *s))
10568 pmflag(&pm->op_pmflags,*s++);
10570 if (PL_madskills && modstart != s) {
10571 SV* tmptoken = newSVpvn(modstart, s - modstart);
10572 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
10575 /* issue a warning if /c is specified,but /g is not */
10576 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
10577 && ckWARN(WARN_REGEXP))
10579 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
10582 pm->op_pmpermflags = pm->op_pmflags;
10584 PL_lex_op = (OP*)pm;
10585 yylval.ival = OP_MATCH;
10590 S_scan_subst(pTHX_ char *start)
10601 yylval.ival = OP_NULL;
10603 s = scan_str(start,!!PL_madskills,FALSE);
10606 Perl_croak(aTHX_ "Substitution pattern not terminated");
10608 if (s[-1] == PL_multi_open)
10611 if (PL_madskills) {
10612 CURMAD('q', PL_thisopen);
10613 CURMAD('_', PL_thiswhite);
10614 CURMAD('E', PL_thisstuff);
10615 CURMAD('Q', PL_thisclose);
10616 PL_realtokenstart = s - SvPVX(PL_linestr);
10620 first_start = PL_multi_start;
10621 s = scan_str(s,!!PL_madskills,FALSE);
10623 if (PL_lex_stuff) {
10624 SvREFCNT_dec(PL_lex_stuff);
10625 PL_lex_stuff = NULL;
10627 Perl_croak(aTHX_ "Substitution replacement not terminated");
10629 PL_multi_start = first_start; /* so whole substitution is taken together */
10631 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10634 if (PL_madskills) {
10635 CURMAD('z', PL_thisopen);
10636 CURMAD('R', PL_thisstuff);
10637 CURMAD('Z', PL_thisclose);
10647 else if (strchr("iogcmsx", *s))
10648 pmflag(&pm->op_pmflags,*s++);
10654 if (PL_madskills) {
10656 curmad('m', newSVpvn(modstart, s - modstart));
10657 append_madprops(PL_thismad, (OP*)pm, 0);
10661 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
10662 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10666 SV * const repl = newSVpvs("");
10668 PL_sublex_info.super_bufptr = s;
10669 PL_sublex_info.super_bufend = PL_bufend;
10671 pm->op_pmflags |= PMf_EVAL;
10673 sv_catpv(repl, es ? "eval " : "do ");
10674 sv_catpvs(repl, "{");
10675 sv_catsv(repl, PL_lex_repl);
10676 sv_catpvs(repl, "}");
10678 SvREFCNT_dec(PL_lex_repl);
10679 PL_lex_repl = repl;
10682 pm->op_pmpermflags = pm->op_pmflags;
10683 PL_lex_op = (OP*)pm;
10684 yylval.ival = OP_SUBST;
10689 S_scan_trans(pTHX_ char *start)
10702 yylval.ival = OP_NULL;
10704 s = scan_str(start,!!PL_madskills,FALSE);
10706 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10708 if (s[-1] == PL_multi_open)
10711 if (PL_madskills) {
10712 CURMAD('q', PL_thisopen);
10713 CURMAD('_', PL_thiswhite);
10714 CURMAD('E', PL_thisstuff);
10715 CURMAD('Q', PL_thisclose);
10716 PL_realtokenstart = s - SvPVX(PL_linestr);
10720 s = scan_str(s,!!PL_madskills,FALSE);
10722 if (PL_lex_stuff) {
10723 SvREFCNT_dec(PL_lex_stuff);
10724 PL_lex_stuff = NULL;
10726 Perl_croak(aTHX_ "Transliteration replacement not terminated");
10728 if (PL_madskills) {
10729 CURMAD('z', PL_thisopen);
10730 CURMAD('R', PL_thisstuff);
10731 CURMAD('Z', PL_thisclose);
10734 complement = del = squash = 0;
10741 complement = OPpTRANS_COMPLEMENT;
10744 del = OPpTRANS_DELETE;
10747 squash = OPpTRANS_SQUASH;
10756 Newx(tbl, complement&&!del?258:256, short);
10757 o = newPVOP(OP_TRANS, 0, (char*)tbl);
10758 o->op_private &= ~OPpTRANS_ALL;
10759 o->op_private |= del|squash|complement|
10760 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
10761 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
10764 yylval.ival = OP_TRANS;
10767 if (PL_madskills) {
10769 curmad('m', newSVpvn(modstart, s - modstart));
10770 append_madprops(PL_thismad, o, 0);
10779 S_scan_heredoc(pTHX_ register char *s)
10783 I32 op_type = OP_SCALAR;
10787 const char *found_newline;
10791 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
10793 I32 stuffstart = s - SvPVX(PL_linestr);
10796 PL_realtokenstart = -1;
10801 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10804 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
10805 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10808 s = delimcpy(d, e, s, PL_bufend, term, &len);
10818 if (!isALNUM_lazy_if(s,UTF))
10819 deprecate_old("bare << to mean <<\"\"");
10820 for (; isALNUM_lazy_if(s,UTF); s++) {
10825 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10826 Perl_croak(aTHX_ "Delimiter for here document is too long");
10829 len = d - PL_tokenbuf;
10832 if (PL_madskills) {
10833 tstart = PL_tokenbuf + !outer;
10834 PL_thisclose = newSVpvn(tstart, len - !outer);
10835 tstart = SvPVX(PL_linestr) + stuffstart;
10836 PL_thisopen = newSVpvn(tstart, s - tstart);
10837 stuffstart = s - SvPVX(PL_linestr);
10840 #ifndef PERL_STRICT_CR
10841 d = strchr(s, '\r');
10843 char * const olds = s;
10845 while (s < PL_bufend) {
10851 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10860 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10867 if ( outer || !(found_newline = memchr(s, '\n', PL_bufend - s)) ) {
10868 herewas = newSVpvn(s,PL_bufend-s);
10872 herewas = newSVpvn(s-1,found_newline-s+1);
10875 herewas = newSVpvn(s,found_newline-s);
10879 if (PL_madskills) {
10880 tstart = SvPVX(PL_linestr) + stuffstart;
10882 sv_catpvn(PL_thisstuff, tstart, s - tstart);
10884 PL_thisstuff = newSVpvn(tstart, s - tstart);
10887 s += SvCUR(herewas);
10890 stuffstart = s - SvPVX(PL_linestr);
10896 tmpstr = newSV(79);
10897 sv_upgrade(tmpstr, SVt_PVIV);
10898 if (term == '\'') {
10899 op_type = OP_CONST;
10900 SvIV_set(tmpstr, -1);
10902 else if (term == '`') {
10903 op_type = OP_BACKTICK;
10904 SvIV_set(tmpstr, '\\');
10908 PL_multi_start = CopLINE(PL_curcop);
10909 PL_multi_open = PL_multi_close = '<';
10910 term = *PL_tokenbuf;
10911 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
10912 char * const bufptr = PL_sublex_info.super_bufptr;
10913 char * const bufend = PL_sublex_info.super_bufend;
10914 char * const olds = s - SvCUR(herewas);
10915 s = strchr(bufptr, '\n');
10919 while (s < bufend &&
10920 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
10922 CopLINE_inc(PL_curcop);
10925 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10926 missingterm(PL_tokenbuf);
10928 sv_setpvn(herewas,bufptr,d-bufptr+1);
10929 sv_setpvn(tmpstr,d+1,s-d);
10931 sv_catpvn(herewas,s,bufend-s);
10932 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
10939 while (s < PL_bufend &&
10940 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
10942 CopLINE_inc(PL_curcop);
10944 if (s >= PL_bufend) {
10945 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10946 missingterm(PL_tokenbuf);
10948 sv_setpvn(tmpstr,d+1,s-d);
10950 if (PL_madskills) {
10952 sv_catpvn(PL_thisstuff, d + 1, s - d);
10954 PL_thisstuff = newSVpvn(d + 1, s - d);
10955 stuffstart = s - SvPVX(PL_linestr);
10959 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
10961 sv_catpvn(herewas,s,PL_bufend-s);
10962 sv_setsv(PL_linestr,herewas);
10963 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
10964 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10965 PL_last_lop = PL_last_uni = NULL;
10968 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
10969 while (s >= PL_bufend) { /* multiple line string? */
10971 if (PL_madskills) {
10972 tstart = SvPVX(PL_linestr) + stuffstart;
10974 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10976 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10980 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10981 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10982 missingterm(PL_tokenbuf);
10985 stuffstart = s - SvPVX(PL_linestr);
10987 CopLINE_inc(PL_curcop);
10988 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10989 PL_last_lop = PL_last_uni = NULL;
10990 #ifndef PERL_STRICT_CR
10991 if (PL_bufend - PL_linestart >= 2) {
10992 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10993 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10995 PL_bufend[-2] = '\n';
10997 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10999 else if (PL_bufend[-1] == '\r')
11000 PL_bufend[-1] = '\n';
11002 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11003 PL_bufend[-1] = '\n';
11005 if (PERLDB_LINE && PL_curstash != PL_debstash) {
11006 SV * const sv = newSV(0);
11008 sv_upgrade(sv, SVt_PVMG);
11009 sv_setsv(sv,PL_linestr);
11010 (void)SvIOK_on(sv);
11012 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop),sv);
11014 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11015 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11016 *(SvPVX(PL_linestr) + off ) = ' ';
11017 sv_catsv(PL_linestr,herewas);
11018 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11019 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11023 sv_catsv(tmpstr,PL_linestr);
11028 PL_multi_end = CopLINE(PL_curcop);
11029 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11030 SvPV_shrink_to_cur(tmpstr);
11032 SvREFCNT_dec(herewas);
11034 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11036 else if (PL_encoding)
11037 sv_recode_to_utf8(tmpstr, PL_encoding);
11039 PL_lex_stuff = tmpstr;
11040 yylval.ival = op_type;
11044 /* scan_inputsymbol
11045 takes: current position in input buffer
11046 returns: new position in input buffer
11047 side-effects: yylval and lex_op are set.
11052 <FH> read from filehandle
11053 <pkg::FH> read from package qualified filehandle
11054 <pkg'FH> read from package qualified filehandle
11055 <$fh> read from filehandle in $fh
11056 <*.h> filename glob
11061 S_scan_inputsymbol(pTHX_ char *start)
11064 register char *s = start; /* current position in buffer */
11068 char *d = PL_tokenbuf; /* start of temp holding space */
11069 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11071 end = strchr(s, '\n');
11074 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11076 /* die if we didn't have space for the contents of the <>,
11077 or if it didn't end, or if we see a newline
11080 if (len >= sizeof PL_tokenbuf)
11081 Perl_croak(aTHX_ "Excessively long <> operator");
11083 Perl_croak(aTHX_ "Unterminated <> operator");
11088 Remember, only scalar variables are interpreted as filehandles by
11089 this code. Anything more complex (e.g., <$fh{$num}>) will be
11090 treated as a glob() call.
11091 This code makes use of the fact that except for the $ at the front,
11092 a scalar variable and a filehandle look the same.
11094 if (*d == '$' && d[1]) d++;
11096 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11097 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11100 /* If we've tried to read what we allow filehandles to look like, and
11101 there's still text left, then it must be a glob() and not a getline.
11102 Use scan_str to pull out the stuff between the <> and treat it
11103 as nothing more than a string.
11106 if (d - PL_tokenbuf != len) {
11107 yylval.ival = OP_GLOB;
11109 s = scan_str(start,!!PL_madskills,FALSE);
11111 Perl_croak(aTHX_ "Glob not terminated");
11115 bool readline_overriden = FALSE;
11118 /* we're in a filehandle read situation */
11121 /* turn <> into <ARGV> */
11123 Copy("ARGV",d,5,char);
11125 /* Check whether readline() is overriden */
11126 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11128 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11130 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11131 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
11132 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11133 readline_overriden = TRUE;
11135 /* if <$fh>, create the ops to turn the variable into a
11141 /* try to find it in the pad for this block, otherwise find
11142 add symbol table ops
11144 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
11145 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11146 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11147 HEK * const stashname = HvNAME_HEK(stash);
11148 SV * const sym = sv_2mortal(newSVhek(stashname));
11149 sv_catpvs(sym, "::");
11150 sv_catpv(sym, d+1);
11155 OP * const o = newOP(OP_PADSV, 0);
11157 PL_lex_op = readline_overriden
11158 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11159 append_elem(OP_LIST, o,
11160 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11161 : (OP*)newUNOP(OP_READLINE, 0, o);
11170 ? (GV_ADDMULTI | GV_ADDINEVAL)
11173 PL_lex_op = readline_overriden
11174 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11175 append_elem(OP_LIST,
11176 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11177 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11178 : (OP*)newUNOP(OP_READLINE, 0,
11179 newUNOP(OP_RV2SV, 0,
11180 newGVOP(OP_GV, 0, gv)));
11182 if (!readline_overriden)
11183 PL_lex_op->op_flags |= OPf_SPECIAL;
11184 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
11185 yylval.ival = OP_NULL;
11188 /* If it's none of the above, it must be a literal filehandle
11189 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11191 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11192 PL_lex_op = readline_overriden
11193 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11194 append_elem(OP_LIST,
11195 newGVOP(OP_GV, 0, gv),
11196 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11197 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11198 yylval.ival = OP_NULL;
11207 takes: start position in buffer
11208 keep_quoted preserve \ on the embedded delimiter(s)
11209 keep_delims preserve the delimiters around the string
11210 returns: position to continue reading from buffer
11211 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11212 updates the read buffer.
11214 This subroutine pulls a string out of the input. It is called for:
11215 q single quotes q(literal text)
11216 ' single quotes 'literal text'
11217 qq double quotes qq(interpolate $here please)
11218 " double quotes "interpolate $here please"
11219 qx backticks qx(/bin/ls -l)
11220 ` backticks `/bin/ls -l`
11221 qw quote words @EXPORT_OK = qw( func() $spam )
11222 m// regexp match m/this/
11223 s/// regexp substitute s/this/that/
11224 tr/// string transliterate tr/this/that/
11225 y/// string transliterate y/this/that/
11226 ($*@) sub prototypes sub foo ($)
11227 (stuff) sub attr parameters sub foo : attr(stuff)
11228 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11230 In most of these cases (all but <>, patterns and transliterate)
11231 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11232 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11233 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11236 It skips whitespace before the string starts, and treats the first
11237 character as the delimiter. If the delimiter is one of ([{< then
11238 the corresponding "close" character )]}> is used as the closing
11239 delimiter. It allows quoting of delimiters, and if the string has
11240 balanced delimiters ([{<>}]) it allows nesting.
11242 On success, the SV with the resulting string is put into lex_stuff or,
11243 if that is already non-NULL, into lex_repl. The second case occurs only
11244 when parsing the RHS of the special constructs s/// and tr/// (y///).
11245 For convenience, the terminating delimiter character is stuffed into
11250 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11253 SV *sv; /* scalar value: string */
11254 char *tmps; /* temp string, used for delimiter matching */
11255 register char *s = start; /* current position in the buffer */
11256 register char term; /* terminating character */
11257 register char *to; /* current position in the sv's data */
11258 I32 brackets = 1; /* bracket nesting level */
11259 bool has_utf8 = FALSE; /* is there any utf8 content? */
11260 I32 termcode; /* terminating char. code */
11261 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
11262 STRLEN termlen; /* length of terminating string */
11263 char *last = NULL; /* last position for nesting bracket */
11269 /* skip space before the delimiter */
11275 if (PL_realtokenstart >= 0) {
11276 stuffstart = PL_realtokenstart;
11277 PL_realtokenstart = -1;
11280 stuffstart = start - SvPVX(PL_linestr);
11282 /* mark where we are, in case we need to report errors */
11285 /* after skipping whitespace, the next character is the terminator */
11288 termcode = termstr[0] = term;
11292 termcode = utf8_to_uvchr((U8*)s, &termlen);
11293 Copy(s, termstr, termlen, U8);
11294 if (!UTF8_IS_INVARIANT(term))
11298 /* mark where we are */
11299 PL_multi_start = CopLINE(PL_curcop);
11300 PL_multi_open = term;
11302 /* find corresponding closing delimiter */
11303 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11304 termcode = termstr[0] = term = tmps[5];
11306 PL_multi_close = term;
11308 /* create a new SV to hold the contents. 79 is the SV's initial length.
11309 What a random number. */
11311 sv_upgrade(sv, SVt_PVIV);
11312 SvIV_set(sv, termcode);
11313 (void)SvPOK_only(sv); /* validate pointer */
11315 /* move past delimiter and try to read a complete string */
11317 sv_catpvn(sv, s, termlen);
11320 tstart = SvPVX(PL_linestr) + stuffstart;
11321 if (!PL_thisopen && !keep_delims) {
11322 PL_thisopen = newSVpvn(tstart, s - tstart);
11323 stuffstart = s - SvPVX(PL_linestr);
11327 if (PL_encoding && !UTF) {
11331 int offset = s - SvPVX_const(PL_linestr);
11332 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11333 &offset, (char*)termstr, termlen);
11334 const char * const ns = SvPVX_const(PL_linestr) + offset;
11335 char * const svlast = SvEND(sv) - 1;
11337 for (; s < ns; s++) {
11338 if (*s == '\n' && !PL_rsfp)
11339 CopLINE_inc(PL_curcop);
11342 goto read_more_line;
11344 /* handle quoted delimiters */
11345 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11347 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11349 if ((svlast-1 - t) % 2) {
11350 if (!keep_quoted) {
11351 *(svlast-1) = term;
11353 SvCUR_set(sv, SvCUR(sv) - 1);
11358 if (PL_multi_open == PL_multi_close) {
11366 for (t = w = last; t < svlast; w++, t++) {
11367 /* At here, all closes are "was quoted" one,
11368 so we don't check PL_multi_close. */
11370 if (!keep_quoted && *(t+1) == PL_multi_open)
11375 else if (*t == PL_multi_open)
11383 SvCUR_set(sv, w - SvPVX_const(sv));
11386 if (--brackets <= 0)
11391 if (!keep_delims) {
11392 SvCUR_set(sv, SvCUR(sv) - 1);
11398 /* extend sv if need be */
11399 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11400 /* set 'to' to the next character in the sv's string */
11401 to = SvPVX(sv)+SvCUR(sv);
11403 /* if open delimiter is the close delimiter read unbridle */
11404 if (PL_multi_open == PL_multi_close) {
11405 for (; s < PL_bufend; s++,to++) {
11406 /* embedded newlines increment the current line number */
11407 if (*s == '\n' && !PL_rsfp)
11408 CopLINE_inc(PL_curcop);
11409 /* handle quoted delimiters */
11410 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11411 if (!keep_quoted && s[1] == term)
11413 /* any other quotes are simply copied straight through */
11417 /* terminate when run out of buffer (the for() condition), or
11418 have found the terminator */
11419 else if (*s == term) {
11422 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11425 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11431 /* if the terminator isn't the same as the start character (e.g.,
11432 matched brackets), we have to allow more in the quoting, and
11433 be prepared for nested brackets.
11436 /* read until we run out of string, or we find the terminator */
11437 for (; s < PL_bufend; s++,to++) {
11438 /* embedded newlines increment the line count */
11439 if (*s == '\n' && !PL_rsfp)
11440 CopLINE_inc(PL_curcop);
11441 /* backslashes can escape the open or closing characters */
11442 if (*s == '\\' && s+1 < PL_bufend) {
11443 if (!keep_quoted &&
11444 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11449 /* allow nested opens and closes */
11450 else if (*s == PL_multi_close && --brackets <= 0)
11452 else if (*s == PL_multi_open)
11454 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11459 /* terminate the copied string and update the sv's end-of-string */
11461 SvCUR_set(sv, to - SvPVX_const(sv));
11464 * this next chunk reads more into the buffer if we're not done yet
11468 break; /* handle case where we are done yet :-) */
11470 #ifndef PERL_STRICT_CR
11471 if (to - SvPVX_const(sv) >= 2) {
11472 if ((to[-2] == '\r' && to[-1] == '\n') ||
11473 (to[-2] == '\n' && to[-1] == '\r'))
11477 SvCUR_set(sv, to - SvPVX_const(sv));
11479 else if (to[-1] == '\r')
11482 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11487 /* if we're out of file, or a read fails, bail and reset the current
11488 line marker so we can report where the unterminated string began
11491 if (PL_madskills) {
11492 char *tstart = SvPVX(PL_linestr) + stuffstart;
11494 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11496 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11500 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11502 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11508 /* we read a line, so increment our line counter */
11509 CopLINE_inc(PL_curcop);
11511 /* update debugger info */
11512 if (PERLDB_LINE && PL_curstash != PL_debstash) {
11513 SV * const line_sv = newSV(0);
11515 sv_upgrade(line_sv, SVt_PVMG);
11516 sv_setsv(line_sv,PL_linestr);
11517 (void)SvIOK_on(line_sv);
11518 SvIV_set(line_sv, 0);
11519 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop), line_sv);
11522 /* having changed the buffer, we must update PL_bufend */
11523 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11524 PL_last_lop = PL_last_uni = NULL;
11527 /* at this point, we have successfully read the delimited string */
11529 if (!PL_encoding || UTF) {
11531 if (PL_madskills) {
11532 char *tstart = SvPVX(PL_linestr) + stuffstart;
11534 sv_catpvn(PL_thisstuff, tstart, s - tstart);
11536 PL_thisstuff = newSVpvn(tstart, s - tstart);
11537 if (!PL_thisclose && !keep_delims)
11538 PL_thisclose = newSVpvn(s,termlen);
11543 sv_catpvn(sv, s, termlen);
11548 if (PL_madskills) {
11549 char *tstart = SvPVX(PL_linestr) + stuffstart;
11551 sv_catpvn(PL_thisstuff, tstart, s - tstart - termlen);
11553 PL_thisstuff = newSVpvn(tstart, s - tstart - termlen);
11554 if (!PL_thisclose && !keep_delims)
11555 PL_thisclose = newSVpvn(s - termlen,termlen);
11559 if (has_utf8 || PL_encoding)
11562 PL_multi_end = CopLINE(PL_curcop);
11564 /* if we allocated too much space, give some back */
11565 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11566 SvLEN_set(sv, SvCUR(sv) + 1);
11567 SvPV_renew(sv, SvLEN(sv));
11570 /* decide whether this is the first or second quoted string we've read
11583 takes: pointer to position in buffer
11584 returns: pointer to new position in buffer
11585 side-effects: builds ops for the constant in yylval.op
11587 Read a number in any of the formats that Perl accepts:
11589 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11590 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11593 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11595 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11598 If it reads a number without a decimal point or an exponent, it will
11599 try converting the number to an integer and see if it can do so
11600 without loss of precision.
11604 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11607 register const char *s = start; /* current position in buffer */
11608 register char *d; /* destination in temp buffer */
11609 register char *e; /* end of temp buffer */
11610 NV nv; /* number read, as a double */
11611 SV *sv = NULL; /* place to put the converted number */
11612 bool floatit; /* boolean: int or float? */
11613 const char *lastub = NULL; /* position of last underbar */
11614 static char const number_too_long[] = "Number too long";
11616 /* We use the first character to decide what type of number this is */
11620 Perl_croak(aTHX_ "panic: scan_num");
11622 /* if it starts with a 0, it could be an octal number, a decimal in
11623 0.13 disguise, or a hexadecimal number, or a binary number. */
11627 u holds the "number so far"
11628 shift the power of 2 of the base
11629 (hex == 4, octal == 3, binary == 1)
11630 overflowed was the number more than we can hold?
11632 Shift is used when we add a digit. It also serves as an "are
11633 we in octal/hex/binary?" indicator to disallow hex characters
11634 when in octal mode.
11639 bool overflowed = FALSE;
11640 bool just_zero = TRUE; /* just plain 0 or binary number? */
11641 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11642 static const char* const bases[5] =
11643 { "", "binary", "", "octal", "hexadecimal" };
11644 static const char* const Bases[5] =
11645 { "", "Binary", "", "Octal", "Hexadecimal" };
11646 static const char* const maxima[5] =
11648 "0b11111111111111111111111111111111",
11652 const char *base, *Base, *max;
11654 /* check for hex */
11659 } else if (s[1] == 'b') {
11664 /* check for a decimal in disguise */
11665 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11667 /* so it must be octal */
11674 if (ckWARN(WARN_SYNTAX))
11675 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11676 "Misplaced _ in number");
11680 base = bases[shift];
11681 Base = Bases[shift];
11682 max = maxima[shift];
11684 /* read the rest of the number */
11686 /* x is used in the overflow test,
11687 b is the digit we're adding on. */
11692 /* if we don't mention it, we're done */
11696 /* _ are ignored -- but warned about if consecutive */
11698 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11699 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11700 "Misplaced _ in number");
11704 /* 8 and 9 are not octal */
11705 case '8': case '9':
11707 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11711 case '2': case '3': case '4':
11712 case '5': case '6': case '7':
11714 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11717 case '0': case '1':
11718 b = *s++ & 15; /* ASCII digit -> value of digit */
11722 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11723 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11724 /* make sure they said 0x */
11727 b = (*s++ & 7) + 9;
11729 /* Prepare to put the digit we have onto the end
11730 of the number so far. We check for overflows.
11736 x = u << shift; /* make room for the digit */
11738 if ((x >> shift) != u
11739 && !(PL_hints & HINT_NEW_BINARY)) {
11742 if (ckWARN_d(WARN_OVERFLOW))
11743 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11744 "Integer overflow in %s number",
11747 u = x | b; /* add the digit to the end */
11750 n *= nvshift[shift];
11751 /* If an NV has not enough bits in its
11752 * mantissa to represent an UV this summing of
11753 * small low-order numbers is a waste of time
11754 * (because the NV cannot preserve the
11755 * low-order bits anyway): we could just
11756 * remember when did we overflow and in the
11757 * end just multiply n by the right
11765 /* if we get here, we had success: make a scalar value from
11770 /* final misplaced underbar check */
11771 if (s[-1] == '_') {
11772 if (ckWARN(WARN_SYNTAX))
11773 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11778 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
11779 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
11780 "%s number > %s non-portable",
11786 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
11787 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
11788 "%s number > %s non-portable",
11793 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11794 sv = new_constant(start, s - start, "integer",
11796 else if (PL_hints & HINT_NEW_BINARY)
11797 sv = new_constant(start, s - start, "binary", sv, NULL, NULL);
11802 handle decimal numbers.
11803 we're also sent here when we read a 0 as the first digit
11805 case '1': case '2': case '3': case '4': case '5':
11806 case '6': case '7': case '8': case '9': case '.':
11809 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11812 /* read next group of digits and _ and copy into d */
11813 while (isDIGIT(*s) || *s == '_') {
11814 /* skip underscores, checking for misplaced ones
11818 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11819 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11820 "Misplaced _ in number");
11824 /* check for end of fixed-length buffer */
11826 Perl_croak(aTHX_ number_too_long);
11827 /* if we're ok, copy the character */
11832 /* final misplaced underbar check */
11833 if (lastub && s == lastub + 1) {
11834 if (ckWARN(WARN_SYNTAX))
11835 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11838 /* read a decimal portion if there is one. avoid
11839 3..5 being interpreted as the number 3. followed
11842 if (*s == '.' && s[1] != '.') {
11847 if (ckWARN(WARN_SYNTAX))
11848 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11849 "Misplaced _ in number");
11853 /* copy, ignoring underbars, until we run out of digits.
11855 for (; isDIGIT(*s) || *s == '_'; s++) {
11856 /* fixed length buffer check */
11858 Perl_croak(aTHX_ number_too_long);
11860 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
11861 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11862 "Misplaced _ in number");
11868 /* fractional part ending in underbar? */
11869 if (s[-1] == '_') {
11870 if (ckWARN(WARN_SYNTAX))
11871 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11872 "Misplaced _ in number");
11874 if (*s == '.' && isDIGIT(s[1])) {
11875 /* oops, it's really a v-string, but without the "v" */
11881 /* read exponent part, if present */
11882 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11886 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11887 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
11889 /* stray preinitial _ */
11891 if (ckWARN(WARN_SYNTAX))
11892 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11893 "Misplaced _ in number");
11897 /* allow positive or negative exponent */
11898 if (*s == '+' || *s == '-')
11901 /* stray initial _ */
11903 if (ckWARN(WARN_SYNTAX))
11904 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11905 "Misplaced _ in number");
11909 /* read digits of exponent */
11910 while (isDIGIT(*s) || *s == '_') {
11913 Perl_croak(aTHX_ number_too_long);
11917 if (((lastub && s == lastub + 1) ||
11918 (!isDIGIT(s[1]) && s[1] != '_'))
11919 && ckWARN(WARN_SYNTAX))
11920 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11921 "Misplaced _ in number");
11928 /* make an sv from the string */
11932 We try to do an integer conversion first if no characters
11933 indicating "float" have been found.
11938 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11940 if (flags == IS_NUMBER_IN_UV) {
11942 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
11945 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11946 if (uv <= (UV) IV_MIN)
11947 sv_setiv(sv, -(IV)uv);
11954 /* terminate the string */
11956 nv = Atof(PL_tokenbuf);
11960 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
11961 (PL_hints & HINT_NEW_INTEGER) )
11962 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
11963 (floatit ? "float" : "integer"),
11967 /* if it starts with a v, it could be a v-string */
11970 sv = newSV(5); /* preallocate storage space */
11971 s = scan_vstring(s,sv);
11975 /* make the op for the constant and return */
11978 lvalp->opval = newSVOP(OP_CONST, 0, sv);
11980 lvalp->opval = NULL;
11986 S_scan_formline(pTHX_ register char *s)
11989 register char *eol;
11991 SV * const stuff = newSVpvs("");
11992 bool needargs = FALSE;
11993 bool eofmt = FALSE;
11995 char *tokenstart = s;
11998 if (PL_madskills) {
11999 savewhite = PL_thiswhite;
12004 while (!needargs) {
12006 #ifdef PERL_STRICT_CR
12007 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
12009 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
12011 if (*t == '\n' || t == PL_bufend) {
12016 if (PL_in_eval && !PL_rsfp) {
12017 eol = (char *) memchr(s,'\n',PL_bufend-s);
12022 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12024 for (t = s; t < eol; t++) {
12025 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12027 goto enough; /* ~~ must be first line in formline */
12029 if (*t == '@' || *t == '^')
12033 sv_catpvn(stuff, s, eol-s);
12034 #ifndef PERL_STRICT_CR
12035 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12036 char *end = SvPVX(stuff) + SvCUR(stuff);
12039 SvCUR_set(stuff, SvCUR(stuff) - 1);
12049 if (PL_madskills) {
12051 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12053 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12056 s = filter_gets(PL_linestr, PL_rsfp, 0);
12058 tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12060 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12062 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12063 PL_last_lop = PL_last_uni = NULL;
12072 if (SvCUR(stuff)) {
12075 PL_lex_state = LEX_NORMAL;
12076 start_force(PL_curforce);
12077 NEXTVAL_NEXTTOKE.ival = 0;
12081 PL_lex_state = LEX_FORMLINE;
12083 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12085 else if (PL_encoding)
12086 sv_recode_to_utf8(stuff, PL_encoding);
12088 start_force(PL_curforce);
12089 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12091 start_force(PL_curforce);
12092 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12096 SvREFCNT_dec(stuff);
12098 PL_lex_formbrack = 0;
12102 if (PL_madskills) {
12104 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12106 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12107 PL_thiswhite = savewhite;
12119 PL_cshlen = strlen(PL_cshname);
12121 #if defined(USE_ITHREADS)
12122 PERL_UNUSED_CONTEXT;
12128 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12131 const I32 oldsavestack_ix = PL_savestack_ix;
12132 CV* const outsidecv = PL_compcv;
12135 assert(SvTYPE(PL_compcv) == SVt_PVCV);
12137 SAVEI32(PL_subline);
12138 save_item(PL_subname);
12139 SAVESPTR(PL_compcv);
12141 PL_compcv = (CV*)newSV(0);
12142 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
12143 CvFLAGS(PL_compcv) |= flags;
12145 PL_subline = CopLINE(PL_curcop);
12146 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12147 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
12148 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12150 return oldsavestack_ix;
12154 #pragma segment Perl_yylex
12157 Perl_yywarn(pTHX_ const char *s)
12160 PL_in_eval |= EVAL_WARNONLY;
12162 PL_in_eval &= ~EVAL_WARNONLY;
12167 Perl_yyerror(pTHX_ const char *s)
12170 const char *where = NULL;
12171 const char *context = NULL;
12175 if (!yychar || (yychar == ';' && !PL_rsfp))
12177 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12178 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12179 PL_oldbufptr != PL_bufptr) {
12182 The code below is removed for NetWare because it abends/crashes on NetWare
12183 when the script has error such as not having the closing quotes like:
12184 if ($var eq "value)
12185 Checking of white spaces is anyway done in NetWare code.
12188 while (isSPACE(*PL_oldoldbufptr))
12191 context = PL_oldoldbufptr;
12192 contlen = PL_bufptr - PL_oldoldbufptr;
12194 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12195 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12198 The code below is removed for NetWare because it abends/crashes on NetWare
12199 when the script has error such as not having the closing quotes like:
12200 if ($var eq "value)
12201 Checking of white spaces is anyway done in NetWare code.
12204 while (isSPACE(*PL_oldbufptr))
12207 context = PL_oldbufptr;
12208 contlen = PL_bufptr - PL_oldbufptr;
12210 else if (yychar > 255)
12211 where = "next token ???";
12212 else if (yychar == -2) { /* YYEMPTY */
12213 if (PL_lex_state == LEX_NORMAL ||
12214 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12215 where = "at end of line";
12216 else if (PL_lex_inpat)
12217 where = "within pattern";
12219 where = "within string";
12222 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
12224 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12225 else if (isPRINT_LC(yychar))
12226 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
12228 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12229 where = SvPVX_const(where_sv);
12231 msg = sv_2mortal(newSVpv(s, 0));
12232 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12233 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12235 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12237 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12238 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12239 Perl_sv_catpvf(aTHX_ msg,
12240 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12241 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12244 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
12245 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
12248 if (PL_error_count >= 10) {
12249 if (PL_in_eval && SvCUR(ERRSV))
12250 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12251 ERRSV, OutCopFILE(PL_curcop));
12253 Perl_croak(aTHX_ "%s has too many errors.\n",
12254 OutCopFILE(PL_curcop));
12257 PL_in_my_stash = NULL;
12261 #pragma segment Main
12265 S_swallow_bom(pTHX_ U8 *s)
12268 const STRLEN slen = SvCUR(PL_linestr);
12271 if (s[1] == 0xFE) {
12272 /* UTF-16 little-endian? (or UTF32-LE?) */
12273 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12274 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12275 #ifndef PERL_NO_UTF16_FILTER
12276 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12279 if (PL_bufend > (char*)s) {
12283 filter_add(utf16rev_textfilter, NULL);
12284 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12285 utf16_to_utf8_reversed(s, news,
12286 PL_bufend - (char*)s - 1,
12288 sv_setpvn(PL_linestr, (const char*)news, newlen);
12290 s = (U8*)SvPVX(PL_linestr);
12291 Copy(news, s, newlen, U8);
12295 SvUTF8_on(PL_linestr);
12296 s = (U8*)SvPVX(PL_linestr);
12298 /* FIXME - is this a general bug fix? */
12301 PL_bufend = SvPVX(PL_linestr) + newlen;
12304 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12309 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12310 #ifndef PERL_NO_UTF16_FILTER
12311 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12314 if (PL_bufend > (char *)s) {
12318 filter_add(utf16_textfilter, NULL);
12319 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12320 utf16_to_utf8(s, news,
12321 PL_bufend - (char*)s,
12323 sv_setpvn(PL_linestr, (const char*)news, newlen);
12325 SvUTF8_on(PL_linestr);
12326 s = (U8*)SvPVX(PL_linestr);
12327 PL_bufend = SvPVX(PL_linestr) + newlen;
12330 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12335 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12336 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12337 s += 3; /* UTF-8 */
12343 if (s[2] == 0xFE && s[3] == 0xFF) {
12344 /* UTF-32 big-endian */
12345 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12348 else if (s[2] == 0 && s[3] != 0) {
12351 * are a good indicator of UTF-16BE. */
12352 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12357 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12360 * are a good indicator of UTF-16LE. */
12361 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12370 * Restore a source filter.
12374 restore_rsfp(pTHX_ void *f)
12377 PerlIO * const fp = (PerlIO*)f;
12379 if (PL_rsfp == PerlIO_stdin())
12380 PerlIO_clearerr(PL_rsfp);
12381 else if (PL_rsfp && (PL_rsfp != fp))
12382 PerlIO_close(PL_rsfp);
12386 #ifndef PERL_NO_UTF16_FILTER
12388 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12391 const STRLEN old = SvCUR(sv);
12392 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12393 DEBUG_P(PerlIO_printf(Perl_debug_log,
12394 "utf16_textfilter(%p): %d %d (%d)\n",
12395 utf16_textfilter, idx, maxlen, (int) count));
12399 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12400 Copy(SvPVX_const(sv), tmps, old, char);
12401 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12402 SvCUR(sv) - old, &newlen);
12403 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12405 DEBUG_P({sv_dump(sv);});
12410 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12413 const STRLEN old = SvCUR(sv);
12414 const I32 count = FILTER_READ(idx+1, sv, maxlen);
12415 DEBUG_P(PerlIO_printf(Perl_debug_log,
12416 "utf16rev_textfilter(%p): %d %d (%d)\n",
12417 utf16rev_textfilter, idx, maxlen, (int) count));
12421 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12422 Copy(SvPVX_const(sv), tmps, old, char);
12423 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12424 SvCUR(sv) - old, &newlen);
12425 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12427 DEBUG_P({ sv_dump(sv); });
12433 Returns a pointer to the next character after the parsed
12434 vstring, as well as updating the passed in sv.
12436 Function must be called like
12439 s = scan_vstring(s,sv);
12441 The sv should already be large enough to store the vstring
12442 passed in, for performance reasons.
12447 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
12450 const char *pos = s;
12451 const char *start = s;
12452 if (*pos == 'v') pos++; /* get past 'v' */
12453 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
12455 if ( *pos != '.') {
12456 /* this may not be a v-string if followed by => */
12457 const char *next = pos;
12458 while (next < PL_bufend && isSPACE(*next))
12460 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
12461 /* return string not v-string */
12462 sv_setpvn(sv,(char *)s,pos-s);
12463 return (char *)pos;
12467 if (!isALPHA(*pos)) {
12468 U8 tmpbuf[UTF8_MAXBYTES+1];
12471 s++; /* get past 'v' */
12473 sv_setpvn(sv, "", 0);
12476 /* this is atoi() that tolerates underscores */
12479 const char *end = pos;
12481 while (--end >= s) {
12483 const UV orev = rev;
12484 rev += (*end - '0') * mult;
12486 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12487 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12488 "Integer overflow in decimal number");
12492 if (rev > 0x7FFFFFFF)
12493 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12495 /* Append native character for the rev point */
12496 tmpend = uvchr_to_utf8(tmpbuf, rev);
12497 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12498 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12500 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
12506 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
12510 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12518 * c-indentation-style: bsd
12519 * c-basic-offset: 4
12520 * indent-tabs-mode: t
12523 * ex: set ts=8 sts=4 sw=4 noet: