3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] = "Identifier too long";
30 static const char commaless_variable_list[] = "comma-less variable list";
32 static void restore_rsfp(pTHX_ void *f);
33 #ifndef PERL_NO_UTF16_FILTER
34 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
38 #define XFAKEBRACK 128
41 #ifdef USE_UTF8_SCRIPTS
42 # define UTF (!IN_BYTES)
44 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
47 /* In variables named $^X, these are the legal values for X.
48 * 1999-02-27 mjd-perl-patch@plover.com */
49 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51 /* On MacOS, respect nonbreaking spaces */
52 #ifdef MACOS_TRADITIONAL
53 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
58 /* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
60 * can get by with a single comparison (if the compiler is smart enough).
63 /* #define LEX_NOTPARSING 11 is done in perl.h. */
65 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
66 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
67 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
68 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
69 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
71 /* at end of code, eg "$x" followed by: */
72 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
73 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
75 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
76 string or after \E, $foo, etc */
77 #define LEX_INTERPCONST 2 /* NOT USED */
78 #define LEX_FORMLINE 1 /* expecting a format line */
79 #define LEX_KNOWNEXT 0 /* next token known; just return it */
83 static const char* const lex_state_names[] = {
102 #include "keywords.h"
104 /* CLINE is a macro that ensures PL_copline has a sane value */
109 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
112 * Convenience functions to return different tokens and prime the
113 * lexer for the next token. They all take an argument.
115 * TOKEN : generic token (used for '(', DOLSHARP, etc)
116 * OPERATOR : generic operator
117 * AOPERATOR : assignment operator
118 * PREBLOCK : beginning the block after an if, while, foreach, ...
119 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
120 * PREREF : *EXPR where EXPR is not a simple identifier
121 * TERM : expression term
122 * LOOPX : loop exiting command (goto, last, dump, etc)
123 * FTST : file test operator
124 * FUN0 : zero-argument function
125 * FUN1 : not used, except for not, which isn't a UNIOP
126 * BOop : bitwise or or xor
128 * SHop : shift operator
129 * PWop : power operator
130 * PMop : pattern-matching operator
131 * Aop : addition-level operator
132 * Mop : multiplication-level operator
133 * Eop : equality-testing operator
134 * Rop : relational operator <= != gt
136 * Also see LOP and lop() below.
139 #ifdef DEBUGGING /* Serve -DT. */
140 # define REPORT(retval) tokereport((I32)retval)
142 # define REPORT(retval) (retval)
145 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
146 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
147 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
148 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
149 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
150 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
151 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
152 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
153 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
154 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
155 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
156 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
157 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
158 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
159 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
160 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
161 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
162 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
163 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
164 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
166 /* This bit of chicanery makes a unary function followed by
167 * a parenthesis into a function with one argument, highest precedence.
168 * The UNIDOR macro is for unary functions that can be followed by the //
169 * operator (such as C<shift // 0>).
171 #define UNI2(f,x) { \
175 PL_last_uni = PL_oldbufptr; \
176 PL_last_lop_op = f; \
178 return REPORT( (int)FUNC1 ); \
180 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
182 #define UNI(f) UNI2(f,XTERM)
183 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
185 #define UNIBRACK(f) { \
188 PL_last_uni = PL_oldbufptr; \
190 return REPORT( (int)FUNC1 ); \
192 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
195 /* grandfather return to old style */
196 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
200 /* how to interpret the yylval associated with the token */
204 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
210 static struct debug_tokens { const int token, type; const char *name; }
211 const debug_tokens[] =
213 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
214 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
215 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
216 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
217 { ARROW, TOKENTYPE_NONE, "ARROW" },
218 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
219 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
220 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
221 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
222 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
223 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
224 { DO, TOKENTYPE_NONE, "DO" },
225 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
226 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
227 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
228 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
229 { ELSE, TOKENTYPE_NONE, "ELSE" },
230 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
231 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
232 { FOR, TOKENTYPE_IVAL, "FOR" },
233 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
234 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
235 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
236 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
237 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
238 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
239 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
240 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
241 { IF, TOKENTYPE_IVAL, "IF" },
242 { LABEL, TOKENTYPE_PVAL, "LABEL" },
243 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
244 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
245 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
246 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
247 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
248 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
249 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
250 { MY, TOKENTYPE_IVAL, "MY" },
251 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
252 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
253 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
254 { OROP, TOKENTYPE_IVAL, "OROP" },
255 { OROR, TOKENTYPE_NONE, "OROR" },
256 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
257 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
258 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
259 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
260 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
261 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
262 { PREINC, TOKENTYPE_NONE, "PREINC" },
263 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
264 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
265 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
266 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
267 { SUB, TOKENTYPE_NONE, "SUB" },
268 { THING, TOKENTYPE_OPVAL, "THING" },
269 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
270 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
271 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
272 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
273 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
274 { USE, TOKENTYPE_IVAL, "USE" },
275 { WHEN, TOKENTYPE_IVAL, "WHEN" },
276 { WHILE, TOKENTYPE_IVAL, "WHILE" },
277 { WORD, TOKENTYPE_OPVAL, "WORD" },
278 { 0, TOKENTYPE_NONE, 0 }
281 /* dump the returned token in rv, plus any optional arg in yylval */
284 S_tokereport(pTHX_ I32 rv)
288 const char *name = Nullch;
289 enum token_type type = TOKENTYPE_NONE;
290 const struct debug_tokens *p;
291 SV* const report = newSVpvs("<== ");
293 for (p = debug_tokens; p->token; p++) {
294 if (p->token == (int)rv) {
301 Perl_sv_catpv(aTHX_ report, name);
302 else if ((char)rv > ' ' && (char)rv < '~')
303 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
305 sv_catpvs(report, "EOF");
307 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
310 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
313 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
315 case TOKENTYPE_OPNUM:
316 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
317 PL_op_name[yylval.ival]);
320 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
322 case TOKENTYPE_OPVAL:
324 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
325 PL_op_name[yylval.opval->op_type]);
326 if (yylval.opval->op_type == OP_CONST) {
327 Perl_sv_catpvf(aTHX_ report, " %s",
328 SvPEEK(cSVOPx_sv(yylval.opval)));
333 sv_catpvs(report, "(opval=null)");
336 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
342 /* print the buffer with suitable escapes */
345 S_printbuf(pTHX_ const char* fmt, const char* s)
347 SV* const tmp = newSVpvs("");
348 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
357 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
358 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
362 S_ao(pTHX_ int toketype)
365 if (*PL_bufptr == '=') {
367 if (toketype == ANDAND)
368 yylval.ival = OP_ANDASSIGN;
369 else if (toketype == OROR)
370 yylval.ival = OP_ORASSIGN;
371 else if (toketype == DORDOR)
372 yylval.ival = OP_DORASSIGN;
380 * When Perl expects an operator and finds something else, no_op
381 * prints the warning. It always prints "<something> found where
382 * operator expected. It prints "Missing semicolon on previous line?"
383 * if the surprise occurs at the start of the line. "do you need to
384 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
385 * where the compiler doesn't know if foo is a method call or a function.
386 * It prints "Missing operator before end of line" if there's nothing
387 * after the missing operator, or "... before <...>" if there is something
388 * after the missing operator.
392 S_no_op(pTHX_ const char *what, char *s)
395 char * const oldbp = PL_bufptr;
396 const bool is_first = (PL_oldbufptr == PL_linestart);
402 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
403 if (ckWARN_d(WARN_SYNTAX)) {
405 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
406 "\t(Missing semicolon on previous line?)\n");
407 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
409 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
410 if (t < PL_bufptr && isSPACE(*t))
411 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
412 "\t(Do you need to predeclare %.*s?)\n",
413 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
417 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
418 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
426 * Complain about missing quote/regexp/heredoc terminator.
427 * If it's called with (char *)NULL then it cauterizes the line buffer.
428 * If we're in a delimited string and the delimiter is a control
429 * character, it's reformatted into a two-char sequence like ^C.
434 S_missingterm(pTHX_ char *s)
440 char * const nl = strrchr(s,'\n');
446 iscntrl(PL_multi_close)
448 PL_multi_close < 32 || PL_multi_close == 127
452 tmpbuf[1] = (char)toCTRL(PL_multi_close);
457 *tmpbuf = (char)PL_multi_close;
461 q = strchr(s,'"') ? '\'' : '"';
462 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
465 #define FEATURE_IS_ENABLED(name) \
466 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
467 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
469 * S_feature_is_enabled
470 * Check whether the named feature is enabled.
473 S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
476 HV * const hinthv = GvHV(PL_hintgv);
477 char he_name[32] = "feature_";
478 (void) strncpy(&he_name[8], name, 24);
480 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
488 Perl_deprecate(pTHX_ const char *s)
490 if (ckWARN(WARN_DEPRECATED))
491 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
495 Perl_deprecate_old(pTHX_ const char *s)
497 /* This function should NOT be called for any new deprecated warnings */
498 /* Use Perl_deprecate instead */
500 /* It is here to maintain backward compatibility with the pre-5.8 */
501 /* warnings category hierarchy. The "deprecated" category used to */
502 /* live under the "syntax" category. It is now a top-level category */
503 /* in its own right. */
505 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
506 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
507 "Use of %s is deprecated", s);
511 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
512 * utf16-to-utf8-reversed.
515 #ifdef PERL_CR_FILTER
519 register const char *s = SvPVX_const(sv);
520 register const char * const e = s + SvCUR(sv);
521 /* outer loop optimized to do nothing if there are no CR-LFs */
523 if (*s++ == '\r' && *s == '\n') {
524 /* hit a CR-LF, need to copy the rest */
525 register char *d = s - 1;
528 if (*s == '\r' && s[1] == '\n')
539 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
541 const I32 count = FILTER_READ(idx+1, sv, maxlen);
542 if (count > 0 && !maxlen)
550 * Initialize variables. Uses the Perl save_stack to save its state (for
551 * recursive calls to the parser).
555 Perl_lex_start(pTHX_ SV *line)
561 SAVEI32(PL_lex_dojoin);
562 SAVEI32(PL_lex_brackets);
563 SAVEI32(PL_lex_casemods);
564 SAVEI32(PL_lex_starts);
565 SAVEI32(PL_lex_state);
566 SAVEVPTR(PL_lex_inpat);
567 SAVEI32(PL_lex_inwhat);
568 if (PL_lex_state == LEX_KNOWNEXT) {
569 I32 toke = PL_nexttoke;
570 while (--toke >= 0) {
571 SAVEI32(PL_nexttype[toke]);
572 SAVEVPTR(PL_nextval[toke]);
574 SAVEI32(PL_nexttoke);
576 SAVECOPLINE(PL_curcop);
579 SAVEPPTR(PL_oldbufptr);
580 SAVEPPTR(PL_oldoldbufptr);
581 SAVEPPTR(PL_last_lop);
582 SAVEPPTR(PL_last_uni);
583 SAVEPPTR(PL_linestart);
584 SAVESPTR(PL_linestr);
585 SAVEGENERICPV(PL_lex_brackstack);
586 SAVEGENERICPV(PL_lex_casestack);
587 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
588 SAVESPTR(PL_lex_stuff);
589 SAVEI32(PL_lex_defer);
590 SAVEI32(PL_sublex_info.sub_inwhat);
591 SAVESPTR(PL_lex_repl);
593 SAVEINT(PL_lex_expect);
595 PL_lex_state = LEX_NORMAL;
599 Newx(PL_lex_brackstack, 120, char);
600 Newx(PL_lex_casestack, 12, char);
602 *PL_lex_casestack = '\0';
605 PL_lex_stuff = Nullsv;
606 PL_lex_repl = Nullsv;
610 PL_sublex_info.sub_inwhat = 0;
612 if (SvREADONLY(PL_linestr))
613 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
614 s = SvPV_const(PL_linestr, len);
615 if (!len || s[len-1] != ';') {
616 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
617 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
618 sv_catpvs(PL_linestr, "\n;");
620 SvTEMP_off(PL_linestr);
621 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
622 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
623 PL_last_lop = PL_last_uni = Nullch;
629 * Finalizer for lexing operations. Must be called when the parser is
630 * done with the lexer.
637 PL_doextract = FALSE;
642 * This subroutine has nothing to do with tilting, whether at windmills
643 * or pinball tables. Its name is short for "increment line". It
644 * increments the current line number in CopLINE(PL_curcop) and checks
645 * to see whether the line starts with a comment of the form
646 * # line 500 "foo.pm"
647 * If so, it sets the current line number and file to the values in the comment.
651 S_incline(pTHX_ char *s)
659 CopLINE_inc(PL_curcop);
662 while (SPACE_OR_TAB(*s)) s++;
663 if (strnEQ(s, "line", 4))
667 if (SPACE_OR_TAB(*s))
671 while (SPACE_OR_TAB(*s)) s++;
677 while (SPACE_OR_TAB(*s))
679 if (*s == '"' && (t = strchr(s+1, '"'))) {
684 for (t = s; !isSPACE(*t); t++) ;
687 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
689 if (*e != '\n' && *e != '\0')
690 return; /* false alarm */
696 const char * const cf = CopFILE(PL_curcop);
697 STRLEN tmplen = cf ? strlen(cf) : 0;
698 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
699 /* must copy *{"::_<(eval N)[oldfilename:L]"}
700 * to *{"::_<newfilename"} */
701 char smallbuf[256], smallbuf2[256];
702 char *tmpbuf, *tmpbuf2;
704 STRLEN tmplen2 = strlen(s);
705 if (tmplen + 3 < sizeof smallbuf)
708 Newx(tmpbuf, tmplen + 3, char);
709 if (tmplen2 + 3 < sizeof smallbuf2)
712 Newx(tmpbuf2, tmplen2 + 3, char);
713 tmpbuf[0] = tmpbuf2[0] = '_';
714 tmpbuf[1] = tmpbuf2[1] = '<';
715 memcpy(tmpbuf + 2, cf, ++tmplen);
716 memcpy(tmpbuf2 + 2, s, ++tmplen2);
718 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
720 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
722 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
723 /* adjust ${"::_<newfilename"} to store the new file name */
724 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
725 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
726 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
728 if (tmpbuf != smallbuf) Safefree(tmpbuf);
729 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
732 CopFILE_free(PL_curcop);
733 CopFILE_set(PL_curcop, s);
736 CopLINE_set(PL_curcop, atoi(n)-1);
741 * Called to gobble the appropriate amount and type of whitespace.
742 * Skips comments as well.
746 S_skipspace(pTHX_ register char *s)
749 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
750 while (s < PL_bufend && SPACE_OR_TAB(*s))
756 SSize_t oldprevlen, oldoldprevlen;
757 SSize_t oldloplen = 0, oldunilen = 0;
758 while (s < PL_bufend && isSPACE(*s)) {
759 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
764 if (s < PL_bufend && *s == '#') {
765 while (s < PL_bufend && *s != '\n')
769 if (PL_in_eval && !PL_rsfp) {
776 /* only continue to recharge the buffer if we're at the end
777 * of the buffer, we're not reading from a source filter, and
778 * we're in normal lexing mode
780 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
781 PL_lex_state == LEX_FORMLINE)
784 /* try to recharge the buffer */
785 if ((s = filter_gets(PL_linestr, PL_rsfp,
786 (prevlen = SvCUR(PL_linestr)))) == Nullch)
788 /* end of file. Add on the -p or -n magic */
791 ";}continue{print or die qq(-p destination: $!\\n);}");
792 PL_minus_n = PL_minus_p = 0;
794 else if (PL_minus_n) {
795 sv_setpvn(PL_linestr, ";}", 2);
799 sv_setpvn(PL_linestr,";", 1);
801 /* reset variables for next time we lex */
802 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
804 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
805 PL_last_lop = PL_last_uni = Nullch;
807 /* Close the filehandle. Could be from -P preprocessor,
808 * STDIN, or a regular file. If we were reading code from
809 * STDIN (because the commandline held no -e or filename)
810 * then we don't close it, we reset it so the code can
811 * read from STDIN too.
814 if (PL_preprocess && !PL_in_eval)
815 (void)PerlProc_pclose(PL_rsfp);
816 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
817 PerlIO_clearerr(PL_rsfp);
819 (void)PerlIO_close(PL_rsfp);
824 /* not at end of file, so we only read another line */
825 /* make corresponding updates to old pointers, for yyerror() */
826 oldprevlen = PL_oldbufptr - PL_bufend;
827 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
829 oldunilen = PL_last_uni - PL_bufend;
831 oldloplen = PL_last_lop - PL_bufend;
832 PL_linestart = PL_bufptr = s + prevlen;
833 PL_bufend = s + SvCUR(PL_linestr);
835 PL_oldbufptr = s + oldprevlen;
836 PL_oldoldbufptr = s + oldoldprevlen;
838 PL_last_uni = s + oldunilen;
840 PL_last_lop = s + oldloplen;
843 /* debugger active and we're not compiling the debugger code,
844 * so store the line into the debugger's array of lines
846 if (PERLDB_LINE && PL_curstash != PL_debstash) {
847 SV * const sv = NEWSV(85,0);
849 sv_upgrade(sv, SVt_PVMG);
850 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
853 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
860 * Check the unary operators to ensure there's no ambiguity in how they're
861 * used. An ambiguous piece of code would be:
863 * This doesn't mean rand() + 5. Because rand() is a unary operator,
864 * the +5 is its argument.
874 if (PL_oldoldbufptr != PL_last_uni)
876 while (isSPACE(*PL_last_uni))
878 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
879 if ((t = strchr(s, '(')) && t < PL_bufptr)
881 if (ckWARN_d(WARN_AMBIGUOUS)){
884 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
885 "Warning: Use of \"%s\" without parentheses is ambiguous",
892 * LOP : macro to build a list operator. Its behaviour has been replaced
893 * with a subroutine, S_lop() for which LOP is just another name.
896 #define LOP(f,x) return lop(f,x,s)
900 * Build a list operator (or something that might be one). The rules:
901 * - if we have a next token, then it's a list operator [why?]
902 * - if the next thing is an opening paren, then it's a function
903 * - else it's a list operator
907 S_lop(pTHX_ I32 f, int x, char *s)
914 PL_last_lop = PL_oldbufptr;
915 PL_last_lop_op = (OPCODE)f;
917 return REPORT(LSTOP);
924 return REPORT(LSTOP);
929 * When the lexer realizes it knows the next token (for instance,
930 * it is reordering tokens for the parser) then it can call S_force_next
931 * to know what token to return the next time the lexer is called. Caller
932 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
933 * handles the token correctly.
937 S_force_next(pTHX_ I32 type)
940 PL_nexttype[PL_nexttoke] = type;
942 if (PL_lex_state != LEX_KNOWNEXT) {
943 PL_lex_defer = PL_lex_state;
944 PL_lex_expect = PL_expect;
945 PL_lex_state = LEX_KNOWNEXT;
950 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
953 SV * const sv = newSVpvn(start,len);
954 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
961 * When the lexer knows the next thing is a word (for instance, it has
962 * just seen -> and it knows that the next char is a word char, then
963 * it calls S_force_word to stick the next word into the PL_next lookahead.
966 * char *start : buffer position (must be within PL_linestr)
967 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
968 * int check_keyword : if true, Perl checks to make sure the word isn't
969 * a keyword (do this if the word is a label, e.g. goto FOO)
970 * int allow_pack : if true, : characters will also be allowed (require,
972 * int allow_initial_tick : used by the "sub" lexer only.
976 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
982 start = skipspace(start);
984 if (isIDFIRST_lazy_if(s,UTF) ||
985 (allow_pack && *s == ':') ||
986 (allow_initial_tick && *s == '\'') )
988 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
989 if (check_keyword && keyword(PL_tokenbuf, len))
991 if (token == METHOD) {
996 PL_expect = XOPERATOR;
999 PL_nextval[PL_nexttoke].opval
1000 = (OP*)newSVOP(OP_CONST,0,
1001 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1002 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
1010 * Called when the lexer wants $foo *foo &foo etc, but the program
1011 * text only contains the "foo" portion. The first argument is a pointer
1012 * to the "foo", and the second argument is the type symbol to prefix.
1013 * Forces the next token to be a "WORD".
1014 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1018 S_force_ident(pTHX_ register const char *s, int kind)
1022 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
1023 PL_nextval[PL_nexttoke].opval = o;
1026 o->op_private = OPpCONST_ENTERED;
1027 /* XXX see note in pp_entereval() for why we forgo typo
1028 warnings if the symbol must be introduced in an eval.
1030 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
1031 kind == '$' ? SVt_PV :
1032 kind == '@' ? SVt_PVAV :
1033 kind == '%' ? SVt_PVHV :
1041 Perl_str_to_version(pTHX_ SV *sv)
1046 const char *start = SvPV_const(sv,len);
1047 const char * const end = start + len;
1048 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1049 while (start < end) {
1053 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1058 retval += ((NV)n)/nshift;
1067 * Forces the next token to be a version number.
1068 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1069 * and if "guessing" is TRUE, then no new token is created (and the caller
1070 * must use an alternative parsing method).
1074 S_force_version(pTHX_ char *s, int guessing)
1077 OP *version = Nullop;
1086 while (isDIGIT(*d) || *d == '_' || *d == '.')
1088 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1090 s = scan_num(s, &yylval);
1091 version = yylval.opval;
1092 ver = cSVOPx(version)->op_sv;
1093 if (SvPOK(ver) && !SvNIOK(ver)) {
1094 SvUPGRADE(ver, SVt_PVNV);
1095 SvNV_set(ver, str_to_version(ver));
1096 SvNOK_on(ver); /* hint that it is a version */
1103 /* NOTE: The parser sees the package name and the VERSION swapped */
1104 PL_nextval[PL_nexttoke].opval = version;
1112 * Tokenize a quoted string passed in as an SV. It finds the next
1113 * chunk, up to end of string or a backslash. It may make a new
1114 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1119 S_tokeq(pTHX_ SV *sv)
1123 register char *send;
1131 s = SvPV_force(sv, len);
1132 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1135 while (s < send && *s != '\\')
1140 if ( PL_hints & HINT_NEW_STRING ) {
1141 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1147 if (s + 1 < send && (s[1] == '\\'))
1148 s++; /* all that, just for this */
1153 SvCUR_set(sv, d - SvPVX_const(sv));
1155 if ( PL_hints & HINT_NEW_STRING )
1156 return new_constant(NULL, 0, "q", sv, pv, "q");
1161 * Now come three functions related to double-quote context,
1162 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1163 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1164 * interact with PL_lex_state, and create fake ( ... ) argument lists
1165 * to handle functions and concatenation.
1166 * They assume that whoever calls them will be setting up a fake
1167 * join call, because each subthing puts a ',' after it. This lets
1170 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1172 * (I'm not sure whether the spurious commas at the end of lcfirst's
1173 * arguments and join's arguments are created or not).
1178 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1180 * Pattern matching will set PL_lex_op to the pattern-matching op to
1181 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1183 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1185 * Everything else becomes a FUNC.
1187 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1188 * had an OP_CONST or OP_READLINE). This just sets us up for a
1189 * call to S_sublex_push().
1193 S_sublex_start(pTHX)
1196 register const I32 op_type = yylval.ival;
1198 if (op_type == OP_NULL) {
1199 yylval.opval = PL_lex_op;
1203 if (op_type == OP_CONST || op_type == OP_READLINE) {
1204 SV *sv = tokeq(PL_lex_stuff);
1206 if (SvTYPE(sv) == SVt_PVIV) {
1207 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1209 const char *p = SvPV_const(sv, len);
1210 SV * const nsv = newSVpvn(p, len);
1216 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1217 PL_lex_stuff = Nullsv;
1218 /* Allow <FH> // "foo" */
1219 if (op_type == OP_READLINE)
1220 PL_expect = XTERMORDORDOR;
1224 PL_sublex_info.super_state = PL_lex_state;
1225 PL_sublex_info.sub_inwhat = op_type;
1226 PL_sublex_info.sub_op = PL_lex_op;
1227 PL_lex_state = LEX_INTERPPUSH;
1231 yylval.opval = PL_lex_op;
1241 * Create a new scope to save the lexing state. The scope will be
1242 * ended in S_sublex_done. Returns a '(', starting the function arguments
1243 * to the uc, lc, etc. found before.
1244 * Sets PL_lex_state to LEX_INTERPCONCAT.
1253 PL_lex_state = PL_sublex_info.super_state;
1254 SAVEI32(PL_lex_dojoin);
1255 SAVEI32(PL_lex_brackets);
1256 SAVEI32(PL_lex_casemods);
1257 SAVEI32(PL_lex_starts);
1258 SAVEI32(PL_lex_state);
1259 SAVEVPTR(PL_lex_inpat);
1260 SAVEI32(PL_lex_inwhat);
1261 SAVECOPLINE(PL_curcop);
1262 SAVEPPTR(PL_bufptr);
1263 SAVEPPTR(PL_bufend);
1264 SAVEPPTR(PL_oldbufptr);
1265 SAVEPPTR(PL_oldoldbufptr);
1266 SAVEPPTR(PL_last_lop);
1267 SAVEPPTR(PL_last_uni);
1268 SAVEPPTR(PL_linestart);
1269 SAVESPTR(PL_linestr);
1270 SAVEGENERICPV(PL_lex_brackstack);
1271 SAVEGENERICPV(PL_lex_casestack);
1273 PL_linestr = PL_lex_stuff;
1274 PL_lex_stuff = Nullsv;
1276 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1277 = SvPVX(PL_linestr);
1278 PL_bufend += SvCUR(PL_linestr);
1279 PL_last_lop = PL_last_uni = Nullch;
1280 SAVEFREESV(PL_linestr);
1282 PL_lex_dojoin = FALSE;
1283 PL_lex_brackets = 0;
1284 Newx(PL_lex_brackstack, 120, char);
1285 Newx(PL_lex_casestack, 12, char);
1286 PL_lex_casemods = 0;
1287 *PL_lex_casestack = '\0';
1289 PL_lex_state = LEX_INTERPCONCAT;
1290 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1292 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1293 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1294 PL_lex_inpat = PL_sublex_info.sub_op;
1296 PL_lex_inpat = Nullop;
1303 * Restores lexer state after a S_sublex_push.
1310 if (!PL_lex_starts++) {
1311 SV * const sv = newSVpvs("");
1312 if (SvUTF8(PL_linestr))
1314 PL_expect = XOPERATOR;
1315 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1319 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1320 PL_lex_state = LEX_INTERPCASEMOD;
1324 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1325 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1326 PL_linestr = PL_lex_repl;
1328 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1329 PL_bufend += SvCUR(PL_linestr);
1330 PL_last_lop = PL_last_uni = Nullch;
1331 SAVEFREESV(PL_linestr);
1332 PL_lex_dojoin = FALSE;
1333 PL_lex_brackets = 0;
1334 PL_lex_casemods = 0;
1335 *PL_lex_casestack = '\0';
1337 if (SvEVALED(PL_lex_repl)) {
1338 PL_lex_state = LEX_INTERPNORMAL;
1340 /* we don't clear PL_lex_repl here, so that we can check later
1341 whether this is an evalled subst; that means we rely on the
1342 logic to ensure sublex_done() is called again only via the
1343 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1346 PL_lex_state = LEX_INTERPCONCAT;
1347 PL_lex_repl = Nullsv;
1353 PL_bufend = SvPVX(PL_linestr);
1354 PL_bufend += SvCUR(PL_linestr);
1355 PL_expect = XOPERATOR;
1356 PL_sublex_info.sub_inwhat = 0;
1364 Extracts a pattern, double-quoted string, or transliteration. This
1367 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1368 processing a pattern (PL_lex_inpat is true), a transliteration
1369 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1371 Returns a pointer to the character scanned up to. Iff this is
1372 advanced from the start pointer supplied (ie if anything was
1373 successfully parsed), will leave an OP for the substring scanned
1374 in yylval. Caller must intuit reason for not parsing further
1375 by looking at the next characters herself.
1379 double-quoted style: \r and \n
1380 regexp special ones: \D \s
1382 backrefs: \1 (deprecated in substitution replacements)
1383 case and quoting: \U \Q \E
1384 stops on @ and $, but not for $ as tail anchor
1386 In transliterations:
1387 characters are VERY literal, except for - not at the start or end
1388 of the string, which indicates a range. scan_const expands the
1389 range to the full set of intermediate characters.
1391 In double-quoted strings:
1393 double-quoted style: \r and \n
1395 backrefs: \1 (deprecated)
1396 case and quoting: \U \Q \E
1399 scan_const does *not* construct ops to handle interpolated strings.
1400 It stops processing as soon as it finds an embedded $ or @ variable
1401 and leaves it to the caller to work out what's going on.
1403 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1405 $ in pattern could be $foo or could be tail anchor. Assumption:
1406 it's a tail anchor if $ is the last thing in the string, or if it's
1407 followed by one of ")| \n\t"
1409 \1 (backreferences) are turned into $1
1411 The structure of the code is
1412 while (there's a character to process) {
1413 handle transliteration ranges
1414 skip regexp comments
1415 skip # initiated comments in //x patterns
1416 check for embedded @foo
1417 check for embedded scalars
1419 leave intact backslashes from leave (below)
1420 deprecate \1 in strings and sub replacements
1421 handle string-changing backslashes \l \U \Q \E, etc.
1422 switch (what was escaped) {
1423 handle - in a transliteration (becomes a literal -)
1424 handle \132 octal characters
1425 handle 0x15 hex characters
1426 handle \cV (control V)
1427 handle printf backslashes (\f, \r, \n, etc)
1429 } (end if backslash)
1430 } (end while character to read)
1435 S_scan_const(pTHX_ char *start)
1438 register char *send = PL_bufend; /* end of the constant */
1439 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1440 register char *s = start; /* start of the constant */
1441 register char *d = SvPVX(sv); /* destination for copies */
1442 bool dorange = FALSE; /* are we in a translit range? */
1443 bool didrange = FALSE; /* did we just finish a range? */
1444 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1445 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1448 UV literal_endpoint = 0;
1451 const char *leaveit = /* set of acceptably-backslashed characters */
1453 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1456 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1457 /* If we are doing a trans and we know we want UTF8 set expectation */
1458 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1459 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1463 while (s < send || dorange) {
1464 /* get transliterations out of the way (they're most literal) */
1465 if (PL_lex_inwhat == OP_TRANS) {
1466 /* expand a range A-Z to the full set of characters. AIE! */
1468 I32 i; /* current expanded character */
1469 I32 min; /* first character in range */
1470 I32 max; /* last character in range */
1473 char * const c = (char*)utf8_hop((U8*)d, -1);
1477 *c = (char)UTF_TO_NATIVE(0xff);
1478 /* mark the range as done, and continue */
1484 i = d - SvPVX_const(sv); /* remember current offset */
1485 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1486 d = SvPVX(sv) + i; /* refresh d after realloc */
1487 d -= 2; /* eat the first char and the - */
1489 min = (U8)*d; /* first char in range */
1490 max = (U8)d[1]; /* last char in range */
1494 "Invalid range \"%c-%c\" in transliteration operator",
1495 (char)min, (char)max);
1499 if (literal_endpoint == 2 &&
1500 ((isLOWER(min) && isLOWER(max)) ||
1501 (isUPPER(min) && isUPPER(max)))) {
1503 for (i = min; i <= max; i++)
1505 *d++ = NATIVE_TO_NEED(has_utf8,i);
1507 for (i = min; i <= max; i++)
1509 *d++ = NATIVE_TO_NEED(has_utf8,i);
1514 for (i = min; i <= max; i++)
1517 /* mark the range as done, and continue */
1521 literal_endpoint = 0;
1526 /* range begins (ignore - as first or last char) */
1527 else if (*s == '-' && s+1 < send && s != start) {
1529 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1532 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1542 literal_endpoint = 0;
1547 /* if we get here, we're not doing a transliteration */
1549 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1550 except for the last char, which will be done separately. */
1551 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1553 while (s+1 < send && *s != ')')
1554 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1556 else if (s[2] == '{' /* This should match regcomp.c */
1557 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1560 char *regparse = s + (s[2] == '{' ? 3 : 4);
1563 while (count && (c = *regparse)) {
1564 if (c == '\\' && regparse[1])
1572 if (*regparse != ')')
1573 regparse--; /* Leave one char for continuation. */
1574 while (s < regparse)
1575 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1579 /* likewise skip #-initiated comments in //x patterns */
1580 else if (*s == '#' && PL_lex_inpat &&
1581 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1582 while (s+1 < send && *s != '\n')
1583 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1586 /* check for embedded arrays
1587 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1589 else if (*s == '@' && s[1]
1590 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1593 /* check for embedded scalars. only stop if we're sure it's a
1596 else if (*s == '$') {
1597 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1599 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1600 break; /* in regexp, $ might be tail anchor */
1603 /* End of else if chain - OP_TRANS rejoin rest */
1606 if (*s == '\\' && s+1 < send) {
1609 /* some backslashes we leave behind */
1610 if (*leaveit && *s && strchr(leaveit, *s)) {
1611 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1612 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1616 /* deprecate \1 in strings and substitution replacements */
1617 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1618 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1620 if (ckWARN(WARN_SYNTAX))
1621 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1626 /* string-change backslash escapes */
1627 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1632 /* if we get here, it's either a quoted -, or a digit */
1635 /* quoted - in transliterations */
1637 if (PL_lex_inwhat == OP_TRANS) {
1647 Perl_warner(aTHX_ packWARN(WARN_MISC),
1648 "Unrecognized escape \\%c passed through",
1650 /* default action is to copy the quoted character */
1651 goto default_action;
1654 /* \132 indicates an octal constant */
1655 case '0': case '1': case '2': case '3':
1656 case '4': case '5': case '6': case '7':
1660 uv = grok_oct(s, &len, &flags, NULL);
1663 goto NUM_ESCAPE_INSERT;
1665 /* \x24 indicates a hex constant */
1669 char* const e = strchr(s, '}');
1670 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1671 PERL_SCAN_DISALLOW_PREFIX;
1676 yyerror("Missing right brace on \\x{}");
1680 uv = grok_hex(s, &len, &flags, NULL);
1686 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1687 uv = grok_hex(s, &len, &flags, NULL);
1693 /* Insert oct or hex escaped character.
1694 * There will always enough room in sv since such
1695 * escapes will be longer than any UTF-8 sequence
1696 * they can end up as. */
1698 /* We need to map to chars to ASCII before doing the tests
1701 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1702 if (!has_utf8 && uv > 255) {
1703 /* Might need to recode whatever we have
1704 * accumulated so far if it contains any
1707 * (Can't we keep track of that and avoid
1708 * this rescan? --jhi)
1712 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1713 if (!NATIVE_IS_INVARIANT(*c)) {
1718 const STRLEN offset = d - SvPVX_const(sv);
1720 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1724 while (src >= (const U8 *)SvPVX_const(sv)) {
1725 if (!NATIVE_IS_INVARIANT(*src)) {
1726 const U8 ch = NATIVE_TO_ASCII(*src);
1727 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1728 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1738 if (has_utf8 || uv > 255) {
1739 d = (char*)uvchr_to_utf8((U8*)d, uv);
1741 if (PL_lex_inwhat == OP_TRANS &&
1742 PL_sublex_info.sub_op) {
1743 PL_sublex_info.sub_op->op_private |=
1744 (PL_lex_repl ? OPpTRANS_FROM_UTF
1757 /* \N{LATIN SMALL LETTER A} is a named character */
1761 char* e = strchr(s, '}');
1767 yyerror("Missing right brace on \\N{}");
1771 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1773 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1774 PERL_SCAN_DISALLOW_PREFIX;
1777 uv = grok_hex(s, &len, &flags, NULL);
1779 goto NUM_ESCAPE_INSERT;
1781 res = newSVpvn(s + 1, e - s - 1);
1782 res = new_constant( Nullch, 0, "charnames",
1783 res, Nullsv, "\\N{...}" );
1785 sv_utf8_upgrade(res);
1786 str = SvPV_const(res,len);
1787 #ifdef EBCDIC_NEVER_MIND
1788 /* charnames uses pack U and that has been
1789 * recently changed to do the below uni->native
1790 * mapping, so this would be redundant (and wrong,
1791 * the code point would be doubly converted).
1792 * But leave this in just in case the pack U change
1793 * gets revoked, but the semantics is still
1794 * desireable for charnames. --jhi */
1796 UV uv = utf8_to_uvchr((const U8*)str, 0);
1799 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1801 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1802 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1803 str = SvPV_const(res, len);
1807 if (!has_utf8 && SvUTF8(res)) {
1808 const char * const ostart = SvPVX_const(sv);
1809 SvCUR_set(sv, d - ostart);
1812 sv_utf8_upgrade(sv);
1813 /* this just broke our allocation above... */
1814 SvGROW(sv, (STRLEN)(send - start));
1815 d = SvPVX(sv) + SvCUR(sv);
1818 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1819 const char * const odest = SvPVX_const(sv);
1821 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1822 d = SvPVX(sv) + (d - odest);
1824 Copy(str, d, len, char);
1831 yyerror("Missing braces on \\N{}");
1834 /* \c is a control character */
1843 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1846 yyerror("Missing control char name in \\c");
1850 /* printf-style backslashes, formfeeds, newlines, etc */
1852 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1855 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1858 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1861 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1864 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1867 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1870 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1876 } /* end if (backslash) */
1883 /* If we started with encoded form, or already know we want it
1884 and then encode the next character */
1885 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1887 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1888 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1891 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1892 const STRLEN off = d - SvPVX_const(sv);
1893 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1895 d = (char*)uvchr_to_utf8((U8*)d, uv);
1899 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1901 } /* while loop to process each character */
1903 /* terminate the string and set up the sv */
1905 SvCUR_set(sv, d - SvPVX_const(sv));
1906 if (SvCUR(sv) >= SvLEN(sv))
1907 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1910 if (PL_encoding && !has_utf8) {
1911 sv_recode_to_utf8(sv, PL_encoding);
1917 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1918 PL_sublex_info.sub_op->op_private |=
1919 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1923 /* shrink the sv if we allocated more than we used */
1924 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1925 SvPV_shrink_to_cur(sv);
1928 /* return the substring (via yylval) only if we parsed anything */
1929 if (s > PL_bufptr) {
1930 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1931 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1933 ( PL_lex_inwhat == OP_TRANS
1935 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1938 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1945 * Returns TRUE if there's more to the expression (e.g., a subscript),
1948 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1950 * ->[ and ->{ return TRUE
1951 * { and [ outside a pattern are always subscripts, so return TRUE
1952 * if we're outside a pattern and it's not { or [, then return FALSE
1953 * if we're in a pattern and the first char is a {
1954 * {4,5} (any digits around the comma) returns FALSE
1955 * if we're in a pattern and the first char is a [
1957 * [SOMETHING] has a funky algorithm to decide whether it's a
1958 * character class or not. It has to deal with things like
1959 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1960 * anything else returns TRUE
1963 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1966 S_intuit_more(pTHX_ register char *s)
1969 if (PL_lex_brackets)
1971 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1973 if (*s != '{' && *s != '[')
1978 /* In a pattern, so maybe we have {n,m}. */
1995 /* On the other hand, maybe we have a character class */
1998 if (*s == ']' || *s == '^')
2001 /* this is terrifying, and it works */
2002 int weight = 2; /* let's weigh the evidence */
2004 unsigned char un_char = 255, last_un_char;
2005 const char * const send = strchr(s,']');
2006 char tmpbuf[sizeof PL_tokenbuf * 4];
2008 if (!send) /* has to be an expression */
2011 Zero(seen,256,char);
2014 else if (isDIGIT(*s)) {
2016 if (isDIGIT(s[1]) && s[2] == ']')
2022 for (; s < send; s++) {
2023 last_un_char = un_char;
2024 un_char = (unsigned char)*s;
2029 weight -= seen[un_char] * 10;
2030 if (isALNUM_lazy_if(s+1,UTF)) {
2031 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2032 if ((int)strlen(tmpbuf) > 1
2033 && gv_fetchpv(tmpbuf, 0, SVt_PV))
2038 else if (*s == '$' && s[1] &&
2039 strchr("[#!%*<>()-=",s[1])) {
2040 if (/*{*/ strchr("])} =",s[2]))
2049 if (strchr("wds]",s[1]))
2051 else if (seen['\''] || seen['"'])
2053 else if (strchr("rnftbxcav",s[1]))
2055 else if (isDIGIT(s[1])) {
2057 while (s[1] && isDIGIT(s[1]))
2067 if (strchr("aA01! ",last_un_char))
2069 if (strchr("zZ79~",s[1]))
2071 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2072 weight -= 5; /* cope with negative subscript */
2075 if (!isALNUM(last_un_char)
2076 && !(last_un_char == '$' || last_un_char == '@'
2077 || last_un_char == '&')
2078 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2083 if (keyword(tmpbuf, d - tmpbuf))
2086 if (un_char == last_un_char + 1)
2088 weight -= seen[un_char];
2093 if (weight >= 0) /* probably a character class */
2103 * Does all the checking to disambiguate
2105 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2106 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2108 * First argument is the stuff after the first token, e.g. "bar".
2110 * Not a method if bar is a filehandle.
2111 * Not a method if foo is a subroutine prototyped to take a filehandle.
2112 * Not a method if it's really "Foo $bar"
2113 * Method if it's "foo $bar"
2114 * Not a method if it's really "print foo $bar"
2115 * Method if it's really "foo package::" (interpreted as package->foo)
2116 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2117 * Not a method if bar is a filehandle or package, but is quoted with
2122 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2125 char *s = start + (*start == '$');
2126 char tmpbuf[sizeof PL_tokenbuf];
2131 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2135 const char *proto = SvPVX_const(cv);
2146 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2147 /* start is the beginning of the possible filehandle/object,
2148 * and s is the end of it
2149 * tmpbuf is a copy of it
2152 if (*start == '$') {
2153 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2158 return *s == '(' ? FUNCMETH : METHOD;
2160 if (!keyword(tmpbuf, len)) {
2161 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2166 indirgv = gv_fetchpv(tmpbuf, 0, SVt_PVCV);
2167 if (indirgv && GvCVu(indirgv))
2169 /* filehandle or package name makes it a method */
2170 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2172 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2173 return 0; /* no assumptions -- "=>" quotes bearword */
2175 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2176 newSVpvn(tmpbuf,len));
2177 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2181 return *s == '(' ? FUNCMETH : METHOD;
2189 * Return a string of Perl code to load the debugger. If PERL5DB
2190 * is set, it will return the contents of that, otherwise a
2191 * compile-time require of perl5db.pl.
2199 const char * const pdb = PerlEnv_getenv("PERL5DB");
2203 SETERRNO(0,SS_NORMAL);
2204 return "BEGIN { require 'perl5db.pl' }";
2210 /* Encoded script support. filter_add() effectively inserts a
2211 * 'pre-processing' function into the current source input stream.
2212 * Note that the filter function only applies to the current source file
2213 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2215 * The datasv parameter (which may be NULL) can be used to pass
2216 * private data to this instance of the filter. The filter function
2217 * can recover the SV using the FILTER_DATA macro and use it to
2218 * store private buffers and state information.
2220 * The supplied datasv parameter is upgraded to a PVIO type
2221 * and the IoDIRP/IoANY field is used to store the function pointer,
2222 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2223 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2224 * private use must be set using malloc'd pointers.
2228 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2234 if (!PL_rsfp_filters)
2235 PL_rsfp_filters = newAV();
2237 datasv = NEWSV(255,0);
2238 SvUPGRADE(datasv, SVt_PVIO);
2239 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2240 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2241 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2242 IoANY(datasv), SvPV_nolen(datasv)));
2243 av_unshift(PL_rsfp_filters, 1);
2244 av_store(PL_rsfp_filters, 0, datasv) ;
2249 /* Delete most recently added instance of this filter function. */
2251 Perl_filter_del(pTHX_ filter_t funcp)
2257 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
2259 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2261 /* if filter is on top of stack (usual case) just pop it off */
2262 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2263 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2264 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2265 IoANY(datasv) = (void *)NULL;
2266 sv_free(av_pop(PL_rsfp_filters));
2270 /* we need to search for the correct entry and clear it */
2271 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2275 /* Invoke the idxth filter function for the current rsfp. */
2276 /* maxlen 0 = read one text line */
2278 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2284 if (!PL_rsfp_filters)
2286 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2287 /* Provide a default input filter to make life easy. */
2288 /* Note that we append to the line. This is handy. */
2289 DEBUG_P(PerlIO_printf(Perl_debug_log,
2290 "filter_read %d: from rsfp\n", idx));
2294 const int old_len = SvCUR(buf_sv);
2296 /* ensure buf_sv is large enough */
2297 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2298 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2299 if (PerlIO_error(PL_rsfp))
2300 return -1; /* error */
2302 return 0 ; /* end of file */
2304 SvCUR_set(buf_sv, old_len + len) ;
2307 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2308 if (PerlIO_error(PL_rsfp))
2309 return -1; /* error */
2311 return 0 ; /* end of file */
2314 return SvCUR(buf_sv);
2316 /* Skip this filter slot if filter has been deleted */
2317 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2318 DEBUG_P(PerlIO_printf(Perl_debug_log,
2319 "filter_read %d: skipped (filter deleted)\n",
2321 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2323 /* Get function pointer hidden within datasv */
2324 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2325 DEBUG_P(PerlIO_printf(Perl_debug_log,
2326 "filter_read %d: via function %p (%s)\n",
2327 idx, datasv, SvPV_nolen_const(datasv)));
2328 /* Call function. The function is expected to */
2329 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2330 /* Return: <0:error, =0:eof, >0:not eof */
2331 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2335 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2338 #ifdef PERL_CR_FILTER
2339 if (!PL_rsfp_filters) {
2340 filter_add(S_cr_textfilter,NULL);
2343 if (PL_rsfp_filters) {
2345 SvCUR_set(sv, 0); /* start with empty line */
2346 if (FILTER_READ(0, sv, 0) > 0)
2347 return ( SvPVX(sv) ) ;
2352 return (sv_gets(sv, fp, append));
2356 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2361 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2365 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2366 (gv = gv_fetchpv(pkgname, 0, SVt_PVHV)))
2368 return GvHV(gv); /* Foo:: */
2371 /* use constant CLASS => 'MyClass' */
2372 if ((gv = gv_fetchpv(pkgname, 0, SVt_PVCV))) {
2374 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2375 pkgname = SvPV_nolen_const(sv);
2379 return gv_stashpv(pkgname, FALSE);
2383 S_tokenize_use(pTHX_ int is_use, char *s) {
2385 if (PL_expect != XSTATE)
2386 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2387 is_use ? "use" : "no"));
2389 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2390 s = force_version(s, TRUE);
2391 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2392 PL_nextval[PL_nexttoke].opval = Nullop;
2395 else if (*s == 'v') {
2396 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2397 s = force_version(s, FALSE);
2401 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2402 s = force_version(s, FALSE);
2404 yylval.ival = is_use;
2408 static const char* const exp_name[] =
2409 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2410 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2417 Works out what to call the token just pulled out of the input
2418 stream. The yacc parser takes care of taking the ops we return and
2419 stitching them into a tree.
2425 if read an identifier
2426 if we're in a my declaration
2427 croak if they tried to say my($foo::bar)
2428 build the ops for a my() declaration
2429 if it's an access to a my() variable
2430 are we in a sort block?
2431 croak if my($a); $a <=> $b
2432 build ops for access to a my() variable
2433 if in a dq string, and they've said @foo and we can't find @foo
2435 build ops for a bareword
2436 if we already built the token before, use it.
2441 #pragma segment Perl_yylex
2447 register char *s = PL_bufptr;
2453 SV* tmp = newSVpvs("");
2454 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2455 (IV)CopLINE(PL_curcop),
2456 lex_state_names[PL_lex_state],
2457 exp_name[PL_expect],
2458 pv_display(tmp, s, strlen(s), 0, 60));
2461 /* check if there's an identifier for us to look at */
2462 if (PL_pending_ident)
2463 return REPORT(S_pending_ident(aTHX));
2465 /* no identifier pending identification */
2467 switch (PL_lex_state) {
2469 case LEX_NORMAL: /* Some compilers will produce faster */
2470 case LEX_INTERPNORMAL: /* code if we comment these out. */
2474 /* when we've already built the next token, just pull it out of the queue */
2477 yylval = PL_nextval[PL_nexttoke];
2479 PL_lex_state = PL_lex_defer;
2480 PL_expect = PL_lex_expect;
2481 PL_lex_defer = LEX_NORMAL;
2483 return REPORT(PL_nexttype[PL_nexttoke]);
2485 /* interpolated case modifiers like \L \U, including \Q and \E.
2486 when we get here, PL_bufptr is at the \
2488 case LEX_INTERPCASEMOD:
2490 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2491 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2493 /* handle \E or end of string */
2494 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2496 if (PL_lex_casemods) {
2497 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2498 PL_lex_casestack[PL_lex_casemods] = '\0';
2500 if (PL_bufptr != PL_bufend
2501 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2503 PL_lex_state = LEX_INTERPCONCAT;
2507 if (PL_bufptr != PL_bufend)
2509 PL_lex_state = LEX_INTERPCONCAT;
2513 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2514 "### Saw case modifier\n"); });
2516 if (s[1] == '\\' && s[2] == 'E') {
2518 PL_lex_state = LEX_INTERPCONCAT;
2523 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2524 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2525 if ((*s == 'L' || *s == 'U') &&
2526 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2527 PL_lex_casestack[--PL_lex_casemods] = '\0';
2530 if (PL_lex_casemods > 10)
2531 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2532 PL_lex_casestack[PL_lex_casemods++] = *s;
2533 PL_lex_casestack[PL_lex_casemods] = '\0';
2534 PL_lex_state = LEX_INTERPCONCAT;
2535 PL_nextval[PL_nexttoke].ival = 0;
2538 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2540 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2542 PL_nextval[PL_nexttoke].ival = OP_LC;
2544 PL_nextval[PL_nexttoke].ival = OP_UC;
2546 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2548 Perl_croak(aTHX_ "panic: yylex");
2552 if (PL_lex_starts) {
2555 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2556 if (PL_lex_casemods == 1 && PL_lex_inpat)
2565 case LEX_INTERPPUSH:
2566 return REPORT(sublex_push());
2568 case LEX_INTERPSTART:
2569 if (PL_bufptr == PL_bufend)
2570 return REPORT(sublex_done());
2571 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2572 "### Interpolated variable\n"); });
2574 PL_lex_dojoin = (*PL_bufptr == '@');
2575 PL_lex_state = LEX_INTERPNORMAL;
2576 if (PL_lex_dojoin) {
2577 PL_nextval[PL_nexttoke].ival = 0;
2579 force_ident("\"", '$');
2580 PL_nextval[PL_nexttoke].ival = 0;
2582 PL_nextval[PL_nexttoke].ival = 0;
2584 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2587 if (PL_lex_starts++) {
2589 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2590 if (!PL_lex_casemods && PL_lex_inpat)
2597 case LEX_INTERPENDMAYBE:
2598 if (intuit_more(PL_bufptr)) {
2599 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2605 if (PL_lex_dojoin) {
2606 PL_lex_dojoin = FALSE;
2607 PL_lex_state = LEX_INTERPCONCAT;
2610 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2611 && SvEVALED(PL_lex_repl))
2613 if (PL_bufptr != PL_bufend)
2614 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2615 PL_lex_repl = Nullsv;
2618 case LEX_INTERPCONCAT:
2620 if (PL_lex_brackets)
2621 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2623 if (PL_bufptr == PL_bufend)
2624 return REPORT(sublex_done());
2626 if (SvIVX(PL_linestr) == '\'') {
2627 SV *sv = newSVsv(PL_linestr);
2630 else if ( PL_hints & HINT_NEW_RE )
2631 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2632 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2636 s = scan_const(PL_bufptr);
2638 PL_lex_state = LEX_INTERPCASEMOD;
2640 PL_lex_state = LEX_INTERPSTART;
2643 if (s != PL_bufptr) {
2644 PL_nextval[PL_nexttoke] = yylval;
2647 if (PL_lex_starts++) {
2648 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2649 if (!PL_lex_casemods && PL_lex_inpat)
2662 PL_lex_state = LEX_NORMAL;
2663 s = scan_formline(PL_bufptr);
2664 if (!PL_lex_formbrack)
2670 PL_oldoldbufptr = PL_oldbufptr;
2676 if (isIDFIRST_lazy_if(s,UTF))
2678 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2681 goto fake_eof; /* emulate EOF on ^D or ^Z */
2686 if (PL_lex_brackets) {
2687 yyerror(PL_lex_formbrack
2688 ? "Format not terminated"
2689 : "Missing right curly or square bracket");
2691 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2692 "### Tokener got EOF\n");
2696 if (s++ < PL_bufend)
2697 goto retry; /* ignore stray nulls */
2700 if (!PL_in_eval && !PL_preambled) {
2701 PL_preambled = TRUE;
2702 sv_setpv(PL_linestr,incl_perldb());
2703 if (SvCUR(PL_linestr))
2704 sv_catpvs(PL_linestr,";");
2706 while(AvFILLp(PL_preambleav) >= 0) {
2707 SV *tmpsv = av_shift(PL_preambleav);
2708 sv_catsv(PL_linestr, tmpsv);
2709 sv_catpvs(PL_linestr, ";");
2712 sv_free((SV*)PL_preambleav);
2713 PL_preambleav = NULL;
2715 if (PL_minus_n || PL_minus_p) {
2716 sv_catpvs(PL_linestr, "LINE: while (<>) {");
2718 sv_catpvs(PL_linestr,"chomp;");
2721 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2722 || *PL_splitstr == '"')
2723 && strchr(PL_splitstr + 1, *PL_splitstr))
2724 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2726 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2727 bytes can be used as quoting characters. :-) */
2728 const char *splits = PL_splitstr;
2729 sv_catpvs(PL_linestr, "our @F=split(q\0");
2732 if (*splits == '\\')
2733 sv_catpvn(PL_linestr, splits, 1);
2734 sv_catpvn(PL_linestr, splits, 1);
2735 } while (*splits++);
2736 /* This loop will embed the trailing NUL of
2737 PL_linestr as the last thing it does before
2739 sv_catpvs(PL_linestr, ");");
2743 sv_catpvs(PL_linestr,"our @F=split(' ');");
2747 sv_catpvs(PL_linestr,"use feature ':5.10';");
2748 sv_catpvs(PL_linestr, "\n");
2749 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2750 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2751 PL_last_lop = PL_last_uni = Nullch;
2752 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2753 SV * const sv = NEWSV(85,0);
2755 sv_upgrade(sv, SVt_PVMG);
2756 sv_setsv(sv,PL_linestr);
2759 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2764 bof = PL_rsfp ? TRUE : FALSE;
2765 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2768 if (PL_preprocess && !PL_in_eval)
2769 (void)PerlProc_pclose(PL_rsfp);
2770 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2771 PerlIO_clearerr(PL_rsfp);
2773 (void)PerlIO_close(PL_rsfp);
2775 PL_doextract = FALSE;
2777 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2778 sv_setpv(PL_linestr,PL_minus_p
2779 ? ";}continue{print;}" : ";}");
2780 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2781 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2782 PL_last_lop = PL_last_uni = Nullch;
2783 PL_minus_n = PL_minus_p = 0;
2786 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2787 PL_last_lop = PL_last_uni = Nullch;
2788 sv_setpvn(PL_linestr,"",0);
2789 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2791 /* If it looks like the start of a BOM or raw UTF-16,
2792 * check if it in fact is. */
2798 #ifdef PERLIO_IS_STDIO
2799 # ifdef __GNU_LIBRARY__
2800 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2801 # define FTELL_FOR_PIPE_IS_BROKEN
2805 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2806 # define FTELL_FOR_PIPE_IS_BROKEN
2811 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2812 /* This loses the possibility to detect the bof
2813 * situation on perl -P when the libc5 is being used.
2814 * Workaround? Maybe attach some extra state to PL_rsfp?
2817 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2819 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2822 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2823 s = swallow_bom((U8*)s);
2827 /* Incest with pod. */
2828 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2829 sv_setpvn(PL_linestr, "", 0);
2830 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2831 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2832 PL_last_lop = PL_last_uni = Nullch;
2833 PL_doextract = FALSE;
2837 } while (PL_doextract);
2838 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2839 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2840 SV * const sv = NEWSV(85,0);
2842 sv_upgrade(sv, SVt_PVMG);
2843 sv_setsv(sv,PL_linestr);
2846 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2848 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2849 PL_last_lop = PL_last_uni = Nullch;
2850 if (CopLINE(PL_curcop) == 1) {
2851 while (s < PL_bufend && isSPACE(*s))
2853 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2857 if (*s == '#' && *(s+1) == '!')
2859 #ifdef ALTERNATE_SHEBANG
2861 static char const as[] = ALTERNATE_SHEBANG;
2862 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2863 d = s + (sizeof(as) - 1);
2865 #endif /* ALTERNATE_SHEBANG */
2874 while (*d && !isSPACE(*d))
2878 #ifdef ARG_ZERO_IS_SCRIPT
2879 if (ipathend > ipath) {
2881 * HP-UX (at least) sets argv[0] to the script name,
2882 * which makes $^X incorrect. And Digital UNIX and Linux,
2883 * at least, set argv[0] to the basename of the Perl
2884 * interpreter. So, having found "#!", we'll set it right.
2887 = GvSV(gv_fetchpv("\030", GV_ADD, SVt_PV)); /* $^X */
2888 assert(SvPOK(x) || SvGMAGICAL(x));
2889 if (sv_eq(x, CopFILESV(PL_curcop))) {
2890 sv_setpvn(x, ipath, ipathend - ipath);
2896 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
2897 const char * const lstart = SvPV_const(x,llen);
2899 bstart += blen - llen;
2900 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2901 sv_setpvn(x, ipath, ipathend - ipath);
2906 TAINT_NOT; /* $^X is always tainted, but that's OK */
2908 #endif /* ARG_ZERO_IS_SCRIPT */
2913 d = instr(s,"perl -");
2915 d = instr(s,"perl");
2917 /* avoid getting into infinite loops when shebang
2918 * line contains "Perl" rather than "perl" */
2920 for (d = ipathend-4; d >= ipath; --d) {
2921 if ((*d == 'p' || *d == 'P')
2922 && !ibcmp(d, "perl", 4))
2932 #ifdef ALTERNATE_SHEBANG
2934 * If the ALTERNATE_SHEBANG on this system starts with a
2935 * character that can be part of a Perl expression, then if
2936 * we see it but not "perl", we're probably looking at the
2937 * start of Perl code, not a request to hand off to some
2938 * other interpreter. Similarly, if "perl" is there, but
2939 * not in the first 'word' of the line, we assume the line
2940 * contains the start of the Perl program.
2942 if (d && *s != '#') {
2943 const char *c = ipath;
2944 while (*c && !strchr("; \t\r\n\f\v#", *c))
2947 d = Nullch; /* "perl" not in first word; ignore */
2949 *s = '#'; /* Don't try to parse shebang line */
2951 #endif /* ALTERNATE_SHEBANG */
2952 #ifndef MACOS_TRADITIONAL
2957 !instr(s,"indir") &&
2958 instr(PL_origargv[0],"perl"))
2965 while (s < PL_bufend && isSPACE(*s))
2967 if (s < PL_bufend) {
2968 Newxz(newargv,PL_origargc+3,char*);
2970 while (s < PL_bufend && !isSPACE(*s))
2973 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2976 newargv = PL_origargv;
2979 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2981 Perl_croak(aTHX_ "Can't exec %s", ipath);
2985 const U32 oldpdb = PL_perldb;
2986 const bool oldn = PL_minus_n;
2987 const bool oldp = PL_minus_p;
2989 while (*d && !isSPACE(*d)) d++;
2990 while (SPACE_OR_TAB(*d)) d++;
2993 const bool switches_done = PL_doswitches;
2995 if (*d == 'M' || *d == 'm' || *d == 'C') {
2996 const char * const m = d;
2997 while (*d && !isSPACE(*d)) d++;
2998 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3001 d = moreswitches(d);
3003 if (PL_doswitches && !switches_done) {
3004 int argc = PL_origargc;
3005 char **argv = PL_origargv;
3008 } while (argc && argv[0][0] == '-' && argv[0][1]);
3009 init_argv_symbols(argc,argv);
3011 if ((PERLDB_LINE && !oldpdb) ||
3012 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3013 /* if we have already added "LINE: while (<>) {",
3014 we must not do it again */
3016 sv_setpvn(PL_linestr, "", 0);
3017 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3018 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3019 PL_last_lop = PL_last_uni = Nullch;
3020 PL_preambled = FALSE;
3022 (void)gv_fetchfile(PL_origfilename);
3029 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3031 PL_lex_state = LEX_FORMLINE;
3036 #ifdef PERL_STRICT_CR
3037 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3039 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3041 case ' ': case '\t': case '\f': case 013:
3042 #ifdef MACOS_TRADITIONAL
3049 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3050 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3051 /* handle eval qq[#line 1 "foo"\n ...] */
3052 CopLINE_dec(PL_curcop);
3056 while (s < d && *s != '\n')
3060 else if (s > d) /* Found by Ilya: feed random input to Perl. */
3061 Perl_croak(aTHX_ "panic: input overflow");
3063 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3065 PL_lex_state = LEX_FORMLINE;
3075 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3083 while (s < PL_bufend && SPACE_OR_TAB(*s))
3086 if (strnEQ(s,"=>",2)) {
3087 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3088 DEBUG_T( { S_printbuf(aTHX_
3089 "### Saw unary minus before =>, forcing word %s\n", s);
3091 OPERATOR('-'); /* unary minus */
3093 PL_last_uni = PL_oldbufptr;
3095 case 'r': ftst = OP_FTEREAD; break;
3096 case 'w': ftst = OP_FTEWRITE; break;
3097 case 'x': ftst = OP_FTEEXEC; break;
3098 case 'o': ftst = OP_FTEOWNED; break;
3099 case 'R': ftst = OP_FTRREAD; break;
3100 case 'W': ftst = OP_FTRWRITE; break;
3101 case 'X': ftst = OP_FTREXEC; break;
3102 case 'O': ftst = OP_FTROWNED; break;
3103 case 'e': ftst = OP_FTIS; break;
3104 case 'z': ftst = OP_FTZERO; break;
3105 case 's': ftst = OP_FTSIZE; break;
3106 case 'f': ftst = OP_FTFILE; break;
3107 case 'd': ftst = OP_FTDIR; break;
3108 case 'l': ftst = OP_FTLINK; break;
3109 case 'p': ftst = OP_FTPIPE; break;
3110 case 'S': ftst = OP_FTSOCK; break;
3111 case 'u': ftst = OP_FTSUID; break;
3112 case 'g': ftst = OP_FTSGID; break;
3113 case 'k': ftst = OP_FTSVTX; break;
3114 case 'b': ftst = OP_FTBLK; break;
3115 case 'c': ftst = OP_FTCHR; break;
3116 case 't': ftst = OP_FTTTY; break;
3117 case 'T': ftst = OP_FTTEXT; break;
3118 case 'B': ftst = OP_FTBINARY; break;
3119 case 'M': case 'A': case 'C':
3120 gv_fetchpv("\024",GV_ADD, SVt_PV);
3122 case 'M': ftst = OP_FTMTIME; break;
3123 case 'A': ftst = OP_FTATIME; break;
3124 case 'C': ftst = OP_FTCTIME; break;
3132 PL_last_lop_op = (OPCODE)ftst;
3133 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3134 "### Saw file test %c\n", (int)tmp);
3139 /* Assume it was a minus followed by a one-letter named
3140 * subroutine call (or a -bareword), then. */
3141 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3142 "### '-%c' looked like a file test but was not\n",
3149 const char tmp = *s++;
3152 if (PL_expect == XOPERATOR)
3157 else if (*s == '>') {
3160 if (isIDFIRST_lazy_if(s,UTF)) {
3161 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3169 if (PL_expect == XOPERATOR)
3172 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3174 OPERATOR('-'); /* unary minus */
3180 const char tmp = *s++;
3183 if (PL_expect == XOPERATOR)
3188 if (PL_expect == XOPERATOR)
3191 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3198 if (PL_expect != XOPERATOR) {
3199 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3200 PL_expect = XOPERATOR;
3201 force_ident(PL_tokenbuf, '*');
3214 if (PL_expect == XOPERATOR) {
3218 PL_tokenbuf[0] = '%';
3219 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3220 if (!PL_tokenbuf[1]) {
3223 PL_pending_ident = '%';
3234 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
3235 && FEATURE_IS_ENABLED("~~"))
3242 const char tmp = *s++;
3248 goto just_a_word_zero_gv;
3251 switch (PL_expect) {
3254 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3256 PL_bufptr = s; /* update in case we back off */
3262 PL_expect = XTERMBLOCK;
3266 while (isIDFIRST_lazy_if(s,UTF)) {
3268 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3269 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3270 if (tmp < 0) tmp = -tmp;
3286 d = scan_str(d,TRUE,TRUE);
3288 /* MUST advance bufptr here to avoid bogus
3289 "at end of line" context messages from yyerror().
3291 PL_bufptr = s + len;
3292 yyerror("Unterminated attribute parameter in attribute list");
3295 return REPORT(0); /* EOF indicator */
3299 SV *sv = newSVpvn(s, len);
3300 sv_catsv(sv, PL_lex_stuff);
3301 attrs = append_elem(OP_LIST, attrs,
3302 newSVOP(OP_CONST, 0, sv));
3303 SvREFCNT_dec(PL_lex_stuff);
3304 PL_lex_stuff = Nullsv;
3307 if (len == 6 && strnEQ(s, "unique", len)) {
3308 if (PL_in_my == KEY_our)
3310 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3312 ; /* skip to avoid loading attributes.pm */
3315 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3318 /* NOTE: any CV attrs applied here need to be part of
3319 the CVf_BUILTIN_ATTRS define in cv.h! */
3320 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3321 CvLVALUE_on(PL_compcv);
3322 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3323 CvLOCKED_on(PL_compcv);
3324 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3325 CvMETHOD_on(PL_compcv);
3326 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3327 CvASSERTION_on(PL_compcv);
3328 /* After we've set the flags, it could be argued that
3329 we don't need to do the attributes.pm-based setting
3330 process, and shouldn't bother appending recognized
3331 flags. To experiment with that, uncomment the
3332 following "else". (Note that's already been
3333 uncommented. That keeps the above-applied built-in
3334 attributes from being intercepted (and possibly
3335 rejected) by a package's attribute routines, but is
3336 justified by the performance win for the common case
3337 of applying only built-in attributes.) */
3339 attrs = append_elem(OP_LIST, attrs,
3340 newSVOP(OP_CONST, 0,
3344 if (*s == ':' && s[1] != ':')
3347 break; /* require real whitespace or :'s */
3351 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3352 if (*s != ';' && *s != '}' && *s != tmp
3353 && (tmp != '=' || *s != ')')) {
3354 const char q = ((*s == '\'') ? '"' : '\'');
3355 /* If here for an expression, and parsed no attrs, back
3357 if (tmp == '=' && !attrs) {
3361 /* MUST advance bufptr here to avoid bogus "at end of line"
3362 context messages from yyerror().
3366 ? Perl_form(aTHX_ "Invalid separator character "
3367 "%c%c%c in attribute list", q, *s, q)
3368 : "Unterminated attribute list" );
3376 PL_nextval[PL_nexttoke].opval = attrs;
3384 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3385 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3393 const char tmp = *s++;
3398 const char tmp = *s++;
3406 if (PL_lex_brackets <= 0)
3407 yyerror("Unmatched right square bracket");
3410 if (PL_lex_state == LEX_INTERPNORMAL) {
3411 if (PL_lex_brackets == 0) {
3412 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3413 PL_lex_state = LEX_INTERPEND;
3420 if (PL_lex_brackets > 100) {
3421 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3423 switch (PL_expect) {
3425 if (PL_lex_formbrack) {
3429 if (PL_oldoldbufptr == PL_last_lop)
3430 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3432 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3433 OPERATOR(HASHBRACK);
3435 while (s < PL_bufend && SPACE_OR_TAB(*s))
3438 PL_tokenbuf[0] = '\0';
3439 if (d < PL_bufend && *d == '-') {
3440 PL_tokenbuf[0] = '-';
3442 while (d < PL_bufend && SPACE_OR_TAB(*d))
3445 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3446 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3448 while (d < PL_bufend && SPACE_OR_TAB(*d))
3451 const char minus = (PL_tokenbuf[0] == '-');
3452 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3460 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3465 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3470 if (PL_oldoldbufptr == PL_last_lop)
3471 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3473 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3476 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3478 /* This hack is to get the ${} in the message. */
3480 yyerror("syntax error");
3483 OPERATOR(HASHBRACK);
3485 /* This hack serves to disambiguate a pair of curlies
3486 * as being a block or an anon hash. Normally, expectation
3487 * determines that, but in cases where we're not in a
3488 * position to expect anything in particular (like inside
3489 * eval"") we have to resolve the ambiguity. This code
3490 * covers the case where the first term in the curlies is a
3491 * quoted string. Most other cases need to be explicitly
3492 * disambiguated by prepending a "+" before the opening
3493 * curly in order to force resolution as an anon hash.
3495 * XXX should probably propagate the outer expectation
3496 * into eval"" to rely less on this hack, but that could
3497 * potentially break current behavior of eval"".
3501 if (*s == '\'' || *s == '"' || *s == '`') {
3502 /* common case: get past first string, handling escapes */
3503 for (t++; t < PL_bufend && *t != *s;)
3504 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3508 else if (*s == 'q') {
3511 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3514 /* skip q//-like construct */
3516 char open, close, term;
3519 while (t < PL_bufend && isSPACE(*t))
3521 /* check for q => */
3522 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3523 OPERATOR(HASHBRACK);
3527 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3531 for (t++; t < PL_bufend; t++) {
3532 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3534 else if (*t == open)
3538 for (t++; t < PL_bufend; t++) {
3539 if (*t == '\\' && t+1 < PL_bufend)
3541 else if (*t == close && --brackets <= 0)
3543 else if (*t == open)
3550 /* skip plain q word */
3551 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3554 else if (isALNUM_lazy_if(t,UTF)) {
3556 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3559 while (t < PL_bufend && isSPACE(*t))
3561 /* if comma follows first term, call it an anon hash */
3562 /* XXX it could be a comma expression with loop modifiers */
3563 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3564 || (*t == '=' && t[1] == '>')))
3565 OPERATOR(HASHBRACK);
3566 if (PL_expect == XREF)
3569 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3575 yylval.ival = CopLINE(PL_curcop);
3576 if (isSPACE(*s) || *s == '#')
3577 PL_copline = NOLINE; /* invalidate current command line number */
3582 if (PL_lex_brackets <= 0)
3583 yyerror("Unmatched right curly bracket");
3585 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3586 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3587 PL_lex_formbrack = 0;
3588 if (PL_lex_state == LEX_INTERPNORMAL) {
3589 if (PL_lex_brackets == 0) {
3590 if (PL_expect & XFAKEBRACK) {
3591 PL_expect &= XENUMMASK;
3592 PL_lex_state = LEX_INTERPEND;
3594 return yylex(); /* ignore fake brackets */
3596 if (*s == '-' && s[1] == '>')
3597 PL_lex_state = LEX_INTERPENDMAYBE;
3598 else if (*s != '[' && *s != '{')
3599 PL_lex_state = LEX_INTERPEND;
3602 if (PL_expect & XFAKEBRACK) {
3603 PL_expect &= XENUMMASK;
3605 return yylex(); /* ignore fake brackets */
3614 if (PL_expect == XOPERATOR) {
3615 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
3616 && isIDFIRST_lazy_if(s,UTF))
3618 CopLINE_dec(PL_curcop);
3619 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3620 CopLINE_inc(PL_curcop);
3625 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3627 PL_expect = XOPERATOR;
3628 force_ident(PL_tokenbuf, '&');
3632 yylval.ival = (OPpENTERSUB_AMPER<<8);
3644 const char tmp = *s++;
3651 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
3652 && strchr("+-*/%.^&|<",tmp))
3653 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3654 "Reversed %c= operator",(int)tmp);
3656 if (PL_expect == XSTATE && isALPHA(tmp) &&
3657 (s == PL_linestart+1 || s[-2] == '\n') )
3659 if (PL_in_eval && !PL_rsfp) {
3664 if (strnEQ(s,"=cut",4)) {
3678 PL_doextract = TRUE;
3682 if (PL_lex_brackets < PL_lex_formbrack) {
3684 #ifdef PERL_STRICT_CR
3685 for (t = s; SPACE_OR_TAB(*t); t++) ;
3687 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3689 if (*t == '\n' || *t == '#') {
3700 const char tmp = *s++;
3702 /* was this !=~ where !~ was meant?
3703 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3705 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3706 const char *t = s+1;
3708 while (t < PL_bufend && isSPACE(*t))
3711 if (*t == '/' || *t == '?' ||
3712 ((*t == 'm' || *t == 's' || *t == 'y')
3713 && !isALNUM(t[1])) ||
3714 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3715 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3716 "!=~ should be !~");
3726 if (PL_expect != XOPERATOR) {
3727 if (s[1] != '<' && !strchr(s,'>'))
3730 s = scan_heredoc(s);
3732 s = scan_inputsymbol(s);
3733 TERM(sublex_start());
3739 SHop(OP_LEFT_SHIFT);
3753 const char tmp = *s++;
3755 SHop(OP_RIGHT_SHIFT);
3765 if (PL_expect == XOPERATOR) {
3766 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3768 deprecate_old(commaless_variable_list);
3769 return REPORT(','); /* grandfather non-comma-format format */
3773 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3774 PL_tokenbuf[0] = '@';
3775 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3776 sizeof PL_tokenbuf - 1, FALSE);
3777 if (PL_expect == XOPERATOR)
3778 no_op("Array length", s);
3779 if (!PL_tokenbuf[1])
3781 PL_expect = XOPERATOR;
3782 PL_pending_ident = '#';
3786 PL_tokenbuf[0] = '$';
3787 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3788 sizeof PL_tokenbuf - 1, FALSE);
3789 if (PL_expect == XOPERATOR)
3791 if (!PL_tokenbuf[1]) {
3793 yyerror("Final $ should be \\$ or $name");
3797 /* This kludge not intended to be bulletproof. */
3798 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3799 yylval.opval = newSVOP(OP_CONST, 0,
3800 newSViv(PL_compiling.cop_arybase));
3801 yylval.opval->op_private = OPpCONST_ARYBASE;
3807 const char tmp = *s;
3808 if (PL_lex_state == LEX_NORMAL)
3811 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
3812 && intuit_more(s)) {
3814 PL_tokenbuf[0] = '@';
3815 if (ckWARN(WARN_SYNTAX)) {
3818 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3821 PL_bufptr = skipspace(PL_bufptr);
3822 while (t < PL_bufend && *t != ']')
3824 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3825 "Multidimensional syntax %.*s not supported",
3826 (int)((t - PL_bufptr) + 1), PL_bufptr);
3830 else if (*s == '{') {
3832 PL_tokenbuf[0] = '%';
3833 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
3834 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
3836 char tmpbuf[sizeof PL_tokenbuf];
3837 for (t++; isSPACE(*t); t++) ;
3838 if (isIDFIRST_lazy_if(t,UTF)) {
3840 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
3842 for (; isSPACE(*t); t++) ;
3843 if (*t == ';' && get_cv(tmpbuf, FALSE))
3844 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3845 "You need to quote \"%s\"",
3852 PL_expect = XOPERATOR;
3853 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3854 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3855 if (!islop || PL_last_lop_op == OP_GREPSTART)
3856 PL_expect = XOPERATOR;
3857 else if (strchr("$@\"'`q", *s))
3858 PL_expect = XTERM; /* e.g. print $fh "foo" */
3859 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3860 PL_expect = XTERM; /* e.g. print $fh &sub */
3861 else if (isIDFIRST_lazy_if(s,UTF)) {
3862 char tmpbuf[sizeof PL_tokenbuf];
3864 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3865 if ((t2 = keyword(tmpbuf, len))) {
3866 /* binary operators exclude handle interpretations */
3878 PL_expect = XTERM; /* e.g. print $fh length() */
3883 PL_expect = XTERM; /* e.g. print $fh subr() */
3886 else if (isDIGIT(*s))
3887 PL_expect = XTERM; /* e.g. print $fh 3 */
3888 else if (*s == '.' && isDIGIT(s[1]))
3889 PL_expect = XTERM; /* e.g. print $fh .3 */
3890 else if ((*s == '?' || *s == '-' || *s == '+')
3891 && !isSPACE(s[1]) && s[1] != '=')
3892 PL_expect = XTERM; /* e.g. print $fh -1 */
3893 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
3895 PL_expect = XTERM; /* e.g. print $fh /.../
3896 XXX except DORDOR operator
3898 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
3900 PL_expect = XTERM; /* print $fh <<"EOF" */
3903 PL_pending_ident = '$';
3907 if (PL_expect == XOPERATOR)
3909 PL_tokenbuf[0] = '@';
3910 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3911 if (!PL_tokenbuf[1]) {
3914 if (PL_lex_state == LEX_NORMAL)
3916 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3918 PL_tokenbuf[0] = '%';
3920 /* Warn about @ where they meant $. */
3921 if (*s == '[' || *s == '{') {
3922 if (ckWARN(WARN_SYNTAX)) {
3923 const char *t = s + 1;
3924 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3926 if (*t == '}' || *t == ']') {
3928 PL_bufptr = skipspace(PL_bufptr);
3929 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3930 "Scalar value %.*s better written as $%.*s",
3931 (int)(t-PL_bufptr), PL_bufptr,
3932 (int)(t-PL_bufptr-1), PL_bufptr+1);
3937 PL_pending_ident = '@';
3940 case '/': /* may be division, defined-or, or pattern */
3941 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3945 case '?': /* may either be conditional or pattern */
3946 if(PL_expect == XOPERATOR) {
3954 /* A // operator. */
3964 /* Disable warning on "study /blah/" */
3965 if (PL_oldoldbufptr == PL_last_uni
3966 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3967 || memNE(PL_last_uni, "study", 5)
3968 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3971 s = scan_pat(s,OP_MATCH);
3972 TERM(sublex_start());
3976 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3977 #ifdef PERL_STRICT_CR
3980 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3982 && (s == PL_linestart || s[-1] == '\n') )
3984 PL_lex_formbrack = 0;
3988 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3994 yylval.ival = OPf_SPECIAL;
4000 if (PL_expect != XOPERATOR)
4005 case '0': case '1': case '2': case '3': case '4':
4006 case '5': case '6': case '7': case '8': case '9':
4007 s = scan_num(s, &yylval);
4008 DEBUG_T( { S_printbuf(aTHX_ "### Saw number in %s\n", s); } );
4009 if (PL_expect == XOPERATOR)
4014 s = scan_str(s,FALSE,FALSE);
4015 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4016 if (PL_expect == XOPERATOR) {
4017 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4019 deprecate_old(commaless_variable_list);
4020 return REPORT(','); /* grandfather non-comma-format format */
4026 missingterm((char*)0);
4027 yylval.ival = OP_CONST;
4028 TERM(sublex_start());
4031 s = scan_str(s,FALSE,FALSE);
4032 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4033 if (PL_expect == XOPERATOR) {
4034 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4036 deprecate_old(commaless_variable_list);
4037 return REPORT(','); /* grandfather non-comma-format format */
4043 missingterm((char*)0);
4044 yylval.ival = OP_CONST;
4045 /* FIXME. I think that this can be const if char *d is replaced by
4046 more localised variables. */
4047 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4048 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4049 yylval.ival = OP_STRINGIFY;
4053 TERM(sublex_start());
4056 s = scan_str(s,FALSE,FALSE);
4057 DEBUG_T( { S_printbuf(aTHX_ "### Saw backtick string before %s\n", s); } );
4058 if (PL_expect == XOPERATOR)
4059 no_op("Backticks",s);
4061 missingterm((char*)0);
4062 yylval.ival = OP_BACKTICK;
4064 TERM(sublex_start());
4068 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
4069 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
4071 if (PL_expect == XOPERATOR)
4072 no_op("Backslash",s);
4076 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
4077 char *start = s + 2;
4078 while (isDIGIT(*start) || *start == '_')
4080 if (*start == '.' && isDIGIT(start[1])) {
4081 s = scan_num(s, &yylval);
4084 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
4085 else if (!isALPHA(*start) && (PL_expect == XTERM
4086 || PL_expect == XREF || PL_expect == XSTATE
4087 || PL_expect == XTERMORDORDOR)) {
4088 const char c = *start;
4091 gv = gv_fetchpv(s, 0, SVt_PVCV);
4094 s = scan_num(s, &yylval);
4101 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
4137 I32 orig_keyword = 0;
4142 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4144 /* Some keywords can be followed by any delimiter, including ':' */
4145 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
4146 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4147 (PL_tokenbuf[0] == 'q' &&
4148 strchr("qwxr", PL_tokenbuf[1])))));
4150 /* x::* is just a word, unless x is "CORE" */
4151 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4155 while (d < PL_bufend && isSPACE(*d))
4156 d++; /* no comments skipped here, or s### is misparsed */
4158 /* Is this a label? */
4159 if (!tmp && PL_expect == XSTATE
4160 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4162 yylval.pval = savepv(PL_tokenbuf);
4167 /* Check for keywords */
4168 tmp = keyword(PL_tokenbuf, len);
4170 /* Is this a word before a => operator? */
4171 if (*d == '=' && d[1] == '>') {
4174 = (OP*)newSVOP(OP_CONST, 0,
4175 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4176 yylval.opval->op_private = OPpCONST_BARE;
4180 if (tmp < 0) { /* second-class keyword? */
4181 GV *ogv = NULL; /* override (winner) */
4182 GV *hgv = NULL; /* hidden (loser) */
4183 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4185 if ((gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV)) &&
4188 if (GvIMPORTED_CV(gv))
4190 else if (! CvMETHOD(cv))
4194 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4195 (gv = *gvp) != (GV*)&PL_sv_undef &&
4196 GvCVu(gv) && GvIMPORTED_CV(gv))
4203 tmp = 0; /* overridden by import or by GLOBAL */
4206 && -tmp==KEY_lock /* XXX generalizable kludge */
4208 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
4210 tmp = 0; /* any sub overrides "weak" keyword */
4212 else { /* no override */
4214 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4215 Perl_warner(aTHX_ packWARN(WARN_MISC),
4216 "dump() better written as CORE::dump()");
4220 if (hgv && tmp != KEY_x && tmp != KEY_CORE
4221 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
4222 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4223 "Ambiguous call resolved as CORE::%s(), %s",
4224 GvENAME(hgv), "qualify as such or use &");
4231 default: /* not a keyword */
4232 /* Trade off - by using this evil construction we can pull the
4233 variable gv into the block labelled keylookup. If not, then
4234 we have to give it function scope so that the goto from the
4235 earlier ':' case doesn't bypass the initialisation. */
4237 just_a_word_zero_gv:
4244 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4247 /* Get the rest if it looks like a package qualifier */
4249 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4251 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4254 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4255 *s == '\'' ? "'" : "::");
4260 if (PL_expect == XOPERATOR) {
4261 if (PL_bufptr == PL_linestart) {
4262 CopLINE_dec(PL_curcop);
4263 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4264 CopLINE_inc(PL_curcop);
4267 no_op("Bareword",s);
4270 /* Look for a subroutine with this name in current package,
4271 unless name is "Foo::", in which case Foo is a bearword
4272 (and a package name). */
4275 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4277 if (ckWARN(WARN_BAREWORD)
4278 && ! gv_fetchpv(PL_tokenbuf, 0, SVt_PVHV))
4279 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4280 "Bareword \"%s\" refers to nonexistent package",
4283 PL_tokenbuf[len] = '\0';
4290 /* Mustn't actually add anything to a symbol table.
4291 But also don't want to "initialise" any placeholder
4292 constants that might already be there into full
4293 blown PVGVs with attached PVCV. */
4294 gv = gv_fetchpv(PL_tokenbuf, GV_NOADD_NOINIT,
4299 /* if we saw a global override before, get the right name */
4302 sv = newSVpvs("CORE::GLOBAL::");
4303 sv_catpv(sv,PL_tokenbuf);
4306 /* If len is 0, newSVpv does strlen(), which is correct.
4307 If len is non-zero, then it will be the true length,
4308 and so the scalar will be created correctly. */
4309 sv = newSVpv(PL_tokenbuf,len);
4312 /* Presume this is going to be a bareword of some sort. */
4315 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4316 yylval.opval->op_private = OPpCONST_BARE;
4317 /* UTF-8 package name? */
4318 if (UTF && !IN_BYTES &&
4319 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
4322 /* And if "Foo::", then that's what it certainly is. */
4327 /* Do the explicit type check so that we don't need to force
4328 the initialisation of the symbol table to have a real GV.
4329 Beware - gv may not really be a PVGV, cv may not really be
4330 a PVCV, (because of the space optimisations that gv_init
4331 understands) But they're true if for this symbol there is
4332 respectively a typeglob and a subroutine.
4334 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
4335 /* Real typeglob, so get the real subroutine: */
4337 /* A proxy for a subroutine in this package? */
4338 : SvOK(gv) ? (CV *) gv : NULL)
4341 /* See if it's the indirect object for a list operator. */
4343 if (PL_oldoldbufptr &&
4344 PL_oldoldbufptr < PL_bufptr &&
4345 (PL_oldoldbufptr == PL_last_lop
4346 || PL_oldoldbufptr == PL_last_uni) &&
4347 /* NO SKIPSPACE BEFORE HERE! */
4348 (PL_expect == XREF ||
4349 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4351 bool immediate_paren = *s == '(';
4353 /* (Now we can afford to cross potential line boundary.) */
4356 /* Two barewords in a row may indicate method call. */
4358 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
4359 (tmp = intuit_method(s, gv, cv)))
4362 /* If not a declared subroutine, it's an indirect object. */
4363 /* (But it's an indir obj regardless for sort.) */
4364 /* Also, if "_" follows a filetest operator, it's a bareword */
4367 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4369 (PL_last_lop_op != OP_MAPSTART &&
4370 PL_last_lop_op != OP_GREPSTART))))
4371 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
4372 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
4375 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4380 PL_expect = XOPERATOR;
4383 /* Is this a word before a => operator? */
4384 if (*s == '=' && s[1] == '>' && !pkgname) {
4386 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4387 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4388 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4392 /* If followed by a paren, it's certainly a subroutine. */
4396 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4397 if (*d == ')' && (sv = gv_const_sv(gv))) {
4402 PL_nextval[PL_nexttoke].opval = yylval.opval;
4403 PL_expect = XOPERATOR;
4409 /* If followed by var or block, call it a method (unless sub) */
4411 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
4412 PL_last_lop = PL_oldbufptr;
4413 PL_last_lop_op = OP_METHOD;
4417 /* If followed by a bareword, see if it looks like indir obj. */
4420 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4421 && (tmp = intuit_method(s, gv, cv)))
4424 /* Not a method, so call it a subroutine (if defined) */
4427 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4428 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4429 "Ambiguous use of -%s resolved as -&%s()",
4430 PL_tokenbuf, PL_tokenbuf);
4431 /* Check for a constant sub */
4432 if ((sv = gv_const_sv(gv))) {
4434 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4435 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4436 yylval.opval->op_private = 0;
4440 /* Resolve to GV now. */
4441 if (SvTYPE(gv) != SVt_PVGV) {
4442 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
4443 assert (SvTYPE(gv) == SVt_PVGV);
4444 /* cv must have been some sort of placeholder, so
4445 now needs replacing with a real code reference. */
4449 op_free(yylval.opval);
4450 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4451 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4452 PL_last_lop = PL_oldbufptr;
4453 PL_last_lop_op = OP_ENTERSUB;
4454 /* Is there a prototype? */
4457 const char *proto = SvPV_const((SV*)cv, len);
4460 if (*proto == '$' && proto[1] == '\0')
4462 while (*proto == ';')
4464 if (*proto == '&' && *s == '{') {
4465 sv_setpv(PL_subname, PL_curstash ?
4466 "__ANON__" : "__ANON__::__ANON__");
4470 PL_nextval[PL_nexttoke].opval = yylval.opval;
4476 /* Call it a bare word */
4478 if (PL_hints & HINT_STRICT_SUBS)
4479 yylval.opval->op_private |= OPpCONST_STRICT;
4482 if (lastchar != '-') {
4483 if (ckWARN(WARN_RESERVED)) {
4484 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4485 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4486 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4493 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4494 && ckWARN_d(WARN_AMBIGUOUS)) {
4495 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4496 "Operator or semicolon missing before %c%s",
4497 lastchar, PL_tokenbuf);
4498 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4499 "Ambiguous use of %c resolved as operator %c",
4500 lastchar, lastchar);
4506 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4507 newSVpv(CopFILE(PL_curcop),0));
4511 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4512 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4515 case KEY___PACKAGE__:
4516 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4518 ? newSVhek(HvNAME_HEK(PL_curstash))
4525 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4526 const char *pname = "main";
4527 if (PL_tokenbuf[2] == 'D')
4528 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
4529 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
4533 GvIOp(gv) = newIO();
4534 IoIFP(GvIOp(gv)) = PL_rsfp;
4535 #if defined(HAS_FCNTL) && defined(F_SETFD)
4537 const int fd = PerlIO_fileno(PL_rsfp);
4538 fcntl(fd,F_SETFD,fd >= 3);
4541 /* Mark this internal pseudo-handle as clean */
4542 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4544 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4545 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4546 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4548 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4549 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4550 /* if the script was opened in binmode, we need to revert
4551 * it to text mode for compatibility; but only iff it has CRs
4552 * XXX this is a questionable hack at best. */
4553 if (PL_bufend-PL_bufptr > 2
4554 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4557 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4558 loc = PerlIO_tell(PL_rsfp);
4559 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4562 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4564 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4565 #endif /* NETWARE */
4566 #ifdef PERLIO_IS_STDIO /* really? */
4567 # if defined(__BORLANDC__)
4568 /* XXX see note in do_binmode() */
4569 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4573 PerlIO_seek(PL_rsfp, loc, 0);
4577 #ifdef PERLIO_LAYERS
4580 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4581 else if (PL_encoding) {
4588 XPUSHs(PL_encoding);
4590 call_method("name", G_SCALAR);
4594 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4595 Perl_form(aTHX_ ":encoding(%"SVf")",
4613 if (PL_expect == XSTATE) {
4620 if (*s == ':' && s[1] == ':') {
4623 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4624 if (!(tmp = keyword(PL_tokenbuf, len)))
4625 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4628 else if (tmp == KEY_require || tmp == KEY_do)
4629 /* that's a way to remember we saw "CORE::" */
4642 LOP(OP_ACCEPT,XTERM);
4648 LOP(OP_ATAN2,XTERM);
4654 LOP(OP_BINMODE,XTERM);
4657 LOP(OP_BLESS,XTERM);
4666 /* When 'use switch' is in effect, continue has a dual
4667 life as a control operator. */
4669 if (!FEATURE_IS_ENABLED("switch"))
4672 /* We have to disambiguate the two senses of
4673 "continue". If the next token is a '{' then
4674 treat it as the start of a continue block;
4675 otherwise treat it as a control operator.
4686 (void)gv_fetchpv("ENV", GV_ADD, SVt_PVHV); /* may use HOME */
4703 if (!PL_cryptseen) {
4704 PL_cryptseen = TRUE;
4708 LOP(OP_CRYPT,XTERM);
4711 LOP(OP_CHMOD,XTERM);
4714 LOP(OP_CHOWN,XTERM);
4717 LOP(OP_CONNECT,XTERM);
4736 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4737 if (orig_keyword == KEY_do) {
4746 PL_hints |= HINT_BLOCK_SCOPE;
4756 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4757 LOP(OP_DBMOPEN,XTERM);
4763 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4770 yylval.ival = CopLINE(PL_curcop);
4784 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4785 UNIBRACK(OP_ENTEREVAL);
4803 case KEY_endhostent:
4809 case KEY_endservent:
4812 case KEY_endprotoent:
4823 yylval.ival = CopLINE(PL_curcop);
4825 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4827 if ((PL_bufend - p) >= 3 &&
4828 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4830 else if ((PL_bufend - p) >= 4 &&
4831 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4834 if (isIDFIRST_lazy_if(p,UTF)) {
4835 p = scan_ident(p, PL_bufend,
4836 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4840 Perl_croak(aTHX_ "Missing $ on loop variable");
4845 LOP(OP_FORMLINE,XTERM);
4851 LOP(OP_FCNTL,XTERM);
4857 LOP(OP_FLOCK,XTERM);
4866 LOP(OP_GREPSTART, XREF);
4869 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4884 case KEY_getpriority:
4885 LOP(OP_GETPRIORITY,XTERM);
4887 case KEY_getprotobyname:
4890 case KEY_getprotobynumber:
4891 LOP(OP_GPBYNUMBER,XTERM);
4893 case KEY_getprotoent:
4905 case KEY_getpeername:
4906 UNI(OP_GETPEERNAME);
4908 case KEY_gethostbyname:
4911 case KEY_gethostbyaddr:
4912 LOP(OP_GHBYADDR,XTERM);
4914 case KEY_gethostent:
4917 case KEY_getnetbyname:
4920 case KEY_getnetbyaddr:
4921 LOP(OP_GNBYADDR,XTERM);
4926 case KEY_getservbyname:
4927 LOP(OP_GSBYNAME,XTERM);
4929 case KEY_getservbyport:
4930 LOP(OP_GSBYPORT,XTERM);
4932 case KEY_getservent:
4935 case KEY_getsockname:
4936 UNI(OP_GETSOCKNAME);
4938 case KEY_getsockopt:
4939 LOP(OP_GSOCKOPT,XTERM);
4954 yylval.ival = CopLINE(PL_curcop);
4965 yylval.ival = CopLINE(PL_curcop);
4969 LOP(OP_INDEX,XTERM);
4975 LOP(OP_IOCTL,XTERM);
4987 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5019 LOP(OP_LISTEN,XTERM);
5028 s = scan_pat(s,OP_MATCH);
5029 TERM(sublex_start());
5032 LOP(OP_MAPSTART, XREF);
5035 LOP(OP_MKDIR,XTERM);
5038 LOP(OP_MSGCTL,XTERM);
5041 LOP(OP_MSGGET,XTERM);
5044 LOP(OP_MSGRCV,XTERM);
5047 LOP(OP_MSGSND,XTERM);
5053 if (isIDFIRST_lazy_if(s,UTF)) {
5054 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
5055 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
5057 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
5058 if (!PL_in_my_stash) {
5061 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
5069 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5076 s = tokenize_use(0, s);
5080 if (*s == '(' || (s = skipspace(s), *s == '('))
5087 if (isIDFIRST_lazy_if(s,UTF)) {
5089 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
5090 for (t=d; *t && isSPACE(*t); t++) ;
5091 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
5093 && !(t[0] == '=' && t[1] == '>')
5095 int len = (int)(d-s);
5096 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
5097 "Precedence problem: open %.*s should be open(%.*s)",
5104 yylval.ival = OP_OR;
5114 LOP(OP_OPEN_DIR,XTERM);
5117 checkcomma(s,PL_tokenbuf,"filehandle");
5121 checkcomma(s,PL_tokenbuf,"filehandle");
5140 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5144 LOP(OP_PIPE_OP,XTERM);
5147 s = scan_str(s,FALSE,FALSE);
5149 missingterm((char*)0);
5150 yylval.ival = OP_CONST;
5151 TERM(sublex_start());
5157 s = scan_str(s,FALSE,FALSE);
5159 missingterm((char*)0);
5160 PL_expect = XOPERATOR;
5162 if (SvCUR(PL_lex_stuff)) {
5165 d = SvPV_force(PL_lex_stuff, len);
5168 for (; isSPACE(*d) && len; --len, ++d) ;
5171 if (!warned && ckWARN(WARN_QW)) {
5172 for (; !isSPACE(*d) && len; --len, ++d) {
5174 Perl_warner(aTHX_ packWARN(WARN_QW),
5175 "Possible attempt to separate words with commas");
5178 else if (*d == '#') {
5179 Perl_warner(aTHX_ packWARN(WARN_QW),
5180 "Possible attempt to put comments in qw() list");
5186 for (; !isSPACE(*d) && len; --len, ++d) ;
5188 sv = newSVpvn(b, d-b);
5189 if (DO_UTF8(PL_lex_stuff))
5191 words = append_elem(OP_LIST, words,
5192 newSVOP(OP_CONST, 0, tokeq(sv)));
5196 PL_nextval[PL_nexttoke].opval = words;
5201 SvREFCNT_dec(PL_lex_stuff);
5202 PL_lex_stuff = Nullsv;
5208 s = scan_str(s,FALSE,FALSE);
5210 missingterm((char*)0);
5211 yylval.ival = OP_STRINGIFY;
5212 if (SvIVX(PL_lex_stuff) == '\'')
5213 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
5214 TERM(sublex_start());
5217 s = scan_pat(s,OP_QR);
5218 TERM(sublex_start());
5221 s = scan_str(s,FALSE,FALSE);
5223 missingterm((char*)0);
5224 yylval.ival = OP_BACKTICK;
5226 TERM(sublex_start());
5234 s = force_version(s, FALSE);
5236 else if (*s != 'v' || !isDIGIT(s[1])
5237 || (s = force_version(s, TRUE), *s == 'v'))
5239 *PL_tokenbuf = '\0';
5240 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5241 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5242 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5244 yyerror("<> should be quotes");
5246 if (orig_keyword == KEY_require) {
5254 PL_last_uni = PL_oldbufptr;
5255 PL_last_lop_op = OP_REQUIRE;
5257 return REPORT( (int)REQUIRE );
5263 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5267 LOP(OP_RENAME,XTERM);
5276 LOP(OP_RINDEX,XTERM);
5286 UNIDOR(OP_READLINE);
5299 LOP(OP_REVERSE,XTERM);
5302 UNIDOR(OP_READLINK);
5310 TERM(sublex_start());
5312 TOKEN(1); /* force error */
5315 checkcomma(s,PL_tokenbuf,"filehandle");
5325 LOP(OP_SELECT,XTERM);
5331 LOP(OP_SEMCTL,XTERM);
5334 LOP(OP_SEMGET,XTERM);
5337 LOP(OP_SEMOP,XTERM);
5343 LOP(OP_SETPGRP,XTERM);
5345 case KEY_setpriority:
5346 LOP(OP_SETPRIORITY,XTERM);
5348 case KEY_sethostent:
5354 case KEY_setservent:
5357 case KEY_setprotoent:
5367 LOP(OP_SEEKDIR,XTERM);
5369 case KEY_setsockopt:
5370 LOP(OP_SSOCKOPT,XTERM);
5376 LOP(OP_SHMCTL,XTERM);
5379 LOP(OP_SHMGET,XTERM);
5382 LOP(OP_SHMREAD,XTERM);
5385 LOP(OP_SHMWRITE,XTERM);
5388 LOP(OP_SHUTDOWN,XTERM);
5397 LOP(OP_SOCKET,XTERM);
5399 case KEY_socketpair:
5400 LOP(OP_SOCKPAIR,XTERM);
5403 checkcomma(s,PL_tokenbuf,"subroutine name");
5405 if (*s == ';' || *s == ')') /* probably a close */
5406 Perl_croak(aTHX_ "sort is now a reserved word");
5408 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5412 LOP(OP_SPLIT,XTERM);
5415 LOP(OP_SPRINTF,XTERM);
5418 LOP(OP_SPLICE,XTERM);
5433 LOP(OP_SUBSTR,XTERM);
5439 char tmpbuf[sizeof PL_tokenbuf];
5440 SSize_t tboffset = 0;
5441 expectation attrful;
5442 bool have_name, have_proto, bad_proto;
5443 const int key = tmp;
5447 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5448 (*s == ':' && s[1] == ':'))
5451 attrful = XATTRBLOCK;
5452 /* remember buffer pos'n for later force_word */
5453 tboffset = s - PL_oldbufptr;
5454 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5455 if (strchr(tmpbuf, ':'))
5456 sv_setpv(PL_subname, tmpbuf);
5458 sv_setsv(PL_subname,PL_curstname);
5459 sv_catpvs(PL_subname,"::");
5460 sv_catpvn(PL_subname,tmpbuf,len);
5467 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5468 PL_expect = XTERMBLOCK;
5469 attrful = XATTRTERM;
5470 sv_setpvn(PL_subname,"?",1);
5474 if (key == KEY_format) {
5476 PL_lex_formbrack = PL_lex_brackets + 1;
5478 (void) force_word(PL_oldbufptr + tboffset, WORD,
5483 /* Look for a prototype */
5487 s = scan_str(s,FALSE,FALSE);
5489 Perl_croak(aTHX_ "Prototype not terminated");
5490 /* strip spaces and check for bad characters */
5491 d = SvPVX(PL_lex_stuff);
5494 for (p = d; *p; ++p) {
5497 if (!strchr("$@%*;[]&\\", *p))
5502 if (bad_proto && ckWARN(WARN_SYNTAX))
5503 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5504 "Illegal character in prototype for %"SVf" : %s",
5506 SvCUR_set(PL_lex_stuff, tmp);
5514 if (*s == ':' && s[1] != ':')
5515 PL_expect = attrful;
5516 else if (*s != '{' && key == KEY_sub) {
5518 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5520 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5524 PL_nextval[PL_nexttoke].opval =
5525 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5526 PL_lex_stuff = Nullsv;
5530 sv_setpv(PL_subname,
5531 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5534 (void) force_word(PL_oldbufptr + tboffset, WORD,
5543 LOP(OP_SYSTEM,XREF);
5546 LOP(OP_SYMLINK,XTERM);
5549 LOP(OP_SYSCALL,XTERM);
5552 LOP(OP_SYSOPEN,XTERM);
5555 LOP(OP_SYSSEEK,XTERM);
5558 LOP(OP_SYSREAD,XTERM);
5561 LOP(OP_SYSWRITE,XTERM);
5565 TERM(sublex_start());
5586 LOP(OP_TRUNCATE,XTERM);
5598 yylval.ival = CopLINE(PL_curcop);
5602 yylval.ival = CopLINE(PL_curcop);
5606 LOP(OP_UNLINK,XTERM);
5612 LOP(OP_UNPACK,XTERM);
5615 LOP(OP_UTIME,XTERM);
5621 LOP(OP_UNSHIFT,XTERM);
5624 s = tokenize_use(1, s);
5634 yylval.ival = CopLINE(PL_curcop);
5638 yylval.ival = CopLINE(PL_curcop);
5642 PL_hints |= HINT_BLOCK_SCOPE;
5649 LOP(OP_WAITPID,XTERM);
5658 ctl_l[0] = toCTRL('L');
5660 gv_fetchpv(ctl_l, GV_ADD, SVt_PV);
5663 gv_fetchpv("\f", GV_ADD, SVt_PV); /* Make sure $^L is defined */
5668 if (PL_expect == XOPERATOR)
5674 yylval.ival = OP_XOR;
5679 TERM(sublex_start());
5684 #pragma segment Main
5688 S_pending_ident(pTHX)
5692 register I32 tmp = 0;
5693 /* pit holds the identifier we read and pending_ident is reset */
5694 char pit = PL_pending_ident;
5695 PL_pending_ident = 0;
5697 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5698 "### Pending identifier '%s'\n", PL_tokenbuf); });
5700 /* if we're in a my(), we can't allow dynamics here.
5701 $foo'bar has already been turned into $foo::bar, so
5702 just check for colons.
5704 if it's a legal name, the OP is a PADANY.
5707 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5708 if (strchr(PL_tokenbuf,':'))
5709 yyerror(Perl_form(aTHX_ "No package name allowed for "
5710 "variable %s in \"our\"",
5712 tmp = allocmy(PL_tokenbuf);
5715 if (strchr(PL_tokenbuf,':'))
5716 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5718 yylval.opval = newOP(OP_PADANY, 0);
5719 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5725 build the ops for accesses to a my() variable.
5727 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5728 then used in a comparison. This catches most, but not
5729 all cases. For instance, it catches
5730 sort { my($a); $a <=> $b }
5732 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5733 (although why you'd do that is anyone's guess).
5736 if (!strchr(PL_tokenbuf,':')) {
5738 tmp = pad_findmy(PL_tokenbuf);
5739 if (tmp != NOT_IN_PAD) {
5740 /* might be an "our" variable" */
5741 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5742 /* build ops for a bareword */
5743 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
5744 HEK * const stashname = HvNAME_HEK(stash);
5745 SV * const sym = newSVhek(stashname);
5746 sv_catpvs(sym, "::");
5747 sv_catpv(sym, PL_tokenbuf+1);
5748 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5749 yylval.opval->op_private = OPpCONST_ENTERED;
5752 ? (GV_ADDMULTI | GV_ADDINEVAL)
5755 ((PL_tokenbuf[0] == '$') ? SVt_PV
5756 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5761 /* if it's a sort block and they're naming $a or $b */
5762 if (PL_last_lop_op == OP_SORT &&
5763 PL_tokenbuf[0] == '$' &&
5764 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5767 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5768 d < PL_bufend && *d != '\n';
5771 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5772 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5778 yylval.opval = newOP(OP_PADANY, 0);
5779 yylval.opval->op_targ = tmp;
5785 Whine if they've said @foo in a doublequoted string,
5786 and @foo isn't a variable we can find in the symbol
5789 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5790 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
5791 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5792 && ckWARN(WARN_AMBIGUOUS))
5794 /* Downgraded from fatal to warning 20000522 mjd */
5795 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5796 "Possible unintended interpolation of %s in string",
5801 /* build ops for a bareword */
5802 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5803 yylval.opval->op_private = OPpCONST_ENTERED;
5807 ? (GV_ADDMULTI | GV_ADDINEVAL)
5808 /* If the identifier refers to a stash, don't autovivify it.
5809 * Change 24660 had the side effect of causing symbol table
5810 * hashes to always be defined, even if they were freshly
5811 * created and the only reference in the entire program was
5812 * the single statement with the defined %foo::bar:: test.
5813 * It appears that all code in the wild doing this actually
5814 * wants to know whether sub-packages have been loaded, so
5815 * by avoiding auto-vivifying symbol tables, we ensure that
5816 * defined %foo::bar:: continues to be false, and the existing
5817 * tests still give the expected answers, even though what
5818 * they're actually testing has now changed subtly.
5820 : !(*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'),
5821 ((PL_tokenbuf[0] == '$') ? SVt_PV
5822 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5828 * The following code was generated by perl_keyword.pl.
5832 Perl_keyword (pTHX_ const char *name, I32 len)
5837 case 1: /* 5 tokens of length 1 */
5869 case 2: /* 18 tokens of length 2 */
6015 case 3: /* 29 tokens of length 3 */
6019 if (name[1] == 'N' &&
6082 if (name[1] == 'i' &&
6104 return (FEATURE_IS_ENABLED("err") ? -KEY_err : 0);
6122 if (name[1] == 'o' &&
6131 if (name[1] == 'e' &&
6140 if (name[1] == 'n' &&
6149 if (name[1] == 'o' &&
6158 if (name[1] == 'a' &&
6167 if (name[1] == 'o' &&
6229 if (name[1] == 'e' &&
6243 return (FEATURE_IS_ENABLED("say") ? -KEY_say : 0);
6269 if (name[1] == 'i' &&
6278 if (name[1] == 's' &&
6287 if (name[1] == 'e' &&
6296 if (name[1] == 'o' &&
6308 case 4: /* 41 tokens of length 4 */
6312 if (name[1] == 'O' &&
6322 if (name[1] == 'N' &&
6332 if (name[1] == 'i' &&
6342 if (name[1] == 'h' &&
6352 if (name[1] == 'u' &&
6365 if (name[2] == 'c' &&
6374 if (name[2] == 's' &&
6383 if (name[2] == 'a' &&
6419 if (name[1] == 'o' &&
6432 if (name[2] == 't' &&
6441 if (name[2] == 'o' &&
6450 if (name[2] == 't' &&
6459 if (name[2] == 'e' &&
6472 if (name[1] == 'o' &&
6485 if (name[2] == 'y' &&
6494 if (name[2] == 'l' &&
6510 if (name[2] == 's' &&
6519 if (name[2] == 'n' &&
6528 if (name[2] == 'c' &&
6541 if (name[1] == 'e' &&
6551 if (name[1] == 'p' &&
6564 if (name[2] == 'c' &&
6573 if (name[2] == 'p' &&
6582 if (name[2] == 's' &&
6598 if (name[2] == 'n' &&
6668 if (name[2] == 'r' &&
6677 if (name[2] == 'r' &&
6686 if (name[2] == 'a' &&
6702 if (name[2] == 'l' &&
6764 if (name[2] == 'e' &&
6767 return (FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
6780 case 5: /* 38 tokens of length 5 */
6784 if (name[1] == 'E' &&
6795 if (name[1] == 'H' &&
6809 if (name[2] == 'a' &&
6819 if (name[2] == 'a' &&
6836 if (name[2] == 'e' &&
6846 if (name[2] == 'e' &&
6850 return (FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
6866 if (name[3] == 'i' &&
6875 if (name[3] == 'o' &&
6911 if (name[2] == 'o' &&
6921 if (name[2] == 'y' &&
6935 if (name[1] == 'l' &&
6949 if (name[2] == 'n' &&
6959 if (name[2] == 'o' &&
6973 if (name[1] == 'i' &&
6978 return (FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
6987 if (name[2] == 'd' &&
6997 if (name[2] == 'c' &&
7014 if (name[2] == 'c' &&
7024 if (name[2] == 't' &&
7038 if (name[1] == 'k' &&
7049 if (name[1] == 'r' &&
7063 if (name[2] == 's' &&
7073 if (name[2] == 'd' &&
7090 if (name[2] == 'm' &&
7100 if (name[2] == 'i' &&
7110 if (name[2] == 'e' &&
7120 if (name[2] == 'l' &&
7130 if (name[2] == 'a' &&
7140 if (name[2] == 'u' &&
7154 if (name[1] == 'i' &&
7168 if (name[2] == 'a' &&
7181 if (name[3] == 'e' &&
7216 if (name[2] == 'i' &&
7233 if (name[2] == 'i' &&
7243 if (name[2] == 'i' &&
7260 case 6: /* 33 tokens of length 6 */
7264 if (name[1] == 'c' &&
7279 if (name[2] == 'l' &&
7290 if (name[2] == 'r' &&
7305 if (name[1] == 'e' &&
7320 if (name[2] == 's' &&
7325 if(ckWARN_d(WARN_SYNTAX))
7326 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
7332 if (name[2] == 'i' &&
7350 if (name[2] == 'l' &&
7361 if (name[2] == 'r' &&
7376 if (name[1] == 'm' &&
7391 if (name[2] == 'n' &&
7402 if (name[2] == 's' &&
7417 if (name[1] == 's' &&
7423 if (name[4] == 't' &&
7432 if (name[4] == 'e' &&
7441 if (name[4] == 'c' &&
7450 if (name[4] == 'n' &&
7466 if (name[1] == 'r' &&
7484 if (name[3] == 'a' &&
7494 if (name[3] == 'u' &&
7508 if (name[2] == 'n' &&
7526 if (name[2] == 'a' &&
7540 if (name[3] == 'e' &&
7553 if (name[4] == 't' &&
7562 if (name[4] == 'e' &&
7584 if (name[4] == 't' &&
7593 if (name[4] == 'e' &&
7609 if (name[2] == 'c' &&
7620 if (name[2] == 'l' &&
7631 if (name[2] == 'b' &&
7642 if (name[2] == 's' &&
7665 if (name[4] == 's' &&
7674 if (name[4] == 'n' &&
7687 if (name[3] == 'a' &&
7704 if (name[1] == 'a' &&
7719 case 7: /* 29 tokens of length 7 */
7723 if (name[1] == 'E' &&
7736 if (name[1] == '_' &&
7749 if (name[1] == 'i' &&
7756 return -KEY_binmode;
7762 if (name[1] == 'o' &&
7769 return -KEY_connect;
7778 if (name[2] == 'm' &&
7784 return -KEY_dbmopen;
7795 if (name[4] == 'u' &&
7799 return (FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
7805 if (name[4] == 'n' &&
7826 if (name[1] == 'o' &&
7839 if (name[1] == 'e' &&
7846 if (name[5] == 'r' &&
7849 return -KEY_getpgrp;
7855 if (name[5] == 'i' &&
7858 return -KEY_getppid;
7871 if (name[1] == 'c' &&
7878 return -KEY_lcfirst;
7884 if (name[1] == 'p' &&
7891 return -KEY_opendir;
7897 if (name[1] == 'a' &&
7915 if (name[3] == 'd' &&
7920 return -KEY_readdir;
7926 if (name[3] == 'u' &&
7937 if (name[3] == 'e' &&
7942 return -KEY_reverse;
7961 if (name[3] == 'k' &&
7966 return -KEY_seekdir;
7972 if (name[3] == 'p' &&
7977 return -KEY_setpgrp;
7987 if (name[2] == 'm' &&
7993 return -KEY_shmread;
7999 if (name[2] == 'r' &&
8005 return -KEY_sprintf;
8014 if (name[3] == 'l' &&
8019 return -KEY_symlink;
8028 if (name[4] == 'a' &&
8032 return -KEY_syscall;
8038 if (name[4] == 'p' &&
8042 return -KEY_sysopen;
8048 if (name[4] == 'e' &&
8052 return -KEY_sysread;
8058 if (name[4] == 'e' &&
8062 return -KEY_sysseek;
8080 if (name[1] == 'e' &&
8087 return -KEY_telldir;
8096 if (name[2] == 'f' &&
8102 return -KEY_ucfirst;
8108 if (name[2] == 's' &&
8114 return -KEY_unshift;
8124 if (name[1] == 'a' &&
8131 return -KEY_waitpid;
8140 case 8: /* 26 tokens of length 8 */
8144 if (name[1] == 'U' &&
8152 return KEY_AUTOLOAD;
8163 if (name[3] == 'A' &&
8169 return KEY___DATA__;
8175 if (name[3] == 'I' &&
8181 return -KEY___FILE__;
8187 if (name[3] == 'I' &&
8193 return -KEY___LINE__;
8209 if (name[2] == 'o' &&
8216 return -KEY_closedir;
8222 if (name[2] == 'n' &&
8229 return -KEY_continue;
8239 if (name[1] == 'b' &&
8247 return -KEY_dbmclose;
8253 if (name[1] == 'n' &&
8259 if (name[4] == 'r' &&
8264 return -KEY_endgrent;
8270 if (name[4] == 'w' &&
8275 return -KEY_endpwent;
8288 if (name[1] == 'o' &&
8296 return -KEY_formline;
8302 if (name[1] == 'e' &&
8313 if (name[6] == 'n' &&
8316 return -KEY_getgrent;
8322 if (name[6] == 'i' &&
8325 return -KEY_getgrgid;
8331 if (name[6] == 'a' &&
8334 return -KEY_getgrnam;
8347 if (name[4] == 'o' &&
8352 return -KEY_getlogin;
8363 if (name[6] == 'n' &&
8366 return -KEY_getpwent;
8372 if (name[6] == 'a' &&
8375 return -KEY_getpwnam;
8381 if (name[6] == 'i' &&
8384 return -KEY_getpwuid;
8404 if (name[1] == 'e' &&
8411 if (name[5] == 'i' &&
8418 return -KEY_readline;
8423 return -KEY_readlink;
8434 if (name[5] == 'i' &&
8438 return -KEY_readpipe;
8459 if (name[4] == 'r' &&
8464 return -KEY_setgrent;
8470 if (name[4] == 'w' &&
8475 return -KEY_setpwent;
8491 if (name[3] == 'w' &&
8497 return -KEY_shmwrite;
8503 if (name[3] == 't' &&
8509 return -KEY_shutdown;
8519 if (name[2] == 's' &&
8526 return -KEY_syswrite;
8536 if (name[1] == 'r' &&
8544 return -KEY_truncate;
8553 case 9: /* 8 tokens of length 9 */
8557 if (name[1] == 'n' &&
8566 return -KEY_endnetent;
8572 if (name[1] == 'e' &&
8581 return -KEY_getnetent;
8587 if (name[1] == 'o' &&
8596 return -KEY_localtime;
8602 if (name[1] == 'r' &&
8611 return KEY_prototype;
8617 if (name[1] == 'u' &&
8626 return -KEY_quotemeta;
8632 if (name[1] == 'e' &&
8641 return -KEY_rewinddir;
8647 if (name[1] == 'e' &&
8656 return -KEY_setnetent;
8662 if (name[1] == 'a' &&
8671 return -KEY_wantarray;
8680 case 10: /* 9 tokens of length 10 */
8684 if (name[1] == 'n' &&
8690 if (name[4] == 'o' &&
8697 return -KEY_endhostent;
8703 if (name[4] == 'e' &&
8710 return -KEY_endservent;
8723 if (name[1] == 'e' &&
8729 if (name[4] == 'o' &&
8736 return -KEY_gethostent;
8745 if (name[5] == 'r' &&
8751 return -KEY_getservent;
8757 if (name[5] == 'c' &&
8763 return -KEY_getsockopt;
8788 if (name[4] == 'o' &&
8795 return -KEY_sethostent;
8804 if (name[5] == 'r' &&
8810 return -KEY_setservent;
8816 if (name[5] == 'c' &&
8822 return -KEY_setsockopt;
8839 if (name[2] == 'c' &&
8848 return -KEY_socketpair;
8861 case 11: /* 8 tokens of length 11 */
8865 if (name[1] == '_' &&
8876 return -KEY___PACKAGE__;
8882 if (name[1] == 'n' &&
8893 return -KEY_endprotoent;
8899 if (name[1] == 'e' &&
8908 if (name[5] == 'e' &&
8915 return -KEY_getpeername;
8924 if (name[6] == 'o' &&
8930 return -KEY_getpriority;
8936 if (name[6] == 't' &&
8942 return -KEY_getprotoent;
8956 if (name[4] == 'o' &&
8964 return -KEY_getsockname;
8977 if (name[1] == 'e' &&
8985 if (name[6] == 'o' &&
8991 return -KEY_setpriority;
8997 if (name[6] == 't' &&
9003 return -KEY_setprotoent;
9019 case 12: /* 2 tokens of length 12 */
9020 if (name[0] == 'g' &&
9032 if (name[9] == 'd' &&
9035 { /* getnetbyaddr */
9036 return -KEY_getnetbyaddr;
9042 if (name[9] == 'a' &&
9045 { /* getnetbyname */
9046 return -KEY_getnetbyname;
9058 case 13: /* 4 tokens of length 13 */
9059 if (name[0] == 'g' &&
9066 if (name[4] == 'o' &&
9075 if (name[10] == 'd' &&
9078 { /* gethostbyaddr */
9079 return -KEY_gethostbyaddr;
9085 if (name[10] == 'a' &&
9088 { /* gethostbyname */
9089 return -KEY_gethostbyname;
9102 if (name[4] == 'e' &&
9111 if (name[10] == 'a' &&
9114 { /* getservbyname */
9115 return -KEY_getservbyname;
9121 if (name[10] == 'o' &&
9124 { /* getservbyport */
9125 return -KEY_getservbyport;
9144 case 14: /* 1 tokens of length 14 */
9145 if (name[0] == 'g' &&
9159 { /* getprotobyname */
9160 return -KEY_getprotobyname;
9165 case 16: /* 1 tokens of length 16 */
9166 if (name[0] == 'g' &&
9182 { /* getprotobynumber */
9183 return -KEY_getprotobynumber;
9197 S_checkcomma(pTHX_ register char *s, const char *name, const char *what)
9202 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9203 if (ckWARN(WARN_SYNTAX)) {
9205 for (w = s+2; *w && level; w++) {
9212 for (; *w && isSPACE(*w); w++) ;
9213 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
9214 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9215 "%s (...) interpreted as function",name);
9218 while (s < PL_bufend && isSPACE(*s))
9222 while (s < PL_bufend && isSPACE(*s))
9224 if (isIDFIRST_lazy_if(s,UTF)) {
9226 while (isALNUM_lazy_if(s,UTF))
9228 while (s < PL_bufend && isSPACE(*s))
9232 *s = '\0'; /* XXX If we didn't do this, we could const a lot of toke.c */
9233 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
9237 Perl_croak(aTHX_ "No comma allowed after %s", what);
9242 /* Either returns sv, or mortalizes sv and returns a new SV*.
9243 Best used as sv=new_constant(..., sv, ...).
9244 If s, pv are NULL, calls subroutine with one argument,
9245 and type is used with error messages only. */
9248 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9252 HV * const table = GvHV(PL_hintgv); /* ^H */
9256 const char *why1 = "", *why2 = "", *why3 = "";
9258 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9261 why2 = strEQ(key,"charnames")
9262 ? "(possibly a missing \"use charnames ...\")"
9264 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
9265 (type ? type: "undef"), why2);
9267 /* This is convoluted and evil ("goto considered harmful")
9268 * but I do not understand the intricacies of all the different
9269 * failure modes of %^H in here. The goal here is to make
9270 * the most probable error message user-friendly. --jhi */
9275 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
9276 (type ? type: "undef"), why1, why2, why3);
9278 yyerror(SvPVX_const(msg));
9282 cvp = hv_fetch(table, key, strlen(key), FALSE);
9283 if (!cvp || !SvOK(*cvp)) {
9286 why3 = "} is not defined";
9289 sv_2mortal(sv); /* Parent created it permanently */
9292 pv = sv_2mortal(newSVpvn(s, len));
9294 typesv = sv_2mortal(newSVpv(type, 0));
9296 typesv = &PL_sv_undef;
9298 PUSHSTACKi(PERLSI_OVERLOAD);
9310 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9314 /* Check the eval first */
9315 if (!PL_in_eval && SvTRUE(ERRSV)) {
9316 sv_catpvs(ERRSV, "Propagated");
9317 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
9319 res = SvREFCNT_inc(sv);
9323 (void)SvREFCNT_inc(res);
9332 why1 = "Call to &{$^H{";
9334 why3 = "}} did not return a defined value";
9342 /* Returns a NUL terminated string, with the length of the string written to
9346 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9349 register char *d = dest;
9350 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
9353 Perl_croak(aTHX_ ident_too_long);
9354 if (isALNUM(*s)) /* UTF handled below */
9356 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
9361 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
9365 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9366 char *t = s + UTF8SKIP(s);
9367 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9369 if (d + (t - s) > e)
9370 Perl_croak(aTHX_ ident_too_long);
9371 Copy(s, d, t - s, char);
9384 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9389 char *bracket = Nullch;
9395 e = d + destlen - 3; /* two-character token, ending NUL */
9397 while (isDIGIT(*s)) {
9399 Perl_croak(aTHX_ ident_too_long);
9406 Perl_croak(aTHX_ ident_too_long);
9407 if (isALNUM(*s)) /* UTF handled below */
9409 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9414 else if (*s == ':' && s[1] == ':') {
9418 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9419 char *t = s + UTF8SKIP(s);
9420 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9422 if (d + (t - s) > e)
9423 Perl_croak(aTHX_ ident_too_long);
9424 Copy(s, d, t - s, char);
9435 if (PL_lex_state != LEX_NORMAL)
9436 PL_lex_state = LEX_INTERPENDMAYBE;
9439 if (*s == '$' && s[1] &&
9440 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9453 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9458 if (isSPACE(s[-1])) {
9460 const char ch = *s++;
9461 if (!SPACE_OR_TAB(ch)) {
9467 if (isIDFIRST_lazy_if(d,UTF)) {
9471 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
9473 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
9476 Copy(s, d, e - s, char);
9481 while ((isALNUM(*s) || *s == ':') && d < e)
9484 Perl_croak(aTHX_ ident_too_long);
9487 while (s < send && SPACE_OR_TAB(*s)) s++;
9488 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9489 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
9490 const char *brack = *s == '[' ? "[...]" : "{...}";
9491 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9492 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9493 funny, dest, brack, funny, dest, brack);
9496 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9500 /* Handle extended ${^Foo} variables
9501 * 1999-02-27 mjd-perl-patch@plover.com */
9502 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9506 while (isALNUM(*s) && d < e) {
9510 Perl_croak(aTHX_ ident_too_long);
9515 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9516 PL_lex_state = LEX_INTERPEND;
9521 if (PL_lex_state == LEX_NORMAL) {
9522 if (ckWARN(WARN_AMBIGUOUS) &&
9523 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
9525 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9526 "Ambiguous use of %c{%s} resolved to %c%s",
9527 funny, dest, funny, dest);
9532 s = bracket; /* let the parser handle it */
9536 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9537 PL_lex_state = LEX_INTERPEND;
9542 Perl_pmflag(pTHX_ U32* pmfl, int ch)
9547 *pmfl |= PMf_GLOBAL;
9549 *pmfl |= PMf_CONTINUE;
9553 *pmfl |= PMf_MULTILINE;
9555 *pmfl |= PMf_SINGLELINE;
9557 *pmfl |= PMf_EXTENDED;
9561 S_scan_pat(pTHX_ char *start, I32 type)
9565 char *s = scan_str(start,FALSE,FALSE);
9568 char * const delimiter = skipspace(start);
9569 Perl_croak(aTHX_ *delimiter == '?'
9570 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9571 : "Search pattern not terminated" );
9574 pm = (PMOP*)newPMOP(type, 0);
9575 if (PL_multi_open == '?')
9576 pm->op_pmflags |= PMf_ONCE;
9578 while (*s && strchr("iomsx", *s))
9579 pmflag(&pm->op_pmflags,*s++);
9582 while (*s && strchr("iogcmsx", *s))
9583 pmflag(&pm->op_pmflags,*s++);
9585 /* issue a warning if /c is specified,but /g is not */
9586 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
9587 && ckWARN(WARN_REGEXP))
9589 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
9592 pm->op_pmpermflags = pm->op_pmflags;
9594 PL_lex_op = (OP*)pm;
9595 yylval.ival = OP_MATCH;
9600 S_scan_subst(pTHX_ char *start)
9608 yylval.ival = OP_NULL;
9610 s = scan_str(start,FALSE,FALSE);
9613 Perl_croak(aTHX_ "Substitution pattern not terminated");
9615 if (s[-1] == PL_multi_open)
9618 first_start = PL_multi_start;
9619 s = scan_str(s,FALSE,FALSE);
9622 SvREFCNT_dec(PL_lex_stuff);
9623 PL_lex_stuff = Nullsv;
9625 Perl_croak(aTHX_ "Substitution replacement not terminated");
9627 PL_multi_start = first_start; /* so whole substitution is taken together */
9629 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9635 else if (strchr("iogcmsx", *s))
9636 pmflag(&pm->op_pmflags,*s++);
9641 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
9642 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9647 PL_sublex_info.super_bufptr = s;
9648 PL_sublex_info.super_bufend = PL_bufend;
9650 pm->op_pmflags |= PMf_EVAL;
9651 repl = newSVpvs("");
9653 sv_catpv(repl, es ? "eval " : "do ");
9654 sv_catpvs(repl, "{ ");
9655 sv_catsv(repl, PL_lex_repl);
9656 sv_catpvs(repl, " }");
9658 SvREFCNT_dec(PL_lex_repl);
9662 pm->op_pmpermflags = pm->op_pmflags;
9663 PL_lex_op = (OP*)pm;
9664 yylval.ival = OP_SUBST;
9669 S_scan_trans(pTHX_ char *start)
9679 yylval.ival = OP_NULL;
9681 s = scan_str(start,FALSE,FALSE);
9683 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9684 if (s[-1] == PL_multi_open)
9687 s = scan_str(s,FALSE,FALSE);
9690 SvREFCNT_dec(PL_lex_stuff);
9691 PL_lex_stuff = Nullsv;
9693 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9696 complement = del = squash = 0;
9700 complement = OPpTRANS_COMPLEMENT;
9703 del = OPpTRANS_DELETE;
9706 squash = OPpTRANS_SQUASH;
9715 Newx(tbl, complement&&!del?258:256, short);
9716 o = newPVOP(OP_TRANS, 0, (char*)tbl);
9717 o->op_private &= ~OPpTRANS_ALL;
9718 o->op_private |= del|squash|complement|
9719 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9720 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9723 yylval.ival = OP_TRANS;
9728 S_scan_heredoc(pTHX_ register char *s)
9732 I32 op_type = OP_SCALAR;
9736 const char *found_newline;
9740 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9744 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9747 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
9748 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9751 s = delimcpy(d, e, s, PL_bufend, term, &len);
9761 if (!isALNUM_lazy_if(s,UTF))
9762 deprecate_old("bare << to mean <<\"\"");
9763 for (; isALNUM_lazy_if(s,UTF); s++) {
9768 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9769 Perl_croak(aTHX_ "Delimiter for here document is too long");
9772 len = d - PL_tokenbuf;
9773 #ifndef PERL_STRICT_CR
9774 d = strchr(s, '\r');
9776 char * const olds = s;
9778 while (s < PL_bufend) {
9784 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9793 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9797 if ( outer || !(found_newline = memchr(s, '\n', PL_bufend - s)) ) {
9798 herewas = newSVpvn(s,PL_bufend-s);
9802 herewas = newSVpvn(s,found_newline-s);
9804 s += SvCUR(herewas);
9806 tmpstr = NEWSV(87,79);
9807 sv_upgrade(tmpstr, SVt_PVIV);
9810 SvIV_set(tmpstr, -1);
9812 else if (term == '`') {
9813 op_type = OP_BACKTICK;
9814 SvIV_set(tmpstr, '\\');
9818 PL_multi_start = CopLINE(PL_curcop);
9819 PL_multi_open = PL_multi_close = '<';
9820 term = *PL_tokenbuf;
9821 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9822 char *bufptr = PL_sublex_info.super_bufptr;
9823 char *bufend = PL_sublex_info.super_bufend;
9824 char * const olds = s - SvCUR(herewas);
9825 s = strchr(bufptr, '\n');
9829 while (s < bufend &&
9830 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9832 CopLINE_inc(PL_curcop);
9835 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9836 missingterm(PL_tokenbuf);
9838 sv_setpvn(herewas,bufptr,d-bufptr+1);
9839 sv_setpvn(tmpstr,d+1,s-d);
9841 sv_catpvn(herewas,s,bufend-s);
9842 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9849 while (s < PL_bufend &&
9850 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9852 CopLINE_inc(PL_curcop);
9854 if (s >= PL_bufend) {
9855 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9856 missingterm(PL_tokenbuf);
9858 sv_setpvn(tmpstr,d+1,s-d);
9860 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9862 sv_catpvn(herewas,s,PL_bufend-s);
9863 sv_setsv(PL_linestr,herewas);
9864 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9865 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9866 PL_last_lop = PL_last_uni = Nullch;
9869 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
9870 while (s >= PL_bufend) { /* multiple line string? */
9872 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
9873 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9874 missingterm(PL_tokenbuf);
9876 CopLINE_inc(PL_curcop);
9877 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9878 PL_last_lop = PL_last_uni = Nullch;
9879 #ifndef PERL_STRICT_CR
9880 if (PL_bufend - PL_linestart >= 2) {
9881 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9882 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9884 PL_bufend[-2] = '\n';
9886 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9888 else if (PL_bufend[-1] == '\r')
9889 PL_bufend[-1] = '\n';
9891 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9892 PL_bufend[-1] = '\n';
9894 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9895 SV *sv = NEWSV(88,0);
9897 sv_upgrade(sv, SVt_PVMG);
9898 sv_setsv(sv,PL_linestr);
9901 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop),sv);
9903 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9904 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9905 *(SvPVX(PL_linestr) + off ) = ' ';
9906 sv_catsv(PL_linestr,herewas);
9907 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9908 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9912 sv_catsv(tmpstr,PL_linestr);
9917 PL_multi_end = CopLINE(PL_curcop);
9918 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9919 SvPV_shrink_to_cur(tmpstr);
9921 SvREFCNT_dec(herewas);
9923 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9925 else if (PL_encoding)
9926 sv_recode_to_utf8(tmpstr, PL_encoding);
9928 PL_lex_stuff = tmpstr;
9929 yylval.ival = op_type;
9934 takes: current position in input buffer
9935 returns: new position in input buffer
9936 side-effects: yylval and lex_op are set.
9941 <FH> read from filehandle
9942 <pkg::FH> read from package qualified filehandle
9943 <pkg'FH> read from package qualified filehandle
9944 <$fh> read from filehandle in $fh
9950 S_scan_inputsymbol(pTHX_ char *start)
9953 register char *s = start; /* current position in buffer */
9959 d = PL_tokenbuf; /* start of temp holding space */
9960 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9961 end = strchr(s, '\n');
9964 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9966 /* die if we didn't have space for the contents of the <>,
9967 or if it didn't end, or if we see a newline
9970 if (len >= sizeof PL_tokenbuf)
9971 Perl_croak(aTHX_ "Excessively long <> operator");
9973 Perl_croak(aTHX_ "Unterminated <> operator");
9978 Remember, only scalar variables are interpreted as filehandles by
9979 this code. Anything more complex (e.g., <$fh{$num}>) will be
9980 treated as a glob() call.
9981 This code makes use of the fact that except for the $ at the front,
9982 a scalar variable and a filehandle look the same.
9984 if (*d == '$' && d[1]) d++;
9986 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9987 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9990 /* If we've tried to read what we allow filehandles to look like, and
9991 there's still text left, then it must be a glob() and not a getline.
9992 Use scan_str to pull out the stuff between the <> and treat it
9993 as nothing more than a string.
9996 if (d - PL_tokenbuf != len) {
9997 yylval.ival = OP_GLOB;
9999 s = scan_str(start,FALSE,FALSE);
10001 Perl_croak(aTHX_ "Glob not terminated");
10005 bool readline_overriden = FALSE;
10006 GV *gv_readline = Nullgv;
10008 /* we're in a filehandle read situation */
10011 /* turn <> into <ARGV> */
10013 Copy("ARGV",d,5,char);
10015 /* Check whether readline() is overriden */
10016 if (((gv_readline = gv_fetchpv("readline", 0, SVt_PVCV))
10017 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10019 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
10020 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
10021 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10022 readline_overriden = TRUE;
10024 /* if <$fh>, create the ops to turn the variable into a
10030 /* try to find it in the pad for this block, otherwise find
10031 add symbol table ops
10033 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
10034 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
10035 HV *stash = PAD_COMPNAME_OURSTASH(tmp);
10036 HEK *stashname = HvNAME_HEK(stash);
10037 SV *sym = sv_2mortal(newSVhek(stashname));
10038 sv_catpvs(sym, "::");
10039 sv_catpv(sym, d+1);
10044 OP *o = newOP(OP_PADSV, 0);
10046 PL_lex_op = readline_overriden
10047 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10048 append_elem(OP_LIST, o,
10049 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10050 : (OP*)newUNOP(OP_READLINE, 0, o);
10059 ? (GV_ADDMULTI | GV_ADDINEVAL)
10062 PL_lex_op = readline_overriden
10063 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10064 append_elem(OP_LIST,
10065 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10066 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10067 : (OP*)newUNOP(OP_READLINE, 0,
10068 newUNOP(OP_RV2SV, 0,
10069 newGVOP(OP_GV, 0, gv)));
10071 if (!readline_overriden)
10072 PL_lex_op->op_flags |= OPf_SPECIAL;
10073 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
10074 yylval.ival = OP_NULL;
10077 /* If it's none of the above, it must be a literal filehandle
10078 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10080 GV *gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
10081 PL_lex_op = readline_overriden
10082 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10083 append_elem(OP_LIST,
10084 newGVOP(OP_GV, 0, gv),
10085 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10086 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10087 yylval.ival = OP_NULL;
10096 takes: start position in buffer
10097 keep_quoted preserve \ on the embedded delimiter(s)
10098 keep_delims preserve the delimiters around the string
10099 returns: position to continue reading from buffer
10100 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10101 updates the read buffer.
10103 This subroutine pulls a string out of the input. It is called for:
10104 q single quotes q(literal text)
10105 ' single quotes 'literal text'
10106 qq double quotes qq(interpolate $here please)
10107 " double quotes "interpolate $here please"
10108 qx backticks qx(/bin/ls -l)
10109 ` backticks `/bin/ls -l`
10110 qw quote words @EXPORT_OK = qw( func() $spam )
10111 m// regexp match m/this/
10112 s/// regexp substitute s/this/that/
10113 tr/// string transliterate tr/this/that/
10114 y/// string transliterate y/this/that/
10115 ($*@) sub prototypes sub foo ($)
10116 (stuff) sub attr parameters sub foo : attr(stuff)
10117 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10119 In most of these cases (all but <>, patterns and transliterate)
10120 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10121 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10122 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10125 It skips whitespace before the string starts, and treats the first
10126 character as the delimiter. If the delimiter is one of ([{< then
10127 the corresponding "close" character )]}> is used as the closing
10128 delimiter. It allows quoting of delimiters, and if the string has
10129 balanced delimiters ([{<>}]) it allows nesting.
10131 On success, the SV with the resulting string is put into lex_stuff or,
10132 if that is already non-NULL, into lex_repl. The second case occurs only
10133 when parsing the RHS of the special constructs s/// and tr/// (y///).
10134 For convenience, the terminating delimiter character is stuffed into
10139 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
10142 SV *sv; /* scalar value: string */
10143 char *tmps; /* temp string, used for delimiter matching */
10144 register char *s = start; /* current position in the buffer */
10145 register char term; /* terminating character */
10146 register char *to; /* current position in the sv's data */
10147 I32 brackets = 1; /* bracket nesting level */
10148 bool has_utf8 = FALSE; /* is there any utf8 content? */
10149 I32 termcode; /* terminating char. code */
10150 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10151 STRLEN termlen; /* length of terminating string */
10152 char *last = NULL; /* last position for nesting bracket */
10154 /* skip space before the delimiter */
10158 /* mark where we are, in case we need to report errors */
10161 /* after skipping whitespace, the next character is the terminator */
10164 termcode = termstr[0] = term;
10168 termcode = utf8_to_uvchr((U8*)s, &termlen);
10169 Copy(s, termstr, termlen, U8);
10170 if (!UTF8_IS_INVARIANT(term))
10174 /* mark where we are */
10175 PL_multi_start = CopLINE(PL_curcop);
10176 PL_multi_open = term;
10178 /* find corresponding closing delimiter */
10179 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10180 termcode = termstr[0] = term = tmps[5];
10182 PL_multi_close = term;
10184 /* create a new SV to hold the contents. 87 is leak category, I'm
10185 assuming. 79 is the SV's initial length. What a random number. */
10187 sv_upgrade(sv, SVt_PVIV);
10188 SvIV_set(sv, termcode);
10189 (void)SvPOK_only(sv); /* validate pointer */
10191 /* move past delimiter and try to read a complete string */
10193 sv_catpvn(sv, s, termlen);
10196 if (PL_encoding && !UTF) {
10200 int offset = s - SvPVX_const(PL_linestr);
10201 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10202 &offset, (char*)termstr, termlen);
10203 const char *ns = SvPVX_const(PL_linestr) + offset;
10204 char *svlast = SvEND(sv) - 1;
10206 for (; s < ns; s++) {
10207 if (*s == '\n' && !PL_rsfp)
10208 CopLINE_inc(PL_curcop);
10211 goto read_more_line;
10213 /* handle quoted delimiters */
10214 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10216 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10218 if ((svlast-1 - t) % 2) {
10219 if (!keep_quoted) {
10220 *(svlast-1) = term;
10222 SvCUR_set(sv, SvCUR(sv) - 1);
10227 if (PL_multi_open == PL_multi_close) {
10235 for (t = w = last; t < svlast; w++, t++) {
10236 /* At here, all closes are "was quoted" one,
10237 so we don't check PL_multi_close. */
10239 if (!keep_quoted && *(t+1) == PL_multi_open)
10244 else if (*t == PL_multi_open)
10252 SvCUR_set(sv, w - SvPVX_const(sv));
10255 if (--brackets <= 0)
10260 if (!keep_delims) {
10261 SvCUR_set(sv, SvCUR(sv) - 1);
10267 /* extend sv if need be */
10268 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10269 /* set 'to' to the next character in the sv's string */
10270 to = SvPVX(sv)+SvCUR(sv);
10272 /* if open delimiter is the close delimiter read unbridle */
10273 if (PL_multi_open == PL_multi_close) {
10274 for (; s < PL_bufend; s++,to++) {
10275 /* embedded newlines increment the current line number */
10276 if (*s == '\n' && !PL_rsfp)
10277 CopLINE_inc(PL_curcop);
10278 /* handle quoted delimiters */
10279 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10280 if (!keep_quoted && s[1] == term)
10282 /* any other quotes are simply copied straight through */
10286 /* terminate when run out of buffer (the for() condition), or
10287 have found the terminator */
10288 else if (*s == term) {
10291 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10294 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10300 /* if the terminator isn't the same as the start character (e.g.,
10301 matched brackets), we have to allow more in the quoting, and
10302 be prepared for nested brackets.
10305 /* read until we run out of string, or we find the terminator */
10306 for (; s < PL_bufend; s++,to++) {
10307 /* embedded newlines increment the line count */
10308 if (*s == '\n' && !PL_rsfp)
10309 CopLINE_inc(PL_curcop);
10310 /* backslashes can escape the open or closing characters */
10311 if (*s == '\\' && s+1 < PL_bufend) {
10312 if (!keep_quoted &&
10313 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10318 /* allow nested opens and closes */
10319 else if (*s == PL_multi_close && --brackets <= 0)
10321 else if (*s == PL_multi_open)
10323 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10328 /* terminate the copied string and update the sv's end-of-string */
10330 SvCUR_set(sv, to - SvPVX_const(sv));
10333 * this next chunk reads more into the buffer if we're not done yet
10337 break; /* handle case where we are done yet :-) */
10339 #ifndef PERL_STRICT_CR
10340 if (to - SvPVX_const(sv) >= 2) {
10341 if ((to[-2] == '\r' && to[-1] == '\n') ||
10342 (to[-2] == '\n' && to[-1] == '\r'))
10346 SvCUR_set(sv, to - SvPVX_const(sv));
10348 else if (to[-1] == '\r')
10351 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10356 /* if we're out of file, or a read fails, bail and reset the current
10357 line marker so we can report where the unterminated string began
10360 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10362 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10365 /* we read a line, so increment our line counter */
10366 CopLINE_inc(PL_curcop);
10368 /* update debugger info */
10369 if (PERLDB_LINE && PL_curstash != PL_debstash) {
10370 SV * const sv = NEWSV(88,0);
10372 sv_upgrade(sv, SVt_PVMG);
10373 sv_setsv(sv,PL_linestr);
10374 (void)SvIOK_on(sv);
10376 av_store(CopFILEAVx(PL_curcop), (I32)CopLINE(PL_curcop), sv);
10379 /* having changed the buffer, we must update PL_bufend */
10380 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10381 PL_last_lop = PL_last_uni = Nullch;
10384 /* at this point, we have successfully read the delimited string */
10386 if (!PL_encoding || UTF) {
10388 sv_catpvn(sv, s, termlen);
10391 if (has_utf8 || PL_encoding)
10394 PL_multi_end = CopLINE(PL_curcop);
10396 /* if we allocated too much space, give some back */
10397 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10398 SvLEN_set(sv, SvCUR(sv) + 1);
10399 SvPV_renew(sv, SvLEN(sv));
10402 /* decide whether this is the first or second quoted string we've read
10415 takes: pointer to position in buffer
10416 returns: pointer to new position in buffer
10417 side-effects: builds ops for the constant in yylval.op
10419 Read a number in any of the formats that Perl accepts:
10421 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10422 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10425 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10427 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10430 If it reads a number without a decimal point or an exponent, it will
10431 try converting the number to an integer and see if it can do so
10432 without loss of precision.
10436 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10439 register const char *s = start; /* current position in buffer */
10440 register char *d; /* destination in temp buffer */
10441 register char *e; /* end of temp buffer */
10442 NV nv; /* number read, as a double */
10443 SV *sv = Nullsv; /* place to put the converted number */
10444 bool floatit; /* boolean: int or float? */
10445 const char *lastub = NULL; /* position of last underbar */
10446 static char const number_too_long[] = "Number too long";
10448 /* We use the first character to decide what type of number this is */
10452 Perl_croak(aTHX_ "panic: scan_num");
10454 /* if it starts with a 0, it could be an octal number, a decimal in
10455 0.13 disguise, or a hexadecimal number, or a binary number. */
10459 u holds the "number so far"
10460 shift the power of 2 of the base
10461 (hex == 4, octal == 3, binary == 1)
10462 overflowed was the number more than we can hold?
10464 Shift is used when we add a digit. It also serves as an "are
10465 we in octal/hex/binary?" indicator to disallow hex characters
10466 when in octal mode.
10471 bool overflowed = FALSE;
10472 bool just_zero = TRUE; /* just plain 0 or binary number? */
10473 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10474 static const char* const bases[5] =
10475 { "", "binary", "", "octal", "hexadecimal" };
10476 static const char* const Bases[5] =
10477 { "", "Binary", "", "Octal", "Hexadecimal" };
10478 static const char* const maxima[5] =
10480 "0b11111111111111111111111111111111",
10484 const char *base, *Base, *max;
10486 /* check for hex */
10491 } else if (s[1] == 'b') {
10496 /* check for a decimal in disguise */
10497 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10499 /* so it must be octal */
10506 if (ckWARN(WARN_SYNTAX))
10507 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10508 "Misplaced _ in number");
10512 base = bases[shift];
10513 Base = Bases[shift];
10514 max = maxima[shift];
10516 /* read the rest of the number */
10518 /* x is used in the overflow test,
10519 b is the digit we're adding on. */
10524 /* if we don't mention it, we're done */
10528 /* _ are ignored -- but warned about if consecutive */
10530 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10531 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10532 "Misplaced _ in number");
10536 /* 8 and 9 are not octal */
10537 case '8': case '9':
10539 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10543 case '2': case '3': case '4':
10544 case '5': case '6': case '7':
10546 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10549 case '0': case '1':
10550 b = *s++ & 15; /* ASCII digit -> value of digit */
10554 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10555 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10556 /* make sure they said 0x */
10559 b = (*s++ & 7) + 9;
10561 /* Prepare to put the digit we have onto the end
10562 of the number so far. We check for overflows.
10568 x = u << shift; /* make room for the digit */
10570 if ((x >> shift) != u
10571 && !(PL_hints & HINT_NEW_BINARY)) {
10574 if (ckWARN_d(WARN_OVERFLOW))
10575 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10576 "Integer overflow in %s number",
10579 u = x | b; /* add the digit to the end */
10582 n *= nvshift[shift];
10583 /* If an NV has not enough bits in its
10584 * mantissa to represent an UV this summing of
10585 * small low-order numbers is a waste of time
10586 * (because the NV cannot preserve the
10587 * low-order bits anyway): we could just
10588 * remember when did we overflow and in the
10589 * end just multiply n by the right
10597 /* if we get here, we had success: make a scalar value from
10602 /* final misplaced underbar check */
10603 if (s[-1] == '_') {
10604 if (ckWARN(WARN_SYNTAX))
10605 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10610 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
10611 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10612 "%s number > %s non-portable",
10618 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
10619 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10620 "%s number > %s non-portable",
10625 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10626 sv = new_constant(start, s - start, "integer",
10628 else if (PL_hints & HINT_NEW_BINARY)
10629 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
10634 handle decimal numbers.
10635 we're also sent here when we read a 0 as the first digit
10637 case '1': case '2': case '3': case '4': case '5':
10638 case '6': case '7': case '8': case '9': case '.':
10641 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10644 /* read next group of digits and _ and copy into d */
10645 while (isDIGIT(*s) || *s == '_') {
10646 /* skip underscores, checking for misplaced ones
10650 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10651 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10652 "Misplaced _ in number");
10656 /* check for end of fixed-length buffer */
10658 Perl_croak(aTHX_ number_too_long);
10659 /* if we're ok, copy the character */
10664 /* final misplaced underbar check */
10665 if (lastub && s == lastub + 1) {
10666 if (ckWARN(WARN_SYNTAX))
10667 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10670 /* read a decimal portion if there is one. avoid
10671 3..5 being interpreted as the number 3. followed
10674 if (*s == '.' && s[1] != '.') {
10679 if (ckWARN(WARN_SYNTAX))
10680 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10681 "Misplaced _ in number");
10685 /* copy, ignoring underbars, until we run out of digits.
10687 for (; isDIGIT(*s) || *s == '_'; s++) {
10688 /* fixed length buffer check */
10690 Perl_croak(aTHX_ number_too_long);
10692 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10693 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10694 "Misplaced _ in number");
10700 /* fractional part ending in underbar? */
10701 if (s[-1] == '_') {
10702 if (ckWARN(WARN_SYNTAX))
10703 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10704 "Misplaced _ in number");
10706 if (*s == '.' && isDIGIT(s[1])) {
10707 /* oops, it's really a v-string, but without the "v" */
10713 /* read exponent part, if present */
10714 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10718 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10719 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10721 /* stray preinitial _ */
10723 if (ckWARN(WARN_SYNTAX))
10724 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10725 "Misplaced _ in number");
10729 /* allow positive or negative exponent */
10730 if (*s == '+' || *s == '-')
10733 /* stray initial _ */
10735 if (ckWARN(WARN_SYNTAX))
10736 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10737 "Misplaced _ in number");
10741 /* read digits of exponent */
10742 while (isDIGIT(*s) || *s == '_') {
10745 Perl_croak(aTHX_ number_too_long);
10749 if (((lastub && s == lastub + 1) ||
10750 (!isDIGIT(s[1]) && s[1] != '_'))
10751 && ckWARN(WARN_SYNTAX))
10752 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10753 "Misplaced _ in number");
10760 /* make an sv from the string */
10764 We try to do an integer conversion first if no characters
10765 indicating "float" have been found.
10770 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10772 if (flags == IS_NUMBER_IN_UV) {
10774 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
10777 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10778 if (uv <= (UV) IV_MIN)
10779 sv_setiv(sv, -(IV)uv);
10786 /* terminate the string */
10788 nv = Atof(PL_tokenbuf);
10792 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
10793 (PL_hints & HINT_NEW_INTEGER) )
10794 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
10795 (floatit ? "float" : "integer"),
10799 /* if it starts with a v, it could be a v-string */
10802 sv = NEWSV(92,5); /* preallocate storage space */
10803 s = scan_vstring(s,sv);
10807 /* make the op for the constant and return */
10810 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10812 lvalp->opval = Nullop;
10818 S_scan_formline(pTHX_ register char *s)
10821 register char *eol;
10823 SV *stuff = newSVpvs("");
10824 bool needargs = FALSE;
10825 bool eofmt = FALSE;
10827 while (!needargs) {
10829 #ifdef PERL_STRICT_CR
10830 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
10832 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
10834 if (*t == '\n' || t == PL_bufend) {
10839 if (PL_in_eval && !PL_rsfp) {
10840 eol = (char *) memchr(s,'\n',PL_bufend-s);
10845 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10847 for (t = s; t < eol; t++) {
10848 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10850 goto enough; /* ~~ must be first line in formline */
10852 if (*t == '@' || *t == '^')
10856 sv_catpvn(stuff, s, eol-s);
10857 #ifndef PERL_STRICT_CR
10858 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10859 char *end = SvPVX(stuff) + SvCUR(stuff);
10862 SvCUR_set(stuff, SvCUR(stuff) - 1);
10871 s = filter_gets(PL_linestr, PL_rsfp, 0);
10872 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
10873 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
10874 PL_last_lop = PL_last_uni = Nullch;
10883 if (SvCUR(stuff)) {
10886 PL_lex_state = LEX_NORMAL;
10887 PL_nextval[PL_nexttoke].ival = 0;
10891 PL_lex_state = LEX_FORMLINE;
10893 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10895 else if (PL_encoding)
10896 sv_recode_to_utf8(stuff, PL_encoding);
10898 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10900 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
10904 SvREFCNT_dec(stuff);
10906 PL_lex_formbrack = 0;
10918 PL_cshlen = strlen(PL_cshname);
10923 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10926 const I32 oldsavestack_ix = PL_savestack_ix;
10927 CV* outsidecv = PL_compcv;
10930 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10932 SAVEI32(PL_subline);
10933 save_item(PL_subname);
10934 SAVESPTR(PL_compcv);
10936 PL_compcv = (CV*)NEWSV(1104,0);
10937 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
10938 CvFLAGS(PL_compcv) |= flags;
10940 PL_subline = CopLINE(PL_curcop);
10941 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10942 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
10943 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10945 return oldsavestack_ix;
10949 #pragma segment Perl_yylex
10952 Perl_yywarn(pTHX_ const char *s)
10955 PL_in_eval |= EVAL_WARNONLY;
10957 PL_in_eval &= ~EVAL_WARNONLY;
10962 Perl_yyerror(pTHX_ const char *s)
10965 const char *where = NULL;
10966 const char *context = NULL;
10970 if (!yychar || (yychar == ';' && !PL_rsfp))
10972 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10973 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10974 PL_oldbufptr != PL_bufptr) {
10977 The code below is removed for NetWare because it abends/crashes on NetWare
10978 when the script has error such as not having the closing quotes like:
10979 if ($var eq "value)
10980 Checking of white spaces is anyway done in NetWare code.
10983 while (isSPACE(*PL_oldoldbufptr))
10986 context = PL_oldoldbufptr;
10987 contlen = PL_bufptr - PL_oldoldbufptr;
10989 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10990 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10993 The code below is removed for NetWare because it abends/crashes on NetWare
10994 when the script has error such as not having the closing quotes like:
10995 if ($var eq "value)
10996 Checking of white spaces is anyway done in NetWare code.
10999 while (isSPACE(*PL_oldbufptr))
11002 context = PL_oldbufptr;
11003 contlen = PL_bufptr - PL_oldbufptr;
11005 else if (yychar > 255)
11006 where = "next token ???";
11007 else if (yychar == -2) { /* YYEMPTY */
11008 if (PL_lex_state == LEX_NORMAL ||
11009 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11010 where = "at end of line";
11011 else if (PL_lex_inpat)
11012 where = "within pattern";
11014 where = "within string";
11017 SV *where_sv = sv_2mortal(newSVpvs("next char "));
11019 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11020 else if (isPRINT_LC(yychar))
11021 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
11023 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11024 where = SvPVX_const(where_sv);
11026 msg = sv_2mortal(newSVpv(s, 0));
11027 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11028 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11030 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
11032 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
11033 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11034 Perl_sv_catpvf(aTHX_ msg,
11035 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11036 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11039 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
11040 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
11043 if (PL_error_count >= 10) {
11044 if (PL_in_eval && SvCUR(ERRSV))
11045 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11046 ERRSV, OutCopFILE(PL_curcop));
11048 Perl_croak(aTHX_ "%s has too many errors.\n",
11049 OutCopFILE(PL_curcop));
11052 PL_in_my_stash = NULL;
11056 #pragma segment Main
11060 S_swallow_bom(pTHX_ U8 *s)
11063 const STRLEN slen = SvCUR(PL_linestr);
11066 if (s[1] == 0xFE) {
11067 /* UTF-16 little-endian? (or UTF32-LE?) */
11068 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11069 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
11070 #ifndef PERL_NO_UTF16_FILTER
11071 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
11074 if (PL_bufend > (char*)s) {
11078 filter_add(utf16rev_textfilter, NULL);
11079 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11080 utf16_to_utf8_reversed(s, news,
11081 PL_bufend - (char*)s - 1,
11083 sv_setpvn(PL_linestr, (const char*)news, newlen);
11085 SvUTF8_on(PL_linestr);
11086 s = (U8*)SvPVX(PL_linestr);
11087 PL_bufend = SvPVX(PL_linestr) + newlen;
11090 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
11095 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11096 #ifndef PERL_NO_UTF16_FILTER
11097 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11100 if (PL_bufend > (char *)s) {
11104 filter_add(utf16_textfilter, NULL);
11105 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11106 utf16_to_utf8(s, news,
11107 PL_bufend - (char*)s,
11109 sv_setpvn(PL_linestr, (const char*)news, newlen);
11111 SvUTF8_on(PL_linestr);
11112 s = (U8*)SvPVX(PL_linestr);
11113 PL_bufend = SvPVX(PL_linestr) + newlen;
11116 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
11121 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11122 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11123 s += 3; /* UTF-8 */
11129 if (s[2] == 0xFE && s[3] == 0xFF) {
11130 /* UTF-32 big-endian */
11131 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
11134 else if (s[2] == 0 && s[3] != 0) {
11137 * are a good indicator of UTF-16BE. */
11138 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11143 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11146 * are a good indicator of UTF-16LE. */
11147 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11156 * Restore a source filter.
11160 restore_rsfp(pTHX_ void *f)
11163 PerlIO * const fp = (PerlIO*)f;
11165 if (PL_rsfp == PerlIO_stdin())
11166 PerlIO_clearerr(PL_rsfp);
11167 else if (PL_rsfp && (PL_rsfp != fp))
11168 PerlIO_close(PL_rsfp);
11172 #ifndef PERL_NO_UTF16_FILTER
11174 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11177 const STRLEN old = SvCUR(sv);
11178 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11179 DEBUG_P(PerlIO_printf(Perl_debug_log,
11180 "utf16_textfilter(%p): %d %d (%d)\n",
11181 utf16_textfilter, idx, maxlen, (int) count));
11185 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11186 Copy(SvPVX_const(sv), tmps, old, char);
11187 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11188 SvCUR(sv) - old, &newlen);
11189 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11191 DEBUG_P({sv_dump(sv);});
11196 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11199 const STRLEN old = SvCUR(sv);
11200 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11201 DEBUG_P(PerlIO_printf(Perl_debug_log,
11202 "utf16rev_textfilter(%p): %d %d (%d)\n",
11203 utf16rev_textfilter, idx, maxlen, (int) count));
11207 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11208 Copy(SvPVX_const(sv), tmps, old, char);
11209 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11210 SvCUR(sv) - old, &newlen);
11211 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11213 DEBUG_P({ sv_dump(sv); });
11219 Returns a pointer to the next character after the parsed
11220 vstring, as well as updating the passed in sv.
11222 Function must be called like
11225 s = scan_vstring(s,sv);
11227 The sv should already be large enough to store the vstring
11228 passed in, for performance reasons.
11233 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
11236 const char *pos = s;
11237 const char *start = s;
11238 if (*pos == 'v') pos++; /* get past 'v' */
11239 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11241 if ( *pos != '.') {
11242 /* this may not be a v-string if followed by => */
11243 const char *next = pos;
11244 while (next < PL_bufend && isSPACE(*next))
11246 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
11247 /* return string not v-string */
11248 sv_setpvn(sv,(char *)s,pos-s);
11249 return (char *)pos;
11253 if (!isALPHA(*pos)) {
11254 U8 tmpbuf[UTF8_MAXBYTES+1];
11256 if (*s == 'v') s++; /* get past 'v' */
11258 sv_setpvn(sv, "", 0);
11264 /* this is atoi() that tolerates underscores */
11265 const char *end = pos;
11267 while (--end >= s) {
11272 rev += (*end - '0') * mult;
11274 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
11275 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11276 "Integer overflow in decimal number");
11280 if (rev > 0x7FFFFFFF)
11281 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11283 /* Append native character for the rev point */
11284 tmpend = uvchr_to_utf8(tmpbuf, rev);
11285 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11286 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11288 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
11294 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11298 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11306 * c-indentation-style: bsd
11307 * c-basic-offset: 4
11308 * indent-tabs-mode: t
11311 * ex: set ts=8 sts=4 sw=4 noet: