3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] = "Identifier too long";
31 static void restore_rsfp(pTHX_ void *f);
32 #ifndef PERL_NO_UTF16_FILTER
33 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
34 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
37 #define XFAKEBRACK 128
40 #ifdef USE_UTF8_SCRIPTS
41 # define UTF (!IN_BYTES)
43 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
46 /* In variables named $^X, these are the legal values for X.
47 * 1999-02-27 mjd-perl-patch@plover.com */
48 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
50 /* On MacOS, respect nonbreaking spaces */
51 #ifdef MACOS_TRADITIONAL
52 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
54 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
57 /* LEX_* are values for PL_lex_state, the state of the lexer.
58 * They are arranged oddly so that the guard on the switch statement
59 * can get by with a single comparison (if the compiler is smart enough).
62 /* #define LEX_NOTPARSING 11 is done in perl.h. */
64 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
65 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
66 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
67 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
68 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
70 /* at end of code, eg "$x" followed by: */
71 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
72 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
74 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
75 string or after \E, $foo, etc */
76 #define LEX_INTERPCONST 2 /* NOT USED */
77 #define LEX_FORMLINE 1 /* expecting a format line */
78 #define LEX_KNOWNEXT 0 /* next token known; just return it */
82 static const char* const lex_state_names[] = {
101 #include "keywords.h"
103 /* CLINE is a macro that ensures PL_copline has a sane value */
108 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
111 * Convenience functions to return different tokens and prime the
112 * lexer for the next token. They all take an argument.
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
124 * FUN1 : not used, except for not, which isn't a UNIOP
125 * BOop : bitwise or or xor
127 * SHop : shift operator
128 * PWop : power operator
129 * PMop : pattern-matching operator
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
133 * Rop : relational operator <= != gt
135 * Also see LOP and lop() below.
138 #ifdef DEBUGGING /* Serve -DT. */
139 # define REPORT(retval) tokereport((I32)retval)
141 # define REPORT(retval) (retval)
144 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
145 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
146 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
147 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
148 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
149 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
150 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
151 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
152 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
153 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
154 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
155 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
156 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
157 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
158 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
159 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
160 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
161 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
162 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
163 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
165 /* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
170 #define UNI2(f,x) { \
174 PL_last_uni = PL_oldbufptr; \
175 PL_last_lop_op = f; \
177 return REPORT( (int)FUNC1 ); \
179 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
181 #define UNI(f) UNI2(f,XTERM)
182 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
184 #define UNIBRACK(f) { \
187 PL_last_uni = PL_oldbufptr; \
189 return REPORT( (int)FUNC1 ); \
191 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
194 /* grandfather return to old style */
195 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
199 /* how to interpret the yylval associated with the token */
203 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
209 static struct debug_tokens { const int token, type; const char *name; }
210 const debug_tokens[] =
212 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
213 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
214 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
215 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
216 { ARROW, TOKENTYPE_NONE, "ARROW" },
217 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
218 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
219 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
220 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
221 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
222 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
223 { DO, TOKENTYPE_NONE, "DO" },
224 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
225 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
226 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
227 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
228 { ELSE, TOKENTYPE_NONE, "ELSE" },
229 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
230 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
231 { FOR, TOKENTYPE_IVAL, "FOR" },
232 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
233 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
234 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
235 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
236 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
237 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
238 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
239 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
240 { IF, TOKENTYPE_IVAL, "IF" },
241 { LABEL, TOKENTYPE_PVAL, "LABEL" },
242 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
243 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
244 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
245 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
246 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
247 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
248 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
249 { MY, TOKENTYPE_IVAL, "MY" },
250 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
251 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
252 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
253 { OROP, TOKENTYPE_IVAL, "OROP" },
254 { OROR, TOKENTYPE_NONE, "OROR" },
255 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
256 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
257 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
258 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
259 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
260 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
261 { PREINC, TOKENTYPE_NONE, "PREINC" },
262 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
263 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
264 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
265 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
266 { SUB, TOKENTYPE_NONE, "SUB" },
267 { THING, TOKENTYPE_OPVAL, "THING" },
268 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
269 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
270 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
271 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
272 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
273 { USE, TOKENTYPE_IVAL, "USE" },
274 { WHEN, TOKENTYPE_IVAL, "WHEN" },
275 { WHILE, TOKENTYPE_IVAL, "WHILE" },
276 { WORD, TOKENTYPE_OPVAL, "WORD" },
277 { 0, TOKENTYPE_NONE, 0 }
280 /* dump the returned token in rv, plus any optional arg in yylval */
283 S_tokereport(pTHX_ I32 rv)
286 const char *name = Nullch;
287 enum token_type type = TOKENTYPE_NONE;
288 const struct debug_tokens *p;
289 SV* const report = newSVpvn("<== ", 4);
291 for (p = debug_tokens; p->token; p++) {
292 if (p->token == (int)rv) {
299 Perl_sv_catpv(aTHX_ report, name);
300 else if ((char)rv > ' ' && (char)rv < '~')
301 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
303 Perl_sv_catpv(aTHX_ report, "EOF");
305 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
308 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
311 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
313 case TOKENTYPE_OPNUM:
314 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
315 PL_op_name[yylval.ival]);
318 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
320 case TOKENTYPE_OPVAL:
322 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
323 PL_op_name[yylval.opval->op_type]);
324 if (yylval.opval->op_type == OP_CONST) {
325 Perl_sv_catpvf(aTHX_ report, " %s",
326 SvPEEK(cSVOPx_sv(yylval.opval)));
331 Perl_sv_catpv(aTHX_ report, "(opval=null)");
334 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
340 /* print the buffer with suitable escapes */
343 S_printbuf(pTHX_ const char* fmt, const char* s)
345 SV* const tmp = newSVpvn("", 0);
346 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
355 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
356 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
360 S_ao(pTHX_ int toketype)
362 if (*PL_bufptr == '=') {
364 if (toketype == ANDAND)
365 yylval.ival = OP_ANDASSIGN;
366 else if (toketype == OROR)
367 yylval.ival = OP_ORASSIGN;
368 else if (toketype == DORDOR)
369 yylval.ival = OP_DORASSIGN;
377 * When Perl expects an operator and finds something else, no_op
378 * prints the warning. It always prints "<something> found where
379 * operator expected. It prints "Missing semicolon on previous line?"
380 * if the surprise occurs at the start of the line. "do you need to
381 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
382 * where the compiler doesn't know if foo is a method call or a function.
383 * It prints "Missing operator before end of line" if there's nothing
384 * after the missing operator, or "... before <...>" if there is something
385 * after the missing operator.
389 S_no_op(pTHX_ const char *what, char *s)
391 char * const oldbp = PL_bufptr;
392 const bool is_first = (PL_oldbufptr == PL_linestart);
398 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
399 if (ckWARN_d(WARN_SYNTAX)) {
401 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
402 "\t(Missing semicolon on previous line?)\n");
403 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
405 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
406 if (t < PL_bufptr && isSPACE(*t))
407 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
408 "\t(Do you need to predeclare %.*s?)\n",
409 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
413 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
414 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
422 * Complain about missing quote/regexp/heredoc terminator.
423 * If it's called with (char *)NULL then it cauterizes the line buffer.
424 * If we're in a delimited string and the delimiter is a control
425 * character, it's reformatted into a two-char sequence like ^C.
430 S_missingterm(pTHX_ char *s)
435 char * const nl = strrchr(s,'\n');
441 iscntrl(PL_multi_close)
443 PL_multi_close < 32 || PL_multi_close == 127
447 tmpbuf[1] = (char)toCTRL(PL_multi_close);
452 *tmpbuf = (char)PL_multi_close;
456 q = strchr(s,'"') ? '\'' : '"';
457 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
460 #define FEATURE_IS_ENABLED(name, namelen) \
461 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
462 && feature_is_enabled(name, namelen) )
464 * S_feature_is_enabled
465 * Check whether the named feature is enabled.
468 S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
470 HV * const hinthv = GvHV(PL_hintgv);
471 char he_name[32] = "feature_";
472 (void) strncpy(&he_name[8], name, 24);
474 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
482 Perl_deprecate(pTHX_ const char *s)
484 if (ckWARN(WARN_DEPRECATED))
485 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
489 Perl_deprecate_old(pTHX_ const char *s)
491 /* This function should NOT be called for any new deprecated warnings */
492 /* Use Perl_deprecate instead */
494 /* It is here to maintain backward compatibility with the pre-5.8 */
495 /* warnings category hierarchy. The "deprecated" category used to */
496 /* live under the "syntax" category. It is now a top-level category */
497 /* in its own right. */
499 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
500 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
501 "Use of %s is deprecated", s);
506 * Deprecate a comma-less variable list.
512 deprecate_old("comma-less variable list");
516 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
517 * utf16-to-utf8-reversed.
520 #ifdef PERL_CR_FILTER
524 register const char *s = SvPVX_const(sv);
525 register const char * const e = s + SvCUR(sv);
526 /* outer loop optimized to do nothing if there are no CR-LFs */
528 if (*s++ == '\r' && *s == '\n') {
529 /* hit a CR-LF, need to copy the rest */
530 register char *d = s - 1;
533 if (*s == '\r' && s[1] == '\n')
544 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
546 const I32 count = FILTER_READ(idx+1, sv, maxlen);
547 if (count > 0 && !maxlen)
555 * Initialize variables. Uses the Perl save_stack to save its state (for
556 * recursive calls to the parser).
560 Perl_lex_start(pTHX_ SV *line)
565 SAVEI32(PL_lex_dojoin);
566 SAVEI32(PL_lex_brackets);
567 SAVEI32(PL_lex_casemods);
568 SAVEI32(PL_lex_starts);
569 SAVEI32(PL_lex_state);
570 SAVEVPTR(PL_lex_inpat);
571 SAVEI32(PL_lex_inwhat);
572 if (PL_lex_state == LEX_KNOWNEXT) {
573 I32 toke = PL_nexttoke;
574 while (--toke >= 0) {
575 SAVEI32(PL_nexttype[toke]);
576 SAVEVPTR(PL_nextval[toke]);
578 SAVEI32(PL_nexttoke);
580 SAVECOPLINE(PL_curcop);
583 SAVEPPTR(PL_oldbufptr);
584 SAVEPPTR(PL_oldoldbufptr);
585 SAVEPPTR(PL_last_lop);
586 SAVEPPTR(PL_last_uni);
587 SAVEPPTR(PL_linestart);
588 SAVESPTR(PL_linestr);
589 SAVEGENERICPV(PL_lex_brackstack);
590 SAVEGENERICPV(PL_lex_casestack);
591 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
592 SAVESPTR(PL_lex_stuff);
593 SAVEI32(PL_lex_defer);
594 SAVEI32(PL_sublex_info.sub_inwhat);
595 SAVESPTR(PL_lex_repl);
597 SAVEINT(PL_lex_expect);
599 PL_lex_state = LEX_NORMAL;
603 Newx(PL_lex_brackstack, 120, char);
604 Newx(PL_lex_casestack, 12, char);
606 *PL_lex_casestack = '\0';
609 PL_lex_stuff = Nullsv;
610 PL_lex_repl = Nullsv;
614 PL_sublex_info.sub_inwhat = 0;
616 if (SvREADONLY(PL_linestr))
617 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
618 s = SvPV_const(PL_linestr, len);
619 if (!len || s[len-1] != ';') {
620 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
621 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
622 sv_catpvn(PL_linestr, "\n;", 2);
624 SvTEMP_off(PL_linestr);
625 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
626 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
627 PL_last_lop = PL_last_uni = Nullch;
633 * Finalizer for lexing operations. Must be called when the parser is
634 * done with the lexer.
640 PL_doextract = FALSE;
645 * This subroutine has nothing to do with tilting, whether at windmills
646 * or pinball tables. Its name is short for "increment line". It
647 * increments the current line number in CopLINE(PL_curcop) and checks
648 * to see whether the line starts with a comment of the form
649 * # line 500 "foo.pm"
650 * If so, it sets the current line number and file to the values in the comment.
654 S_incline(pTHX_ char *s)
661 CopLINE_inc(PL_curcop);
664 while (SPACE_OR_TAB(*s)) s++;
665 if (strnEQ(s, "line", 4))
669 if (SPACE_OR_TAB(*s))
673 while (SPACE_OR_TAB(*s)) s++;
679 while (SPACE_OR_TAB(*s))
681 if (*s == '"' && (t = strchr(s+1, '"'))) {
686 for (t = s; !isSPACE(*t); t++) ;
689 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
691 if (*e != '\n' && *e != '\0')
692 return; /* false alarm */
698 const char * const cf = CopFILE(PL_curcop);
699 STRLEN tmplen = cf ? strlen(cf) : 0;
700 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
701 /* must copy *{"::_<(eval N)[oldfilename:L]"}
702 * to *{"::_<newfilename"} */
703 char smallbuf[256], smallbuf2[256];
704 char *tmpbuf, *tmpbuf2;
706 STRLEN tmplen2 = strlen(s);
707 if (tmplen + 3 < sizeof smallbuf)
710 Newx(tmpbuf, tmplen + 3, char);
711 if (tmplen2 + 3 < sizeof smallbuf2)
714 Newx(tmpbuf2, tmplen2 + 3, char);
715 tmpbuf[0] = tmpbuf2[0] = '_';
716 tmpbuf[1] = tmpbuf2[1] = '<';
717 memcpy(tmpbuf + 2, cf, ++tmplen);
718 memcpy(tmpbuf2 + 2, s, ++tmplen2);
720 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
722 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
724 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
725 /* adjust ${"::_<newfilename"} to store the new file name */
726 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
727 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
728 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
730 if (tmpbuf != smallbuf) Safefree(tmpbuf);
731 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
734 CopFILE_free(PL_curcop);
735 CopFILE_set(PL_curcop, s);
738 CopLINE_set(PL_curcop, atoi(n)-1);
743 * Called to gobble the appropriate amount and type of whitespace.
744 * Skips comments as well.
748 S_skipspace(pTHX_ register char *s)
750 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
751 while (s < PL_bufend && SPACE_OR_TAB(*s))
757 SSize_t oldprevlen, oldoldprevlen;
758 SSize_t oldloplen = 0, oldunilen = 0;
759 while (s < PL_bufend && isSPACE(*s)) {
760 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
765 if (s < PL_bufend && *s == '#') {
766 while (s < PL_bufend && *s != '\n')
770 if (PL_in_eval && !PL_rsfp) {
777 /* only continue to recharge the buffer if we're at the end
778 * of the buffer, we're not reading from a source filter, and
779 * we're in normal lexing mode
781 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
782 PL_lex_state == LEX_FORMLINE)
785 /* try to recharge the buffer */
786 if ((s = filter_gets(PL_linestr, PL_rsfp,
787 (prevlen = SvCUR(PL_linestr)))) == Nullch)
789 /* end of file. Add on the -p or -n magic */
792 ";}continue{print or die qq(-p destination: $!\\n);}");
793 PL_minus_n = PL_minus_p = 0;
795 else if (PL_minus_n) {
796 sv_setpvn(PL_linestr, ";}", 2);
800 sv_setpvn(PL_linestr,";", 1);
802 /* reset variables for next time we lex */
803 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
805 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
806 PL_last_lop = PL_last_uni = Nullch;
808 /* Close the filehandle. Could be from -P preprocessor,
809 * STDIN, or a regular file. If we were reading code from
810 * STDIN (because the commandline held no -e or filename)
811 * then we don't close it, we reset it so the code can
812 * read from STDIN too.
815 if (PL_preprocess && !PL_in_eval)
816 (void)PerlProc_pclose(PL_rsfp);
817 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
818 PerlIO_clearerr(PL_rsfp);
820 (void)PerlIO_close(PL_rsfp);
825 /* not at end of file, so we only read another line */
826 /* make corresponding updates to old pointers, for yyerror() */
827 oldprevlen = PL_oldbufptr - PL_bufend;
828 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
830 oldunilen = PL_last_uni - PL_bufend;
832 oldloplen = PL_last_lop - PL_bufend;
833 PL_linestart = PL_bufptr = s + prevlen;
834 PL_bufend = s + SvCUR(PL_linestr);
836 PL_oldbufptr = s + oldprevlen;
837 PL_oldoldbufptr = s + oldoldprevlen;
839 PL_last_uni = s + oldunilen;
841 PL_last_lop = s + oldloplen;
844 /* debugger active and we're not compiling the debugger code,
845 * so store the line into the debugger's array of lines
847 if (PERLDB_LINE && PL_curstash != PL_debstash) {
848 SV * const sv = NEWSV(85,0);
850 sv_upgrade(sv, SVt_PVMG);
851 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
854 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
861 * Check the unary operators to ensure there's no ambiguity in how they're
862 * used. An ambiguous piece of code would be:
864 * This doesn't mean rand() + 5. Because rand() is a unary operator,
865 * the +5 is its argument.
874 if (PL_oldoldbufptr != PL_last_uni)
876 while (isSPACE(*PL_last_uni))
878 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
879 if ((t = strchr(s, '(')) && t < PL_bufptr)
881 if (ckWARN_d(WARN_AMBIGUOUS)){
884 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
885 "Warning: Use of \"%s\" without parentheses is ambiguous",
892 * LOP : macro to build a list operator. Its behaviour has been replaced
893 * with a subroutine, S_lop() for which LOP is just another name.
896 #define LOP(f,x) return lop(f,x,s)
900 * Build a list operator (or something that might be one). The rules:
901 * - if we have a next token, then it's a list operator [why?]
902 * - if the next thing is an opening paren, then it's a function
903 * - else it's a list operator
907 S_lop(pTHX_ I32 f, int x, char *s)
913 PL_last_lop = PL_oldbufptr;
914 PL_last_lop_op = (OPCODE)f;
916 return REPORT(LSTOP);
923 return REPORT(LSTOP);
928 * When the lexer realizes it knows the next token (for instance,
929 * it is reordering tokens for the parser) then it can call S_force_next
930 * to know what token to return the next time the lexer is called. Caller
931 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
932 * handles the token correctly.
936 S_force_next(pTHX_ I32 type)
938 PL_nexttype[PL_nexttoke] = type;
940 if (PL_lex_state != LEX_KNOWNEXT) {
941 PL_lex_defer = PL_lex_state;
942 PL_lex_expect = PL_expect;
943 PL_lex_state = LEX_KNOWNEXT;
948 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
950 SV * const sv = newSVpvn(start,len);
951 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
958 * When the lexer knows the next thing is a word (for instance, it has
959 * just seen -> and it knows that the next char is a word char, then
960 * it calls S_force_word to stick the next word into the PL_next lookahead.
963 * char *start : buffer position (must be within PL_linestr)
964 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
965 * int check_keyword : if true, Perl checks to make sure the word isn't
966 * a keyword (do this if the word is a label, e.g. goto FOO)
967 * int allow_pack : if true, : characters will also be allowed (require,
969 * int allow_initial_tick : used by the "sub" lexer only.
973 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
978 start = skipspace(start);
980 if (isIDFIRST_lazy_if(s,UTF) ||
981 (allow_pack && *s == ':') ||
982 (allow_initial_tick && *s == '\'') )
984 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
985 if (check_keyword && keyword(PL_tokenbuf, len))
987 if (token == METHOD) {
992 PL_expect = XOPERATOR;
995 PL_nextval[PL_nexttoke].opval
996 = (OP*)newSVOP(OP_CONST,0,
997 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
998 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
1006 * Called when the lexer wants $foo *foo &foo etc, but the program
1007 * text only contains the "foo" portion. The first argument is a pointer
1008 * to the "foo", and the second argument is the type symbol to prefix.
1009 * Forces the next token to be a "WORD".
1010 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1014 S_force_ident(pTHX_ register const char *s, int kind)
1017 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
1018 PL_nextval[PL_nexttoke].opval = o;
1021 o->op_private = OPpCONST_ENTERED;
1022 /* XXX see note in pp_entereval() for why we forgo typo
1023 warnings if the symbol must be introduced in an eval.
1025 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
1026 kind == '$' ? SVt_PV :
1027 kind == '@' ? SVt_PVAV :
1028 kind == '%' ? SVt_PVHV :
1036 Perl_str_to_version(pTHX_ SV *sv)
1041 const char *start = SvPV_const(sv,len);
1042 const char * const end = start + len;
1043 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1044 while (start < end) {
1048 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1053 retval += ((NV)n)/nshift;
1062 * Forces the next token to be a version number.
1063 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1064 * and if "guessing" is TRUE, then no new token is created (and the caller
1065 * must use an alternative parsing method).
1069 S_force_version(pTHX_ char *s, int guessing)
1071 OP *version = Nullop;
1080 while (isDIGIT(*d) || *d == '_' || *d == '.')
1082 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1084 s = scan_num(s, &yylval);
1085 version = yylval.opval;
1086 ver = cSVOPx(version)->op_sv;
1087 if (SvPOK(ver) && !SvNIOK(ver)) {
1088 SvUPGRADE(ver, SVt_PVNV);
1089 SvNV_set(ver, str_to_version(ver));
1090 SvNOK_on(ver); /* hint that it is a version */
1097 /* NOTE: The parser sees the package name and the VERSION swapped */
1098 PL_nextval[PL_nexttoke].opval = version;
1106 * Tokenize a quoted string passed in as an SV. It finds the next
1107 * chunk, up to end of string or a backslash. It may make a new
1108 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1113 S_tokeq(pTHX_ SV *sv)
1116 register char *send;
1124 s = SvPV_force(sv, len);
1125 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1128 while (s < send && *s != '\\')
1133 if ( PL_hints & HINT_NEW_STRING ) {
1134 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1140 if (s + 1 < send && (s[1] == '\\'))
1141 s++; /* all that, just for this */
1146 SvCUR_set(sv, d - SvPVX_const(sv));
1148 if ( PL_hints & HINT_NEW_STRING )
1149 return new_constant(NULL, 0, "q", sv, pv, "q");
1154 * Now come three functions related to double-quote context,
1155 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1156 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1157 * interact with PL_lex_state, and create fake ( ... ) argument lists
1158 * to handle functions and concatenation.
1159 * They assume that whoever calls them will be setting up a fake
1160 * join call, because each subthing puts a ',' after it. This lets
1163 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1165 * (I'm not sure whether the spurious commas at the end of lcfirst's
1166 * arguments and join's arguments are created or not).
1171 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1173 * Pattern matching will set PL_lex_op to the pattern-matching op to
1174 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1176 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1178 * Everything else becomes a FUNC.
1180 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1181 * had an OP_CONST or OP_READLINE). This just sets us up for a
1182 * call to S_sublex_push().
1186 S_sublex_start(pTHX)
1188 register const I32 op_type = yylval.ival;
1190 if (op_type == OP_NULL) {
1191 yylval.opval = PL_lex_op;
1195 if (op_type == OP_CONST || op_type == OP_READLINE) {
1196 SV *sv = tokeq(PL_lex_stuff);
1198 if (SvTYPE(sv) == SVt_PVIV) {
1199 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1201 const char *p = SvPV_const(sv, len);
1202 SV * const nsv = newSVpvn(p, len);
1208 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1209 PL_lex_stuff = Nullsv;
1210 /* Allow <FH> // "foo" */
1211 if (op_type == OP_READLINE)
1212 PL_expect = XTERMORDORDOR;
1216 PL_sublex_info.super_state = PL_lex_state;
1217 PL_sublex_info.sub_inwhat = op_type;
1218 PL_sublex_info.sub_op = PL_lex_op;
1219 PL_lex_state = LEX_INTERPPUSH;
1223 yylval.opval = PL_lex_op;
1233 * Create a new scope to save the lexing state. The scope will be
1234 * ended in S_sublex_done. Returns a '(', starting the function arguments
1235 * to the uc, lc, etc. found before.
1236 * Sets PL_lex_state to LEX_INTERPCONCAT.
1245 PL_lex_state = PL_sublex_info.super_state;
1246 SAVEI32(PL_lex_dojoin);
1247 SAVEI32(PL_lex_brackets);
1248 SAVEI32(PL_lex_casemods);
1249 SAVEI32(PL_lex_starts);
1250 SAVEI32(PL_lex_state);
1251 SAVEVPTR(PL_lex_inpat);
1252 SAVEI32(PL_lex_inwhat);
1253 SAVECOPLINE(PL_curcop);
1254 SAVEPPTR(PL_bufptr);
1255 SAVEPPTR(PL_bufend);
1256 SAVEPPTR(PL_oldbufptr);
1257 SAVEPPTR(PL_oldoldbufptr);
1258 SAVEPPTR(PL_last_lop);
1259 SAVEPPTR(PL_last_uni);
1260 SAVEPPTR(PL_linestart);
1261 SAVESPTR(PL_linestr);
1262 SAVEGENERICPV(PL_lex_brackstack);
1263 SAVEGENERICPV(PL_lex_casestack);
1265 PL_linestr = PL_lex_stuff;
1266 PL_lex_stuff = Nullsv;
1268 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1269 = SvPVX(PL_linestr);
1270 PL_bufend += SvCUR(PL_linestr);
1271 PL_last_lop = PL_last_uni = Nullch;
1272 SAVEFREESV(PL_linestr);
1274 PL_lex_dojoin = FALSE;
1275 PL_lex_brackets = 0;
1276 Newx(PL_lex_brackstack, 120, char);
1277 Newx(PL_lex_casestack, 12, char);
1278 PL_lex_casemods = 0;
1279 *PL_lex_casestack = '\0';
1281 PL_lex_state = LEX_INTERPCONCAT;
1282 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1284 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1285 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1286 PL_lex_inpat = PL_sublex_info.sub_op;
1288 PL_lex_inpat = Nullop;
1295 * Restores lexer state after a S_sublex_push.
1302 if (!PL_lex_starts++) {
1303 SV * const sv = newSVpvn("",0);
1304 if (SvUTF8(PL_linestr))
1306 PL_expect = XOPERATOR;
1307 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1311 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1312 PL_lex_state = LEX_INTERPCASEMOD;
1316 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1317 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1318 PL_linestr = PL_lex_repl;
1320 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1321 PL_bufend += SvCUR(PL_linestr);
1322 PL_last_lop = PL_last_uni = Nullch;
1323 SAVEFREESV(PL_linestr);
1324 PL_lex_dojoin = FALSE;
1325 PL_lex_brackets = 0;
1326 PL_lex_casemods = 0;
1327 *PL_lex_casestack = '\0';
1329 if (SvEVALED(PL_lex_repl)) {
1330 PL_lex_state = LEX_INTERPNORMAL;
1332 /* we don't clear PL_lex_repl here, so that we can check later
1333 whether this is an evalled subst; that means we rely on the
1334 logic to ensure sublex_done() is called again only via the
1335 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1338 PL_lex_state = LEX_INTERPCONCAT;
1339 PL_lex_repl = Nullsv;
1345 PL_bufend = SvPVX(PL_linestr);
1346 PL_bufend += SvCUR(PL_linestr);
1347 PL_expect = XOPERATOR;
1348 PL_sublex_info.sub_inwhat = 0;
1356 Extracts a pattern, double-quoted string, or transliteration. This
1359 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1360 processing a pattern (PL_lex_inpat is true), a transliteration
1361 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1363 Returns a pointer to the character scanned up to. Iff this is
1364 advanced from the start pointer supplied (ie if anything was
1365 successfully parsed), will leave an OP for the substring scanned
1366 in yylval. Caller must intuit reason for not parsing further
1367 by looking at the next characters herself.
1371 double-quoted style: \r and \n
1372 regexp special ones: \D \s
1374 backrefs: \1 (deprecated in substitution replacements)
1375 case and quoting: \U \Q \E
1376 stops on @ and $, but not for $ as tail anchor
1378 In transliterations:
1379 characters are VERY literal, except for - not at the start or end
1380 of the string, which indicates a range. scan_const expands the
1381 range to the full set of intermediate characters.
1383 In double-quoted strings:
1385 double-quoted style: \r and \n
1387 backrefs: \1 (deprecated)
1388 case and quoting: \U \Q \E
1391 scan_const does *not* construct ops to handle interpolated strings.
1392 It stops processing as soon as it finds an embedded $ or @ variable
1393 and leaves it to the caller to work out what's going on.
1395 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1397 $ in pattern could be $foo or could be tail anchor. Assumption:
1398 it's a tail anchor if $ is the last thing in the string, or if it's
1399 followed by one of ")| \n\t"
1401 \1 (backreferences) are turned into $1
1403 The structure of the code is
1404 while (there's a character to process) {
1405 handle transliteration ranges
1406 skip regexp comments
1407 skip # initiated comments in //x patterns
1408 check for embedded @foo
1409 check for embedded scalars
1411 leave intact backslashes from leave (below)
1412 deprecate \1 in strings and sub replacements
1413 handle string-changing backslashes \l \U \Q \E, etc.
1414 switch (what was escaped) {
1415 handle - in a transliteration (becomes a literal -)
1416 handle \132 octal characters
1417 handle 0x15 hex characters
1418 handle \cV (control V)
1419 handle printf backslashes (\f, \r, \n, etc)
1421 } (end if backslash)
1422 } (end while character to read)
1427 S_scan_const(pTHX_ char *start)
1429 register char *send = PL_bufend; /* end of the constant */
1430 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1431 register char *s = start; /* start of the constant */
1432 register char *d = SvPVX(sv); /* destination for copies */
1433 bool dorange = FALSE; /* are we in a translit range? */
1434 bool didrange = FALSE; /* did we just finish a range? */
1435 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1436 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1439 UV literal_endpoint = 0;
1442 const char *leaveit = /* set of acceptably-backslashed characters */
1444 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1447 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1448 /* If we are doing a trans and we know we want UTF8 set expectation */
1449 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1450 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1454 while (s < send || dorange) {
1455 /* get transliterations out of the way (they're most literal) */
1456 if (PL_lex_inwhat == OP_TRANS) {
1457 /* expand a range A-Z to the full set of characters. AIE! */
1459 I32 i; /* current expanded character */
1460 I32 min; /* first character in range */
1461 I32 max; /* last character in range */
1464 char * const c = (char*)utf8_hop((U8*)d, -1);
1468 *c = (char)UTF_TO_NATIVE(0xff);
1469 /* mark the range as done, and continue */
1475 i = d - SvPVX_const(sv); /* remember current offset */
1476 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1477 d = SvPVX(sv) + i; /* refresh d after realloc */
1478 d -= 2; /* eat the first char and the - */
1480 min = (U8)*d; /* first char in range */
1481 max = (U8)d[1]; /* last char in range */
1485 "Invalid range \"%c-%c\" in transliteration operator",
1486 (char)min, (char)max);
1490 if (literal_endpoint == 2 &&
1491 ((isLOWER(min) && isLOWER(max)) ||
1492 (isUPPER(min) && isUPPER(max)))) {
1494 for (i = min; i <= max; i++)
1496 *d++ = NATIVE_TO_NEED(has_utf8,i);
1498 for (i = min; i <= max; i++)
1500 *d++ = NATIVE_TO_NEED(has_utf8,i);
1505 for (i = min; i <= max; i++)
1508 /* mark the range as done, and continue */
1512 literal_endpoint = 0;
1517 /* range begins (ignore - as first or last char) */
1518 else if (*s == '-' && s+1 < send && s != start) {
1520 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1523 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1533 literal_endpoint = 0;
1538 /* if we get here, we're not doing a transliteration */
1540 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1541 except for the last char, which will be done separately. */
1542 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1544 while (s+1 < send && *s != ')')
1545 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1547 else if (s[2] == '{' /* This should match regcomp.c */
1548 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1551 char *regparse = s + (s[2] == '{' ? 3 : 4);
1554 while (count && (c = *regparse)) {
1555 if (c == '\\' && regparse[1])
1563 if (*regparse != ')')
1564 regparse--; /* Leave one char for continuation. */
1565 while (s < regparse)
1566 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1570 /* likewise skip #-initiated comments in //x patterns */
1571 else if (*s == '#' && PL_lex_inpat &&
1572 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1573 while (s+1 < send && *s != '\n')
1574 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1577 /* check for embedded arrays
1578 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1580 else if (*s == '@' && s[1]
1581 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1584 /* check for embedded scalars. only stop if we're sure it's a
1587 else if (*s == '$') {
1588 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1590 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1591 break; /* in regexp, $ might be tail anchor */
1594 /* End of else if chain - OP_TRANS rejoin rest */
1597 if (*s == '\\' && s+1 < send) {
1600 /* some backslashes we leave behind */
1601 if (*leaveit && *s && strchr(leaveit, *s)) {
1602 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1603 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1607 /* deprecate \1 in strings and substitution replacements */
1608 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1609 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1611 if (ckWARN(WARN_SYNTAX))
1612 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1617 /* string-change backslash escapes */
1618 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1623 /* if we get here, it's either a quoted -, or a digit */
1626 /* quoted - in transliterations */
1628 if (PL_lex_inwhat == OP_TRANS) {
1638 Perl_warner(aTHX_ packWARN(WARN_MISC),
1639 "Unrecognized escape \\%c passed through",
1641 /* default action is to copy the quoted character */
1642 goto default_action;
1645 /* \132 indicates an octal constant */
1646 case '0': case '1': case '2': case '3':
1647 case '4': case '5': case '6': case '7':
1651 uv = grok_oct(s, &len, &flags, NULL);
1654 goto NUM_ESCAPE_INSERT;
1656 /* \x24 indicates a hex constant */
1660 char* const e = strchr(s, '}');
1661 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1662 PERL_SCAN_DISALLOW_PREFIX;
1667 yyerror("Missing right brace on \\x{}");
1671 uv = grok_hex(s, &len, &flags, NULL);
1677 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1678 uv = grok_hex(s, &len, &flags, NULL);
1684 /* Insert oct or hex escaped character.
1685 * There will always enough room in sv since such
1686 * escapes will be longer than any UTF-8 sequence
1687 * they can end up as. */
1689 /* We need to map to chars to ASCII before doing the tests
1692 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1693 if (!has_utf8 && uv > 255) {
1694 /* Might need to recode whatever we have
1695 * accumulated so far if it contains any
1698 * (Can't we keep track of that and avoid
1699 * this rescan? --jhi)
1703 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1704 if (!NATIVE_IS_INVARIANT(*c)) {
1709 const STRLEN offset = d - SvPVX_const(sv);
1711 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1715 while (src >= (const U8 *)SvPVX_const(sv)) {
1716 if (!NATIVE_IS_INVARIANT(*src)) {
1717 const U8 ch = NATIVE_TO_ASCII(*src);
1718 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1719 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1729 if (has_utf8 || uv > 255) {
1730 d = (char*)uvchr_to_utf8((U8*)d, uv);
1732 if (PL_lex_inwhat == OP_TRANS &&
1733 PL_sublex_info.sub_op) {
1734 PL_sublex_info.sub_op->op_private |=
1735 (PL_lex_repl ? OPpTRANS_FROM_UTF
1748 /* \N{LATIN SMALL LETTER A} is a named character */
1752 char* e = strchr(s, '}');
1758 yyerror("Missing right brace on \\N{}");
1762 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1764 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1765 PERL_SCAN_DISALLOW_PREFIX;
1768 uv = grok_hex(s, &len, &flags, NULL);
1770 goto NUM_ESCAPE_INSERT;
1772 res = newSVpvn(s + 1, e - s - 1);
1773 res = new_constant( Nullch, 0, "charnames",
1774 res, Nullsv, "\\N{...}" );
1776 sv_utf8_upgrade(res);
1777 str = SvPV_const(res,len);
1778 #ifdef EBCDIC_NEVER_MIND
1779 /* charnames uses pack U and that has been
1780 * recently changed to do the below uni->native
1781 * mapping, so this would be redundant (and wrong,
1782 * the code point would be doubly converted).
1783 * But leave this in just in case the pack U change
1784 * gets revoked, but the semantics is still
1785 * desireable for charnames. --jhi */
1787 UV uv = utf8_to_uvchr((const U8*)str, 0);
1790 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1792 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1793 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1794 str = SvPV_const(res, len);
1798 if (!has_utf8 && SvUTF8(res)) {
1799 const char * const ostart = SvPVX_const(sv);
1800 SvCUR_set(sv, d - ostart);
1803 sv_utf8_upgrade(sv);
1804 /* this just broke our allocation above... */
1805 SvGROW(sv, (STRLEN)(send - start));
1806 d = SvPVX(sv) + SvCUR(sv);
1809 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1810 const char * const odest = SvPVX_const(sv);
1812 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1813 d = SvPVX(sv) + (d - odest);
1815 Copy(str, d, len, char);
1822 yyerror("Missing braces on \\N{}");
1825 /* \c is a control character */
1834 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1837 yyerror("Missing control char name in \\c");
1841 /* printf-style backslashes, formfeeds, newlines, etc */
1843 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1846 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1849 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1852 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1855 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1858 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1861 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1867 } /* end if (backslash) */
1874 /* If we started with encoded form, or already know we want it
1875 and then encode the next character */
1876 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1878 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1879 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1882 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1883 const STRLEN off = d - SvPVX_const(sv);
1884 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1886 d = (char*)uvchr_to_utf8((U8*)d, uv);
1890 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1892 } /* while loop to process each character */
1894 /* terminate the string and set up the sv */
1896 SvCUR_set(sv, d - SvPVX_const(sv));
1897 if (SvCUR(sv) >= SvLEN(sv))
1898 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
1901 if (PL_encoding && !has_utf8) {
1902 sv_recode_to_utf8(sv, PL_encoding);
1908 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1909 PL_sublex_info.sub_op->op_private |=
1910 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1914 /* shrink the sv if we allocated more than we used */
1915 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1916 SvPV_shrink_to_cur(sv);
1919 /* return the substring (via yylval) only if we parsed anything */
1920 if (s > PL_bufptr) {
1921 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1922 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1924 ( PL_lex_inwhat == OP_TRANS
1926 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1929 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1936 * Returns TRUE if there's more to the expression (e.g., a subscript),
1939 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1941 * ->[ and ->{ return TRUE
1942 * { and [ outside a pattern are always subscripts, so return TRUE
1943 * if we're outside a pattern and it's not { or [, then return FALSE
1944 * if we're in a pattern and the first char is a {
1945 * {4,5} (any digits around the comma) returns FALSE
1946 * if we're in a pattern and the first char is a [
1948 * [SOMETHING] has a funky algorithm to decide whether it's a
1949 * character class or not. It has to deal with things like
1950 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1951 * anything else returns TRUE
1954 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1957 S_intuit_more(pTHX_ register char *s)
1959 if (PL_lex_brackets)
1961 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1963 if (*s != '{' && *s != '[')
1968 /* In a pattern, so maybe we have {n,m}. */
1985 /* On the other hand, maybe we have a character class */
1988 if (*s == ']' || *s == '^')
1991 /* this is terrifying, and it works */
1992 int weight = 2; /* let's weigh the evidence */
1994 unsigned char un_char = 255, last_un_char;
1995 const char * const send = strchr(s,']');
1996 char tmpbuf[sizeof PL_tokenbuf * 4];
1998 if (!send) /* has to be an expression */
2001 Zero(seen,256,char);
2004 else if (isDIGIT(*s)) {
2006 if (isDIGIT(s[1]) && s[2] == ']')
2012 for (; s < send; s++) {
2013 last_un_char = un_char;
2014 un_char = (unsigned char)*s;
2019 weight -= seen[un_char] * 10;
2020 if (isALNUM_lazy_if(s+1,UTF)) {
2021 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2022 if ((int)strlen(tmpbuf) > 1
2023 && gv_fetchpv(tmpbuf, 0, SVt_PV))
2028 else if (*s == '$' && s[1] &&
2029 strchr("[#!%*<>()-=",s[1])) {
2030 if (/*{*/ strchr("])} =",s[2]))
2039 if (strchr("wds]",s[1]))
2041 else if (seen['\''] || seen['"'])
2043 else if (strchr("rnftbxcav",s[1]))
2045 else if (isDIGIT(s[1])) {
2047 while (s[1] && isDIGIT(s[1]))
2057 if (strchr("aA01! ",last_un_char))
2059 if (strchr("zZ79~",s[1]))
2061 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2062 weight -= 5; /* cope with negative subscript */
2065 if (!isALNUM(last_un_char)
2066 && !(last_un_char == '$' || last_un_char == '@'
2067 || last_un_char == '&')
2068 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2073 if (keyword(tmpbuf, d - tmpbuf))
2076 if (un_char == last_un_char + 1)
2078 weight -= seen[un_char];
2083 if (weight >= 0) /* probably a character class */
2093 * Does all the checking to disambiguate
2095 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2096 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2098 * First argument is the stuff after the first token, e.g. "bar".
2100 * Not a method if bar is a filehandle.
2101 * Not a method if foo is a subroutine prototyped to take a filehandle.
2102 * Not a method if it's really "Foo $bar"
2103 * Method if it's "foo $bar"
2104 * Not a method if it's really "print foo $bar"
2105 * Method if it's really "foo package::" (interpreted as package->foo)
2106 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2107 * Not a method if bar is a filehandle or package, but is quoted with
2112 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2114 char *s = start + (*start == '$');
2115 char tmpbuf[sizeof PL_tokenbuf];
2120 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2124 const char *proto = SvPVX_const(cv);
2135 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2136 /* start is the beginning of the possible filehandle/object,
2137 * and s is the end of it
2138 * tmpbuf is a copy of it
2141 if (*start == '$') {
2142 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2147 return *s == '(' ? FUNCMETH : METHOD;
2149 if (!keyword(tmpbuf, len)) {
2150 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2155 indirgv = gv_fetchpv(tmpbuf, 0, SVt_PVCV);
2156 if (indirgv && GvCVu(indirgv))
2158 /* filehandle or package name makes it a method */
2159 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2161 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2162 return 0; /* no assumptions -- "=>" quotes bearword */
2164 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
2165 newSVpvn(tmpbuf,len));
2166 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2170 return *s == '(' ? FUNCMETH : METHOD;
2178 * Return a string of Perl code to load the debugger. If PERL5DB
2179 * is set, it will return the contents of that, otherwise a
2180 * compile-time require of perl5db.pl.
2187 const char * const pdb = PerlEnv_getenv("PERL5DB");
2191 SETERRNO(0,SS_NORMAL);
2192 return "BEGIN { require 'perl5db.pl' }";
2198 /* Encoded script support. filter_add() effectively inserts a
2199 * 'pre-processing' function into the current source input stream.
2200 * Note that the filter function only applies to the current source file
2201 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2203 * The datasv parameter (which may be NULL) can be used to pass
2204 * private data to this instance of the filter. The filter function
2205 * can recover the SV using the FILTER_DATA macro and use it to
2206 * store private buffers and state information.
2208 * The supplied datasv parameter is upgraded to a PVIO type
2209 * and the IoDIRP/IoANY field is used to store the function pointer,
2210 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2211 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2212 * private use must be set using malloc'd pointers.
2216 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2221 if (!PL_rsfp_filters)
2222 PL_rsfp_filters = newAV();
2224 datasv = NEWSV(255,0);
2225 SvUPGRADE(datasv, SVt_PVIO);
2226 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2227 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2228 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2229 IoANY(datasv), SvPV_nolen(datasv)));
2230 av_unshift(PL_rsfp_filters, 1);
2231 av_store(PL_rsfp_filters, 0, datasv) ;
2236 /* Delete most recently added instance of this filter function. */
2238 Perl_filter_del(pTHX_ filter_t funcp)
2243 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
2245 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2247 /* if filter is on top of stack (usual case) just pop it off */
2248 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2249 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2250 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2251 IoANY(datasv) = (void *)NULL;
2252 sv_free(av_pop(PL_rsfp_filters));
2256 /* we need to search for the correct entry and clear it */
2257 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2261 /* Invoke the idxth filter function for the current rsfp. */
2262 /* maxlen 0 = read one text line */
2264 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2269 if (!PL_rsfp_filters)
2271 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2272 /* Provide a default input filter to make life easy. */
2273 /* Note that we append to the line. This is handy. */
2274 DEBUG_P(PerlIO_printf(Perl_debug_log,
2275 "filter_read %d: from rsfp\n", idx));
2279 const int old_len = SvCUR(buf_sv);
2281 /* ensure buf_sv is large enough */
2282 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2283 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2284 if (PerlIO_error(PL_rsfp))
2285 return -1; /* error */
2287 return 0 ; /* end of file */
2289 SvCUR_set(buf_sv, old_len + len) ;
2292 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2293 if (PerlIO_error(PL_rsfp))
2294 return -1; /* error */
2296 return 0 ; /* end of file */
2299 return SvCUR(buf_sv);
2301 /* Skip this filter slot if filter has been deleted */
2302 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2303 DEBUG_P(PerlIO_printf(Perl_debug_log,
2304 "filter_read %d: skipped (filter deleted)\n",
2306 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2308 /* Get function pointer hidden within datasv */
2309 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2310 DEBUG_P(PerlIO_printf(Perl_debug_log,
2311 "filter_read %d: via function %p (%s)\n",
2312 idx, datasv, SvPV_nolen_const(datasv)));
2313 /* Call function. The function is expected to */
2314 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2315 /* Return: <0:error, =0:eof, >0:not eof */
2316 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2320 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2322 #ifdef PERL_CR_FILTER
2323 if (!PL_rsfp_filters) {
2324 filter_add(S_cr_textfilter,NULL);
2327 if (PL_rsfp_filters) {
2329 SvCUR_set(sv, 0); /* start with empty line */
2330 if (FILTER_READ(0, sv, 0) > 0)
2331 return ( SvPVX(sv) ) ;
2336 return (sv_gets(sv, fp, append));
2340 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2344 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2348 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2349 (gv = gv_fetchpv(pkgname, 0, SVt_PVHV)))
2351 return GvHV(gv); /* Foo:: */
2354 /* use constant CLASS => 'MyClass' */
2355 if ((gv = gv_fetchpv(pkgname, 0, SVt_PVCV))) {
2357 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2358 pkgname = SvPV_nolen_const(sv);
2362 return gv_stashpv(pkgname, FALSE);
2366 S_tokenize_use(pTHX_ int is_use, char *s) {
2367 if (PL_expect != XSTATE)
2368 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2369 is_use ? "use" : "no"));
2371 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2372 s = force_version(s, TRUE);
2373 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2374 PL_nextval[PL_nexttoke].opval = Nullop;
2377 else if (*s == 'v') {
2378 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2379 s = force_version(s, FALSE);
2383 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2384 s = force_version(s, FALSE);
2386 yylval.ival = is_use;
2390 static const char* const exp_name[] =
2391 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2392 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2399 Works out what to call the token just pulled out of the input
2400 stream. The yacc parser takes care of taking the ops we return and
2401 stitching them into a tree.
2407 if read an identifier
2408 if we're in a my declaration
2409 croak if they tried to say my($foo::bar)
2410 build the ops for a my() declaration
2411 if it's an access to a my() variable
2412 are we in a sort block?
2413 croak if my($a); $a <=> $b
2414 build ops for access to a my() variable
2415 if in a dq string, and they've said @foo and we can't find @foo
2417 build ops for a bareword
2418 if we already built the token before, use it.
2423 #pragma segment Perl_yylex
2428 register char *s = PL_bufptr;
2434 SV* tmp = newSVpvn("", 0);
2435 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2436 (IV)CopLINE(PL_curcop),
2437 lex_state_names[PL_lex_state],
2438 exp_name[PL_expect],
2439 pv_display(tmp, s, strlen(s), 0, 60));
2442 /* check if there's an identifier for us to look at */
2443 if (PL_pending_ident)
2444 return REPORT(S_pending_ident(aTHX));
2446 /* no identifier pending identification */
2448 switch (PL_lex_state) {
2450 case LEX_NORMAL: /* Some compilers will produce faster */
2451 case LEX_INTERPNORMAL: /* code if we comment these out. */
2455 /* when we've already built the next token, just pull it out of the queue */
2458 yylval = PL_nextval[PL_nexttoke];
2460 PL_lex_state = PL_lex_defer;
2461 PL_expect = PL_lex_expect;
2462 PL_lex_defer = LEX_NORMAL;
2464 return REPORT(PL_nexttype[PL_nexttoke]);
2466 /* interpolated case modifiers like \L \U, including \Q and \E.
2467 when we get here, PL_bufptr is at the \
2469 case LEX_INTERPCASEMOD:
2471 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2472 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2474 /* handle \E or end of string */
2475 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2477 if (PL_lex_casemods) {
2478 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2479 PL_lex_casestack[PL_lex_casemods] = '\0';
2481 if (PL_bufptr != PL_bufend
2482 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2484 PL_lex_state = LEX_INTERPCONCAT;
2488 if (PL_bufptr != PL_bufend)
2490 PL_lex_state = LEX_INTERPCONCAT;
2494 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2495 "### Saw case modifier\n"); });
2497 if (s[1] == '\\' && s[2] == 'E') {
2499 PL_lex_state = LEX_INTERPCONCAT;
2504 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2505 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2506 if ((*s == 'L' || *s == 'U') &&
2507 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2508 PL_lex_casestack[--PL_lex_casemods] = '\0';
2511 if (PL_lex_casemods > 10)
2512 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2513 PL_lex_casestack[PL_lex_casemods++] = *s;
2514 PL_lex_casestack[PL_lex_casemods] = '\0';
2515 PL_lex_state = LEX_INTERPCONCAT;
2516 PL_nextval[PL_nexttoke].ival = 0;
2519 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2521 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2523 PL_nextval[PL_nexttoke].ival = OP_LC;
2525 PL_nextval[PL_nexttoke].ival = OP_UC;
2527 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2529 Perl_croak(aTHX_ "panic: yylex");
2533 if (PL_lex_starts) {
2536 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2537 if (PL_lex_casemods == 1 && PL_lex_inpat)
2546 case LEX_INTERPPUSH:
2547 return REPORT(sublex_push());
2549 case LEX_INTERPSTART:
2550 if (PL_bufptr == PL_bufend)
2551 return REPORT(sublex_done());
2552 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2553 "### Interpolated variable\n"); });
2555 PL_lex_dojoin = (*PL_bufptr == '@');
2556 PL_lex_state = LEX_INTERPNORMAL;
2557 if (PL_lex_dojoin) {
2558 PL_nextval[PL_nexttoke].ival = 0;
2560 force_ident("\"", '$');
2561 PL_nextval[PL_nexttoke].ival = 0;
2563 PL_nextval[PL_nexttoke].ival = 0;
2565 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2568 if (PL_lex_starts++) {
2570 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2571 if (!PL_lex_casemods && PL_lex_inpat)
2578 case LEX_INTERPENDMAYBE:
2579 if (intuit_more(PL_bufptr)) {
2580 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2586 if (PL_lex_dojoin) {
2587 PL_lex_dojoin = FALSE;
2588 PL_lex_state = LEX_INTERPCONCAT;
2591 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2592 && SvEVALED(PL_lex_repl))
2594 if (PL_bufptr != PL_bufend)
2595 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2596 PL_lex_repl = Nullsv;
2599 case LEX_INTERPCONCAT:
2601 if (PL_lex_brackets)
2602 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2604 if (PL_bufptr == PL_bufend)
2605 return REPORT(sublex_done());
2607 if (SvIVX(PL_linestr) == '\'') {
2608 SV *sv = newSVsv(PL_linestr);
2611 else if ( PL_hints & HINT_NEW_RE )
2612 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2613 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2617 s = scan_const(PL_bufptr);
2619 PL_lex_state = LEX_INTERPCASEMOD;
2621 PL_lex_state = LEX_INTERPSTART;
2624 if (s != PL_bufptr) {
2625 PL_nextval[PL_nexttoke] = yylval;
2628 if (PL_lex_starts++) {
2629 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2630 if (!PL_lex_casemods && PL_lex_inpat)
2643 PL_lex_state = LEX_NORMAL;
2644 s = scan_formline(PL_bufptr);
2645 if (!PL_lex_formbrack)
2651 PL_oldoldbufptr = PL_oldbufptr;
2657 if (isIDFIRST_lazy_if(s,UTF))
2659 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2662 goto fake_eof; /* emulate EOF on ^D or ^Z */
2667 if (PL_lex_brackets) {
2668 yyerror(PL_lex_formbrack
2669 ? "Format not terminated"
2670 : "Missing right curly or square bracket");
2672 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2673 "### Tokener got EOF\n");
2677 if (s++ < PL_bufend)
2678 goto retry; /* ignore stray nulls */
2681 if (!PL_in_eval && !PL_preambled) {
2682 PL_preambled = TRUE;
2683 sv_setpv(PL_linestr,incl_perldb());
2684 if (SvCUR(PL_linestr))
2685 sv_catpvn(PL_linestr,";", 1);
2687 while(AvFILLp(PL_preambleav) >= 0) {
2688 SV *tmpsv = av_shift(PL_preambleav);
2689 sv_catsv(PL_linestr, tmpsv);
2690 sv_catpvn(PL_linestr, ";", 1);
2693 sv_free((SV*)PL_preambleav);
2694 PL_preambleav = NULL;
2696 if (PL_minus_n || PL_minus_p) {
2697 sv_catpv(PL_linestr, "LINE: while (<>) {");
2699 sv_catpv(PL_linestr,"chomp;");
2702 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2703 || *PL_splitstr == '"')
2704 && strchr(PL_splitstr + 1, *PL_splitstr))
2705 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2707 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2708 bytes can be used as quoting characters. :-) */
2709 /* The count here deliberately includes the NUL
2710 that terminates the C string constant. This
2711 embeds the opening NUL into the string. */
2712 const char *splits = PL_splitstr;
2713 sv_catpvn(PL_linestr, "our @F=split(q", 15);
2716 if (*splits == '\\')
2717 sv_catpvn(PL_linestr, splits, 1);
2718 sv_catpvn(PL_linestr, splits, 1);
2719 } while (*splits++);
2720 /* This loop will embed the trailing NUL of
2721 PL_linestr as the last thing it does before
2723 sv_catpvn(PL_linestr, ");", 2);
2727 sv_catpv(PL_linestr,"our @F=split(' ');");
2731 sv_catpv(PL_linestr,"use feature ':5.10';");
2732 sv_catpvn(PL_linestr, "\n", 1);
2733 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2734 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2735 PL_last_lop = PL_last_uni = Nullch;
2736 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2737 SV * const sv = NEWSV(85,0);
2739 sv_upgrade(sv, SVt_PVMG);
2740 sv_setsv(sv,PL_linestr);
2743 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2748 bof = PL_rsfp ? TRUE : FALSE;
2749 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2752 if (PL_preprocess && !PL_in_eval)
2753 (void)PerlProc_pclose(PL_rsfp);
2754 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2755 PerlIO_clearerr(PL_rsfp);
2757 (void)PerlIO_close(PL_rsfp);
2759 PL_doextract = FALSE;
2761 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2762 sv_setpv(PL_linestr,PL_minus_p
2763 ? ";}continue{print;}" : ";}");
2764 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2765 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2766 PL_last_lop = PL_last_uni = Nullch;
2767 PL_minus_n = PL_minus_p = 0;
2770 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2771 PL_last_lop = PL_last_uni = Nullch;
2772 sv_setpvn(PL_linestr,"",0);
2773 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2775 /* If it looks like the start of a BOM or raw UTF-16,
2776 * check if it in fact is. */
2782 #ifdef PERLIO_IS_STDIO
2783 # ifdef __GNU_LIBRARY__
2784 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2785 # define FTELL_FOR_PIPE_IS_BROKEN
2789 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2790 # define FTELL_FOR_PIPE_IS_BROKEN
2795 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2796 /* This loses the possibility to detect the bof
2797 * situation on perl -P when the libc5 is being used.
2798 * Workaround? Maybe attach some extra state to PL_rsfp?
2801 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2803 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2806 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2807 s = swallow_bom((U8*)s);
2811 /* Incest with pod. */
2812 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2813 sv_setpvn(PL_linestr, "", 0);
2814 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2815 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2816 PL_last_lop = PL_last_uni = Nullch;
2817 PL_doextract = FALSE;
2821 } while (PL_doextract);
2822 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2823 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2824 SV * const sv = NEWSV(85,0);
2826 sv_upgrade(sv, SVt_PVMG);
2827 sv_setsv(sv,PL_linestr);
2830 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2832 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2833 PL_last_lop = PL_last_uni = Nullch;
2834 if (CopLINE(PL_curcop) == 1) {
2835 while (s < PL_bufend && isSPACE(*s))
2837 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2841 if (*s == '#' && *(s+1) == '!')
2843 #ifdef ALTERNATE_SHEBANG
2845 static char const as[] = ALTERNATE_SHEBANG;
2846 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2847 d = s + (sizeof(as) - 1);
2849 #endif /* ALTERNATE_SHEBANG */
2858 while (*d && !isSPACE(*d))
2862 #ifdef ARG_ZERO_IS_SCRIPT
2863 if (ipathend > ipath) {
2865 * HP-UX (at least) sets argv[0] to the script name,
2866 * which makes $^X incorrect. And Digital UNIX and Linux,
2867 * at least, set argv[0] to the basename of the Perl
2868 * interpreter. So, having found "#!", we'll set it right.
2871 = GvSV(gv_fetchpv("\030", GV_ADD, SVt_PV)); /* $^X */
2872 assert(SvPOK(x) || SvGMAGICAL(x));
2873 if (sv_eq(x, CopFILESV(PL_curcop))) {
2874 sv_setpvn(x, ipath, ipathend - ipath);
2880 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
2881 const char * const lstart = SvPV_const(x,llen);
2883 bstart += blen - llen;
2884 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2885 sv_setpvn(x, ipath, ipathend - ipath);
2890 TAINT_NOT; /* $^X is always tainted, but that's OK */
2892 #endif /* ARG_ZERO_IS_SCRIPT */
2897 d = instr(s,"perl -");
2899 d = instr(s,"perl");
2901 /* avoid getting into infinite loops when shebang
2902 * line contains "Perl" rather than "perl" */
2904 for (d = ipathend-4; d >= ipath; --d) {
2905 if ((*d == 'p' || *d == 'P')
2906 && !ibcmp(d, "perl", 4))
2916 #ifdef ALTERNATE_SHEBANG
2918 * If the ALTERNATE_SHEBANG on this system starts with a
2919 * character that can be part of a Perl expression, then if
2920 * we see it but not "perl", we're probably looking at the
2921 * start of Perl code, not a request to hand off to some
2922 * other interpreter. Similarly, if "perl" is there, but
2923 * not in the first 'word' of the line, we assume the line
2924 * contains the start of the Perl program.
2926 if (d && *s != '#') {
2927 const char *c = ipath;
2928 while (*c && !strchr("; \t\r\n\f\v#", *c))
2931 d = Nullch; /* "perl" not in first word; ignore */
2933 *s = '#'; /* Don't try to parse shebang line */
2935 #endif /* ALTERNATE_SHEBANG */
2936 #ifndef MACOS_TRADITIONAL
2941 !instr(s,"indir") &&
2942 instr(PL_origargv[0],"perl"))
2949 while (s < PL_bufend && isSPACE(*s))
2951 if (s < PL_bufend) {
2952 Newxz(newargv,PL_origargc+3,char*);
2954 while (s < PL_bufend && !isSPACE(*s))
2957 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2960 newargv = PL_origargv;
2963 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2965 Perl_croak(aTHX_ "Can't exec %s", ipath);
2969 const U32 oldpdb = PL_perldb;
2970 const bool oldn = PL_minus_n;
2971 const bool oldp = PL_minus_p;
2973 while (*d && !isSPACE(*d)) d++;
2974 while (SPACE_OR_TAB(*d)) d++;
2977 const bool switches_done = PL_doswitches;
2979 if (*d == 'M' || *d == 'm' || *d == 'C') {
2980 const char * const m = d;
2981 while (*d && !isSPACE(*d)) d++;
2982 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
2985 d = moreswitches(d);
2987 if (PL_doswitches && !switches_done) {
2988 int argc = PL_origargc;
2989 char **argv = PL_origargv;
2992 } while (argc && argv[0][0] == '-' && argv[0][1]);
2993 init_argv_symbols(argc,argv);
2995 if ((PERLDB_LINE && !oldpdb) ||
2996 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
2997 /* if we have already added "LINE: while (<>) {",
2998 we must not do it again */
3000 sv_setpvn(PL_linestr, "", 0);
3001 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3002 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3003 PL_last_lop = PL_last_uni = Nullch;
3004 PL_preambled = FALSE;
3006 (void)gv_fetchfile(PL_origfilename);
3009 if (PL_doswitches && !switches_done) {
3010 int argc = PL_origargc;
3011 char **argv = PL_origargv;
3014 } while (argc && argv[0][0] == '-' && argv[0][1]);
3015 init_argv_symbols(argc,argv);
3021 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3023 PL_lex_state = LEX_FORMLINE;
3028 #ifdef PERL_STRICT_CR
3029 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3031 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3033 case ' ': case '\t': case '\f': case 013:
3034 #ifdef MACOS_TRADITIONAL
3041 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3042 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3043 /* handle eval qq[#line 1 "foo"\n ...] */
3044 CopLINE_dec(PL_curcop);
3048 while (s < d && *s != '\n')
3052 else if (s > d) /* Found by Ilya: feed random input to Perl. */
3053 Perl_croak(aTHX_ "panic: input overflow");
3055 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3057 PL_lex_state = LEX_FORMLINE;
3067 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3075 while (s < PL_bufend && SPACE_OR_TAB(*s))
3078 if (strnEQ(s,"=>",2)) {
3079 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3080 DEBUG_T( { S_printbuf(aTHX_
3081 "### Saw unary minus before =>, forcing word %s\n", s);
3083 OPERATOR('-'); /* unary minus */
3085 PL_last_uni = PL_oldbufptr;
3087 case 'r': ftst = OP_FTEREAD; break;
3088 case 'w': ftst = OP_FTEWRITE; break;
3089 case 'x': ftst = OP_FTEEXEC; break;
3090 case 'o': ftst = OP_FTEOWNED; break;
3091 case 'R': ftst = OP_FTRREAD; break;
3092 case 'W': ftst = OP_FTRWRITE; break;
3093 case 'X': ftst = OP_FTREXEC; break;
3094 case 'O': ftst = OP_FTROWNED; break;
3095 case 'e': ftst = OP_FTIS; break;
3096 case 'z': ftst = OP_FTZERO; break;
3097 case 's': ftst = OP_FTSIZE; break;
3098 case 'f': ftst = OP_FTFILE; break;
3099 case 'd': ftst = OP_FTDIR; break;
3100 case 'l': ftst = OP_FTLINK; break;
3101 case 'p': ftst = OP_FTPIPE; break;
3102 case 'S': ftst = OP_FTSOCK; break;
3103 case 'u': ftst = OP_FTSUID; break;
3104 case 'g': ftst = OP_FTSGID; break;
3105 case 'k': ftst = OP_FTSVTX; break;
3106 case 'b': ftst = OP_FTBLK; break;
3107 case 'c': ftst = OP_FTCHR; break;
3108 case 't': ftst = OP_FTTTY; break;
3109 case 'T': ftst = OP_FTTEXT; break;
3110 case 'B': ftst = OP_FTBINARY; break;
3111 case 'M': case 'A': case 'C':
3112 gv_fetchpv("\024",GV_ADD, SVt_PV);
3114 case 'M': ftst = OP_FTMTIME; break;
3115 case 'A': ftst = OP_FTATIME; break;
3116 case 'C': ftst = OP_FTCTIME; break;
3124 PL_last_lop_op = (OPCODE)ftst;
3125 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3126 "### Saw file test %c\n", (int)tmp);
3131 /* Assume it was a minus followed by a one-letter named
3132 * subroutine call (or a -bareword), then. */
3133 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3134 "### '-%c' looked like a file test but was not\n",
3141 const char tmp = *s++;
3144 if (PL_expect == XOPERATOR)
3149 else if (*s == '>') {
3152 if (isIDFIRST_lazy_if(s,UTF)) {
3153 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3161 if (PL_expect == XOPERATOR)
3164 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3166 OPERATOR('-'); /* unary minus */
3172 const char tmp = *s++;
3175 if (PL_expect == XOPERATOR)
3180 if (PL_expect == XOPERATOR)
3183 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3190 if (PL_expect != XOPERATOR) {
3191 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3192 PL_expect = XOPERATOR;
3193 force_ident(PL_tokenbuf, '*');
3206 if (PL_expect == XOPERATOR) {
3210 PL_tokenbuf[0] = '%';
3211 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3212 if (!PL_tokenbuf[1]) {
3215 PL_pending_ident = '%';
3226 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
3227 && FEATURE_IS_ENABLED("~~", 2))
3234 const char tmp = *s++;
3240 goto just_a_word_zero_gv;
3243 switch (PL_expect) {
3246 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3248 PL_bufptr = s; /* update in case we back off */
3254 PL_expect = XTERMBLOCK;
3258 while (isIDFIRST_lazy_if(s,UTF)) {
3260 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3261 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3262 if (tmp < 0) tmp = -tmp;
3278 d = scan_str(d,TRUE,TRUE);
3280 /* MUST advance bufptr here to avoid bogus
3281 "at end of line" context messages from yyerror().
3283 PL_bufptr = s + len;
3284 yyerror("Unterminated attribute parameter in attribute list");
3287 return REPORT(0); /* EOF indicator */
3291 SV *sv = newSVpvn(s, len);
3292 sv_catsv(sv, PL_lex_stuff);
3293 attrs = append_elem(OP_LIST, attrs,
3294 newSVOP(OP_CONST, 0, sv));
3295 SvREFCNT_dec(PL_lex_stuff);
3296 PL_lex_stuff = Nullsv;
3299 if (len == 6 && strnEQ(s, "unique", len)) {
3300 if (PL_in_my == KEY_our)
3302 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3304 ; /* skip to avoid loading attributes.pm */
3307 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3310 /* NOTE: any CV attrs applied here need to be part of
3311 the CVf_BUILTIN_ATTRS define in cv.h! */
3312 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3313 CvLVALUE_on(PL_compcv);
3314 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3315 CvLOCKED_on(PL_compcv);
3316 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3317 CvMETHOD_on(PL_compcv);
3318 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3319 CvASSERTION_on(PL_compcv);
3320 /* After we've set the flags, it could be argued that
3321 we don't need to do the attributes.pm-based setting
3322 process, and shouldn't bother appending recognized
3323 flags. To experiment with that, uncomment the
3324 following "else". (Note that's already been
3325 uncommented. That keeps the above-applied built-in
3326 attributes from being intercepted (and possibly
3327 rejected) by a package's attribute routines, but is
3328 justified by the performance win for the common case
3329 of applying only built-in attributes.) */
3331 attrs = append_elem(OP_LIST, attrs,
3332 newSVOP(OP_CONST, 0,
3336 if (*s == ':' && s[1] != ':')
3339 break; /* require real whitespace or :'s */
3343 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3344 if (*s != ';' && *s != '}' && *s != tmp
3345 && (tmp != '=' || *s != ')')) {
3346 const char q = ((*s == '\'') ? '"' : '\'');
3347 /* If here for an expression, and parsed no attrs, back
3349 if (tmp == '=' && !attrs) {
3353 /* MUST advance bufptr here to avoid bogus "at end of line"
3354 context messages from yyerror().
3358 ? Perl_form(aTHX_ "Invalid separator character "
3359 "%c%c%c in attribute list", q, *s, q)
3360 : "Unterminated attribute list" );
3368 PL_nextval[PL_nexttoke].opval = attrs;
3376 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3377 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3385 const char tmp = *s++;
3390 const char tmp = *s++;
3398 if (PL_lex_brackets <= 0)
3399 yyerror("Unmatched right square bracket");
3402 if (PL_lex_state == LEX_INTERPNORMAL) {
3403 if (PL_lex_brackets == 0) {
3404 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3405 PL_lex_state = LEX_INTERPEND;
3412 if (PL_lex_brackets > 100) {
3413 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3415 switch (PL_expect) {
3417 if (PL_lex_formbrack) {
3421 if (PL_oldoldbufptr == PL_last_lop)
3422 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3424 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3425 OPERATOR(HASHBRACK);
3427 while (s < PL_bufend && SPACE_OR_TAB(*s))
3430 PL_tokenbuf[0] = '\0';
3431 if (d < PL_bufend && *d == '-') {
3432 PL_tokenbuf[0] = '-';
3434 while (d < PL_bufend && SPACE_OR_TAB(*d))
3437 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3438 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3440 while (d < PL_bufend && SPACE_OR_TAB(*d))
3443 const char minus = (PL_tokenbuf[0] == '-');
3444 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3452 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3457 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3462 if (PL_oldoldbufptr == PL_last_lop)
3463 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3465 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3468 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3470 /* This hack is to get the ${} in the message. */
3472 yyerror("syntax error");
3475 OPERATOR(HASHBRACK);
3477 /* This hack serves to disambiguate a pair of curlies
3478 * as being a block or an anon hash. Normally, expectation
3479 * determines that, but in cases where we're not in a
3480 * position to expect anything in particular (like inside
3481 * eval"") we have to resolve the ambiguity. This code
3482 * covers the case where the first term in the curlies is a
3483 * quoted string. Most other cases need to be explicitly
3484 * disambiguated by prepending a "+" before the opening
3485 * curly in order to force resolution as an anon hash.
3487 * XXX should probably propagate the outer expectation
3488 * into eval"" to rely less on this hack, but that could
3489 * potentially break current behavior of eval"".
3493 if (*s == '\'' || *s == '"' || *s == '`') {
3494 /* common case: get past first string, handling escapes */
3495 for (t++; t < PL_bufend && *t != *s;)
3496 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3500 else if (*s == 'q') {
3503 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3506 /* skip q//-like construct */
3508 char open, close, term;
3511 while (t < PL_bufend && isSPACE(*t))
3513 /* check for q => */
3514 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3515 OPERATOR(HASHBRACK);
3519 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3523 for (t++; t < PL_bufend; t++) {
3524 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3526 else if (*t == open)
3530 for (t++; t < PL_bufend; t++) {
3531 if (*t == '\\' && t+1 < PL_bufend)
3533 else if (*t == close && --brackets <= 0)
3535 else if (*t == open)
3542 /* skip plain q word */
3543 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3546 else if (isALNUM_lazy_if(t,UTF)) {
3548 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3551 while (t < PL_bufend && isSPACE(*t))
3553 /* if comma follows first term, call it an anon hash */
3554 /* XXX it could be a comma expression with loop modifiers */
3555 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3556 || (*t == '=' && t[1] == '>')))
3557 OPERATOR(HASHBRACK);
3558 if (PL_expect == XREF)
3561 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3567 yylval.ival = CopLINE(PL_curcop);
3568 if (isSPACE(*s) || *s == '#')
3569 PL_copline = NOLINE; /* invalidate current command line number */
3574 if (PL_lex_brackets <= 0)
3575 yyerror("Unmatched right curly bracket");
3577 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3578 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3579 PL_lex_formbrack = 0;
3580 if (PL_lex_state == LEX_INTERPNORMAL) {
3581 if (PL_lex_brackets == 0) {
3582 if (PL_expect & XFAKEBRACK) {
3583 PL_expect &= XENUMMASK;
3584 PL_lex_state = LEX_INTERPEND;
3586 return yylex(); /* ignore fake brackets */
3588 if (*s == '-' && s[1] == '>')
3589 PL_lex_state = LEX_INTERPENDMAYBE;
3590 else if (*s != '[' && *s != '{')
3591 PL_lex_state = LEX_INTERPEND;
3594 if (PL_expect & XFAKEBRACK) {
3595 PL_expect &= XENUMMASK;
3597 return yylex(); /* ignore fake brackets */
3606 if (PL_expect == XOPERATOR) {
3607 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
3608 && isIDFIRST_lazy_if(s,UTF))
3610 CopLINE_dec(PL_curcop);
3611 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3612 CopLINE_inc(PL_curcop);
3617 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3619 PL_expect = XOPERATOR;
3620 force_ident(PL_tokenbuf, '&');
3624 yylval.ival = (OPpENTERSUB_AMPER<<8);
3636 const char tmp = *s++;
3643 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
3644 && strchr("+-*/%.^&|<",tmp))
3645 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3646 "Reversed %c= operator",(int)tmp);
3648 if (PL_expect == XSTATE && isALPHA(tmp) &&
3649 (s == PL_linestart+1 || s[-2] == '\n') )
3651 if (PL_in_eval && !PL_rsfp) {
3656 if (strnEQ(s,"=cut",4)) {
3670 PL_doextract = TRUE;
3674 if (PL_lex_brackets < PL_lex_formbrack) {
3676 #ifdef PERL_STRICT_CR
3677 for (t = s; SPACE_OR_TAB(*t); t++) ;
3679 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3681 if (*t == '\n' || *t == '#') {
3692 const char tmp = *s++;
3694 /* was this !=~ where !~ was meant?
3695 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3697 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3698 const char *t = s+1;
3700 while (t < PL_bufend && isSPACE(*t))
3703 if (*t == '/' || *t == '?' ||
3704 ((*t == 'm' || *t == 's' || *t == 'y')
3705 && !isALNUM(t[1])) ||
3706 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3707 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3708 "!=~ should be !~");
3718 if (PL_expect != XOPERATOR) {
3719 if (s[1] != '<' && !strchr(s,'>'))
3722 s = scan_heredoc(s);
3724 s = scan_inputsymbol(s);
3725 TERM(sublex_start());
3731 SHop(OP_LEFT_SHIFT);
3745 const char tmp = *s++;
3747 SHop(OP_RIGHT_SHIFT);
3757 if (PL_expect == XOPERATOR) {
3758 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3761 return REPORT(','); /* grandfather non-comma-format format */
3765 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3766 PL_tokenbuf[0] = '@';
3767 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3768 sizeof PL_tokenbuf - 1, FALSE);
3769 if (PL_expect == XOPERATOR)
3770 no_op("Array length", s);
3771 if (!PL_tokenbuf[1])
3773 PL_expect = XOPERATOR;
3774 PL_pending_ident = '#';
3778 PL_tokenbuf[0] = '$';
3779 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3780 sizeof PL_tokenbuf - 1, FALSE);
3781 if (PL_expect == XOPERATOR)
3783 if (!PL_tokenbuf[1]) {
3785 yyerror("Final $ should be \\$ or $name");
3789 /* This kludge not intended to be bulletproof. */
3790 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3791 yylval.opval = newSVOP(OP_CONST, 0,
3792 newSViv(PL_compiling.cop_arybase));
3793 yylval.opval->op_private = OPpCONST_ARYBASE;
3799 const char tmp = *s;
3800 if (PL_lex_state == LEX_NORMAL)
3803 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
3804 && intuit_more(s)) {
3806 PL_tokenbuf[0] = '@';
3807 if (ckWARN(WARN_SYNTAX)) {
3810 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3813 PL_bufptr = skipspace(PL_bufptr);
3814 while (t < PL_bufend && *t != ']')
3816 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3817 "Multidimensional syntax %.*s not supported",
3818 (t - PL_bufptr) + 1, PL_bufptr);
3822 else if (*s == '{') {
3824 PL_tokenbuf[0] = '%';
3825 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
3826 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
3828 char tmpbuf[sizeof PL_tokenbuf];
3829 for (t++; isSPACE(*t); t++) ;
3830 if (isIDFIRST_lazy_if(t,UTF)) {
3832 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
3834 for (; isSPACE(*t); t++) ;
3835 if (*t == ';' && get_cv(tmpbuf, FALSE))
3836 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3837 "You need to quote \"%s\"",
3844 PL_expect = XOPERATOR;
3845 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3846 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3847 if (!islop || PL_last_lop_op == OP_GREPSTART)
3848 PL_expect = XOPERATOR;
3849 else if (strchr("$@\"'`q", *s))
3850 PL_expect = XTERM; /* e.g. print $fh "foo" */
3851 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3852 PL_expect = XTERM; /* e.g. print $fh &sub */
3853 else if (isIDFIRST_lazy_if(s,UTF)) {
3854 char tmpbuf[sizeof PL_tokenbuf];
3856 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3857 if ((t2 = keyword(tmpbuf, len))) {
3858 /* binary operators exclude handle interpretations */
3870 PL_expect = XTERM; /* e.g. print $fh length() */
3875 PL_expect = XTERM; /* e.g. print $fh subr() */
3878 else if (isDIGIT(*s))
3879 PL_expect = XTERM; /* e.g. print $fh 3 */
3880 else if (*s == '.' && isDIGIT(s[1]))
3881 PL_expect = XTERM; /* e.g. print $fh .3 */
3882 else if ((*s == '?' || *s == '-' || *s == '+')
3883 && !isSPACE(s[1]) && s[1] != '=')
3884 PL_expect = XTERM; /* e.g. print $fh -1 */
3885 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
3887 PL_expect = XTERM; /* e.g. print $fh /.../
3888 XXX except DORDOR operator
3890 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
3892 PL_expect = XTERM; /* print $fh <<"EOF" */
3895 PL_pending_ident = '$';
3899 if (PL_expect == XOPERATOR)
3901 PL_tokenbuf[0] = '@';
3902 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3903 if (!PL_tokenbuf[1]) {
3906 if (PL_lex_state == LEX_NORMAL)
3908 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3910 PL_tokenbuf[0] = '%';
3912 /* Warn about @ where they meant $. */
3913 if (*s == '[' || *s == '{') {
3914 if (ckWARN(WARN_SYNTAX)) {
3915 const char *t = s + 1;
3916 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3918 if (*t == '}' || *t == ']') {
3920 PL_bufptr = skipspace(PL_bufptr);
3921 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3922 "Scalar value %.*s better written as $%.*s",
3923 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
3928 PL_pending_ident = '@';
3931 case '/': /* may be division, defined-or, or pattern */
3932 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3936 case '?': /* may either be conditional or pattern */
3937 if(PL_expect == XOPERATOR) {
3945 /* A // operator. */
3955 /* Disable warning on "study /blah/" */
3956 if (PL_oldoldbufptr == PL_last_uni
3957 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3958 || memNE(PL_last_uni, "study", 5)
3959 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3962 s = scan_pat(s,OP_MATCH);
3963 TERM(sublex_start());
3967 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3968 #ifdef PERL_STRICT_CR
3971 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3973 && (s == PL_linestart || s[-1] == '\n') )
3975 PL_lex_formbrack = 0;
3979 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3985 yylval.ival = OPf_SPECIAL;
3991 if (PL_expect != XOPERATOR)
3996 case '0': case '1': case '2': case '3': case '4':
3997 case '5': case '6': case '7': case '8': case '9':
3998 s = scan_num(s, &yylval);
3999 DEBUG_T( { S_printbuf(aTHX_ "### Saw number in %s\n", s); } );
4000 if (PL_expect == XOPERATOR)
4005 s = scan_str(s,FALSE,FALSE);
4006 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4007 if (PL_expect == XOPERATOR) {
4008 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4011 return REPORT(','); /* grandfather non-comma-format format */
4017 missingterm((char*)0);
4018 yylval.ival = OP_CONST;
4019 TERM(sublex_start());
4022 s = scan_str(s,FALSE,FALSE);
4023 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4024 if (PL_expect == XOPERATOR) {
4025 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4028 return REPORT(','); /* grandfather non-comma-format format */
4034 missingterm((char*)0);
4035 yylval.ival = OP_CONST;
4036 /* FIXME. I think that this can be const if char *d is replaced by
4037 more localised variables. */
4038 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4039 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4040 yylval.ival = OP_STRINGIFY;
4044 TERM(sublex_start());
4047 s = scan_str(s,FALSE,FALSE);
4048 DEBUG_T( { S_printbuf(aTHX_ "### Saw backtick string before %s\n", s); } );
4049 if (PL_expect == XOPERATOR)
4050 no_op("Backticks",s);
4052 missingterm((char*)0);
4053 yylval.ival = OP_BACKTICK;
4055 TERM(sublex_start());
4059 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
4060 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
4062 if (PL_expect == XOPERATOR)
4063 no_op("Backslash",s);
4067 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
4068 char *start = s + 2;
4069 while (isDIGIT(*start) || *start == '_')
4071 if (*start == '.' && isDIGIT(start[1])) {
4072 s = scan_num(s, &yylval);
4075 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
4076 else if (!isALPHA(*start) && (PL_expect == XTERM
4077 || PL_expect == XREF || PL_expect == XSTATE
4078 || PL_expect == XTERMORDORDOR)) {
4079 const char c = *start;
4082 gv = gv_fetchpv(s, 0, SVt_PVCV);
4085 s = scan_num(s, &yylval);
4092 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
4128 I32 orig_keyword = 0;
4133 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4135 /* Some keywords can be followed by any delimiter, including ':' */
4136 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
4137 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4138 (PL_tokenbuf[0] == 'q' &&
4139 strchr("qwxr", PL_tokenbuf[1])))));
4141 /* x::* is just a word, unless x is "CORE" */
4142 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4146 while (d < PL_bufend && isSPACE(*d))
4147 d++; /* no comments skipped here, or s### is misparsed */
4149 /* Is this a label? */
4150 if (!tmp && PL_expect == XSTATE
4151 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4153 yylval.pval = savepv(PL_tokenbuf);
4158 /* Check for keywords */
4159 tmp = keyword(PL_tokenbuf, len);
4161 /* Is this a word before a => operator? */
4162 if (*d == '=' && d[1] == '>') {
4165 = (OP*)newSVOP(OP_CONST, 0,
4166 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4167 yylval.opval->op_private = OPpCONST_BARE;
4171 if (tmp < 0) { /* second-class keyword? */
4172 GV *ogv = Nullgv; /* override (winner) */
4173 GV *hgv = Nullgv; /* hidden (loser) */
4174 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4176 if ((gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV)) &&
4179 if (GvIMPORTED_CV(gv))
4181 else if (! CvMETHOD(cv))
4185 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4186 (gv = *gvp) != (GV*)&PL_sv_undef &&
4187 GvCVu(gv) && GvIMPORTED_CV(gv))
4194 tmp = 0; /* overridden by import or by GLOBAL */
4197 && -tmp==KEY_lock /* XXX generalizable kludge */
4199 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
4201 tmp = 0; /* any sub overrides "weak" keyword */
4203 else { /* no override */
4205 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4206 Perl_warner(aTHX_ packWARN(WARN_MISC),
4207 "dump() better written as CORE::dump()");
4211 if (hgv && tmp != KEY_x && tmp != KEY_CORE
4212 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
4213 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4214 "Ambiguous call resolved as CORE::%s(), %s",
4215 GvENAME(hgv), "qualify as such or use &");
4222 default: /* not a keyword */
4223 /* Trade off - by using this evil construction we can pull the
4224 variable gv into the block labelled keylookup. If not, then
4225 we have to give it function scope so that the goto from the
4226 earlier ':' case doesn't bypass the initialisation. */
4228 just_a_word_zero_gv:
4235 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4238 /* Get the rest if it looks like a package qualifier */
4240 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4242 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4245 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4246 *s == '\'' ? "'" : "::");
4251 if (PL_expect == XOPERATOR) {
4252 if (PL_bufptr == PL_linestart) {
4253 CopLINE_dec(PL_curcop);
4254 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4255 CopLINE_inc(PL_curcop);
4258 no_op("Bareword",s);
4261 /* Look for a subroutine with this name in current package,
4262 unless name is "Foo::", in which case Foo is a bearword
4263 (and a package name). */
4266 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4268 if (ckWARN(WARN_BAREWORD)
4269 && ! gv_fetchpv(PL_tokenbuf, 0, SVt_PVHV))
4270 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4271 "Bareword \"%s\" refers to nonexistent package",
4274 PL_tokenbuf[len] = '\0';
4281 /* Mustn't actually add anything to a symbol table.
4282 But also don't want to "initialise" any placeholder
4283 constants that might already be there into full
4284 blown PVGVs with attached PVCV. */
4285 gv = gv_fetchpv(PL_tokenbuf, GV_NOADD_NOINIT,
4290 /* if we saw a global override before, get the right name */
4293 sv = newSVpvn("CORE::GLOBAL::",14);
4294 sv_catpv(sv,PL_tokenbuf);
4297 /* If len is 0, newSVpv does strlen(), which is correct.
4298 If len is non-zero, then it will be the true length,
4299 and so the scalar will be created correctly. */
4300 sv = newSVpv(PL_tokenbuf,len);
4303 /* Presume this is going to be a bareword of some sort. */
4306 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4307 yylval.opval->op_private = OPpCONST_BARE;
4308 /* UTF-8 package name? */
4309 if (UTF && !IN_BYTES &&
4310 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
4313 /* And if "Foo::", then that's what it certainly is. */
4318 /* Do the explicit type check so that we don't need to force
4319 the initialisation of the symbol table to have a real GV.
4320 Beware - gv may not really be a PVGV, cv may not really be
4321 a PVCV, (because of the space optimisations that gv_init
4322 understands) But they're true if for this symbol there is
4323 respectively a typeglob and a subroutine.
4325 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
4326 /* Real typeglob, so get the real subroutine: */
4328 /* A proxy for a subroutine in this package? */
4329 : SvOK(gv) ? (CV *) gv : NULL)
4332 /* See if it's the indirect object for a list operator. */
4334 if (PL_oldoldbufptr &&
4335 PL_oldoldbufptr < PL_bufptr &&
4336 (PL_oldoldbufptr == PL_last_lop
4337 || PL_oldoldbufptr == PL_last_uni) &&
4338 /* NO SKIPSPACE BEFORE HERE! */
4339 (PL_expect == XREF ||
4340 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4342 bool immediate_paren = *s == '(';
4344 /* (Now we can afford to cross potential line boundary.) */
4347 /* Two barewords in a row may indicate method call. */
4349 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
4350 (tmp = intuit_method(s, gv, cv)))
4353 /* If not a declared subroutine, it's an indirect object. */
4354 /* (But it's an indir obj regardless for sort.) */
4355 /* Also, if "_" follows a filetest operator, it's a bareword */
4358 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4360 (PL_last_lop_op != OP_MAPSTART &&
4361 PL_last_lop_op != OP_GREPSTART))))
4362 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
4363 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
4366 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4371 PL_expect = XOPERATOR;
4374 /* Is this a word before a => operator? */
4375 if (*s == '=' && s[1] == '>' && !pkgname) {
4377 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4378 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4379 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4383 /* If followed by a paren, it's certainly a subroutine. */
4387 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
4388 if (*d == ')' && (sv = gv_const_sv(gv))) {
4393 PL_nextval[PL_nexttoke].opval = yylval.opval;
4394 PL_expect = XOPERATOR;
4400 /* If followed by var or block, call it a method (unless sub) */
4402 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
4403 PL_last_lop = PL_oldbufptr;
4404 PL_last_lop_op = OP_METHOD;
4408 /* If followed by a bareword, see if it looks like indir obj. */
4411 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4412 && (tmp = intuit_method(s, gv, cv)))
4415 /* Not a method, so call it a subroutine (if defined) */
4418 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4419 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4420 "Ambiguous use of -%s resolved as -&%s()",
4421 PL_tokenbuf, PL_tokenbuf);
4422 /* Check for a constant sub */
4423 if ((sv = gv_const_sv(gv))) {
4425 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4426 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4427 yylval.opval->op_private = 0;
4431 /* Resolve to GV now. */
4432 if (SvTYPE(gv) != SVt_PVGV) {
4433 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
4434 assert (SvTYPE(gv) == SVt_PVGV);
4435 /* cv must have been some sort of placeholder, so
4436 now needs replacing with a real code reference. */
4440 op_free(yylval.opval);
4441 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4442 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4443 PL_last_lop = PL_oldbufptr;
4444 PL_last_lop_op = OP_ENTERSUB;
4445 /* Is there a prototype? */
4448 const char *proto = SvPV_const((SV*)cv, len);
4451 if (*proto == '$' && proto[1] == '\0')
4453 while (*proto == ';')
4455 if (*proto == '&' && *s == '{') {
4456 sv_setpv(PL_subname, PL_curstash ?
4457 "__ANON__" : "__ANON__::__ANON__");
4461 PL_nextval[PL_nexttoke].opval = yylval.opval;
4467 /* Call it a bare word */
4469 if (PL_hints & HINT_STRICT_SUBS)
4470 yylval.opval->op_private |= OPpCONST_STRICT;
4473 if (lastchar != '-') {
4474 if (ckWARN(WARN_RESERVED)) {
4475 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4476 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4477 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4484 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4485 && ckWARN_d(WARN_AMBIGUOUS)) {
4486 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4487 "Operator or semicolon missing before %c%s",
4488 lastchar, PL_tokenbuf);
4489 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4490 "Ambiguous use of %c resolved as operator %c",
4491 lastchar, lastchar);
4497 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4498 newSVpv(CopFILE(PL_curcop),0));
4502 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4503 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4506 case KEY___PACKAGE__:
4507 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4509 ? newSVhek(HvNAME_HEK(PL_curstash))
4516 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4517 const char *pname = "main";
4518 if (PL_tokenbuf[2] == 'D')
4519 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
4520 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
4524 GvIOp(gv) = newIO();
4525 IoIFP(GvIOp(gv)) = PL_rsfp;
4526 #if defined(HAS_FCNTL) && defined(F_SETFD)
4528 const int fd = PerlIO_fileno(PL_rsfp);
4529 fcntl(fd,F_SETFD,fd >= 3);
4532 /* Mark this internal pseudo-handle as clean */
4533 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4535 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4536 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4537 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4539 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4540 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4541 /* if the script was opened in binmode, we need to revert
4542 * it to text mode for compatibility; but only iff it has CRs
4543 * XXX this is a questionable hack at best. */
4544 if (PL_bufend-PL_bufptr > 2
4545 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4548 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4549 loc = PerlIO_tell(PL_rsfp);
4550 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4553 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4555 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4556 #endif /* NETWARE */
4557 #ifdef PERLIO_IS_STDIO /* really? */
4558 # if defined(__BORLANDC__)
4559 /* XXX see note in do_binmode() */
4560 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4564 PerlIO_seek(PL_rsfp, loc, 0);
4568 #ifdef PERLIO_LAYERS
4571 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4572 else if (PL_encoding) {
4579 XPUSHs(PL_encoding);
4581 call_method("name", G_SCALAR);
4585 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4586 Perl_form(aTHX_ ":encoding(%"SVf")",
4604 if (PL_expect == XSTATE) {
4611 if (*s == ':' && s[1] == ':') {
4614 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4615 if (!(tmp = keyword(PL_tokenbuf, len)))
4616 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4619 else if (tmp == KEY_require || tmp == KEY_do)
4620 /* that's a way to remember we saw "CORE::" */
4633 LOP(OP_ACCEPT,XTERM);
4639 LOP(OP_ATAN2,XTERM);
4645 LOP(OP_BINMODE,XTERM);
4648 LOP(OP_BLESS,XTERM);
4657 /* When 'use switch' is in effect, continue has a dual
4658 life as a control operator. */
4660 if (!FEATURE_IS_ENABLED("switch", 6))
4663 /* We have to disambiguate the two senses of
4664 "continue". If the next token is a '{' then
4665 treat it as the start of a continue block;
4666 otherwise treat it as a control operator.
4677 (void)gv_fetchpv("ENV", GV_ADD, SVt_PVHV); /* may use HOME */
4694 if (!PL_cryptseen) {
4695 PL_cryptseen = TRUE;
4699 LOP(OP_CRYPT,XTERM);
4702 LOP(OP_CHMOD,XTERM);
4705 LOP(OP_CHOWN,XTERM);
4708 LOP(OP_CONNECT,XTERM);
4727 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4728 if (orig_keyword == KEY_do) {
4737 PL_hints |= HINT_BLOCK_SCOPE;
4747 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4748 LOP(OP_DBMOPEN,XTERM);
4754 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4761 yylval.ival = CopLINE(PL_curcop);
4775 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4776 UNIBRACK(OP_ENTEREVAL);
4794 case KEY_endhostent:
4800 case KEY_endservent:
4803 case KEY_endprotoent:
4814 yylval.ival = CopLINE(PL_curcop);
4816 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4818 if ((PL_bufend - p) >= 3 &&
4819 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4821 else if ((PL_bufend - p) >= 4 &&
4822 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4825 if (isIDFIRST_lazy_if(p,UTF)) {
4826 p = scan_ident(p, PL_bufend,
4827 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4831 Perl_croak(aTHX_ "Missing $ on loop variable");
4836 LOP(OP_FORMLINE,XTERM);
4842 LOP(OP_FCNTL,XTERM);
4848 LOP(OP_FLOCK,XTERM);
4857 LOP(OP_GREPSTART, XREF);
4860 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4875 case KEY_getpriority:
4876 LOP(OP_GETPRIORITY,XTERM);
4878 case KEY_getprotobyname:
4881 case KEY_getprotobynumber:
4882 LOP(OP_GPBYNUMBER,XTERM);
4884 case KEY_getprotoent:
4896 case KEY_getpeername:
4897 UNI(OP_GETPEERNAME);
4899 case KEY_gethostbyname:
4902 case KEY_gethostbyaddr:
4903 LOP(OP_GHBYADDR,XTERM);
4905 case KEY_gethostent:
4908 case KEY_getnetbyname:
4911 case KEY_getnetbyaddr:
4912 LOP(OP_GNBYADDR,XTERM);
4917 case KEY_getservbyname:
4918 LOP(OP_GSBYNAME,XTERM);
4920 case KEY_getservbyport:
4921 LOP(OP_GSBYPORT,XTERM);
4923 case KEY_getservent:
4926 case KEY_getsockname:
4927 UNI(OP_GETSOCKNAME);
4929 case KEY_getsockopt:
4930 LOP(OP_GSOCKOPT,XTERM);
4945 yylval.ival = CopLINE(PL_curcop);
4956 yylval.ival = CopLINE(PL_curcop);
4960 LOP(OP_INDEX,XTERM);
4966 LOP(OP_IOCTL,XTERM);
4978 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5010 LOP(OP_LISTEN,XTERM);
5019 s = scan_pat(s,OP_MATCH);
5020 TERM(sublex_start());
5023 LOP(OP_MAPSTART, XREF);
5026 LOP(OP_MKDIR,XTERM);
5029 LOP(OP_MSGCTL,XTERM);
5032 LOP(OP_MSGGET,XTERM);
5035 LOP(OP_MSGRCV,XTERM);
5038 LOP(OP_MSGSND,XTERM);
5044 if (isIDFIRST_lazy_if(s,UTF)) {
5045 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
5046 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
5048 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
5049 if (!PL_in_my_stash) {
5052 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
5060 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5067 s = tokenize_use(0, s);
5071 if (*s == '(' || (s = skipspace(s), *s == '('))
5078 if (isIDFIRST_lazy_if(s,UTF)) {
5080 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
5081 for (t=d; *t && isSPACE(*t); t++) ;
5082 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
5084 && !(t[0] == '=' && t[1] == '>')
5086 int len = (int)(d-s);
5087 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
5088 "Precedence problem: open %.*s should be open(%.*s)",
5095 yylval.ival = OP_OR;
5105 LOP(OP_OPEN_DIR,XTERM);
5108 checkcomma(s,PL_tokenbuf,"filehandle");
5112 checkcomma(s,PL_tokenbuf,"filehandle");
5131 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5135 LOP(OP_PIPE_OP,XTERM);
5138 s = scan_str(s,FALSE,FALSE);
5140 missingterm((char*)0);
5141 yylval.ival = OP_CONST;
5142 TERM(sublex_start());
5148 s = scan_str(s,FALSE,FALSE);
5150 missingterm((char*)0);
5151 PL_expect = XOPERATOR;
5153 if (SvCUR(PL_lex_stuff)) {
5156 d = SvPV_force(PL_lex_stuff, len);
5159 for (; isSPACE(*d) && len; --len, ++d) ;
5162 if (!warned && ckWARN(WARN_QW)) {
5163 for (; !isSPACE(*d) && len; --len, ++d) {
5165 Perl_warner(aTHX_ packWARN(WARN_QW),
5166 "Possible attempt to separate words with commas");
5169 else if (*d == '#') {
5170 Perl_warner(aTHX_ packWARN(WARN_QW),
5171 "Possible attempt to put comments in qw() list");
5177 for (; !isSPACE(*d) && len; --len, ++d) ;
5179 sv = newSVpvn(b, d-b);
5180 if (DO_UTF8(PL_lex_stuff))
5182 words = append_elem(OP_LIST, words,
5183 newSVOP(OP_CONST, 0, tokeq(sv)));
5187 PL_nextval[PL_nexttoke].opval = words;
5192 SvREFCNT_dec(PL_lex_stuff);
5193 PL_lex_stuff = Nullsv;
5199 s = scan_str(s,FALSE,FALSE);
5201 missingterm((char*)0);
5202 yylval.ival = OP_STRINGIFY;
5203 if (SvIVX(PL_lex_stuff) == '\'')
5204 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
5205 TERM(sublex_start());
5208 s = scan_pat(s,OP_QR);
5209 TERM(sublex_start());
5212 s = scan_str(s,FALSE,FALSE);
5214 missingterm((char*)0);
5215 yylval.ival = OP_BACKTICK;
5217 TERM(sublex_start());
5225 s = force_version(s, FALSE);
5227 else if (*s != 'v' || !isDIGIT(s[1])
5228 || (s = force_version(s, TRUE), *s == 'v'))
5230 *PL_tokenbuf = '\0';
5231 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5232 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5233 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5235 yyerror("<> should be quotes");
5237 if (orig_keyword == KEY_require) {
5245 PL_last_uni = PL_oldbufptr;
5246 PL_last_lop_op = OP_REQUIRE;
5248 return REPORT( (int)REQUIRE );
5254 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5258 LOP(OP_RENAME,XTERM);
5267 LOP(OP_RINDEX,XTERM);
5277 UNIDOR(OP_READLINE);
5290 LOP(OP_REVERSE,XTERM);
5293 UNIDOR(OP_READLINK);
5301 TERM(sublex_start());
5303 TOKEN(1); /* force error */
5306 checkcomma(s,PL_tokenbuf,"filehandle");
5316 LOP(OP_SELECT,XTERM);
5322 LOP(OP_SEMCTL,XTERM);
5325 LOP(OP_SEMGET,XTERM);
5328 LOP(OP_SEMOP,XTERM);
5334 LOP(OP_SETPGRP,XTERM);
5336 case KEY_setpriority:
5337 LOP(OP_SETPRIORITY,XTERM);
5339 case KEY_sethostent:
5345 case KEY_setservent:
5348 case KEY_setprotoent:
5358 LOP(OP_SEEKDIR,XTERM);
5360 case KEY_setsockopt:
5361 LOP(OP_SSOCKOPT,XTERM);
5367 LOP(OP_SHMCTL,XTERM);
5370 LOP(OP_SHMGET,XTERM);
5373 LOP(OP_SHMREAD,XTERM);
5376 LOP(OP_SHMWRITE,XTERM);
5379 LOP(OP_SHUTDOWN,XTERM);
5388 LOP(OP_SOCKET,XTERM);
5390 case KEY_socketpair:
5391 LOP(OP_SOCKPAIR,XTERM);
5394 checkcomma(s,PL_tokenbuf,"subroutine name");
5396 if (*s == ';' || *s == ')') /* probably a close */
5397 Perl_croak(aTHX_ "sort is now a reserved word");
5399 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5403 LOP(OP_SPLIT,XTERM);
5406 LOP(OP_SPRINTF,XTERM);
5409 LOP(OP_SPLICE,XTERM);
5424 LOP(OP_SUBSTR,XTERM);
5430 char tmpbuf[sizeof PL_tokenbuf];
5431 SSize_t tboffset = 0;
5432 expectation attrful;
5433 bool have_name, have_proto, bad_proto;
5434 const int key = tmp;
5438 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5439 (*s == ':' && s[1] == ':'))
5442 attrful = XATTRBLOCK;
5443 /* remember buffer pos'n for later force_word */
5444 tboffset = s - PL_oldbufptr;
5445 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5446 if (strchr(tmpbuf, ':'))
5447 sv_setpv(PL_subname, tmpbuf);
5449 sv_setsv(PL_subname,PL_curstname);
5450 sv_catpvn(PL_subname,"::",2);
5451 sv_catpvn(PL_subname,tmpbuf,len);
5458 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5459 PL_expect = XTERMBLOCK;
5460 attrful = XATTRTERM;
5461 sv_setpvn(PL_subname,"?",1);
5465 if (key == KEY_format) {
5467 PL_lex_formbrack = PL_lex_brackets + 1;
5469 (void) force_word(PL_oldbufptr + tboffset, WORD,
5474 /* Look for a prototype */
5478 s = scan_str(s,FALSE,FALSE);
5480 Perl_croak(aTHX_ "Prototype not terminated");
5481 /* strip spaces and check for bad characters */
5482 d = SvPVX(PL_lex_stuff);
5485 for (p = d; *p; ++p) {
5488 if (!strchr("$@%*;[]&\\", *p))
5493 if (bad_proto && ckWARN(WARN_SYNTAX))
5494 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5495 "Illegal character in prototype for %"SVf" : %s",
5497 SvCUR_set(PL_lex_stuff, tmp);
5505 if (*s == ':' && s[1] != ':')
5506 PL_expect = attrful;
5507 else if (*s != '{' && key == KEY_sub) {
5509 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5511 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5515 PL_nextval[PL_nexttoke].opval =
5516 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5517 PL_lex_stuff = Nullsv;
5521 sv_setpv(PL_subname,
5522 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
5525 (void) force_word(PL_oldbufptr + tboffset, WORD,
5534 LOP(OP_SYSTEM,XREF);
5537 LOP(OP_SYMLINK,XTERM);
5540 LOP(OP_SYSCALL,XTERM);
5543 LOP(OP_SYSOPEN,XTERM);
5546 LOP(OP_SYSSEEK,XTERM);
5549 LOP(OP_SYSREAD,XTERM);
5552 LOP(OP_SYSWRITE,XTERM);
5556 TERM(sublex_start());
5577 LOP(OP_TRUNCATE,XTERM);
5589 yylval.ival = CopLINE(PL_curcop);
5593 yylval.ival = CopLINE(PL_curcop);
5597 LOP(OP_UNLINK,XTERM);
5603 LOP(OP_UNPACK,XTERM);
5606 LOP(OP_UTIME,XTERM);
5612 LOP(OP_UNSHIFT,XTERM);
5615 s = tokenize_use(1, s);
5625 yylval.ival = CopLINE(PL_curcop);
5629 yylval.ival = CopLINE(PL_curcop);
5633 PL_hints |= HINT_BLOCK_SCOPE;
5640 LOP(OP_WAITPID,XTERM);
5649 ctl_l[0] = toCTRL('L');
5651 gv_fetchpv(ctl_l, GV_ADD, SVt_PV);
5654 gv_fetchpv("\f", GV_ADD, SVt_PV); /* Make sure $^L is defined */
5659 if (PL_expect == XOPERATOR)
5665 yylval.ival = OP_XOR;
5670 TERM(sublex_start());
5675 #pragma segment Main
5679 S_pending_ident(pTHX)
5682 register I32 tmp = 0;
5683 /* pit holds the identifier we read and pending_ident is reset */
5684 char pit = PL_pending_ident;
5685 PL_pending_ident = 0;
5687 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5688 "### Pending identifier '%s'\n", PL_tokenbuf); });
5690 /* if we're in a my(), we can't allow dynamics here.
5691 $foo'bar has already been turned into $foo::bar, so
5692 just check for colons.
5694 if it's a legal name, the OP is a PADANY.
5697 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5698 if (strchr(PL_tokenbuf,':'))
5699 yyerror(Perl_form(aTHX_ "No package name allowed for "
5700 "variable %s in \"our\"",
5702 tmp = allocmy(PL_tokenbuf);
5705 if (strchr(PL_tokenbuf,':'))
5706 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5708 yylval.opval = newOP(OP_PADANY, 0);
5709 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5715 build the ops for accesses to a my() variable.
5717 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5718 then used in a comparison. This catches most, but not
5719 all cases. For instance, it catches
5720 sort { my($a); $a <=> $b }
5722 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5723 (although why you'd do that is anyone's guess).
5726 if (!strchr(PL_tokenbuf,':')) {
5728 tmp = pad_findmy(PL_tokenbuf);
5729 if (tmp != NOT_IN_PAD) {
5730 /* might be an "our" variable" */
5731 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
5732 /* build ops for a bareword */
5733 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
5734 HEK * const stashname = HvNAME_HEK(stash);
5735 SV * const sym = newSVhek(stashname);
5736 sv_catpvn(sym, "::", 2);
5737 sv_catpv(sym, PL_tokenbuf+1);
5738 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5739 yylval.opval->op_private = OPpCONST_ENTERED;
5742 ? (GV_ADDMULTI | GV_ADDINEVAL)
5745 ((PL_tokenbuf[0] == '$') ? SVt_PV
5746 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5751 /* if it's a sort block and they're naming $a or $b */
5752 if (PL_last_lop_op == OP_SORT &&
5753 PL_tokenbuf[0] == '$' &&
5754 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5757 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5758 d < PL_bufend && *d != '\n';
5761 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5762 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5768 yylval.opval = newOP(OP_PADANY, 0);
5769 yylval.opval->op_targ = tmp;
5775 Whine if they've said @foo in a doublequoted string,
5776 and @foo isn't a variable we can find in the symbol
5779 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5780 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
5781 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5782 && ckWARN(WARN_AMBIGUOUS))
5784 /* Downgraded from fatal to warning 20000522 mjd */
5785 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5786 "Possible unintended interpolation of %s in string",
5791 /* build ops for a bareword */
5792 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5793 yylval.opval->op_private = OPpCONST_ENTERED;
5797 ? (GV_ADDMULTI | GV_ADDINEVAL)
5798 /* If the identifier refers to a stash, don't autovivify it.
5799 * Change 24660 had the side effect of causing symbol table
5800 * hashes to always be defined, even if they were freshly
5801 * created and the only reference in the entire program was
5802 * the single statement with the defined %foo::bar:: test.
5803 * It appears that all code in the wild doing this actually
5804 * wants to know whether sub-packages have been loaded, so
5805 * by avoiding auto-vivifying symbol tables, we ensure that
5806 * defined %foo::bar:: continues to be false, and the existing
5807 * tests still give the expected answers, even though what
5808 * they're actually testing has now changed subtly.
5810 : !(*PL_tokenbuf == '%' && *(d = PL_tokenbuf + strlen(PL_tokenbuf) - 1) == ':' && d[-1] == ':'),
5811 ((PL_tokenbuf[0] == '$') ? SVt_PV
5812 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5818 * The following code was generated by perl_keyword.pl.
5822 Perl_keyword (pTHX_ const char *name, I32 len)
5826 case 1: /* 5 tokens of length 1 */
5858 case 2: /* 18 tokens of length 2 */
6004 case 3: /* 29 tokens of length 3 */
6008 if (name[1] == 'N' &&
6071 if (name[1] == 'i' &&
6093 return (FEATURE_IS_ENABLED("err", 3) ? -KEY_err : 0);
6111 if (name[1] == 'o' &&
6120 if (name[1] == 'e' &&
6129 if (name[1] == 'n' &&
6138 if (name[1] == 'o' &&
6147 if (name[1] == 'a' &&
6156 if (name[1] == 'o' &&
6218 if (name[1] == 'e' &&
6232 return (FEATURE_IS_ENABLED("say", 3) ? -KEY_say : 0);
6258 if (name[1] == 'i' &&
6267 if (name[1] == 's' &&
6276 if (name[1] == 'e' &&
6285 if (name[1] == 'o' &&
6297 case 4: /* 41 tokens of length 4 */
6301 if (name[1] == 'O' &&
6311 if (name[1] == 'N' &&
6321 if (name[1] == 'i' &&
6331 if (name[1] == 'h' &&
6341 if (name[1] == 'u' &&
6354 if (name[2] == 'c' &&
6363 if (name[2] == 's' &&
6372 if (name[2] == 'a' &&
6408 if (name[1] == 'o' &&
6421 if (name[2] == 't' &&
6430 if (name[2] == 'o' &&
6439 if (name[2] == 't' &&
6448 if (name[2] == 'e' &&
6461 if (name[1] == 'o' &&
6474 if (name[2] == 'y' &&
6483 if (name[2] == 'l' &&
6499 if (name[2] == 's' &&
6508 if (name[2] == 'n' &&
6517 if (name[2] == 'c' &&
6530 if (name[1] == 'e' &&
6540 if (name[1] == 'p' &&
6553 if (name[2] == 'c' &&
6562 if (name[2] == 'p' &&
6571 if (name[2] == 's' &&
6587 if (name[2] == 'n' &&
6657 if (name[2] == 'r' &&
6666 if (name[2] == 'r' &&
6675 if (name[2] == 'a' &&
6691 if (name[2] == 'l' &&
6753 if (name[2] == 'e' &&
6756 return (FEATURE_IS_ENABLED("switch", 6) ? KEY_when : 0);
6769 case 5: /* 38 tokens of length 5 */
6773 if (name[1] == 'E' &&
6784 if (name[1] == 'H' &&
6798 if (name[2] == 'a' &&
6808 if (name[2] == 'a' &&
6825 if (name[2] == 'e' &&
6835 if (name[2] == 'e' &&
6839 return (FEATURE_IS_ENABLED("switch", 6) ? -KEY_break : 0);
6855 if (name[3] == 'i' &&
6864 if (name[3] == 'o' &&
6900 if (name[2] == 'o' &&
6910 if (name[2] == 'y' &&
6924 if (name[1] == 'l' &&
6938 if (name[2] == 'n' &&
6948 if (name[2] == 'o' &&
6962 if (name[1] == 'i' &&
6967 return (FEATURE_IS_ENABLED("switch", 6) ? KEY_given : 0);
6976 if (name[2] == 'd' &&
6986 if (name[2] == 'c' &&
7003 if (name[2] == 'c' &&
7013 if (name[2] == 't' &&
7027 if (name[1] == 'k' &&
7038 if (name[1] == 'r' &&
7052 if (name[2] == 's' &&
7062 if (name[2] == 'd' &&
7079 if (name[2] == 'm' &&
7089 if (name[2] == 'i' &&
7099 if (name[2] == 'e' &&
7109 if (name[2] == 'l' &&
7119 if (name[2] == 'a' &&
7129 if (name[2] == 'u' &&
7143 if (name[1] == 'i' &&
7157 if (name[2] == 'a' &&
7170 if (name[3] == 'e' &&
7205 if (name[2] == 'i' &&
7222 if (name[2] == 'i' &&
7232 if (name[2] == 'i' &&
7249 case 6: /* 33 tokens of length 6 */
7253 if (name[1] == 'c' &&
7268 if (name[2] == 'l' &&
7279 if (name[2] == 'r' &&
7294 if (name[1] == 'e' &&
7309 if (name[2] == 's' &&
7314 if(ckWARN_d(WARN_SYNTAX))
7315 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
7321 if (name[2] == 'i' &&
7339 if (name[2] == 'l' &&
7350 if (name[2] == 'r' &&
7365 if (name[1] == 'm' &&
7380 if (name[2] == 'n' &&
7391 if (name[2] == 's' &&
7406 if (name[1] == 's' &&
7412 if (name[4] == 't' &&
7421 if (name[4] == 'e' &&
7430 if (name[4] == 'c' &&
7439 if (name[4] == 'n' &&
7455 if (name[1] == 'r' &&
7473 if (name[3] == 'a' &&
7483 if (name[3] == 'u' &&
7497 if (name[2] == 'n' &&
7515 if (name[2] == 'a' &&
7529 if (name[3] == 'e' &&
7542 if (name[4] == 't' &&
7551 if (name[4] == 'e' &&
7573 if (name[4] == 't' &&
7582 if (name[4] == 'e' &&
7598 if (name[2] == 'c' &&
7609 if (name[2] == 'l' &&
7620 if (name[2] == 'b' &&
7631 if (name[2] == 's' &&
7654 if (name[4] == 's' &&
7663 if (name[4] == 'n' &&
7676 if (name[3] == 'a' &&
7693 if (name[1] == 'a' &&
7708 case 7: /* 29 tokens of length 7 */
7712 if (name[1] == 'E' &&
7725 if (name[1] == '_' &&
7738 if (name[1] == 'i' &&
7745 return -KEY_binmode;
7751 if (name[1] == 'o' &&
7758 return -KEY_connect;
7767 if (name[2] == 'm' &&
7773 return -KEY_dbmopen;
7784 if (name[4] == 'u' &&
7788 return (FEATURE_IS_ENABLED("switch", 6) ? KEY_default : 0);
7794 if (name[4] == 'n' &&
7815 if (name[1] == 'o' &&
7828 if (name[1] == 'e' &&
7835 if (name[5] == 'r' &&
7838 return -KEY_getpgrp;
7844 if (name[5] == 'i' &&
7847 return -KEY_getppid;
7860 if (name[1] == 'c' &&
7867 return -KEY_lcfirst;
7873 if (name[1] == 'p' &&
7880 return -KEY_opendir;
7886 if (name[1] == 'a' &&
7904 if (name[3] == 'd' &&
7909 return -KEY_readdir;
7915 if (name[3] == 'u' &&
7926 if (name[3] == 'e' &&
7931 return -KEY_reverse;
7950 if (name[3] == 'k' &&
7955 return -KEY_seekdir;
7961 if (name[3] == 'p' &&
7966 return -KEY_setpgrp;
7976 if (name[2] == 'm' &&
7982 return -KEY_shmread;
7988 if (name[2] == 'r' &&
7994 return -KEY_sprintf;
8003 if (name[3] == 'l' &&
8008 return -KEY_symlink;
8017 if (name[4] == 'a' &&
8021 return -KEY_syscall;
8027 if (name[4] == 'p' &&
8031 return -KEY_sysopen;
8037 if (name[4] == 'e' &&
8041 return -KEY_sysread;
8047 if (name[4] == 'e' &&
8051 return -KEY_sysseek;
8069 if (name[1] == 'e' &&
8076 return -KEY_telldir;
8085 if (name[2] == 'f' &&
8091 return -KEY_ucfirst;
8097 if (name[2] == 's' &&
8103 return -KEY_unshift;
8113 if (name[1] == 'a' &&
8120 return -KEY_waitpid;
8129 case 8: /* 26 tokens of length 8 */
8133 if (name[1] == 'U' &&
8141 return KEY_AUTOLOAD;
8152 if (name[3] == 'A' &&
8158 return KEY___DATA__;
8164 if (name[3] == 'I' &&
8170 return -KEY___FILE__;
8176 if (name[3] == 'I' &&
8182 return -KEY___LINE__;
8198 if (name[2] == 'o' &&
8205 return -KEY_closedir;
8211 if (name[2] == 'n' &&
8218 return -KEY_continue;
8228 if (name[1] == 'b' &&
8236 return -KEY_dbmclose;
8242 if (name[1] == 'n' &&
8248 if (name[4] == 'r' &&
8253 return -KEY_endgrent;
8259 if (name[4] == 'w' &&
8264 return -KEY_endpwent;
8277 if (name[1] == 'o' &&
8285 return -KEY_formline;
8291 if (name[1] == 'e' &&
8302 if (name[6] == 'n' &&
8305 return -KEY_getgrent;
8311 if (name[6] == 'i' &&
8314 return -KEY_getgrgid;
8320 if (name[6] == 'a' &&
8323 return -KEY_getgrnam;
8336 if (name[4] == 'o' &&
8341 return -KEY_getlogin;
8352 if (name[6] == 'n' &&
8355 return -KEY_getpwent;
8361 if (name[6] == 'a' &&
8364 return -KEY_getpwnam;
8370 if (name[6] == 'i' &&
8373 return -KEY_getpwuid;
8393 if (name[1] == 'e' &&
8400 if (name[5] == 'i' &&
8407 return -KEY_readline;
8412 return -KEY_readlink;
8423 if (name[5] == 'i' &&
8427 return -KEY_readpipe;
8448 if (name[4] == 'r' &&
8453 return -KEY_setgrent;
8459 if (name[4] == 'w' &&
8464 return -KEY_setpwent;
8480 if (name[3] == 'w' &&
8486 return -KEY_shmwrite;
8492 if (name[3] == 't' &&
8498 return -KEY_shutdown;
8508 if (name[2] == 's' &&
8515 return -KEY_syswrite;
8525 if (name[1] == 'r' &&
8533 return -KEY_truncate;
8542 case 9: /* 8 tokens of length 9 */
8546 if (name[1] == 'n' &&
8555 return -KEY_endnetent;
8561 if (name[1] == 'e' &&
8570 return -KEY_getnetent;
8576 if (name[1] == 'o' &&
8585 return -KEY_localtime;
8591 if (name[1] == 'r' &&
8600 return KEY_prototype;
8606 if (name[1] == 'u' &&
8615 return -KEY_quotemeta;
8621 if (name[1] == 'e' &&
8630 return -KEY_rewinddir;
8636 if (name[1] == 'e' &&
8645 return -KEY_setnetent;
8651 if (name[1] == 'a' &&
8660 return -KEY_wantarray;
8669 case 10: /* 9 tokens of length 10 */
8673 if (name[1] == 'n' &&
8679 if (name[4] == 'o' &&
8686 return -KEY_endhostent;
8692 if (name[4] == 'e' &&
8699 return -KEY_endservent;
8712 if (name[1] == 'e' &&
8718 if (name[4] == 'o' &&
8725 return -KEY_gethostent;
8734 if (name[5] == 'r' &&
8740 return -KEY_getservent;
8746 if (name[5] == 'c' &&
8752 return -KEY_getsockopt;
8777 if (name[4] == 'o' &&
8784 return -KEY_sethostent;
8793 if (name[5] == 'r' &&
8799 return -KEY_setservent;
8805 if (name[5] == 'c' &&
8811 return -KEY_setsockopt;
8828 if (name[2] == 'c' &&
8837 return -KEY_socketpair;
8850 case 11: /* 8 tokens of length 11 */
8854 if (name[1] == '_' &&
8865 return -KEY___PACKAGE__;
8871 if (name[1] == 'n' &&
8882 return -KEY_endprotoent;
8888 if (name[1] == 'e' &&
8897 if (name[5] == 'e' &&
8904 return -KEY_getpeername;
8913 if (name[6] == 'o' &&
8919 return -KEY_getpriority;
8925 if (name[6] == 't' &&
8931 return -KEY_getprotoent;
8945 if (name[4] == 'o' &&
8953 return -KEY_getsockname;
8966 if (name[1] == 'e' &&
8974 if (name[6] == 'o' &&
8980 return -KEY_setpriority;
8986 if (name[6] == 't' &&
8992 return -KEY_setprotoent;
9008 case 12: /* 2 tokens of length 12 */
9009 if (name[0] == 'g' &&
9021 if (name[9] == 'd' &&
9024 { /* getnetbyaddr */
9025 return -KEY_getnetbyaddr;
9031 if (name[9] == 'a' &&
9034 { /* getnetbyname */
9035 return -KEY_getnetbyname;
9047 case 13: /* 4 tokens of length 13 */
9048 if (name[0] == 'g' &&
9055 if (name[4] == 'o' &&
9064 if (name[10] == 'd' &&
9067 { /* gethostbyaddr */
9068 return -KEY_gethostbyaddr;
9074 if (name[10] == 'a' &&
9077 { /* gethostbyname */
9078 return -KEY_gethostbyname;
9091 if (name[4] == 'e' &&
9100 if (name[10] == 'a' &&
9103 { /* getservbyname */
9104 return -KEY_getservbyname;
9110 if (name[10] == 'o' &&
9113 { /* getservbyport */
9114 return -KEY_getservbyport;
9133 case 14: /* 1 tokens of length 14 */
9134 if (name[0] == 'g' &&
9148 { /* getprotobyname */
9149 return -KEY_getprotobyname;
9154 case 16: /* 1 tokens of length 16 */
9155 if (name[0] == 'g' &&
9171 { /* getprotobynumber */
9172 return -KEY_getprotobynumber;
9186 S_checkcomma(pTHX_ register char *s, const char *name, const char *what)
9190 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9191 if (ckWARN(WARN_SYNTAX)) {
9193 for (w = s+2; *w && level; w++) {
9200 for (; *w && isSPACE(*w); w++) ;
9201 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
9202 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9203 "%s (...) interpreted as function",name);
9206 while (s < PL_bufend && isSPACE(*s))
9210 while (s < PL_bufend && isSPACE(*s))
9212 if (isIDFIRST_lazy_if(s,UTF)) {
9214 while (isALNUM_lazy_if(s,UTF))
9216 while (s < PL_bufend && isSPACE(*s))
9220 *s = '\0'; /* XXX If we didn't do this, we could const a lot of toke.c */
9221 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
9225 Perl_croak(aTHX_ "No comma allowed after %s", what);
9230 /* Either returns sv, or mortalizes sv and returns a new SV*.
9231 Best used as sv=new_constant(..., sv, ...).
9232 If s, pv are NULL, calls subroutine with one argument,
9233 and type is used with error messages only. */
9236 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9240 HV * const table = GvHV(PL_hintgv); /* ^H */
9244 const char *why1 = "", *why2 = "", *why3 = "";
9246 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9249 why2 = strEQ(key,"charnames")
9250 ? "(possibly a missing \"use charnames ...\")"
9252 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
9253 (type ? type: "undef"), why2);
9255 /* This is convoluted and evil ("goto considered harmful")
9256 * but I do not understand the intricacies of all the different
9257 * failure modes of %^H in here. The goal here is to make
9258 * the most probable error message user-friendly. --jhi */
9263 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
9264 (type ? type: "undef"), why1, why2, why3);
9266 yyerror(SvPVX_const(msg));
9270 cvp = hv_fetch(table, key, strlen(key), FALSE);
9271 if (!cvp || !SvOK(*cvp)) {
9274 why3 = "} is not defined";
9277 sv_2mortal(sv); /* Parent created it permanently */
9280 pv = sv_2mortal(newSVpvn(s, len));
9282 typesv = sv_2mortal(newSVpv(type, 0));
9284 typesv = &PL_sv_undef;
9286 PUSHSTACKi(PERLSI_OVERLOAD);
9298 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9302 /* Check the eval first */
9303 if (!PL_in_eval && SvTRUE(ERRSV)) {
9304 sv_catpv(ERRSV, "Propagated");
9305 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
9307 res = SvREFCNT_inc(sv);
9311 (void)SvREFCNT_inc(res);
9320 why1 = "Call to &{$^H{";
9322 why3 = "}} did not return a defined value";
9330 /* Returns a NUL terminated string, with the length of the string written to
9334 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9336 register char *d = dest;
9337 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
9340 Perl_croak(aTHX_ ident_too_long);
9341 if (isALNUM(*s)) /* UTF handled below */
9343 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
9348 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
9352 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9353 char *t = s + UTF8SKIP(s);
9354 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9356 if (d + (t - s) > e)
9357 Perl_croak(aTHX_ ident_too_long);
9358 Copy(s, d, t - s, char);
9371 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9375 char *bracket = Nullch;
9381 e = d + destlen - 3; /* two-character token, ending NUL */
9383 while (isDIGIT(*s)) {
9385 Perl_croak(aTHX_ ident_too_long);
9392 Perl_croak(aTHX_ ident_too_long);
9393 if (isALNUM(*s)) /* UTF handled below */
9395 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9400 else if (*s == ':' && s[1] == ':') {
9404 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9405 char *t = s + UTF8SKIP(s);
9406 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9408 if (d + (t - s) > e)
9409 Perl_croak(aTHX_ ident_too_long);
9410 Copy(s, d, t - s, char);
9421 if (PL_lex_state != LEX_NORMAL)
9422 PL_lex_state = LEX_INTERPENDMAYBE;
9425 if (*s == '$' && s[1] &&
9426 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9439 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9444 if (isSPACE(s[-1])) {
9446 const char ch = *s++;
9447 if (!SPACE_OR_TAB(ch)) {
9453 if (isIDFIRST_lazy_if(d,UTF)) {
9457 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
9459 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
9462 Copy(s, d, e - s, char);
9467 while ((isALNUM(*s) || *s == ':') && d < e)
9470 Perl_croak(aTHX_ ident_too_long);
9473 while (s < send && SPACE_OR_TAB(*s)) s++;
9474 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9475 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
9476 const char *brack = *s == '[' ? "[...]" : "{...}";
9477 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9478 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9479 funny, dest, brack, funny, dest, brack);
9482 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9486 /* Handle extended ${^Foo} variables
9487 * 1999-02-27 mjd-perl-patch@plover.com */
9488 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9492 while (isALNUM(*s) && d < e) {
9496 Perl_croak(aTHX_ ident_too_long);
9501 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9502 PL_lex_state = LEX_INTERPEND;
9507 if (PL_lex_state == LEX_NORMAL) {
9508 if (ckWARN(WARN_AMBIGUOUS) &&
9509 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
9511 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9512 "Ambiguous use of %c{%s} resolved to %c%s",
9513 funny, dest, funny, dest);
9518 s = bracket; /* let the parser handle it */
9522 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9523 PL_lex_state = LEX_INTERPEND;
9528 Perl_pmflag(pTHX_ U32* pmfl, int ch)
9533 *pmfl |= PMf_GLOBAL;
9535 *pmfl |= PMf_CONTINUE;
9539 *pmfl |= PMf_MULTILINE;
9541 *pmfl |= PMf_SINGLELINE;
9543 *pmfl |= PMf_EXTENDED;
9547 S_scan_pat(pTHX_ char *start, I32 type)
9550 char *s = scan_str(start,FALSE,FALSE);
9553 char * const delimiter = skipspace(start);
9554 Perl_croak(aTHX_ *delimiter == '?'
9555 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9556 : "Search pattern not terminated" );
9559 pm = (PMOP*)newPMOP(type, 0);
9560 if (PL_multi_open == '?')
9561 pm->op_pmflags |= PMf_ONCE;
9563 while (*s && strchr("iomsx", *s))
9564 pmflag(&pm->op_pmflags,*s++);
9567 while (*s && strchr("iogcmsx", *s))
9568 pmflag(&pm->op_pmflags,*s++);
9570 /* issue a warning if /c is specified,but /g is not */
9571 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
9572 && ckWARN(WARN_REGEXP))
9574 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
9577 pm->op_pmpermflags = pm->op_pmflags;
9579 PL_lex_op = (OP*)pm;
9580 yylval.ival = OP_MATCH;
9585 S_scan_subst(pTHX_ char *start)
9593 yylval.ival = OP_NULL;
9595 s = scan_str(start,FALSE,FALSE);
9598 Perl_croak(aTHX_ "Substitution pattern not terminated");
9600 if (s[-1] == PL_multi_open)
9603 first_start = PL_multi_start;
9604 s = scan_str(s,FALSE,FALSE);
9607 SvREFCNT_dec(PL_lex_stuff);
9608 PL_lex_stuff = Nullsv;
9610 Perl_croak(aTHX_ "Substitution replacement not terminated");
9612 PL_multi_start = first_start; /* so whole substitution is taken together */
9614 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9620 else if (strchr("iogcmsx", *s))
9621 pmflag(&pm->op_pmflags,*s++);
9626 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
9627 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9632 PL_sublex_info.super_bufptr = s;
9633 PL_sublex_info.super_bufend = PL_bufend;
9635 pm->op_pmflags |= PMf_EVAL;
9636 repl = newSVpvn("",0);
9638 sv_catpv(repl, es ? "eval " : "do ");
9639 sv_catpvn(repl, "{ ", 2);
9640 sv_catsv(repl, PL_lex_repl);
9641 sv_catpvn(repl, " };", 2);
9643 SvREFCNT_dec(PL_lex_repl);
9647 pm->op_pmpermflags = pm->op_pmflags;
9648 PL_lex_op = (OP*)pm;
9649 yylval.ival = OP_SUBST;
9654 S_scan_trans(pTHX_ char *start)
9663 yylval.ival = OP_NULL;
9665 s = scan_str(start,FALSE,FALSE);
9667 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9668 if (s[-1] == PL_multi_open)
9671 s = scan_str(s,FALSE,FALSE);
9674 SvREFCNT_dec(PL_lex_stuff);
9675 PL_lex_stuff = Nullsv;
9677 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9680 complement = del = squash = 0;
9684 complement = OPpTRANS_COMPLEMENT;
9687 del = OPpTRANS_DELETE;
9690 squash = OPpTRANS_SQUASH;
9699 Newx(tbl, complement&&!del?258:256, short);
9700 o = newPVOP(OP_TRANS, 0, (char*)tbl);
9701 o->op_private &= ~OPpTRANS_ALL;
9702 o->op_private |= del|squash|complement|
9703 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9704 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9707 yylval.ival = OP_TRANS;
9712 S_scan_heredoc(pTHX_ register char *s)
9715 I32 op_type = OP_SCALAR;
9719 const char newline[] = "\n";
9720 const char *found_newline;
9724 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9728 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9731 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
9732 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9735 s = delimcpy(d, e, s, PL_bufend, term, &len);
9745 if (!isALNUM_lazy_if(s,UTF))
9746 deprecate_old("bare << to mean <<\"\"");
9747 for (; isALNUM_lazy_if(s,UTF); s++) {
9752 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9753 Perl_croak(aTHX_ "Delimiter for here document is too long");
9756 len = d - PL_tokenbuf;
9757 #ifndef PERL_STRICT_CR
9758 d = strchr(s, '\r');
9760 char * const olds = s;
9762 while (s < PL_bufend) {
9768 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9777 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9781 if ( outer || !(found_newline = ninstr(s,PL_bufend,newline,newline+1)) ) {
9782 herewas = newSVpvn(s,PL_bufend-s);
9786 herewas = newSVpvn(s,found_newline-s);
9788 s += SvCUR(herewas);
9790 tmpstr = NEWSV(87,79);
9791 sv_upgrade(tmpstr, SVt_PVIV);
9794 SvIV_set(tmpstr, -1);
9796 else if (term == '`') {
9797 op_type = OP_BACKTICK;
9798 SvIV_set(tmpstr, '\\');
9802 PL_multi_start = CopLINE(PL_curcop);
9803 PL_multi_open = PL_multi_close = '<';
9804 term = *PL_tokenbuf;
9805 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9806 char *bufptr = PL_sublex_info.super_bufptr;
9807 char *bufend = PL_sublex_info.super_bufend;
9808 char * const olds = s - SvCUR(herewas);
9809 s = strchr(bufptr, '\n');
9813 while (s < bufend &&
9814 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9816 CopLINE_inc(PL_curcop);
9819 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9820 missingterm(PL_tokenbuf);
9822 sv_setpvn(herewas,bufptr,d-bufptr+1);
9823 sv_setpvn(tmpstr,d+1,s-d);
9825 sv_catpvn(herewas,s,bufend-s);
9826 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9833 while (s < PL_bufend &&
9834 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9836 CopLINE_inc(PL_curcop);
9838 if (s >= PL_bufend) {
9839 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9840 missingterm(PL_tokenbuf);
9842 sv_setpvn(tmpstr,d+1,s-d);
9844 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9846 sv_catpvn(herewas,s,PL_bufend-s);
9847 sv_setsv(PL_linestr,herewas);
9848 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9849 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9850 PL_last_lop = PL_last_uni = Nullch;
9853 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
9854 while (s >= PL_bufend) { /* multiple line string? */
9856 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
9857 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9858 missingterm(PL_tokenbuf);
9860 CopLINE_inc(PL_curcop);
9861 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9862 PL_last_lop = PL_last_uni = Nullch;
9863 #ifndef PERL_STRICT_CR
9864 if (PL_bufend - PL_linestart >= 2) {
9865 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9866 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9868 PL_bufend[-2] = '\n';
9870 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9872 else if (PL_bufend[-1] == '\r')
9873 PL_bufend[-1] = '\n';
9875 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9876 PL_bufend[-1] = '\n';
9878 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9879 SV *sv = NEWSV(88,0);
9881 sv_upgrade(sv, SVt_PVMG);
9882 sv_setsv(sv,PL_linestr);
9885 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
9887 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9888 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9889 *(SvPVX(PL_linestr) + off ) = ' ';
9890 sv_catsv(PL_linestr,herewas);
9891 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9892 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9896 sv_catsv(tmpstr,PL_linestr);
9901 PL_multi_end = CopLINE(PL_curcop);
9902 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9903 SvPV_shrink_to_cur(tmpstr);
9905 SvREFCNT_dec(herewas);
9907 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9909 else if (PL_encoding)
9910 sv_recode_to_utf8(tmpstr, PL_encoding);
9912 PL_lex_stuff = tmpstr;
9913 yylval.ival = op_type;
9918 takes: current position in input buffer
9919 returns: new position in input buffer
9920 side-effects: yylval and lex_op are set.
9925 <FH> read from filehandle
9926 <pkg::FH> read from package qualified filehandle
9927 <pkg'FH> read from package qualified filehandle
9928 <$fh> read from filehandle in $fh
9934 S_scan_inputsymbol(pTHX_ char *start)
9936 register char *s = start; /* current position in buffer */
9942 d = PL_tokenbuf; /* start of temp holding space */
9943 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9944 end = strchr(s, '\n');
9947 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9949 /* die if we didn't have space for the contents of the <>,
9950 or if it didn't end, or if we see a newline
9953 if (len >= sizeof PL_tokenbuf)
9954 Perl_croak(aTHX_ "Excessively long <> operator");
9956 Perl_croak(aTHX_ "Unterminated <> operator");
9961 Remember, only scalar variables are interpreted as filehandles by
9962 this code. Anything more complex (e.g., <$fh{$num}>) will be
9963 treated as a glob() call.
9964 This code makes use of the fact that except for the $ at the front,
9965 a scalar variable and a filehandle look the same.
9967 if (*d == '$' && d[1]) d++;
9969 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9970 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9973 /* If we've tried to read what we allow filehandles to look like, and
9974 there's still text left, then it must be a glob() and not a getline.
9975 Use scan_str to pull out the stuff between the <> and treat it
9976 as nothing more than a string.
9979 if (d - PL_tokenbuf != len) {
9980 yylval.ival = OP_GLOB;
9982 s = scan_str(start,FALSE,FALSE);
9984 Perl_croak(aTHX_ "Glob not terminated");
9988 bool readline_overriden = FALSE;
9989 GV *gv_readline = Nullgv;
9991 /* we're in a filehandle read situation */
9994 /* turn <> into <ARGV> */
9996 Copy("ARGV",d,5,char);
9998 /* Check whether readline() is overriden */
9999 if (((gv_readline = gv_fetchpv("readline", 0, SVt_PVCV))
10000 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10002 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
10003 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
10004 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10005 readline_overriden = TRUE;
10007 /* if <$fh>, create the ops to turn the variable into a
10013 /* try to find it in the pad for this block, otherwise find
10014 add symbol table ops
10016 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
10017 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
10018 HV *stash = PAD_COMPNAME_OURSTASH(tmp);
10019 HEK *stashname = HvNAME_HEK(stash);
10020 SV *sym = sv_2mortal(newSVhek(stashname));
10021 sv_catpvn(sym, "::", 2);
10022 sv_catpv(sym, d+1);
10027 OP *o = newOP(OP_PADSV, 0);
10029 PL_lex_op = readline_overriden
10030 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10031 append_elem(OP_LIST, o,
10032 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10033 : (OP*)newUNOP(OP_READLINE, 0, o);
10042 ? (GV_ADDMULTI | GV_ADDINEVAL)
10045 PL_lex_op = readline_overriden
10046 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10047 append_elem(OP_LIST,
10048 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10049 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10050 : (OP*)newUNOP(OP_READLINE, 0,
10051 newUNOP(OP_RV2SV, 0,
10052 newGVOP(OP_GV, 0, gv)));
10054 if (!readline_overriden)
10055 PL_lex_op->op_flags |= OPf_SPECIAL;
10056 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
10057 yylval.ival = OP_NULL;
10060 /* If it's none of the above, it must be a literal filehandle
10061 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10063 GV *gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
10064 PL_lex_op = readline_overriden
10065 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10066 append_elem(OP_LIST,
10067 newGVOP(OP_GV, 0, gv),
10068 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10069 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10070 yylval.ival = OP_NULL;
10079 takes: start position in buffer
10080 keep_quoted preserve \ on the embedded delimiter(s)
10081 keep_delims preserve the delimiters around the string
10082 returns: position to continue reading from buffer
10083 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10084 updates the read buffer.
10086 This subroutine pulls a string out of the input. It is called for:
10087 q single quotes q(literal text)
10088 ' single quotes 'literal text'
10089 qq double quotes qq(interpolate $here please)
10090 " double quotes "interpolate $here please"
10091 qx backticks qx(/bin/ls -l)
10092 ` backticks `/bin/ls -l`
10093 qw quote words @EXPORT_OK = qw( func() $spam )
10094 m// regexp match m/this/
10095 s/// regexp substitute s/this/that/
10096 tr/// string transliterate tr/this/that/
10097 y/// string transliterate y/this/that/
10098 ($*@) sub prototypes sub foo ($)
10099 (stuff) sub attr parameters sub foo : attr(stuff)
10100 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10102 In most of these cases (all but <>, patterns and transliterate)
10103 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10104 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10105 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10108 It skips whitespace before the string starts, and treats the first
10109 character as the delimiter. If the delimiter is one of ([{< then
10110 the corresponding "close" character )]}> is used as the closing
10111 delimiter. It allows quoting of delimiters, and if the string has
10112 balanced delimiters ([{<>}]) it allows nesting.
10114 On success, the SV with the resulting string is put into lex_stuff or,
10115 if that is already non-NULL, into lex_repl. The second case occurs only
10116 when parsing the RHS of the special constructs s/// and tr/// (y///).
10117 For convenience, the terminating delimiter character is stuffed into
10122 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
10124 SV *sv; /* scalar value: string */
10125 char *tmps; /* temp string, used for delimiter matching */
10126 register char *s = start; /* current position in the buffer */
10127 register char term; /* terminating character */
10128 register char *to; /* current position in the sv's data */
10129 I32 brackets = 1; /* bracket nesting level */
10130 bool has_utf8 = FALSE; /* is there any utf8 content? */
10131 I32 termcode; /* terminating char. code */
10132 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10133 STRLEN termlen; /* length of terminating string */
10134 char *last = NULL; /* last position for nesting bracket */
10136 /* skip space before the delimiter */
10140 /* mark where we are, in case we need to report errors */
10143 /* after skipping whitespace, the next character is the terminator */
10146 termcode = termstr[0] = term;
10150 termcode = utf8_to_uvchr((U8*)s, &termlen);
10151 Copy(s, termstr, termlen, U8);
10152 if (!UTF8_IS_INVARIANT(term))
10156 /* mark where we are */
10157 PL_multi_start = CopLINE(PL_curcop);
10158 PL_multi_open = term;
10160 /* find corresponding closing delimiter */
10161 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10162 termcode = termstr[0] = term = tmps[5];
10164 PL_multi_close = term;
10166 /* create a new SV to hold the contents. 87 is leak category, I'm
10167 assuming. 79 is the SV's initial length. What a random number. */
10169 sv_upgrade(sv, SVt_PVIV);
10170 SvIV_set(sv, termcode);
10171 (void)SvPOK_only(sv); /* validate pointer */
10173 /* move past delimiter and try to read a complete string */
10175 sv_catpvn(sv, s, termlen);
10178 if (PL_encoding && !UTF) {
10182 int offset = s - SvPVX_const(PL_linestr);
10183 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10184 &offset, (char*)termstr, termlen);
10185 const char *ns = SvPVX_const(PL_linestr) + offset;
10186 char *svlast = SvEND(sv) - 1;
10188 for (; s < ns; s++) {
10189 if (*s == '\n' && !PL_rsfp)
10190 CopLINE_inc(PL_curcop);
10193 goto read_more_line;
10195 /* handle quoted delimiters */
10196 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10198 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10200 if ((svlast-1 - t) % 2) {
10201 if (!keep_quoted) {
10202 *(svlast-1) = term;
10204 SvCUR_set(sv, SvCUR(sv) - 1);
10209 if (PL_multi_open == PL_multi_close) {
10217 for (t = w = last; t < svlast; w++, t++) {
10218 /* At here, all closes are "was quoted" one,
10219 so we don't check PL_multi_close. */
10221 if (!keep_quoted && *(t+1) == PL_multi_open)
10226 else if (*t == PL_multi_open)
10234 SvCUR_set(sv, w - SvPVX_const(sv));
10237 if (--brackets <= 0)
10242 if (!keep_delims) {
10243 SvCUR_set(sv, SvCUR(sv) - 1);
10249 /* extend sv if need be */
10250 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10251 /* set 'to' to the next character in the sv's string */
10252 to = SvPVX(sv)+SvCUR(sv);
10254 /* if open delimiter is the close delimiter read unbridle */
10255 if (PL_multi_open == PL_multi_close) {
10256 for (; s < PL_bufend; s++,to++) {
10257 /* embedded newlines increment the current line number */
10258 if (*s == '\n' && !PL_rsfp)
10259 CopLINE_inc(PL_curcop);
10260 /* handle quoted delimiters */
10261 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10262 if (!keep_quoted && s[1] == term)
10264 /* any other quotes are simply copied straight through */
10268 /* terminate when run out of buffer (the for() condition), or
10269 have found the terminator */
10270 else if (*s == term) {
10273 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10276 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10282 /* if the terminator isn't the same as the start character (e.g.,
10283 matched brackets), we have to allow more in the quoting, and
10284 be prepared for nested brackets.
10287 /* read until we run out of string, or we find the terminator */
10288 for (; s < PL_bufend; s++,to++) {
10289 /* embedded newlines increment the line count */
10290 if (*s == '\n' && !PL_rsfp)
10291 CopLINE_inc(PL_curcop);
10292 /* backslashes can escape the open or closing characters */
10293 if (*s == '\\' && s+1 < PL_bufend) {
10294 if (!keep_quoted &&
10295 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10300 /* allow nested opens and closes */
10301 else if (*s == PL_multi_close && --brackets <= 0)
10303 else if (*s == PL_multi_open)
10305 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10310 /* terminate the copied string and update the sv's end-of-string */
10312 SvCUR_set(sv, to - SvPVX_const(sv));
10315 * this next chunk reads more into the buffer if we're not done yet
10319 break; /* handle case where we are done yet :-) */
10321 #ifndef PERL_STRICT_CR
10322 if (to - SvPVX_const(sv) >= 2) {
10323 if ((to[-2] == '\r' && to[-1] == '\n') ||
10324 (to[-2] == '\n' && to[-1] == '\r'))
10328 SvCUR_set(sv, to - SvPVX_const(sv));
10330 else if (to[-1] == '\r')
10333 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10338 /* if we're out of file, or a read fails, bail and reset the current
10339 line marker so we can report where the unterminated string began
10342 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10344 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10347 /* we read a line, so increment our line counter */
10348 CopLINE_inc(PL_curcop);
10350 /* update debugger info */
10351 if (PERLDB_LINE && PL_curstash != PL_debstash) {
10352 SV *sv = NEWSV(88,0);
10354 sv_upgrade(sv, SVt_PVMG);
10355 sv_setsv(sv,PL_linestr);
10356 (void)SvIOK_on(sv);
10358 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
10361 /* having changed the buffer, we must update PL_bufend */
10362 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10363 PL_last_lop = PL_last_uni = Nullch;
10366 /* at this point, we have successfully read the delimited string */
10368 if (!PL_encoding || UTF) {
10370 sv_catpvn(sv, s, termlen);
10373 if (has_utf8 || PL_encoding)
10376 PL_multi_end = CopLINE(PL_curcop);
10378 /* if we allocated too much space, give some back */
10379 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10380 SvLEN_set(sv, SvCUR(sv) + 1);
10381 SvPV_renew(sv, SvLEN(sv));
10384 /* decide whether this is the first or second quoted string we've read
10397 takes: pointer to position in buffer
10398 returns: pointer to new position in buffer
10399 side-effects: builds ops for the constant in yylval.op
10401 Read a number in any of the formats that Perl accepts:
10403 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10404 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10407 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10409 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10412 If it reads a number without a decimal point or an exponent, it will
10413 try converting the number to an integer and see if it can do so
10414 without loss of precision.
10418 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10420 register const char *s = start; /* current position in buffer */
10421 register char *d; /* destination in temp buffer */
10422 register char *e; /* end of temp buffer */
10423 NV nv; /* number read, as a double */
10424 SV *sv = Nullsv; /* place to put the converted number */
10425 bool floatit; /* boolean: int or float? */
10426 const char *lastub = 0; /* position of last underbar */
10427 static char const number_too_long[] = "Number too long";
10429 /* We use the first character to decide what type of number this is */
10433 Perl_croak(aTHX_ "panic: scan_num");
10435 /* if it starts with a 0, it could be an octal number, a decimal in
10436 0.13 disguise, or a hexadecimal number, or a binary number. */
10440 u holds the "number so far"
10441 shift the power of 2 of the base
10442 (hex == 4, octal == 3, binary == 1)
10443 overflowed was the number more than we can hold?
10445 Shift is used when we add a digit. It also serves as an "are
10446 we in octal/hex/binary?" indicator to disallow hex characters
10447 when in octal mode.
10452 bool overflowed = FALSE;
10453 bool just_zero = TRUE; /* just plain 0 or binary number? */
10454 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10455 static const char* const bases[5] =
10456 { "", "binary", "", "octal", "hexadecimal" };
10457 static const char* const Bases[5] =
10458 { "", "Binary", "", "Octal", "Hexadecimal" };
10459 static const char* const maxima[5] =
10461 "0b11111111111111111111111111111111",
10465 const char *base, *Base, *max;
10467 /* check for hex */
10472 } else if (s[1] == 'b') {
10477 /* check for a decimal in disguise */
10478 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10480 /* so it must be octal */
10487 if (ckWARN(WARN_SYNTAX))
10488 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10489 "Misplaced _ in number");
10493 base = bases[shift];
10494 Base = Bases[shift];
10495 max = maxima[shift];
10497 /* read the rest of the number */
10499 /* x is used in the overflow test,
10500 b is the digit we're adding on. */
10505 /* if we don't mention it, we're done */
10509 /* _ are ignored -- but warned about if consecutive */
10511 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10512 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10513 "Misplaced _ in number");
10517 /* 8 and 9 are not octal */
10518 case '8': case '9':
10520 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10524 case '2': case '3': case '4':
10525 case '5': case '6': case '7':
10527 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10530 case '0': case '1':
10531 b = *s++ & 15; /* ASCII digit -> value of digit */
10535 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10536 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10537 /* make sure they said 0x */
10540 b = (*s++ & 7) + 9;
10542 /* Prepare to put the digit we have onto the end
10543 of the number so far. We check for overflows.
10549 x = u << shift; /* make room for the digit */
10551 if ((x >> shift) != u
10552 && !(PL_hints & HINT_NEW_BINARY)) {
10555 if (ckWARN_d(WARN_OVERFLOW))
10556 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10557 "Integer overflow in %s number",
10560 u = x | b; /* add the digit to the end */
10563 n *= nvshift[shift];
10564 /* If an NV has not enough bits in its
10565 * mantissa to represent an UV this summing of
10566 * small low-order numbers is a waste of time
10567 * (because the NV cannot preserve the
10568 * low-order bits anyway): we could just
10569 * remember when did we overflow and in the
10570 * end just multiply n by the right
10578 /* if we get here, we had success: make a scalar value from
10583 /* final misplaced underbar check */
10584 if (s[-1] == '_') {
10585 if (ckWARN(WARN_SYNTAX))
10586 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10591 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
10592 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10593 "%s number > %s non-portable",
10599 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
10600 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10601 "%s number > %s non-portable",
10606 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10607 sv = new_constant(start, s - start, "integer",
10609 else if (PL_hints & HINT_NEW_BINARY)
10610 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
10615 handle decimal numbers.
10616 we're also sent here when we read a 0 as the first digit
10618 case '1': case '2': case '3': case '4': case '5':
10619 case '6': case '7': case '8': case '9': case '.':
10622 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10625 /* read next group of digits and _ and copy into d */
10626 while (isDIGIT(*s) || *s == '_') {
10627 /* skip underscores, checking for misplaced ones
10631 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10632 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10633 "Misplaced _ in number");
10637 /* check for end of fixed-length buffer */
10639 Perl_croak(aTHX_ number_too_long);
10640 /* if we're ok, copy the character */
10645 /* final misplaced underbar check */
10646 if (lastub && s == lastub + 1) {
10647 if (ckWARN(WARN_SYNTAX))
10648 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10651 /* read a decimal portion if there is one. avoid
10652 3..5 being interpreted as the number 3. followed
10655 if (*s == '.' && s[1] != '.') {
10660 if (ckWARN(WARN_SYNTAX))
10661 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10662 "Misplaced _ in number");
10666 /* copy, ignoring underbars, until we run out of digits.
10668 for (; isDIGIT(*s) || *s == '_'; s++) {
10669 /* fixed length buffer check */
10671 Perl_croak(aTHX_ number_too_long);
10673 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10674 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10675 "Misplaced _ in number");
10681 /* fractional part ending in underbar? */
10682 if (s[-1] == '_') {
10683 if (ckWARN(WARN_SYNTAX))
10684 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10685 "Misplaced _ in number");
10687 if (*s == '.' && isDIGIT(s[1])) {
10688 /* oops, it's really a v-string, but without the "v" */
10694 /* read exponent part, if present */
10695 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10699 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10700 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10702 /* stray preinitial _ */
10704 if (ckWARN(WARN_SYNTAX))
10705 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10706 "Misplaced _ in number");
10710 /* allow positive or negative exponent */
10711 if (*s == '+' || *s == '-')
10714 /* stray initial _ */
10716 if (ckWARN(WARN_SYNTAX))
10717 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10718 "Misplaced _ in number");
10722 /* read digits of exponent */
10723 while (isDIGIT(*s) || *s == '_') {
10726 Perl_croak(aTHX_ number_too_long);
10730 if (((lastub && s == lastub + 1) ||
10731 (!isDIGIT(s[1]) && s[1] != '_'))
10732 && ckWARN(WARN_SYNTAX))
10733 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10734 "Misplaced _ in number");
10741 /* make an sv from the string */
10745 We try to do an integer conversion first if no characters
10746 indicating "float" have been found.
10751 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10753 if (flags == IS_NUMBER_IN_UV) {
10755 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
10758 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10759 if (uv <= (UV) IV_MIN)
10760 sv_setiv(sv, -(IV)uv);
10767 /* terminate the string */
10769 nv = Atof(PL_tokenbuf);
10773 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
10774 (PL_hints & HINT_NEW_INTEGER) )
10775 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
10776 (floatit ? "float" : "integer"),
10780 /* if it starts with a v, it could be a v-string */
10783 sv = NEWSV(92,5); /* preallocate storage space */
10784 s = scan_vstring(s,sv);
10788 /* make the op for the constant and return */
10791 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10793 lvalp->opval = Nullop;
10799 S_scan_formline(pTHX_ register char *s)
10801 register char *eol;
10803 SV *stuff = newSVpvn("",0);
10804 bool needargs = FALSE;
10805 bool eofmt = FALSE;
10807 while (!needargs) {
10809 #ifdef PERL_STRICT_CR
10810 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
10812 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
10814 if (*t == '\n' || t == PL_bufend) {
10819 if (PL_in_eval && !PL_rsfp) {
10820 eol = (char *) memchr(s,'\n',PL_bufend-s);
10825 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10827 for (t = s; t < eol; t++) {
10828 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10830 goto enough; /* ~~ must be first line in formline */
10832 if (*t == '@' || *t == '^')
10836 sv_catpvn(stuff, s, eol-s);
10837 #ifndef PERL_STRICT_CR
10838 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10839 char *end = SvPVX(stuff) + SvCUR(stuff);
10842 SvCUR_set(stuff, SvCUR(stuff) - 1);
10851 s = filter_gets(PL_linestr, PL_rsfp, 0);
10852 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
10853 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
10854 PL_last_lop = PL_last_uni = Nullch;
10863 if (SvCUR(stuff)) {
10866 PL_lex_state = LEX_NORMAL;
10867 PL_nextval[PL_nexttoke].ival = 0;
10871 PL_lex_state = LEX_FORMLINE;
10873 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10875 else if (PL_encoding)
10876 sv_recode_to_utf8(stuff, PL_encoding);
10878 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10880 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
10884 SvREFCNT_dec(stuff);
10886 PL_lex_formbrack = 0;
10897 PL_cshlen = strlen(PL_cshname);
10902 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10904 const I32 oldsavestack_ix = PL_savestack_ix;
10905 CV* outsidecv = PL_compcv;
10908 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10910 SAVEI32(PL_subline);
10911 save_item(PL_subname);
10912 SAVESPTR(PL_compcv);
10914 PL_compcv = (CV*)NEWSV(1104,0);
10915 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
10916 CvFLAGS(PL_compcv) |= flags;
10918 PL_subline = CopLINE(PL_curcop);
10919 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10920 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
10921 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10923 return oldsavestack_ix;
10927 #pragma segment Perl_yylex
10930 Perl_yywarn(pTHX_ const char *s)
10932 PL_in_eval |= EVAL_WARNONLY;
10934 PL_in_eval &= ~EVAL_WARNONLY;
10939 Perl_yyerror(pTHX_ const char *s)
10941 const char *where = NULL;
10942 const char *context = NULL;
10946 if (!yychar || (yychar == ';' && !PL_rsfp))
10948 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10949 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10950 PL_oldbufptr != PL_bufptr) {
10953 The code below is removed for NetWare because it abends/crashes on NetWare
10954 when the script has error such as not having the closing quotes like:
10955 if ($var eq "value)
10956 Checking of white spaces is anyway done in NetWare code.
10959 while (isSPACE(*PL_oldoldbufptr))
10962 context = PL_oldoldbufptr;
10963 contlen = PL_bufptr - PL_oldoldbufptr;
10965 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10966 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10969 The code below is removed for NetWare because it abends/crashes on NetWare
10970 when the script has error such as not having the closing quotes like:
10971 if ($var eq "value)
10972 Checking of white spaces is anyway done in NetWare code.
10975 while (isSPACE(*PL_oldbufptr))
10978 context = PL_oldbufptr;
10979 contlen = PL_bufptr - PL_oldbufptr;
10981 else if (yychar > 255)
10982 where = "next token ???";
10983 else if (yychar == -2) { /* YYEMPTY */
10984 if (PL_lex_state == LEX_NORMAL ||
10985 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10986 where = "at end of line";
10987 else if (PL_lex_inpat)
10988 where = "within pattern";
10990 where = "within string";
10993 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
10995 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10996 else if (isPRINT_LC(yychar))
10997 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
10999 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11000 where = SvPVX_const(where_sv);
11002 msg = sv_2mortal(newSVpv(s, 0));
11003 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11004 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11006 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
11008 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
11009 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11010 Perl_sv_catpvf(aTHX_ msg,
11011 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11012 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11015 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
11016 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
11019 if (PL_error_count >= 10) {
11020 if (PL_in_eval && SvCUR(ERRSV))
11021 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11022 ERRSV, OutCopFILE(PL_curcop));
11024 Perl_croak(aTHX_ "%s has too many errors.\n",
11025 OutCopFILE(PL_curcop));
11028 PL_in_my_stash = NULL;
11032 #pragma segment Main
11036 S_swallow_bom(pTHX_ U8 *s)
11038 const STRLEN slen = SvCUR(PL_linestr);
11041 if (s[1] == 0xFE) {
11042 /* UTF-16 little-endian? (or UTF32-LE?) */
11043 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11044 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
11045 #ifndef PERL_NO_UTF16_FILTER
11046 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
11049 if (PL_bufend > (char*)s) {
11053 filter_add(utf16rev_textfilter, NULL);
11054 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11055 utf16_to_utf8_reversed(s, news,
11056 PL_bufend - (char*)s - 1,
11058 sv_setpvn(PL_linestr, (const char*)news, newlen);
11060 SvUTF8_on(PL_linestr);
11061 s = (U8*)SvPVX(PL_linestr);
11062 PL_bufend = SvPVX(PL_linestr) + newlen;
11065 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
11070 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11071 #ifndef PERL_NO_UTF16_FILTER
11072 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11075 if (PL_bufend > (char *)s) {
11079 filter_add(utf16_textfilter, NULL);
11080 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11081 utf16_to_utf8(s, news,
11082 PL_bufend - (char*)s,
11084 sv_setpvn(PL_linestr, (const char*)news, newlen);
11086 SvUTF8_on(PL_linestr);
11087 s = (U8*)SvPVX(PL_linestr);
11088 PL_bufend = SvPVX(PL_linestr) + newlen;
11091 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
11096 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11097 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11098 s += 3; /* UTF-8 */
11104 if (s[2] == 0xFE && s[3] == 0xFF) {
11105 /* UTF-32 big-endian */
11106 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
11109 else if (s[2] == 0 && s[3] != 0) {
11112 * are a good indicator of UTF-16BE. */
11113 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11118 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11121 * are a good indicator of UTF-16LE. */
11122 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11131 * Restore a source filter.
11135 restore_rsfp(pTHX_ void *f)
11137 PerlIO * const fp = (PerlIO*)f;
11139 if (PL_rsfp == PerlIO_stdin())
11140 PerlIO_clearerr(PL_rsfp);
11141 else if (PL_rsfp && (PL_rsfp != fp))
11142 PerlIO_close(PL_rsfp);
11146 #ifndef PERL_NO_UTF16_FILTER
11148 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11150 const STRLEN old = SvCUR(sv);
11151 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11152 DEBUG_P(PerlIO_printf(Perl_debug_log,
11153 "utf16_textfilter(%p): %d %d (%d)\n",
11154 utf16_textfilter, idx, maxlen, (int) count));
11158 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11159 Copy(SvPVX_const(sv), tmps, old, char);
11160 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11161 SvCUR(sv) - old, &newlen);
11162 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11164 DEBUG_P({sv_dump(sv);});
11169 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11171 const STRLEN old = SvCUR(sv);
11172 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11173 DEBUG_P(PerlIO_printf(Perl_debug_log,
11174 "utf16rev_textfilter(%p): %d %d (%d)\n",
11175 utf16rev_textfilter, idx, maxlen, (int) count));
11179 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11180 Copy(SvPVX_const(sv), tmps, old, char);
11181 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11182 SvCUR(sv) - old, &newlen);
11183 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11185 DEBUG_P({ sv_dump(sv); });
11191 Returns a pointer to the next character after the parsed
11192 vstring, as well as updating the passed in sv.
11194 Function must be called like
11197 s = scan_vstring(s,sv);
11199 The sv should already be large enough to store the vstring
11200 passed in, for performance reasons.
11205 Perl_scan_vstring(pTHX_ const char *s, SV *sv)
11207 const char *pos = s;
11208 const char *start = s;
11209 if (*pos == 'v') pos++; /* get past 'v' */
11210 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11212 if ( *pos != '.') {
11213 /* this may not be a v-string if followed by => */
11214 const char *next = pos;
11215 while (next < PL_bufend && isSPACE(*next))
11217 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
11218 /* return string not v-string */
11219 sv_setpvn(sv,(char *)s,pos-s);
11220 return (char *)pos;
11224 if (!isALPHA(*pos)) {
11225 U8 tmpbuf[UTF8_MAXBYTES+1];
11227 if (*s == 'v') s++; /* get past 'v' */
11229 sv_setpvn(sv, "", 0);
11235 /* this is atoi() that tolerates underscores */
11236 const char *end = pos;
11238 while (--end >= s) {
11243 rev += (*end - '0') * mult;
11245 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
11246 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11247 "Integer overflow in decimal number");
11251 if (rev > 0x7FFFFFFF)
11252 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11254 /* Append native character for the rev point */
11255 tmpend = uvchr_to_utf8(tmpbuf, rev);
11256 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11257 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11259 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
11265 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11269 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11277 * c-indentation-style: bsd
11278 * c-basic-offset: 4
11279 * indent-tabs-mode: t
11282 * ex: set ts=8 sts=4 sw=4 noet: