3 * Copyright (c) 1991-2001, Larry Wall
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
11 * "It all comes from here, the stench and the peril." --Frodo
15 * This file is the lexer for Perl. It's closely linked to the
18 * The main routine is yylex(), which returns the next token.
22 #define PERL_IN_TOKE_C
25 #define yychar PL_yychar
26 #define yylval PL_yylval
28 static char ident_too_long[] = "Identifier too long";
30 static void restore_rsfp(pTHXo_ void *f);
31 #ifndef PERL_NO_UTF16_FILTER
32 static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33 static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
36 #define XFAKEBRACK 128
39 /*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
40 #define UTF (PL_hints & HINT_UTF8)
42 /* In variables name $^X, these are the legal values for X.
43 * 1999-02-27 mjd-perl-patch@plover.com */
44 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
46 /* On MacOS, respect nonbreaking spaces */
47 #ifdef MACOS_TRADITIONAL
48 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
50 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
53 /* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
55 * can get by with a single comparison (if the compiler is smart enough).
58 /* #define LEX_NOTPARSING 11 is done in perl.h. */
61 #define LEX_INTERPNORMAL 9
62 #define LEX_INTERPCASEMOD 8
63 #define LEX_INTERPPUSH 7
64 #define LEX_INTERPSTART 6
65 #define LEX_INTERPEND 5
66 #define LEX_INTERPENDMAYBE 4
67 #define LEX_INTERPCONCAT 3
68 #define LEX_INTERPCONST 2
69 #define LEX_FORMLINE 1
70 #define LEX_KNOWNEXT 0
78 # define YYMAXLEVEL 100
80 YYSTYPE* yylval_pointer[YYMAXLEVEL];
81 int* yychar_pointer[YYMAXLEVEL];
85 # define yylval (*yylval_pointer[yyactlevel])
86 # define yychar (*yychar_pointer[yyactlevel])
87 # define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
89 # define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
94 /* CLINE is a macro that ensures PL_copline has a sane value */
99 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
102 * Convenience functions to return different tokens and prime the
103 * lexer for the next token. They all take an argument.
105 * TOKEN : generic token (used for '(', DOLSHARP, etc)
106 * OPERATOR : generic operator
107 * AOPERATOR : assignment operator
108 * PREBLOCK : beginning the block after an if, while, foreach, ...
109 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
110 * PREREF : *EXPR where EXPR is not a simple identifier
111 * TERM : expression term
112 * LOOPX : loop exiting command (goto, last, dump, etc)
113 * FTST : file test operator
114 * FUN0 : zero-argument function
115 * FUN1 : not used, except for not, which isn't a UNIOP
116 * BOop : bitwise or or xor
118 * SHop : shift operator
119 * PWop : power operator
120 * PMop : pattern-matching operator
121 * Aop : addition-level operator
122 * Mop : multiplication-level operator
123 * Eop : equality-testing operator
124 * Rop : relational operator <= != gt
126 * Also see LOP and lop() below.
129 #define TOKEN(retval) return (PL_bufptr = s,(int)retval)
130 #define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
131 #define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
132 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
133 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
134 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
135 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
136 #define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
137 #define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
138 #define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
139 #define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
140 #define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
141 #define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
142 #define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
143 #define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
144 #define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
145 #define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
146 #define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
147 #define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
148 #define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
150 /* This bit of chicanery makes a unary function followed by
151 * a parenthesis into a function with one argument, highest precedence.
153 #define UNI(f) return(yylval.ival = f, \
156 PL_last_uni = PL_oldbufptr, \
157 PL_last_lop_op = f, \
158 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
160 #define UNIBRACK(f) return(yylval.ival = f, \
162 PL_last_uni = PL_oldbufptr, \
163 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
165 /* grandfather return to old style */
166 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
171 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
172 * into an OP_ANDASSIGN or OP_ORASSIGN
176 S_ao(pTHX_ int toketype)
178 if (*PL_bufptr == '=') {
180 if (toketype == ANDAND)
181 yylval.ival = OP_ANDASSIGN;
182 else if (toketype == OROR)
183 yylval.ival = OP_ORASSIGN;
191 * When Perl expects an operator and finds something else, no_op
192 * prints the warning. It always prints "<something> found where
193 * operator expected. It prints "Missing semicolon on previous line?"
194 * if the surprise occurs at the start of the line. "do you need to
195 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
196 * where the compiler doesn't know if foo is a method call or a function.
197 * It prints "Missing operator before end of line" if there's nothing
198 * after the missing operator, or "... before <...>" if there is something
199 * after the missing operator.
203 S_no_op(pTHX_ char *what, char *s)
205 char *oldbp = PL_bufptr;
206 bool is_first = (PL_oldbufptr == PL_linestart);
212 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
214 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
215 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
217 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
218 if (t < PL_bufptr && isSPACE(*t))
219 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
220 t - PL_oldoldbufptr, PL_oldoldbufptr);
224 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
231 * Complain about missing quote/regexp/heredoc terminator.
232 * If it's called with (char *)NULL then it cauterizes the line buffer.
233 * If we're in a delimited string and the delimiter is a control
234 * character, it's reformatted into a two-char sequence like ^C.
239 S_missingterm(pTHX_ char *s)
244 char *nl = strrchr(s,'\n');
250 iscntrl(PL_multi_close)
252 PL_multi_close < 32 || PL_multi_close == 127
256 tmpbuf[1] = toCTRL(PL_multi_close);
262 *tmpbuf = PL_multi_close;
266 q = strchr(s,'"') ? '\'' : '"';
267 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
275 Perl_deprecate(pTHX_ char *s)
277 if (ckWARN(WARN_DEPRECATED))
278 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
283 * Deprecate a comma-less variable list.
289 deprecate("comma-less variable list");
293 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
294 * utf16-to-utf8-reversed.
297 #ifdef PERL_CR_FILTER
301 register char *s = SvPVX(sv);
302 register char *e = s + SvCUR(sv);
303 /* outer loop optimized to do nothing if there are no CR-LFs */
305 if (*s++ == '\r' && *s == '\n') {
306 /* hit a CR-LF, need to copy the rest */
307 register char *d = s - 1;
310 if (*s == '\r' && s[1] == '\n')
321 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
323 I32 count = FILTER_READ(idx+1, sv, maxlen);
324 if (count > 0 && !maxlen)
332 * Initialize variables. Uses the Perl save_stack to save its state (for
333 * recursive calls to the parser).
337 Perl_lex_start(pTHX_ SV *line)
342 SAVEI32(PL_lex_dojoin);
343 SAVEI32(PL_lex_brackets);
344 SAVEI32(PL_lex_casemods);
345 SAVEI32(PL_lex_starts);
346 SAVEI32(PL_lex_state);
347 SAVEVPTR(PL_lex_inpat);
348 SAVEI32(PL_lex_inwhat);
349 if (PL_lex_state == LEX_KNOWNEXT) {
350 I32 toke = PL_nexttoke;
351 while (--toke >= 0) {
352 SAVEI32(PL_nexttype[toke]);
353 SAVEVPTR(PL_nextval[toke]);
355 SAVEI32(PL_nexttoke);
357 SAVECOPLINE(PL_curcop);
360 SAVEPPTR(PL_oldbufptr);
361 SAVEPPTR(PL_oldoldbufptr);
362 SAVEPPTR(PL_linestart);
363 SAVESPTR(PL_linestr);
364 SAVEPPTR(PL_lex_brackstack);
365 SAVEPPTR(PL_lex_casestack);
366 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
367 SAVESPTR(PL_lex_stuff);
368 SAVEI32(PL_lex_defer);
369 SAVEI32(PL_sublex_info.sub_inwhat);
370 SAVESPTR(PL_lex_repl);
372 SAVEINT(PL_lex_expect);
374 PL_lex_state = LEX_NORMAL;
378 New(899, PL_lex_brackstack, 120, char);
379 New(899, PL_lex_casestack, 12, char);
380 SAVEFREEPV(PL_lex_brackstack);
381 SAVEFREEPV(PL_lex_casestack);
383 *PL_lex_casestack = '\0';
386 PL_lex_stuff = Nullsv;
387 PL_lex_repl = Nullsv;
391 PL_sublex_info.sub_inwhat = 0;
393 if (SvREADONLY(PL_linestr))
394 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
395 s = SvPV(PL_linestr, len);
396 if (len && s[len-1] != ';') {
397 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
398 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
399 sv_catpvn(PL_linestr, "\n;", 2);
401 SvTEMP_off(PL_linestr);
402 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
403 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
405 PL_rs = newSVpvn("\n", 1);
411 * Finalizer for lexing operations. Must be called when the parser is
412 * done with the lexer.
418 PL_doextract = FALSE;
423 * This subroutine has nothing to do with tilting, whether at windmills
424 * or pinball tables. Its name is short for "increment line". It
425 * increments the current line number in CopLINE(PL_curcop) and checks
426 * to see whether the line starts with a comment of the form
427 * # line 500 "foo.pm"
428 * If so, it sets the current line number and file to the values in the comment.
432 S_incline(pTHX_ char *s)
439 CopLINE_inc(PL_curcop);
442 while (SPACE_OR_TAB(*s)) s++;
443 if (strnEQ(s, "line", 4))
447 if (*s == ' ' || *s == '\t')
451 while (SPACE_OR_TAB(*s)) s++;
457 while (SPACE_OR_TAB(*s))
459 if (*s == '"' && (t = strchr(s+1, '"'))) {
464 for (t = s; !isSPACE(*t); t++) ;
467 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
469 if (*e != '\n' && *e != '\0')
470 return; /* false alarm */
476 Safefree(CopFILE(PL_curcop));
478 SvREFCNT_dec(CopFILEGV(PL_curcop));
480 CopFILE_set(PL_curcop, s);
483 CopLINE_set(PL_curcop, atoi(n)-1);
488 * Called to gobble the appropriate amount and type of whitespace.
489 * Skips comments as well.
493 S_skipspace(pTHX_ register char *s)
495 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
496 while (s < PL_bufend && SPACE_OR_TAB(*s))
502 SSize_t oldprevlen, oldoldprevlen;
503 SSize_t oldloplen, oldunilen;
504 while (s < PL_bufend && isSPACE(*s)) {
505 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
510 if (s < PL_bufend && *s == '#') {
511 while (s < PL_bufend && *s != '\n')
515 if (PL_in_eval && !PL_rsfp) {
522 /* only continue to recharge the buffer if we're at the end
523 * of the buffer, we're not reading from a source filter, and
524 * we're in normal lexing mode
526 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
527 PL_lex_state == LEX_FORMLINE)
530 /* try to recharge the buffer */
531 if ((s = filter_gets(PL_linestr, PL_rsfp,
532 (prevlen = SvCUR(PL_linestr)))) == Nullch)
534 /* end of file. Add on the -p or -n magic */
535 if (PL_minus_n || PL_minus_p) {
536 sv_setpv(PL_linestr,PL_minus_p ?
537 ";}continue{print or die qq(-p destination: $!\\n)" :
539 sv_catpv(PL_linestr,";}");
540 PL_minus_n = PL_minus_p = 0;
543 sv_setpv(PL_linestr,";");
545 /* reset variables for next time we lex */
546 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
548 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
550 /* Close the filehandle. Could be from -P preprocessor,
551 * STDIN, or a regular file. If we were reading code from
552 * STDIN (because the commandline held no -e or filename)
553 * then we don't close it, we reset it so the code can
554 * read from STDIN too.
557 if (PL_preprocess && !PL_in_eval)
558 (void)PerlProc_pclose(PL_rsfp);
559 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
560 PerlIO_clearerr(PL_rsfp);
562 (void)PerlIO_close(PL_rsfp);
567 /* not at end of file, so we only read another line */
568 /* make corresponding updates to old pointers, for yyerror() */
569 oldprevlen = PL_oldbufptr - PL_bufend;
570 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
572 oldunilen = PL_last_uni - PL_bufend;
574 oldloplen = PL_last_lop - PL_bufend;
575 PL_linestart = PL_bufptr = s + prevlen;
576 PL_bufend = s + SvCUR(PL_linestr);
578 PL_oldbufptr = s + oldprevlen;
579 PL_oldoldbufptr = s + oldoldprevlen;
581 PL_last_uni = s + oldunilen;
583 PL_last_lop = s + oldloplen;
586 /* debugger active and we're not compiling the debugger code,
587 * so store the line into the debugger's array of lines
589 if (PERLDB_LINE && PL_curstash != PL_debstash) {
590 SV *sv = NEWSV(85,0);
592 sv_upgrade(sv, SVt_PVMG);
593 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
594 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
601 * Check the unary operators to ensure there's no ambiguity in how they're
602 * used. An ambiguous piece of code would be:
604 * This doesn't mean rand() + 5. Because rand() is a unary operator,
605 * the +5 is its argument.
614 if (PL_oldoldbufptr != PL_last_uni)
616 while (isSPACE(*PL_last_uni))
618 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
619 if ((t = strchr(s, '(')) && t < PL_bufptr)
621 if (ckWARN_d(WARN_AMBIGUOUS)){
624 Perl_warner(aTHX_ WARN_AMBIGUOUS,
625 "Warning: Use of \"%s\" without parens is ambiguous",
631 /* workaround to replace the UNI() macro with a function. Only the
632 * hints/uts.sh file mentions this. Other comments elsewhere in the
633 * source indicate Microport Unix might need it too.
639 #define UNI(f) return uni(f,s)
642 S_uni(pTHX_ I32 f, char *s)
647 PL_last_uni = PL_oldbufptr;
658 #endif /* CRIPPLED_CC */
661 * LOP : macro to build a list operator. Its behaviour has been replaced
662 * with a subroutine, S_lop() for which LOP is just another name.
665 #define LOP(f,x) return lop(f,x,s)
669 * Build a list operator (or something that might be one). The rules:
670 * - if we have a next token, then it's a list operator [why?]
671 * - if the next thing is an opening paren, then it's a function
672 * - else it's a list operator
676 S_lop(pTHX_ I32 f, int x, char *s)
682 PL_last_lop = PL_oldbufptr;
697 * When the lexer realizes it knows the next token (for instance,
698 * it is reordering tokens for the parser) then it can call S_force_next
699 * to know what token to return the next time the lexer is called. Caller
700 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
701 * handles the token correctly.
705 S_force_next(pTHX_ I32 type)
707 PL_nexttype[PL_nexttoke] = type;
709 if (PL_lex_state != LEX_KNOWNEXT) {
710 PL_lex_defer = PL_lex_state;
711 PL_lex_expect = PL_expect;
712 PL_lex_state = LEX_KNOWNEXT;
718 * When the lexer knows the next thing is a word (for instance, it has
719 * just seen -> and it knows that the next char is a word char, then
720 * it calls S_force_word to stick the next word into the PL_next lookahead.
723 * char *start : buffer position (must be within PL_linestr)
724 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
725 * int check_keyword : if true, Perl checks to make sure the word isn't
726 * a keyword (do this if the word is a label, e.g. goto FOO)
727 * int allow_pack : if true, : characters will also be allowed (require,
729 * int allow_initial_tick : used by the "sub" lexer only.
733 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
738 start = skipspace(start);
740 if (isIDFIRST_lazy_if(s,UTF) ||
741 (allow_pack && *s == ':') ||
742 (allow_initial_tick && *s == '\'') )
744 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
745 if (check_keyword && keyword(PL_tokenbuf, len))
747 if (token == METHOD) {
752 PL_expect = XOPERATOR;
755 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
756 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
764 * Called when the lexer wants $foo *foo &foo etc, but the program
765 * text only contains the "foo" portion. The first argument is a pointer
766 * to the "foo", and the second argument is the type symbol to prefix.
767 * Forces the next token to be a "WORD".
768 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
772 S_force_ident(pTHX_ register char *s, int kind)
775 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
776 PL_nextval[PL_nexttoke].opval = o;
779 o->op_private = OPpCONST_ENTERED;
780 /* XXX see note in pp_entereval() for why we forgo typo
781 warnings if the symbol must be introduced in an eval.
783 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
784 kind == '$' ? SVt_PV :
785 kind == '@' ? SVt_PVAV :
786 kind == '%' ? SVt_PVHV :
794 Perl_str_to_version(pTHX_ SV *sv)
799 char *start = SvPVx(sv,len);
800 bool utf = SvUTF8(sv) ? TRUE : FALSE;
801 char *end = start + len;
802 while (start < end) {
806 n = utf8_to_uv((U8*)start, len, &skip, 0);
811 retval += ((NV)n)/nshift;
820 * Forces the next token to be a version number.
824 S_force_version(pTHX_ char *s)
826 OP *version = Nullop;
835 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
836 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
838 s = scan_num(s, &yylval);
839 version = yylval.opval;
840 ver = cSVOPx(version)->op_sv;
841 if (SvPOK(ver) && !SvNIOK(ver)) {
842 (void)SvUPGRADE(ver, SVt_PVNV);
843 SvNVX(ver) = str_to_version(ver);
844 SvNOK_on(ver); /* hint that it is a version */
849 /* NOTE: The parser sees the package name and the VERSION swapped */
850 PL_nextval[PL_nexttoke].opval = version;
858 * Tokenize a quoted string passed in as an SV. It finds the next
859 * chunk, up to end of string or a backslash. It may make a new
860 * SV containing that chunk (if HINT_NEW_STRING is on). It also
865 S_tokeq(pTHX_ SV *sv)
876 s = SvPV_force(sv, len);
877 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
880 while (s < send && *s != '\\')
885 if ( PL_hints & HINT_NEW_STRING )
886 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
889 if (s + 1 < send && (s[1] == '\\'))
890 s++; /* all that, just for this */
895 SvCUR_set(sv, d - SvPVX(sv));
897 if ( PL_hints & HINT_NEW_STRING )
898 return new_constant(NULL, 0, "q", sv, pv, "q");
903 * Now come three functions related to double-quote context,
904 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
905 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
906 * interact with PL_lex_state, and create fake ( ... ) argument lists
907 * to handle functions and concatenation.
908 * They assume that whoever calls them will be setting up a fake
909 * join call, because each subthing puts a ',' after it. This lets
912 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
914 * (I'm not sure whether the spurious commas at the end of lcfirst's
915 * arguments and join's arguments are created or not).
920 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
922 * Pattern matching will set PL_lex_op to the pattern-matching op to
923 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
925 * OP_CONST and OP_READLINE are easy--just make the new op and return.
927 * Everything else becomes a FUNC.
929 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
930 * had an OP_CONST or OP_READLINE). This just sets us up for a
931 * call to S_sublex_push().
937 register I32 op_type = yylval.ival;
939 if (op_type == OP_NULL) {
940 yylval.opval = PL_lex_op;
944 if (op_type == OP_CONST || op_type == OP_READLINE) {
945 SV *sv = tokeq(PL_lex_stuff);
947 if (SvTYPE(sv) == SVt_PVIV) {
948 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
954 nsv = newSVpvn(p, len);
960 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
961 PL_lex_stuff = Nullsv;
965 PL_sublex_info.super_state = PL_lex_state;
966 PL_sublex_info.sub_inwhat = op_type;
967 PL_sublex_info.sub_op = PL_lex_op;
968 PL_lex_state = LEX_INTERPPUSH;
972 yylval.opval = PL_lex_op;
982 * Create a new scope to save the lexing state. The scope will be
983 * ended in S_sublex_done. Returns a '(', starting the function arguments
984 * to the uc, lc, etc. found before.
985 * Sets PL_lex_state to LEX_INTERPCONCAT.
993 PL_lex_state = PL_sublex_info.super_state;
994 SAVEI32(PL_lex_dojoin);
995 SAVEI32(PL_lex_brackets);
996 SAVEI32(PL_lex_casemods);
997 SAVEI32(PL_lex_starts);
998 SAVEI32(PL_lex_state);
999 SAVEVPTR(PL_lex_inpat);
1000 SAVEI32(PL_lex_inwhat);
1001 SAVECOPLINE(PL_curcop);
1002 SAVEPPTR(PL_bufptr);
1003 SAVEPPTR(PL_oldbufptr);
1004 SAVEPPTR(PL_oldoldbufptr);
1005 SAVEPPTR(PL_linestart);
1006 SAVESPTR(PL_linestr);
1007 SAVEPPTR(PL_lex_brackstack);
1008 SAVEPPTR(PL_lex_casestack);
1010 PL_linestr = PL_lex_stuff;
1011 PL_lex_stuff = Nullsv;
1013 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1014 = SvPVX(PL_linestr);
1015 PL_bufend += SvCUR(PL_linestr);
1016 SAVEFREESV(PL_linestr);
1018 PL_lex_dojoin = FALSE;
1019 PL_lex_brackets = 0;
1020 New(899, PL_lex_brackstack, 120, char);
1021 New(899, PL_lex_casestack, 12, char);
1022 SAVEFREEPV(PL_lex_brackstack);
1023 SAVEFREEPV(PL_lex_casestack);
1024 PL_lex_casemods = 0;
1025 *PL_lex_casestack = '\0';
1027 PL_lex_state = LEX_INTERPCONCAT;
1028 CopLINE_set(PL_curcop, PL_multi_start);
1030 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1031 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1032 PL_lex_inpat = PL_sublex_info.sub_op;
1034 PL_lex_inpat = Nullop;
1041 * Restores lexer state after a S_sublex_push.
1047 if (!PL_lex_starts++) {
1048 PL_expect = XOPERATOR;
1049 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
1053 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1054 PL_lex_state = LEX_INTERPCASEMOD;
1058 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1059 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1060 PL_linestr = PL_lex_repl;
1062 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1063 PL_bufend += SvCUR(PL_linestr);
1064 SAVEFREESV(PL_linestr);
1065 PL_lex_dojoin = FALSE;
1066 PL_lex_brackets = 0;
1067 PL_lex_casemods = 0;
1068 *PL_lex_casestack = '\0';
1070 if (SvEVALED(PL_lex_repl)) {
1071 PL_lex_state = LEX_INTERPNORMAL;
1073 /* we don't clear PL_lex_repl here, so that we can check later
1074 whether this is an evalled subst; that means we rely on the
1075 logic to ensure sublex_done() is called again only via the
1076 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1079 PL_lex_state = LEX_INTERPCONCAT;
1080 PL_lex_repl = Nullsv;
1086 PL_bufend = SvPVX(PL_linestr);
1087 PL_bufend += SvCUR(PL_linestr);
1088 PL_expect = XOPERATOR;
1089 PL_sublex_info.sub_inwhat = 0;
1097 Extracts a pattern, double-quoted string, or transliteration. This
1100 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1101 processing a pattern (PL_lex_inpat is true), a transliteration
1102 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1104 Returns a pointer to the character scanned up to. Iff this is
1105 advanced from the start pointer supplied (ie if anything was
1106 successfully parsed), will leave an OP for the substring scanned
1107 in yylval. Caller must intuit reason for not parsing further
1108 by looking at the next characters herself.
1112 double-quoted style: \r and \n
1113 regexp special ones: \D \s
1115 backrefs: \1 (deprecated in substitution replacements)
1116 case and quoting: \U \Q \E
1117 stops on @ and $, but not for $ as tail anchor
1119 In transliterations:
1120 characters are VERY literal, except for - not at the start or end
1121 of the string, which indicates a range. scan_const expands the
1122 range to the full set of intermediate characters.
1124 In double-quoted strings:
1126 double-quoted style: \r and \n
1128 backrefs: \1 (deprecated)
1129 case and quoting: \U \Q \E
1132 scan_const does *not* construct ops to handle interpolated strings.
1133 It stops processing as soon as it finds an embedded $ or @ variable
1134 and leaves it to the caller to work out what's going on.
1136 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1138 $ in pattern could be $foo or could be tail anchor. Assumption:
1139 it's a tail anchor if $ is the last thing in the string, or if it's
1140 followed by one of ")| \n\t"
1142 \1 (backreferences) are turned into $1
1144 The structure of the code is
1145 while (there's a character to process) {
1146 handle transliteration ranges
1147 skip regexp comments
1148 skip # initiated comments in //x patterns
1149 check for embedded @foo
1150 check for embedded scalars
1152 leave intact backslashes from leave (below)
1153 deprecate \1 in strings and sub replacements
1154 handle string-changing backslashes \l \U \Q \E, etc.
1155 switch (what was escaped) {
1156 handle - in a transliteration (becomes a literal -)
1157 handle \132 octal characters
1158 handle 0x15 hex characters
1159 handle \cV (control V)
1160 handle printf backslashes (\f, \r, \n, etc)
1162 } (end if backslash)
1163 } (end while character to read)
1168 S_scan_const(pTHX_ char *start)
1170 register char *send = PL_bufend; /* end of the constant */
1171 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1172 register char *s = start; /* start of the constant */
1173 register char *d = SvPVX(sv); /* destination for copies */
1174 bool dorange = FALSE; /* are we in a translit range? */
1175 bool didrange = FALSE; /* did we just finish a range? */
1176 bool has_utf8 = FALSE; /* embedded \x{} */
1179 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
1180 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1182 I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
1183 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1184 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
1186 const char *leaveit = /* set of acceptably-backslashed characters */
1188 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
1191 while (s < send || dorange) {
1192 /* get transliterations out of the way (they're most literal) */
1193 if (PL_lex_inwhat == OP_TRANS) {
1194 /* expand a range A-Z to the full set of characters. AIE! */
1196 I32 i; /* current expanded character */
1197 I32 min; /* first character in range */
1198 I32 max; /* last character in range */
1200 i = d - SvPVX(sv); /* remember current offset */
1201 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1202 d = SvPVX(sv) + i; /* refresh d after realloc */
1203 d -= 2; /* eat the first char and the - */
1205 min = (U8)*d; /* first char in range */
1206 max = (U8)d[1]; /* last char in range */
1210 "Invalid [] range \"%c-%c\" in transliteration operator",
1211 (char)min, (char)max);
1215 if ((isLOWER(min) && isLOWER(max)) ||
1216 (isUPPER(min) && isUPPER(max))) {
1218 for (i = min; i <= max; i++)
1222 for (i = min; i <= max; i++)
1229 for (i = min; i <= max; i++)
1232 /* mark the range as done, and continue */
1238 /* range begins (ignore - as first or last char) */
1239 else if (*s == '-' && s+1 < send && s != start) {
1241 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1244 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
1256 /* if we get here, we're not doing a transliteration */
1258 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1259 except for the last char, which will be done separately. */
1260 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1262 while (s < send && *s != ')')
1265 else if (s[2] == '{' /* This should match regcomp.c */
1266 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1269 char *regparse = s + (s[2] == '{' ? 3 : 4);
1272 while (count && (c = *regparse)) {
1273 if (c == '\\' && regparse[1])
1281 if (*regparse != ')') {
1282 regparse--; /* Leave one char for continuation. */
1283 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
1285 while (s < regparse)
1290 /* likewise skip #-initiated comments in //x patterns */
1291 else if (*s == '#' && PL_lex_inpat &&
1292 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1293 while (s+1 < send && *s != '\n')
1297 /* check for embedded arrays
1298 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1300 else if (*s == '@' && s[1]
1301 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1304 /* check for embedded scalars. only stop if we're sure it's a
1307 else if (*s == '$') {
1308 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1310 if (s + 1 < send && !strchr("()| \n\t", s[1]))
1311 break; /* in regexp, $ might be tail anchor */
1315 if (*s == '\\' && s+1 < send) {
1316 bool to_be_utf8 = FALSE;
1320 /* some backslashes we leave behind */
1321 if (*leaveit && *s && strchr(leaveit, *s)) {
1327 /* deprecate \1 in strings and substitution replacements */
1328 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1329 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1331 if (ckWARN(WARN_SYNTAX))
1332 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
1337 /* string-change backslash escapes */
1338 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1343 /* if we get here, it's either a quoted -, or a digit */
1346 /* quoted - in transliterations */
1348 if (PL_lex_inwhat == OP_TRANS) {
1355 if (ckWARN(WARN_MISC) && isALNUM(*s))
1356 Perl_warner(aTHX_ WARN_MISC,
1357 "Unrecognized escape \\%c passed through",
1359 /* default action is to copy the quoted character */
1364 /* \132 indicates an octal constant */
1365 case '0': case '1': case '2': case '3':
1366 case '4': case '5': case '6': case '7':
1368 STRLEN len = 0; /* disallow underscores */
1369 uv = (UV)scan_oct(s, 3, &len);
1372 goto NUM_ESCAPE_INSERT;
1374 /* \x24 indicates a hex constant */
1378 char* e = strchr(s, '}');
1380 yyerror("Missing right brace on \\x{}");
1384 STRLEN len = 1; /* allow underscores */
1385 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1392 STRLEN len = 0; /* disallow underscores */
1393 uv = (UV)scan_hex(s, 2, &len);
1399 /* Insert oct or hex escaped character.
1400 * There will always enough room in sv since such
1401 * escapes will be longer than any UT-F8 sequence
1402 * they can end up as. */
1404 /* This spot is wrong for EBCDIC. Characters like
1405 * the lowercase letters and digits are >127 in EBCDIC,
1406 * so here they would need to be mapped to the Unicode
1407 * repertoire. --jhi */
1410 if (!has_utf8 && (to_be_utf8 || uv > 255)) {
1411 /* Might need to recode whatever we have
1412 * accumulated so far if it contains any
1415 * (Can't we keep track of that and avoid
1416 * this rescan? --jhi)
1421 for (c = SvPVX(sv); c < d; c++) {
1422 if (UTF8_IS_CONTINUED(*c))
1426 char *old_pvx = SvPVX(sv);
1428 U8 tmpbuf[UTF8_MAXLEN+1];
1432 SvCUR(sv) + hicount + 1) +
1440 if (UTF8_IS_CONTINUED(*src)) {
1441 tmpend = uv_to_utf8(tmpbuf, (U8)*src--);
1442 dst -= tmpend - tmpbuf;
1443 Copy((char *)tmpbuf, dst+1,
1444 tmpend - tmpbuf, char);
1453 if (to_be_utf8 || (has_utf8 && uv > 127) || uv > 255) {
1454 d = (char*)uv_to_utf8((U8*)d, uv);
1466 /* \N{latin small letter a} is a named character */
1470 char* e = strchr(s, '}');
1476 yyerror("Missing right brace on \\N{}");
1480 res = newSVpvn(s + 1, e - s - 1);
1481 res = new_constant( Nullch, 0, "charnames",
1482 res, Nullsv, "\\N{...}" );
1483 str = SvPV(res,len);
1484 if (!has_utf8 && SvUTF8(res)) {
1485 char *ostart = SvPVX(sv);
1486 SvCUR_set(sv, d - ostart);
1489 sv_utf8_upgrade(sv);
1490 /* this just broke our allocation above... */
1491 SvGROW(sv, send - start);
1492 d = SvPVX(sv) + SvCUR(sv);
1495 if (len > e - s + 4) {
1496 char *odest = SvPVX(sv);
1498 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1499 d = SvPVX(sv) + (d - odest);
1501 Copy(str, d, len, char);
1508 yyerror("Missing braces on \\N{}");
1511 /* \c is a control character */
1528 /* printf-style backslashes, formfeeds, newlines, etc */
1546 *d++ = '\047'; /* CP 1047 */
1549 *d++ = '\057'; /* CP 1047 */
1563 } /* end if (backslash) */
1565 /* (now in tr/// code again) */
1567 if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) {
1568 STRLEN len = (STRLEN) -1;
1571 uv = utf8_to_uv((U8*)s, send - s, &len, 0);
1573 if (len == (STRLEN)-1) {
1574 /* Illegal UTF8 (a high-bit byte), make it valid. */
1575 char *old_pvx = SvPVX(sv);
1576 /* need space for one extra char (NOTE: SvCUR() not set here) */
1577 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
1578 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1589 } /* while loop to process each character */
1591 /* terminate the string and set up the sv */
1593 SvCUR_set(sv, d - SvPVX(sv));
1598 /* shrink the sv if we allocated more than we used */
1599 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1600 SvLEN_set(sv, SvCUR(sv) + 1);
1601 Renew(SvPVX(sv), SvLEN(sv), char);
1604 /* return the substring (via yylval) only if we parsed anything */
1605 if (s > PL_bufptr) {
1606 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1607 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
1609 ( PL_lex_inwhat == OP_TRANS
1611 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
1614 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1621 * Returns TRUE if there's more to the expression (e.g., a subscript),
1624 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1626 * ->[ and ->{ return TRUE
1627 * { and [ outside a pattern are always subscripts, so return TRUE
1628 * if we're outside a pattern and it's not { or [, then return FALSE
1629 * if we're in a pattern and the first char is a {
1630 * {4,5} (any digits around the comma) returns FALSE
1631 * if we're in a pattern and the first char is a [
1633 * [SOMETHING] has a funky algorithm to decide whether it's a
1634 * character class or not. It has to deal with things like
1635 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1636 * anything else returns TRUE
1639 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
1642 S_intuit_more(pTHX_ register char *s)
1644 if (PL_lex_brackets)
1646 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1648 if (*s != '{' && *s != '[')
1653 /* In a pattern, so maybe we have {n,m}. */
1670 /* On the other hand, maybe we have a character class */
1673 if (*s == ']' || *s == '^')
1676 /* this is terrifying, and it works */
1677 int weight = 2; /* let's weigh the evidence */
1679 unsigned char un_char = 255, last_un_char;
1680 char *send = strchr(s,']');
1681 char tmpbuf[sizeof PL_tokenbuf * 4];
1683 if (!send) /* has to be an expression */
1686 Zero(seen,256,char);
1689 else if (isDIGIT(*s)) {
1691 if (isDIGIT(s[1]) && s[2] == ']')
1697 for (; s < send; s++) {
1698 last_un_char = un_char;
1699 un_char = (unsigned char)*s;
1704 weight -= seen[un_char] * 10;
1705 if (isALNUM_lazy_if(s+1,UTF)) {
1706 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
1707 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
1712 else if (*s == '$' && s[1] &&
1713 strchr("[#!%*<>()-=",s[1])) {
1714 if (/*{*/ strchr("])} =",s[2]))
1723 if (strchr("wds]",s[1]))
1725 else if (seen['\''] || seen['"'])
1727 else if (strchr("rnftbxcav",s[1]))
1729 else if (isDIGIT(s[1])) {
1731 while (s[1] && isDIGIT(s[1]))
1741 if (strchr("aA01! ",last_un_char))
1743 if (strchr("zZ79~",s[1]))
1745 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1746 weight -= 5; /* cope with negative subscript */
1749 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
1750 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1755 if (keyword(tmpbuf, d - tmpbuf))
1758 if (un_char == last_un_char + 1)
1760 weight -= seen[un_char];
1765 if (weight >= 0) /* probably a character class */
1775 * Does all the checking to disambiguate
1777 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1778 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1780 * First argument is the stuff after the first token, e.g. "bar".
1782 * Not a method if bar is a filehandle.
1783 * Not a method if foo is a subroutine prototyped to take a filehandle.
1784 * Not a method if it's really "Foo $bar"
1785 * Method if it's "foo $bar"
1786 * Not a method if it's really "print foo $bar"
1787 * Method if it's really "foo package::" (interpreted as package->foo)
1788 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
1789 * Not a method if bar is a filehandle or package, but is quoted with
1794 S_intuit_method(pTHX_ char *start, GV *gv)
1796 char *s = start + (*start == '$');
1797 char tmpbuf[sizeof PL_tokenbuf];
1805 if ((cv = GvCVu(gv))) {
1806 char *proto = SvPVX(cv);
1816 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
1817 /* start is the beginning of the possible filehandle/object,
1818 * and s is the end of it
1819 * tmpbuf is a copy of it
1822 if (*start == '$') {
1823 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
1828 return *s == '(' ? FUNCMETH : METHOD;
1830 if (!keyword(tmpbuf, len)) {
1831 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1836 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
1837 if (indirgv && GvCVu(indirgv))
1839 /* filehandle or package name makes it a method */
1840 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
1842 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
1843 return 0; /* no assumptions -- "=>" quotes bearword */
1845 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
1846 newSVpvn(tmpbuf,len));
1847 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1851 return *s == '(' ? FUNCMETH : METHOD;
1859 * Return a string of Perl code to load the debugger. If PERL5DB
1860 * is set, it will return the contents of that, otherwise a
1861 * compile-time require of perl5db.pl.
1868 char *pdb = PerlEnv_getenv("PERL5DB");
1872 SETERRNO(0,SS$_NORMAL);
1873 return "BEGIN { require 'perl5db.pl' }";
1879 /* Encoded script support. filter_add() effectively inserts a
1880 * 'pre-processing' function into the current source input stream.
1881 * Note that the filter function only applies to the current source file
1882 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1884 * The datasv parameter (which may be NULL) can be used to pass
1885 * private data to this instance of the filter. The filter function
1886 * can recover the SV using the FILTER_DATA macro and use it to
1887 * store private buffers and state information.
1889 * The supplied datasv parameter is upgraded to a PVIO type
1890 * and the IoDIRP/IoANY field is used to store the function pointer,
1891 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
1892 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1893 * private use must be set using malloc'd pointers.
1897 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
1902 if (!PL_rsfp_filters)
1903 PL_rsfp_filters = newAV();
1905 datasv = NEWSV(255,0);
1906 if (!SvUPGRADE(datasv, SVt_PVIO))
1907 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
1908 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
1909 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
1910 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1911 funcp, SvPV_nolen(datasv)));
1912 av_unshift(PL_rsfp_filters, 1);
1913 av_store(PL_rsfp_filters, 0, datasv) ;
1918 /* Delete most recently added instance of this filter function. */
1920 Perl_filter_del(pTHX_ filter_t funcp)
1923 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
1924 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
1926 /* if filter is on top of stack (usual case) just pop it off */
1927 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
1928 if (IoANY(datasv) == (void *)funcp) {
1929 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
1930 IoANY(datasv) = (void *)NULL;
1931 sv_free(av_pop(PL_rsfp_filters));
1935 /* we need to search for the correct entry and clear it */
1936 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
1940 /* Invoke the n'th filter function for the current rsfp. */
1942 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
1945 /* 0 = read one text line */
1950 if (!PL_rsfp_filters)
1952 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
1953 /* Provide a default input filter to make life easy. */
1954 /* Note that we append to the line. This is handy. */
1955 DEBUG_P(PerlIO_printf(Perl_debug_log,
1956 "filter_read %d: from rsfp\n", idx));
1960 int old_len = SvCUR(buf_sv) ;
1962 /* ensure buf_sv is large enough */
1963 SvGROW(buf_sv, old_len + maxlen) ;
1964 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1965 if (PerlIO_error(PL_rsfp))
1966 return -1; /* error */
1968 return 0 ; /* end of file */
1970 SvCUR_set(buf_sv, old_len + len) ;
1973 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1974 if (PerlIO_error(PL_rsfp))
1975 return -1; /* error */
1977 return 0 ; /* end of file */
1980 return SvCUR(buf_sv);
1982 /* Skip this filter slot if filter has been deleted */
1983 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
1984 DEBUG_P(PerlIO_printf(Perl_debug_log,
1985 "filter_read %d: skipped (filter deleted)\n",
1987 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1989 /* Get function pointer hidden within datasv */
1990 funcp = (filter_t)IoANY(datasv);
1991 DEBUG_P(PerlIO_printf(Perl_debug_log,
1992 "filter_read %d: via function %p (%s)\n",
1993 idx, funcp, SvPV_nolen(datasv)));
1994 /* Call function. The function is expected to */
1995 /* call "FILTER_READ(idx+1, buf_sv)" first. */
1996 /* Return: <0:error, =0:eof, >0:not eof */
1997 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
2001 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2003 #ifdef PERL_CR_FILTER
2004 if (!PL_rsfp_filters) {
2005 filter_add(S_cr_textfilter,NULL);
2008 if (PL_rsfp_filters) {
2011 SvCUR_set(sv, 0); /* start with empty line */
2012 if (FILTER_READ(0, sv, 0) > 0)
2013 return ( SvPVX(sv) ) ;
2018 return (sv_gets(sv, fp, append));
2022 S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
2026 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2030 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2031 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2033 return GvHV(gv); /* Foo:: */
2036 /* use constant CLASS => 'MyClass' */
2037 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2039 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2040 pkgname = SvPV_nolen(sv);
2044 return gv_stashpv(pkgname, FALSE);
2048 static char* exp_name[] =
2049 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2050 "ATTRTERM", "TERMBLOCK"
2057 Works out what to call the token just pulled out of the input
2058 stream. The yacc parser takes care of taking the ops we return and
2059 stitching them into a tree.
2065 if read an identifier
2066 if we're in a my declaration
2067 croak if they tried to say my($foo::bar)
2068 build the ops for a my() declaration
2069 if it's an access to a my() variable
2070 are we in a sort block?
2071 croak if my($a); $a <=> $b
2072 build ops for access to a my() variable
2073 if in a dq string, and they've said @foo and we can't find @foo
2075 build ops for a bareword
2076 if we already built the token before, use it.
2079 #ifdef USE_PURE_BISON
2081 #pragma segment Perl_yylex_r
2084 Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
2089 yylval_pointer[yyactlevel] = lvalp;
2090 yychar_pointer[yyactlevel] = lcharp;
2091 if (yyactlevel >= YYMAXLEVEL)
2092 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
2094 r = Perl_yylex(aTHX);
2103 #pragma segment Perl_yylex
2115 /* check if there's an identifier for us to look at */
2116 if (PL_pending_ident) {
2117 /* pit holds the identifier we read and pending_ident is reset */
2118 char pit = PL_pending_ident;
2119 PL_pending_ident = 0;
2121 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2122 "### Tokener saw identifier '%s'\n", PL_tokenbuf); })
2124 /* if we're in a my(), we can't allow dynamics here.
2125 $foo'bar has already been turned into $foo::bar, so
2126 just check for colons.
2128 if it's a legal name, the OP is a PADANY.
2131 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
2132 if (strchr(PL_tokenbuf,':'))
2133 yyerror(Perl_form(aTHX_ "No package name allowed for "
2134 "variable %s in \"our\"",
2136 tmp = pad_allocmy(PL_tokenbuf);
2139 if (strchr(PL_tokenbuf,':'))
2140 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
2142 yylval.opval = newOP(OP_PADANY, 0);
2143 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2149 build the ops for accesses to a my() variable.
2151 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2152 then used in a comparison. This catches most, but not
2153 all cases. For instance, it catches
2154 sort { my($a); $a <=> $b }
2156 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2157 (although why you'd do that is anyone's guess).
2160 if (!strchr(PL_tokenbuf,':')) {
2162 /* Check for single character per-thread SVs */
2163 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2164 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2165 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
2167 yylval.opval = newOP(OP_THREADSV, 0);
2168 yylval.opval->op_targ = tmp;
2171 #endif /* USE_THREADS */
2172 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
2173 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
2174 /* might be an "our" variable" */
2175 if (SvFLAGS(namesv) & SVpad_OUR) {
2176 /* build ops for a bareword */
2177 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2178 sv_catpvn(sym, "::", 2);
2179 sv_catpv(sym, PL_tokenbuf+1);
2180 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
2181 yylval.opval->op_private = OPpCONST_ENTERED;
2182 gv_fetchpv(SvPVX(sym),
2184 ? (GV_ADDMULTI | GV_ADDINEVAL)
2187 ((PL_tokenbuf[0] == '$') ? SVt_PV
2188 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2193 /* if it's a sort block and they're naming $a or $b */
2194 if (PL_last_lop_op == OP_SORT &&
2195 PL_tokenbuf[0] == '$' &&
2196 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2199 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2200 d < PL_bufend && *d != '\n';
2203 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
2204 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
2210 yylval.opval = newOP(OP_PADANY, 0);
2211 yylval.opval->op_targ = tmp;
2217 Whine if they've said @foo in a doublequoted string,
2218 and @foo isn't a variable we can find in the symbol
2221 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2222 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
2223 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2224 && ckWARN(WARN_AMBIGUOUS))
2226 /* Downgraded from fatal to warning 20000522 mjd */
2227 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2228 "Possible unintended interpolation of %s in string",
2233 /* build ops for a bareword */
2234 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
2235 yylval.opval->op_private = OPpCONST_ENTERED;
2236 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2237 ((PL_tokenbuf[0] == '$') ? SVt_PV
2238 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2243 /* no identifier pending identification */
2245 switch (PL_lex_state) {
2247 case LEX_NORMAL: /* Some compilers will produce faster */
2248 case LEX_INTERPNORMAL: /* code if we comment these out. */
2252 /* when we've already built the next token, just pull it out of the queue */
2255 yylval = PL_nextval[PL_nexttoke];
2257 PL_lex_state = PL_lex_defer;
2258 PL_expect = PL_lex_expect;
2259 PL_lex_defer = LEX_NORMAL;
2261 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2262 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2263 (IV)PL_nexttype[PL_nexttoke]); })
2265 return(PL_nexttype[PL_nexttoke]);
2267 /* interpolated case modifiers like \L \U, including \Q and \E.
2268 when we get here, PL_bufptr is at the \
2270 case LEX_INTERPCASEMOD:
2272 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2273 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2275 /* handle \E or end of string */
2276 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2280 if (PL_lex_casemods) {
2281 oldmod = PL_lex_casestack[--PL_lex_casemods];
2282 PL_lex_casestack[PL_lex_casemods] = '\0';
2284 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2286 PL_lex_state = LEX_INTERPCONCAT;
2290 if (PL_bufptr != PL_bufend)
2292 PL_lex_state = LEX_INTERPCONCAT;
2296 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2297 "### Saw case modifier at '%s'\n", PL_bufptr); })
2299 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2300 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
2301 if (strchr("LU", *s) &&
2302 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
2304 PL_lex_casestack[--PL_lex_casemods] = '\0';
2307 if (PL_lex_casemods > 10) {
2308 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2309 if (newlb != PL_lex_casestack) {
2311 PL_lex_casestack = newlb;
2314 PL_lex_casestack[PL_lex_casemods++] = *s;
2315 PL_lex_casestack[PL_lex_casemods] = '\0';
2316 PL_lex_state = LEX_INTERPCONCAT;
2317 PL_nextval[PL_nexttoke].ival = 0;
2320 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2322 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2324 PL_nextval[PL_nexttoke].ival = OP_LC;
2326 PL_nextval[PL_nexttoke].ival = OP_UC;
2328 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2330 Perl_croak(aTHX_ "panic: yylex");
2333 if (PL_lex_starts) {
2342 case LEX_INTERPPUSH:
2343 return sublex_push();
2345 case LEX_INTERPSTART:
2346 if (PL_bufptr == PL_bufend)
2347 return sublex_done();
2348 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2349 "### Interpolated variable at '%s'\n", PL_bufptr); })
2351 PL_lex_dojoin = (*PL_bufptr == '@');
2352 PL_lex_state = LEX_INTERPNORMAL;
2353 if (PL_lex_dojoin) {
2354 PL_nextval[PL_nexttoke].ival = 0;
2357 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2358 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
2359 force_next(PRIVATEREF);
2361 force_ident("\"", '$');
2362 #endif /* USE_THREADS */
2363 PL_nextval[PL_nexttoke].ival = 0;
2365 PL_nextval[PL_nexttoke].ival = 0;
2367 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
2370 if (PL_lex_starts++) {
2376 case LEX_INTERPENDMAYBE:
2377 if (intuit_more(PL_bufptr)) {
2378 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2384 if (PL_lex_dojoin) {
2385 PL_lex_dojoin = FALSE;
2386 PL_lex_state = LEX_INTERPCONCAT;
2389 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2390 && SvEVALED(PL_lex_repl))
2392 if (PL_bufptr != PL_bufend)
2393 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2394 PL_lex_repl = Nullsv;
2397 case LEX_INTERPCONCAT:
2399 if (PL_lex_brackets)
2400 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2402 if (PL_bufptr == PL_bufend)
2403 return sublex_done();
2405 if (SvIVX(PL_linestr) == '\'') {
2406 SV *sv = newSVsv(PL_linestr);
2409 else if ( PL_hints & HINT_NEW_RE )
2410 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2411 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2415 s = scan_const(PL_bufptr);
2417 PL_lex_state = LEX_INTERPCASEMOD;
2419 PL_lex_state = LEX_INTERPSTART;
2422 if (s != PL_bufptr) {
2423 PL_nextval[PL_nexttoke] = yylval;
2426 if (PL_lex_starts++)
2436 PL_lex_state = LEX_NORMAL;
2437 s = scan_formline(PL_bufptr);
2438 if (!PL_lex_formbrack)
2444 PL_oldoldbufptr = PL_oldbufptr;
2447 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2448 exp_name[PL_expect], s);
2454 if (isIDFIRST_lazy_if(s,UTF))
2456 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2459 goto fake_eof; /* emulate EOF on ^D or ^Z */
2464 if (PL_lex_brackets)
2465 yyerror("Missing right curly or square bracket");
2466 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2467 "### Tokener got EOF\n");
2471 if (s++ < PL_bufend)
2472 goto retry; /* ignore stray nulls */
2475 if (!PL_in_eval && !PL_preambled) {
2476 PL_preambled = TRUE;
2477 sv_setpv(PL_linestr,incl_perldb());
2478 if (SvCUR(PL_linestr))
2479 sv_catpv(PL_linestr,";");
2481 while(AvFILLp(PL_preambleav) >= 0) {
2482 SV *tmpsv = av_shift(PL_preambleav);
2483 sv_catsv(PL_linestr, tmpsv);
2484 sv_catpv(PL_linestr, ";");
2487 sv_free((SV*)PL_preambleav);
2488 PL_preambleav = NULL;
2490 if (PL_minus_n || PL_minus_p) {
2491 sv_catpv(PL_linestr, "LINE: while (<>) {");
2493 sv_catpv(PL_linestr,"chomp;");
2495 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2497 GvIMPORTED_AV_on(gv);
2499 if (strchr("/'\"", *PL_splitstr)
2500 && strchr(PL_splitstr + 1, *PL_splitstr))
2501 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
2504 s = "'~#\200\1'"; /* surely one char is unused...*/
2505 while (s[1] && strchr(PL_splitstr, *s)) s++;
2507 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
2508 "q" + (delim == '\''), delim);
2509 for (s = PL_splitstr; *s; s++) {
2511 sv_catpvn(PL_linestr, "\\", 1);
2512 sv_catpvn(PL_linestr, s, 1);
2514 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
2518 sv_catpv(PL_linestr,"@F=split(' ');");
2521 sv_catpv(PL_linestr, "\n");
2522 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2523 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2524 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2525 SV *sv = NEWSV(85,0);
2527 sv_upgrade(sv, SVt_PVMG);
2528 sv_setsv(sv,PL_linestr);
2529 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2534 bool bof = PL_rsfp ? TRUE : FALSE;
2536 #ifdef PERLIO_IS_STDIO
2537 # ifdef __GNU_LIBRARY__
2538 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2539 # define FTELL_FOR_PIPE_IS_BROKEN
2543 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2544 # define FTELL_FOR_PIPE_IS_BROKEN
2549 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2550 /* This loses the possibility to detect the bof
2551 * situation on perl -P when the libc5 is being used.
2552 * Workaround? Maybe attach some extra state to PL_rsfp?
2555 bof = PerlIO_tell(PL_rsfp) == 0;
2557 bof = PerlIO_tell(PL_rsfp) == 0;
2560 s = filter_gets(PL_linestr, PL_rsfp, 0);
2564 if (PL_preprocess && !PL_in_eval)
2565 (void)PerlProc_pclose(PL_rsfp);
2566 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2567 PerlIO_clearerr(PL_rsfp);
2569 (void)PerlIO_close(PL_rsfp);
2571 PL_doextract = FALSE;
2573 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2574 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2575 sv_catpv(PL_linestr,";}");
2576 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2577 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2578 PL_minus_n = PL_minus_p = 0;
2581 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2582 sv_setpv(PL_linestr,"");
2583 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2585 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2586 s = swallow_bom((U8*)s);
2589 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
2590 PL_doextract = FALSE;
2592 /* Incest with pod. */
2593 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2594 sv_setpv(PL_linestr, "");
2595 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2596 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2597 PL_doextract = FALSE;
2601 } while (PL_doextract);
2602 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2603 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2604 SV *sv = NEWSV(85,0);
2606 sv_upgrade(sv, SVt_PVMG);
2607 sv_setsv(sv,PL_linestr);
2608 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2610 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2611 if (CopLINE(PL_curcop) == 1) {
2612 while (s < PL_bufend && isSPACE(*s))
2614 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2618 if (*s == '#' && *(s+1) == '!')
2620 #ifdef ALTERNATE_SHEBANG
2622 static char as[] = ALTERNATE_SHEBANG;
2623 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2624 d = s + (sizeof(as) - 1);
2626 #endif /* ALTERNATE_SHEBANG */
2635 while (*d && !isSPACE(*d))
2639 #ifdef ARG_ZERO_IS_SCRIPT
2640 if (ipathend > ipath) {
2642 * HP-UX (at least) sets argv[0] to the script name,
2643 * which makes $^X incorrect. And Digital UNIX and Linux,
2644 * at least, set argv[0] to the basename of the Perl
2645 * interpreter. So, having found "#!", we'll set it right.
2647 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2648 assert(SvPOK(x) || SvGMAGICAL(x));
2649 if (sv_eq(x, CopFILESV(PL_curcop))) {
2650 sv_setpvn(x, ipath, ipathend - ipath);
2653 TAINT_NOT; /* $^X is always tainted, but that's OK */
2655 #endif /* ARG_ZERO_IS_SCRIPT */
2660 d = instr(s,"perl -");
2662 d = instr(s,"perl");
2664 /* avoid getting into infinite loops when shebang
2665 * line contains "Perl" rather than "perl" */
2667 for (d = ipathend-4; d >= ipath; --d) {
2668 if ((*d == 'p' || *d == 'P')
2669 && !ibcmp(d, "perl", 4))
2679 #ifdef ALTERNATE_SHEBANG
2681 * If the ALTERNATE_SHEBANG on this system starts with a
2682 * character that can be part of a Perl expression, then if
2683 * we see it but not "perl", we're probably looking at the
2684 * start of Perl code, not a request to hand off to some
2685 * other interpreter. Similarly, if "perl" is there, but
2686 * not in the first 'word' of the line, we assume the line
2687 * contains the start of the Perl program.
2689 if (d && *s != '#') {
2691 while (*c && !strchr("; \t\r\n\f\v#", *c))
2694 d = Nullch; /* "perl" not in first word; ignore */
2696 *s = '#'; /* Don't try to parse shebang line */
2698 #endif /* ALTERNATE_SHEBANG */
2699 #ifndef MACOS_TRADITIONAL
2704 !instr(s,"indir") &&
2705 instr(PL_origargv[0],"perl"))
2711 while (s < PL_bufend && isSPACE(*s))
2713 if (s < PL_bufend) {
2714 Newz(899,newargv,PL_origargc+3,char*);
2716 while (s < PL_bufend && !isSPACE(*s))
2719 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
2722 newargv = PL_origargv;
2724 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
2725 Perl_croak(aTHX_ "Can't exec %s", ipath);
2729 U32 oldpdb = PL_perldb;
2730 bool oldn = PL_minus_n;
2731 bool oldp = PL_minus_p;
2733 while (*d && !isSPACE(*d)) d++;
2734 while (SPACE_OR_TAB(*d)) d++;
2738 if (*d == 'M' || *d == 'm') {
2740 while (*d && !isSPACE(*d)) d++;
2741 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
2744 d = moreswitches(d);
2746 if ((PERLDB_LINE && !oldpdb) ||
2747 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
2748 /* if we have already added "LINE: while (<>) {",
2749 we must not do it again */
2751 sv_setpv(PL_linestr, "");
2752 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2753 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2754 PL_preambled = FALSE;
2756 (void)gv_fetchfile(PL_origfilename);
2763 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2765 PL_lex_state = LEX_FORMLINE;
2770 #ifdef PERL_STRICT_CR
2771 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2773 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
2775 case ' ': case '\t': case '\f': case 013:
2776 #ifdef MACOS_TRADITIONAL
2783 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2784 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2785 /* handle eval qq[#line 1 "foo"\n ...] */
2786 CopLINE_dec(PL_curcop);
2790 while (s < d && *s != '\n')
2795 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2797 PL_lex_state = LEX_FORMLINE;
2807 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
2814 while (s < PL_bufend && SPACE_OR_TAB(*s))
2817 if (strnEQ(s,"=>",2)) {
2818 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
2819 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2820 "### Saw unary minus before =>, forcing word '%s'\n", s);
2822 OPERATOR('-'); /* unary minus */
2824 PL_last_uni = PL_oldbufptr;
2826 case 'r': ftst = OP_FTEREAD; break;
2827 case 'w': ftst = OP_FTEWRITE; break;
2828 case 'x': ftst = OP_FTEEXEC; break;
2829 case 'o': ftst = OP_FTEOWNED; break;
2830 case 'R': ftst = OP_FTRREAD; break;
2831 case 'W': ftst = OP_FTRWRITE; break;
2832 case 'X': ftst = OP_FTREXEC; break;
2833 case 'O': ftst = OP_FTROWNED; break;
2834 case 'e': ftst = OP_FTIS; break;
2835 case 'z': ftst = OP_FTZERO; break;
2836 case 's': ftst = OP_FTSIZE; break;
2837 case 'f': ftst = OP_FTFILE; break;
2838 case 'd': ftst = OP_FTDIR; break;
2839 case 'l': ftst = OP_FTLINK; break;
2840 case 'p': ftst = OP_FTPIPE; break;
2841 case 'S': ftst = OP_FTSOCK; break;
2842 case 'u': ftst = OP_FTSUID; break;
2843 case 'g': ftst = OP_FTSGID; break;
2844 case 'k': ftst = OP_FTSVTX; break;
2845 case 'b': ftst = OP_FTBLK; break;
2846 case 'c': ftst = OP_FTCHR; break;
2847 case 't': ftst = OP_FTTTY; break;
2848 case 'T': ftst = OP_FTTEXT; break;
2849 case 'B': ftst = OP_FTBINARY; break;
2850 case 'M': case 'A': case 'C':
2851 gv_fetchpv("\024",TRUE, SVt_PV);
2853 case 'M': ftst = OP_FTMTIME; break;
2854 case 'A': ftst = OP_FTATIME; break;
2855 case 'C': ftst = OP_FTCTIME; break;
2863 PL_last_lop_op = ftst;
2864 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2865 "### Saw file test %c\n", ftst);
2870 /* Assume it was a minus followed by a one-letter named
2871 * subroutine call (or a -bareword), then. */
2872 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2873 "### %c looked like a file test but was not\n", ftst);
2881 if (PL_expect == XOPERATOR)
2886 else if (*s == '>') {
2889 if (isIDFIRST_lazy_if(s,UTF)) {
2890 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
2898 if (PL_expect == XOPERATOR)
2901 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2903 OPERATOR('-'); /* unary minus */
2910 if (PL_expect == XOPERATOR)
2915 if (PL_expect == XOPERATOR)
2918 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2924 if (PL_expect != XOPERATOR) {
2925 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2926 PL_expect = XOPERATOR;
2927 force_ident(PL_tokenbuf, '*');
2940 if (PL_expect == XOPERATOR) {
2944 PL_tokenbuf[0] = '%';
2945 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2946 if (!PL_tokenbuf[1]) {
2948 yyerror("Final % should be \\% or %name");
2951 PL_pending_ident = '%';
2970 switch (PL_expect) {
2973 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2975 PL_bufptr = s; /* update in case we back off */
2981 PL_expect = XTERMBLOCK;
2985 while (isIDFIRST_lazy_if(s,UTF)) {
2986 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
2987 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2988 if (tmp < 0) tmp = -tmp;
3003 d = scan_str(d,TRUE,TRUE);
3006 SvREFCNT_dec(PL_lex_stuff);
3007 PL_lex_stuff = Nullsv;
3009 /* MUST advance bufptr here to avoid bogus
3010 "at end of line" context messages from yyerror().
3012 PL_bufptr = s + len;
3013 yyerror("Unterminated attribute parameter in attribute list");
3016 return 0; /* EOF indicator */
3020 SV *sv = newSVpvn(s, len);
3021 sv_catsv(sv, PL_lex_stuff);
3022 attrs = append_elem(OP_LIST, attrs,
3023 newSVOP(OP_CONST, 0, sv));
3024 SvREFCNT_dec(PL_lex_stuff);
3025 PL_lex_stuff = Nullsv;
3028 attrs = append_elem(OP_LIST, attrs,
3029 newSVOP(OP_CONST, 0,
3033 if (*s == ':' && s[1] != ':')
3036 break; /* require real whitespace or :'s */
3038 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3039 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
3040 char q = ((*s == '\'') ? '"' : '\'');
3041 /* If here for an expression, and parsed no attrs, back off. */
3042 if (tmp == '=' && !attrs) {
3046 /* MUST advance bufptr here to avoid bogus "at end of line"
3047 context messages from yyerror().
3051 yyerror("Unterminated attribute list");
3053 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3061 PL_nextval[PL_nexttoke].opval = attrs;
3069 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3070 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3086 if (PL_lex_brackets <= 0)
3087 yyerror("Unmatched right square bracket");
3090 if (PL_lex_state == LEX_INTERPNORMAL) {
3091 if (PL_lex_brackets == 0) {
3092 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3093 PL_lex_state = LEX_INTERPEND;
3100 if (PL_lex_brackets > 100) {
3101 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3102 if (newlb != PL_lex_brackstack) {
3104 PL_lex_brackstack = newlb;
3107 switch (PL_expect) {
3109 if (PL_lex_formbrack) {
3113 if (PL_oldoldbufptr == PL_last_lop)
3114 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3116 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3117 OPERATOR(HASHBRACK);
3119 while (s < PL_bufend && SPACE_OR_TAB(*s))
3122 PL_tokenbuf[0] = '\0';
3123 if (d < PL_bufend && *d == '-') {
3124 PL_tokenbuf[0] = '-';
3126 while (d < PL_bufend && SPACE_OR_TAB(*d))
3129 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3130 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3132 while (d < PL_bufend && SPACE_OR_TAB(*d))
3135 char minus = (PL_tokenbuf[0] == '-');
3136 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3137 if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, 0) &&
3138 PL_nextval[PL_nexttoke-1].opval)
3139 SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke-1].opval)->op_sv);
3147 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3152 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3157 if (PL_oldoldbufptr == PL_last_lop)
3158 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3160 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3163 OPERATOR(HASHBRACK);
3164 /* This hack serves to disambiguate a pair of curlies
3165 * as being a block or an anon hash. Normally, expectation
3166 * determines that, but in cases where we're not in a
3167 * position to expect anything in particular (like inside
3168 * eval"") we have to resolve the ambiguity. This code
3169 * covers the case where the first term in the curlies is a
3170 * quoted string. Most other cases need to be explicitly
3171 * disambiguated by prepending a `+' before the opening
3172 * curly in order to force resolution as an anon hash.
3174 * XXX should probably propagate the outer expectation
3175 * into eval"" to rely less on this hack, but that could
3176 * potentially break current behavior of eval"".
3180 if (*s == '\'' || *s == '"' || *s == '`') {
3181 /* common case: get past first string, handling escapes */
3182 for (t++; t < PL_bufend && *t != *s;)
3183 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3187 else if (*s == 'q') {
3190 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3194 char open, close, term;
3197 while (t < PL_bufend && isSPACE(*t))
3201 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3205 for (t++; t < PL_bufend; t++) {
3206 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3208 else if (*t == open)
3212 for (t++; t < PL_bufend; t++) {
3213 if (*t == '\\' && t+1 < PL_bufend)
3215 else if (*t == close && --brackets <= 0)
3217 else if (*t == open)
3223 else if (isALNUM_lazy_if(t,UTF)) {
3225 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3228 while (t < PL_bufend && isSPACE(*t))
3230 /* if comma follows first term, call it an anon hash */
3231 /* XXX it could be a comma expression with loop modifiers */
3232 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3233 || (*t == '=' && t[1] == '>')))
3234 OPERATOR(HASHBRACK);
3235 if (PL_expect == XREF)
3238 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3244 yylval.ival = CopLINE(PL_curcop);
3245 if (isSPACE(*s) || *s == '#')
3246 PL_copline = NOLINE; /* invalidate current command line number */
3251 if (PL_lex_brackets <= 0)
3252 yyerror("Unmatched right curly bracket");
3254 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3255 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3256 PL_lex_formbrack = 0;
3257 if (PL_lex_state == LEX_INTERPNORMAL) {
3258 if (PL_lex_brackets == 0) {
3259 if (PL_expect & XFAKEBRACK) {
3260 PL_expect &= XENUMMASK;
3261 PL_lex_state = LEX_INTERPEND;
3263 return yylex(); /* ignore fake brackets */
3265 if (*s == '-' && s[1] == '>')
3266 PL_lex_state = LEX_INTERPENDMAYBE;
3267 else if (*s != '[' && *s != '{')
3268 PL_lex_state = LEX_INTERPEND;
3271 if (PL_expect & XFAKEBRACK) {
3272 PL_expect &= XENUMMASK;
3274 return yylex(); /* ignore fake brackets */
3284 if (PL_expect == XOPERATOR) {
3285 if (ckWARN(WARN_SEMICOLON)
3286 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3288 CopLINE_dec(PL_curcop);
3289 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
3290 CopLINE_inc(PL_curcop);
3295 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3297 PL_expect = XOPERATOR;
3298 force_ident(PL_tokenbuf, '&');
3302 yylval.ival = (OPpENTERSUB_AMPER<<8);
3321 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
3322 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
3324 if (PL_expect == XSTATE && isALPHA(tmp) &&
3325 (s == PL_linestart+1 || s[-2] == '\n') )
3327 if (PL_in_eval && !PL_rsfp) {
3332 if (strnEQ(s,"=cut",4)) {
3346 PL_doextract = TRUE;
3349 if (PL_lex_brackets < PL_lex_formbrack) {
3351 #ifdef PERL_STRICT_CR
3352 for (t = s; SPACE_OR_TAB(*t); t++) ;
3354 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3356 if (*t == '\n' || *t == '#') {
3374 if (PL_expect != XOPERATOR) {
3375 if (s[1] != '<' && !strchr(s,'>'))
3378 s = scan_heredoc(s);
3380 s = scan_inputsymbol(s);
3381 TERM(sublex_start());
3386 SHop(OP_LEFT_SHIFT);
3400 SHop(OP_RIGHT_SHIFT);
3409 if (PL_expect == XOPERATOR) {
3410 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3413 return ','; /* grandfather non-comma-format format */
3417 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3418 PL_tokenbuf[0] = '@';
3419 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3420 sizeof PL_tokenbuf - 1, FALSE);
3421 if (PL_expect == XOPERATOR)
3422 no_op("Array length", s);
3423 if (!PL_tokenbuf[1])
3425 PL_expect = XOPERATOR;
3426 PL_pending_ident = '#';
3430 PL_tokenbuf[0] = '$';
3431 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3432 sizeof PL_tokenbuf - 1, FALSE);
3433 if (PL_expect == XOPERATOR)
3435 if (!PL_tokenbuf[1]) {
3437 yyerror("Final $ should be \\$ or $name");
3441 /* This kludge not intended to be bulletproof. */
3442 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3443 yylval.opval = newSVOP(OP_CONST, 0,
3444 newSViv(PL_compiling.cop_arybase));
3445 yylval.opval->op_private = OPpCONST_ARYBASE;
3451 if (PL_lex_state == LEX_NORMAL)
3454 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3457 PL_tokenbuf[0] = '@';
3458 if (ckWARN(WARN_SYNTAX)) {
3460 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3463 PL_bufptr = skipspace(PL_bufptr);
3464 while (t < PL_bufend && *t != ']')
3466 Perl_warner(aTHX_ WARN_SYNTAX,
3467 "Multidimensional syntax %.*s not supported",
3468 (t - PL_bufptr) + 1, PL_bufptr);
3472 else if (*s == '{') {
3473 PL_tokenbuf[0] = '%';
3474 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
3475 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3477 char tmpbuf[sizeof PL_tokenbuf];
3479 for (t++; isSPACE(*t); t++) ;
3480 if (isIDFIRST_lazy_if(t,UTF)) {
3481 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
3482 for (; isSPACE(*t); t++) ;
3483 if (*t == ';' && get_cv(tmpbuf, FALSE))
3484 Perl_warner(aTHX_ WARN_SYNTAX,
3485 "You need to quote \"%s\"", tmpbuf);
3491 PL_expect = XOPERATOR;
3492 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3493 bool islop = (PL_last_lop == PL_oldoldbufptr);
3494 if (!islop || PL_last_lop_op == OP_GREPSTART)
3495 PL_expect = XOPERATOR;
3496 else if (strchr("$@\"'`q", *s))
3497 PL_expect = XTERM; /* e.g. print $fh "foo" */
3498 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3499 PL_expect = XTERM; /* e.g. print $fh &sub */
3500 else if (isIDFIRST_lazy_if(s,UTF)) {
3501 char tmpbuf[sizeof PL_tokenbuf];
3502 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3503 if ((tmp = keyword(tmpbuf, len))) {
3504 /* binary operators exclude handle interpretations */
3516 PL_expect = XTERM; /* e.g. print $fh length() */
3521 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3522 if (gv && GvCVu(gv))
3523 PL_expect = XTERM; /* e.g. print $fh subr() */
3526 else if (isDIGIT(*s))
3527 PL_expect = XTERM; /* e.g. print $fh 3 */
3528 else if (*s == '.' && isDIGIT(s[1]))
3529 PL_expect = XTERM; /* e.g. print $fh .3 */
3530 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3531 PL_expect = XTERM; /* e.g. print $fh -1 */
3532 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3533 PL_expect = XTERM; /* print $fh <<"EOF" */
3535 PL_pending_ident = '$';
3539 if (PL_expect == XOPERATOR)
3541 PL_tokenbuf[0] = '@';
3542 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3543 if (!PL_tokenbuf[1]) {
3545 yyerror("Final @ should be \\@ or @name");
3548 if (PL_lex_state == LEX_NORMAL)
3550 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
3552 PL_tokenbuf[0] = '%';
3554 /* Warn about @ where they meant $. */
3555 if (ckWARN(WARN_SYNTAX)) {
3556 if (*s == '[' || *s == '{') {
3558 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
3560 if (*t == '}' || *t == ']') {
3562 PL_bufptr = skipspace(PL_bufptr);
3563 Perl_warner(aTHX_ WARN_SYNTAX,
3564 "Scalar value %.*s better written as $%.*s",
3565 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
3570 PL_pending_ident = '@';
3573 case '/': /* may either be division or pattern */
3574 case '?': /* may either be conditional or pattern */
3575 if (PL_expect != XOPERATOR) {
3576 /* Disable warning on "study /blah/" */
3577 if (PL_oldoldbufptr == PL_last_uni
3578 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3579 || memNE(PL_last_uni, "study", 5)
3580 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
3582 s = scan_pat(s,OP_MATCH);
3583 TERM(sublex_start());
3591 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3592 #ifdef PERL_STRICT_CR
3595 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3597 && (s == PL_linestart || s[-1] == '\n') )
3599 PL_lex_formbrack = 0;
3603 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
3609 yylval.ival = OPf_SPECIAL;
3615 if (PL_expect != XOPERATOR)
3620 case '0': case '1': case '2': case '3': case '4':
3621 case '5': case '6': case '7': case '8': case '9':
3622 s = scan_num(s, &yylval);
3623 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3624 "### Saw number in '%s'\n", s);
3626 if (PL_expect == XOPERATOR)
3631 s = scan_str(s,FALSE,FALSE);
3632 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3633 "### Saw string in '%s'\n", s);
3635 if (PL_expect == XOPERATOR) {
3636 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3639 return ','; /* grandfather non-comma-format format */
3645 missingterm((char*)0);
3646 yylval.ival = OP_CONST;
3647 TERM(sublex_start());
3650 s = scan_str(s,FALSE,FALSE);
3651 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3652 "### Saw string in '%s'\n", s);
3654 if (PL_expect == XOPERATOR) {
3655 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3658 return ','; /* grandfather non-comma-format format */
3664 missingterm((char*)0);
3665 yylval.ival = OP_CONST;
3666 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
3667 if (*d == '$' || *d == '@' || *d == '\\' || UTF8_IS_CONTINUED(*d)) {
3668 yylval.ival = OP_STRINGIFY;
3672 TERM(sublex_start());
3675 s = scan_str(s,FALSE,FALSE);
3676 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3677 "### Saw backtick string in '%s'\n", s);
3679 if (PL_expect == XOPERATOR)
3680 no_op("Backticks",s);
3682 missingterm((char*)0);
3683 yylval.ival = OP_BACKTICK;
3685 TERM(sublex_start());
3689 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
3690 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
3692 if (PL_expect == XOPERATOR)
3693 no_op("Backslash",s);
3697 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
3701 while (isDIGIT(*start) || *start == '_')
3703 if (*start == '.' && isDIGIT(start[1])) {
3704 s = scan_num(s, &yylval);
3707 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
3708 else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF)) {
3712 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3715 s = scan_num(s, &yylval);
3722 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
3761 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3763 /* Some keywords can be followed by any delimiter, including ':' */
3764 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3765 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3766 (PL_tokenbuf[0] == 'q' &&
3767 strchr("qwxr", PL_tokenbuf[1])))));
3769 /* x::* is just a word, unless x is "CORE" */
3770 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
3774 while (d < PL_bufend && isSPACE(*d))
3775 d++; /* no comments skipped here, or s### is misparsed */
3777 /* Is this a label? */
3778 if (!tmp && PL_expect == XSTATE
3779 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
3781 yylval.pval = savepv(PL_tokenbuf);
3786 /* Check for keywords */
3787 tmp = keyword(PL_tokenbuf, len);
3789 /* Is this a word before a => operator? */
3790 if (*d == '=' && d[1] == '>') {
3792 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
3793 yylval.opval->op_private = OPpCONST_BARE;
3794 if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
3795 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
3799 if (tmp < 0) { /* second-class keyword? */
3800 GV *ogv = Nullgv; /* override (winner) */
3801 GV *hgv = Nullgv; /* hidden (loser) */
3802 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
3804 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
3807 if (GvIMPORTED_CV(gv))
3809 else if (! CvMETHOD(cv))
3813 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3814 (gv = *gvp) != (GV*)&PL_sv_undef &&
3815 GvCVu(gv) && GvIMPORTED_CV(gv))
3821 tmp = 0; /* overridden by import or by GLOBAL */
3824 && -tmp==KEY_lock /* XXX generalizable kludge */
3826 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
3828 tmp = 0; /* any sub overrides "weak" keyword */
3830 else { /* no override */
3834 if (ckWARN(WARN_AMBIGUOUS) && hgv
3835 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
3836 Perl_warner(aTHX_ WARN_AMBIGUOUS,
3837 "Ambiguous call resolved as CORE::%s(), %s",
3838 GvENAME(hgv), "qualify as such or use &");
3845 default: /* not a keyword */
3848 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
3850 /* Get the rest if it looks like a package qualifier */
3852 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
3854 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
3857 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
3858 *s == '\'' ? "'" : "::");
3862 if (PL_expect == XOPERATOR) {
3863 if (PL_bufptr == PL_linestart) {
3864 CopLINE_dec(PL_curcop);
3865 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
3866 CopLINE_inc(PL_curcop);
3869 no_op("Bareword",s);
3872 /* Look for a subroutine with this name in current package,
3873 unless name is "Foo::", in which case Foo is a bearword
3874 (and a package name). */
3877 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
3879 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
3880 Perl_warner(aTHX_ WARN_BAREWORD,
3881 "Bareword \"%s\" refers to nonexistent package",
3884 PL_tokenbuf[len] = '\0';
3891 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
3894 /* if we saw a global override before, get the right name */
3897 sv = newSVpvn("CORE::GLOBAL::",14);
3898 sv_catpv(sv,PL_tokenbuf);
3901 sv = newSVpv(PL_tokenbuf,0);
3903 /* Presume this is going to be a bareword of some sort. */
3906 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3907 yylval.opval->op_private = OPpCONST_BARE;
3909 /* And if "Foo::", then that's what it certainly is. */
3914 /* See if it's the indirect object for a list operator. */
3916 if (PL_oldoldbufptr &&
3917 PL_oldoldbufptr < PL_bufptr &&
3918 (PL_oldoldbufptr == PL_last_lop
3919 || PL_oldoldbufptr == PL_last_uni) &&
3920 /* NO SKIPSPACE BEFORE HERE! */
3921 (PL_expect == XREF ||
3922 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
3924 bool immediate_paren = *s == '(';
3926 /* (Now we can afford to cross potential line boundary.) */
3929 /* Two barewords in a row may indicate method call. */
3931 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
3934 /* If not a declared subroutine, it's an indirect object. */
3935 /* (But it's an indir obj regardless for sort.) */
3937 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
3938 ((!gv || !GvCVu(gv)) &&
3939 (PL_last_lop_op != OP_MAPSTART &&
3940 PL_last_lop_op != OP_GREPSTART))))
3942 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
3948 PL_expect = XOPERATOR;
3951 /* Is this a word before a => operator? */
3952 if (*s == '=' && s[1] == '>') {
3954 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
3955 if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
3956 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
3960 /* If followed by a paren, it's certainly a subroutine. */
3963 if (gv && GvCVu(gv)) {
3964 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
3965 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
3970 PL_nextval[PL_nexttoke].opval = yylval.opval;
3971 PL_expect = XOPERATOR;
3977 /* If followed by var or block, call it a method (unless sub) */
3979 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
3980 PL_last_lop = PL_oldbufptr;
3981 PL_last_lop_op = OP_METHOD;
3985 /* If followed by a bareword, see if it looks like indir obj. */
3987 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp = intuit_method(s,gv)))
3990 /* Not a method, so call it a subroutine (if defined) */
3992 if (gv && GvCVu(gv)) {
3994 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
3995 Perl_warner(aTHX_ WARN_AMBIGUOUS,
3996 "Ambiguous use of -%s resolved as -&%s()",
3997 PL_tokenbuf, PL_tokenbuf);
3998 /* Check for a constant sub */
4000 if ((sv = cv_const_sv(cv))) {
4002 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4003 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4004 yylval.opval->op_private = 0;
4008 /* Resolve to GV now. */
4009 op_free(yylval.opval);
4010 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4011 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4012 PL_last_lop = PL_oldbufptr;
4013 PL_last_lop_op = OP_ENTERSUB;
4014 /* Is there a prototype? */
4017 char *proto = SvPV((SV*)cv, len);
4020 if (strEQ(proto, "$"))
4022 if (*proto == '&' && *s == '{') {
4023 sv_setpv(PL_subname,"__ANON__");
4027 PL_nextval[PL_nexttoke].opval = yylval.opval;
4033 /* Call it a bare word */
4035 if (PL_hints & HINT_STRICT_SUBS)
4036 yylval.opval->op_private |= OPpCONST_STRICT;
4039 if (ckWARN(WARN_RESERVED)) {
4040 if (lastchar != '-') {
4041 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4043 Perl_warner(aTHX_ WARN_RESERVED, PL_warn_reserved,
4050 if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) {
4051 Perl_warner(aTHX_ WARN_AMBIGUOUS,
4052 "Operator or semicolon missing before %c%s",
4053 lastchar, PL_tokenbuf);
4054 Perl_warner(aTHX_ WARN_AMBIGUOUS,
4055 "Ambiguous use of %c resolved as operator %c",
4056 lastchar, lastchar);
4062 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4063 newSVpv(CopFILE(PL_curcop),0));
4067 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4068 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4071 case KEY___PACKAGE__:
4072 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4074 ? newSVsv(PL_curstname)
4083 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4084 char *pname = "main";
4085 if (PL_tokenbuf[2] == 'D')
4086 pname = HvNAME(PL_curstash ? PL_curstash : PL_defstash);
4087 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
4090 GvIOp(gv) = newIO();
4091 IoIFP(GvIOp(gv)) = PL_rsfp;
4092 #if defined(HAS_FCNTL) && defined(F_SETFD)
4094 int fd = PerlIO_fileno(PL_rsfp);
4095 fcntl(fd,F_SETFD,fd >= 3);
4098 /* Mark this internal pseudo-handle as clean */
4099 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4101 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4102 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4103 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4105 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4106 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4107 /* if the script was opened in binmode, we need to revert
4108 * it to text mode for compatibility; but only iff it has CRs
4109 * XXX this is a questionable hack at best. */
4110 if (PL_bufend-PL_bufptr > 2
4111 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4114 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4115 loc = PerlIO_tell(PL_rsfp);
4116 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4118 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4119 #if defined(__BORLANDC__)
4120 /* XXX see note in do_binmode() */
4121 ((FILE*)PL_rsfp)->flags |= _F_BIN;
4124 PerlIO_seek(PL_rsfp, loc, 0);
4128 #ifdef PERLIO_LAYERS
4129 if (UTF && !IN_BYTE)
4130 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4143 if (PL_expect == XSTATE) {
4150 if (*s == ':' && s[1] == ':') {
4153 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4154 if (!(tmp = keyword(PL_tokenbuf, len)))
4155 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4169 LOP(OP_ACCEPT,XTERM);
4175 LOP(OP_ATAN2,XTERM);
4181 LOP(OP_BINMODE,XTERM);
4184 LOP(OP_BLESS,XTERM);
4193 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
4210 if (!PL_cryptseen) {
4211 PL_cryptseen = TRUE;
4215 LOP(OP_CRYPT,XTERM);
4218 if (ckWARN(WARN_CHMOD)) {
4219 for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ;
4220 if (*d != '0' && isDIGIT(*d))
4221 Perl_warner(aTHX_ WARN_CHMOD,
4222 "chmod() mode argument is missing initial 0");
4224 LOP(OP_CHMOD,XTERM);
4227 LOP(OP_CHOWN,XTERM);
4230 LOP(OP_CONNECT,XTERM);
4246 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4250 PL_hints |= HINT_BLOCK_SCOPE;
4260 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4261 LOP(OP_DBMOPEN,XTERM);
4267 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4274 yylval.ival = CopLINE(PL_curcop);
4288 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4289 UNIBRACK(OP_ENTEREVAL);
4304 case KEY_endhostent:
4310 case KEY_endservent:
4313 case KEY_endprotoent:
4324 yylval.ival = CopLINE(PL_curcop);
4326 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4328 if ((PL_bufend - p) >= 3 &&
4329 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4331 else if ((PL_bufend - p) >= 4 &&
4332 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4335 if (isIDFIRST_lazy_if(p,UTF)) {
4336 p = scan_ident(p, PL_bufend,
4337 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4341 Perl_croak(aTHX_ "Missing $ on loop variable");
4346 LOP(OP_FORMLINE,XTERM);
4352 LOP(OP_FCNTL,XTERM);
4358 LOP(OP_FLOCK,XTERM);
4367 LOP(OP_GREPSTART, XREF);
4370 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4385 case KEY_getpriority:
4386 LOP(OP_GETPRIORITY,XTERM);
4388 case KEY_getprotobyname:
4391 case KEY_getprotobynumber:
4392 LOP(OP_GPBYNUMBER,XTERM);
4394 case KEY_getprotoent:
4406 case KEY_getpeername:
4407 UNI(OP_GETPEERNAME);
4409 case KEY_gethostbyname:
4412 case KEY_gethostbyaddr:
4413 LOP(OP_GHBYADDR,XTERM);
4415 case KEY_gethostent:
4418 case KEY_getnetbyname:
4421 case KEY_getnetbyaddr:
4422 LOP(OP_GNBYADDR,XTERM);
4427 case KEY_getservbyname:
4428 LOP(OP_GSBYNAME,XTERM);
4430 case KEY_getservbyport:
4431 LOP(OP_GSBYPORT,XTERM);
4433 case KEY_getservent:
4436 case KEY_getsockname:
4437 UNI(OP_GETSOCKNAME);
4439 case KEY_getsockopt:
4440 LOP(OP_GSOCKOPT,XTERM);
4462 yylval.ival = CopLINE(PL_curcop);
4466 LOP(OP_INDEX,XTERM);
4472 LOP(OP_IOCTL,XTERM);
4484 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4516 LOP(OP_LISTEN,XTERM);
4525 s = scan_pat(s,OP_MATCH);
4526 TERM(sublex_start());
4529 LOP(OP_MAPSTART, XREF);
4532 LOP(OP_MKDIR,XTERM);
4535 LOP(OP_MSGCTL,XTERM);
4538 LOP(OP_MSGGET,XTERM);
4541 LOP(OP_MSGRCV,XTERM);
4544 LOP(OP_MSGSND,XTERM);
4550 if (isIDFIRST_lazy_if(s,UTF)) {
4551 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
4552 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4554 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
4555 if (!PL_in_my_stash) {
4558 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
4566 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4573 if (PL_expect != XSTATE)
4574 yyerror("\"no\" not allowed in expression");
4575 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4576 s = force_version(s);
4581 if (*s == '(' || (s = skipspace(s), *s == '('))
4588 if (isIDFIRST_lazy_if(s,UTF)) {
4590 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
4592 if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE))
4593 Perl_warner(aTHX_ WARN_PRECEDENCE,
4594 "Precedence problem: open %.*s should be open(%.*s)",
4600 yylval.ival = OP_OR;
4610 LOP(OP_OPEN_DIR,XTERM);
4613 checkcomma(s,PL_tokenbuf,"filehandle");
4617 checkcomma(s,PL_tokenbuf,"filehandle");
4636 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4640 LOP(OP_PIPE_OP,XTERM);
4643 s = scan_str(s,FALSE,FALSE);
4645 missingterm((char*)0);
4646 yylval.ival = OP_CONST;
4647 TERM(sublex_start());
4653 s = scan_str(s,FALSE,FALSE);
4655 missingterm((char*)0);
4657 if (SvCUR(PL_lex_stuff)) {
4660 d = SvPV_force(PL_lex_stuff, len);
4663 for (; isSPACE(*d) && len; --len, ++d) ;
4666 if (!warned && ckWARN(WARN_QW)) {
4667 for (; !isSPACE(*d) && len; --len, ++d) {
4669 Perl_warner(aTHX_ WARN_QW,
4670 "Possible attempt to separate words with commas");
4673 else if (*d == '#') {
4674 Perl_warner(aTHX_ WARN_QW,
4675 "Possible attempt to put comments in qw() list");
4681 for (; !isSPACE(*d) && len; --len, ++d) ;
4683 sv = newSVpvn(b, d-b);
4684 if (DO_UTF8(PL_lex_stuff))
4686 words = append_elem(OP_LIST, words,
4687 newSVOP(OP_CONST, 0, tokeq(sv)));
4691 PL_nextval[PL_nexttoke].opval = words;
4696 SvREFCNT_dec(PL_lex_stuff);
4697 PL_lex_stuff = Nullsv;
4702 s = scan_str(s,FALSE,FALSE);
4704 missingterm((char*)0);
4705 yylval.ival = OP_STRINGIFY;
4706 if (SvIVX(PL_lex_stuff) == '\'')
4707 SvIVX(PL_lex_stuff) = 0; /* qq'$foo' should intepolate */
4708 TERM(sublex_start());
4711 s = scan_pat(s,OP_QR);
4712 TERM(sublex_start());
4715 s = scan_str(s,FALSE,FALSE);
4717 missingterm((char*)0);
4718 yylval.ival = OP_BACKTICK;
4720 TERM(sublex_start());
4727 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4728 s = force_version(s);
4731 *PL_tokenbuf = '\0';
4732 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4733 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
4734 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
4736 yyerror("<> should be quotes");
4744 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4748 LOP(OP_RENAME,XTERM);
4757 LOP(OP_RINDEX,XTERM);
4780 LOP(OP_REVERSE,XTERM);
4791 TERM(sublex_start());
4793 TOKEN(1); /* force error */
4802 LOP(OP_SELECT,XTERM);
4808 LOP(OP_SEMCTL,XTERM);
4811 LOP(OP_SEMGET,XTERM);
4814 LOP(OP_SEMOP,XTERM);
4820 LOP(OP_SETPGRP,XTERM);
4822 case KEY_setpriority:
4823 LOP(OP_SETPRIORITY,XTERM);
4825 case KEY_sethostent:
4831 case KEY_setservent:
4834 case KEY_setprotoent:
4844 LOP(OP_SEEKDIR,XTERM);
4846 case KEY_setsockopt:
4847 LOP(OP_SSOCKOPT,XTERM);
4853 LOP(OP_SHMCTL,XTERM);
4856 LOP(OP_SHMGET,XTERM);
4859 LOP(OP_SHMREAD,XTERM);
4862 LOP(OP_SHMWRITE,XTERM);
4865 LOP(OP_SHUTDOWN,XTERM);
4874 LOP(OP_SOCKET,XTERM);
4876 case KEY_socketpair:
4877 LOP(OP_SOCKPAIR,XTERM);
4880 checkcomma(s,PL_tokenbuf,"subroutine name");
4882 if (*s == ';' || *s == ')') /* probably a close */
4883 Perl_croak(aTHX_ "sort is now a reserved word");
4885 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4889 LOP(OP_SPLIT,XTERM);
4892 LOP(OP_SPRINTF,XTERM);
4895 LOP(OP_SPLICE,XTERM);
4910 LOP(OP_SUBSTR,XTERM);
4916 char tmpbuf[sizeof PL_tokenbuf];
4918 expectation attrful;
4919 bool have_name, have_proto;
4924 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
4925 (*s == ':' && s[1] == ':'))
4928 attrful = XATTRBLOCK;
4929 /* remember buffer pos'n for later force_word */
4930 tboffset = s - PL_oldbufptr;
4931 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4932 if (strchr(tmpbuf, ':'))
4933 sv_setpv(PL_subname, tmpbuf);
4935 sv_setsv(PL_subname,PL_curstname);
4936 sv_catpvn(PL_subname,"::",2);
4937 sv_catpvn(PL_subname,tmpbuf,len);
4944 Perl_croak(aTHX_ "Missing name in \"my sub\"");
4945 PL_expect = XTERMBLOCK;
4946 attrful = XATTRTERM;
4947 sv_setpv(PL_subname,"?");
4951 if (key == KEY_format) {
4953 PL_lex_formbrack = PL_lex_brackets + 1;
4955 (void) force_word(PL_oldbufptr + tboffset, WORD,
4960 /* Look for a prototype */
4964 s = scan_str(s,FALSE,FALSE);
4967 SvREFCNT_dec(PL_lex_stuff);
4968 PL_lex_stuff = Nullsv;
4969 Perl_croak(aTHX_ "Prototype not terminated");
4972 d = SvPVX(PL_lex_stuff);
4974 for (p = d; *p; ++p) {
4979 SvCUR(PL_lex_stuff) = tmp;
4987 if (*s == ':' && s[1] != ':')
4988 PL_expect = attrful;
4991 PL_nextval[PL_nexttoke].opval =
4992 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
4993 PL_lex_stuff = Nullsv;
4997 sv_setpv(PL_subname,"__ANON__");
5000 (void) force_word(PL_oldbufptr + tboffset, WORD,
5009 LOP(OP_SYSTEM,XREF);
5012 LOP(OP_SYMLINK,XTERM);
5015 LOP(OP_SYSCALL,XTERM);
5018 LOP(OP_SYSOPEN,XTERM);
5021 LOP(OP_SYSSEEK,XTERM);
5024 LOP(OP_SYSREAD,XTERM);
5027 LOP(OP_SYSWRITE,XTERM);
5031 TERM(sublex_start());
5052 LOP(OP_TRUNCATE,XTERM);
5064 yylval.ival = CopLINE(PL_curcop);
5068 yylval.ival = CopLINE(PL_curcop);
5072 LOP(OP_UNLINK,XTERM);
5078 LOP(OP_UNPACK,XTERM);
5081 LOP(OP_UTIME,XTERM);
5084 if (ckWARN(WARN_UMASK)) {
5085 for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ;
5086 if (*d != '0' && isDIGIT(*d))
5087 Perl_warner(aTHX_ WARN_UMASK,
5088 "umask: argument is missing initial 0");
5093 LOP(OP_UNSHIFT,XTERM);
5096 if (PL_expect != XSTATE)
5097 yyerror("\"use\" not allowed in expression");
5099 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
5100 s = force_version(s);
5101 if (*s == ';' || (s = skipspace(s), *s == ';')) {
5102 PL_nextval[PL_nexttoke].opval = Nullop;
5107 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5108 s = force_version(s);
5120 yylval.ival = CopLINE(PL_curcop);
5124 PL_hints |= HINT_BLOCK_SCOPE;
5131 LOP(OP_WAITPID,XTERM);
5139 static char ctl_l[2];
5141 if (ctl_l[0] == '\0')
5142 ctl_l[0] = toCTRL('L');
5143 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5146 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5151 if (PL_expect == XOPERATOR)
5157 yylval.ival = OP_XOR;
5162 TERM(sublex_start());
5167 #pragma segment Main
5171 Perl_keyword(pTHX_ register char *d, I32 len)
5176 if (strEQ(d,"__FILE__")) return -KEY___FILE__;
5177 if (strEQ(d,"__LINE__")) return -KEY___LINE__;
5178 if (strEQ(d,"__PACKAGE__")) return -KEY___PACKAGE__;
5179 if (strEQ(d,"__DATA__")) return KEY___DATA__;
5180 if (strEQ(d,"__END__")) return KEY___END__;
5184 if (strEQ(d,"AUTOLOAD")) return KEY_AUTOLOAD;
5189 if (strEQ(d,"and")) return -KEY_and;
5190 if (strEQ(d,"abs")) return -KEY_abs;
5193 if (strEQ(d,"alarm")) return -KEY_alarm;
5194 if (strEQ(d,"atan2")) return -KEY_atan2;
5197 if (strEQ(d,"accept")) return -KEY_accept;
5202 if (strEQ(d,"BEGIN")) return KEY_BEGIN;
5205 if (strEQ(d,"bless")) return -KEY_bless;
5206 if (strEQ(d,"bind")) return -KEY_bind;
5207 if (strEQ(d,"binmode")) return -KEY_binmode;
5210 if (strEQ(d,"CORE")) return -KEY_CORE;
5211 if (strEQ(d,"CHECK")) return KEY_CHECK;
5216 if (strEQ(d,"cmp")) return -KEY_cmp;
5217 if (strEQ(d,"chr")) return -KEY_chr;
5218 if (strEQ(d,"cos")) return -KEY_cos;
5221 if (strEQ(d,"chop")) return -KEY_chop;
5224 if (strEQ(d,"close")) return -KEY_close;
5225 if (strEQ(d,"chdir")) return -KEY_chdir;
5226 if (strEQ(d,"chomp")) return -KEY_chomp;
5227 if (strEQ(d,"chmod")) return -KEY_chmod;
5228 if (strEQ(d,"chown")) return -KEY_chown;
5229 if (strEQ(d,"crypt")) return -KEY_crypt;
5232 if (strEQ(d,"chroot")) return -KEY_chroot;
5233 if (strEQ(d,"caller")) return -KEY_caller;
5236 if (strEQ(d,"connect")) return -KEY_connect;
5239 if (strEQ(d,"closedir")) return -KEY_closedir;
5240 if (strEQ(d,"continue")) return -KEY_continue;
5245 if (strEQ(d,"DESTROY")) return KEY_DESTROY;
5250 if (strEQ(d,"do")) return KEY_do;
5253 if (strEQ(d,"die")) return -KEY_die;
5256 if (strEQ(d,"dump")) return -KEY_dump;
5259 if (strEQ(d,"delete")) return KEY_delete;
5262 if (strEQ(d,"defined")) return KEY_defined;
5263 if (strEQ(d,"dbmopen")) return -KEY_dbmopen;
5266 if (strEQ(d,"dbmclose")) return -KEY_dbmclose;
5271 if (strEQ(d,"END")) return KEY_END;
5276 if (strEQ(d,"eq")) return -KEY_eq;
5279 if (strEQ(d,"eof")) return -KEY_eof;
5280 if (strEQ(d,"exp")) return -KEY_exp;
5283 if (strEQ(d,"else")) return KEY_else;
5284 if (strEQ(d,"exit")) return -KEY_exit;
5285 if (strEQ(d,"eval")) return KEY_eval;
5286 if (strEQ(d,"exec")) return -KEY_exec;
5287 if (strEQ(d,"each")) return -KEY_each;
5290 if (strEQ(d,"elsif")) return KEY_elsif;
5293 if (strEQ(d,"exists")) return KEY_exists;
5294 if (strEQ(d,"elseif")) Perl_warn(aTHX_ "elseif should be elsif");
5297 if (strEQ(d,"endgrent")) return -KEY_endgrent;
5298 if (strEQ(d,"endpwent")) return -KEY_endpwent;
5301 if (strEQ(d,"endnetent")) return -KEY_endnetent;
5304 if (strEQ(d,"endhostent")) return -KEY_endhostent;
5305 if (strEQ(d,"endservent")) return -KEY_endservent;
5308 if (strEQ(d,"endprotoent")) return -KEY_endprotoent;
5315 if (strEQ(d,"for")) return KEY_for;
5318 if (strEQ(d,"fork")) return -KEY_fork;
5321 if (strEQ(d,"fcntl")) return -KEY_fcntl;
5322 if (strEQ(d,"flock")) return -KEY_flock;
5325 if (strEQ(d,"format")) return KEY_format;
5326 if (strEQ(d,"fileno")) return -KEY_fileno;
5329 if (strEQ(d,"foreach")) return KEY_foreach;
5332 if (strEQ(d,"formline")) return -KEY_formline;
5337 if (strnEQ(d,"get",3)) {
5342 if (strEQ(d,"ppid")) return -KEY_getppid;
5343 if (strEQ(d,"pgrp")) return -KEY_getpgrp;
5346 if (strEQ(d,"pwent")) return -KEY_getpwent;
5347 if (strEQ(d,"pwnam")) return -KEY_getpwnam;
5348 if (strEQ(d,"pwuid")) return -KEY_getpwuid;
5351 if (strEQ(d,"peername")) return -KEY_getpeername;
5352 if (strEQ(d,"protoent")) return -KEY_getprotoent;
5353 if (strEQ(d,"priority")) return -KEY_getpriority;
5356 if (strEQ(d,"protobyname")) return -KEY_getprotobyname;
5359 if (strEQ(d,"protobynumber"))return -KEY_getprotobynumber;
5363 else if (*d == 'h') {
5364 if (strEQ(d,"hostbyname")) return -KEY_gethostbyname;
5365 if (strEQ(d,"hostbyaddr")) return -KEY_gethostbyaddr;
5366 if (strEQ(d,"hostent")) return -KEY_gethostent;
5368 else if (*d == 'n') {
5369 if (strEQ(d,"netbyname")) return -KEY_getnetbyname;
5370 if (strEQ(d,"netbyaddr")) return -KEY_getnetbyaddr;
5371 if (strEQ(d,"netent")) return -KEY_getnetent;
5373 else if (*d == 's') {
5374 if (strEQ(d,"servbyname")) return -KEY_getservbyname;
5375 if (strEQ(d,"servbyport")) return -KEY_getservbyport;
5376 if (strEQ(d,"servent")) return -KEY_getservent;
5377 if (strEQ(d,"sockname")) return -KEY_getsockname;
5378 if (strEQ(d,"sockopt")) return -KEY_getsockopt;
5380 else if (*d == 'g') {
5381 if (strEQ(d,"grent")) return -KEY_getgrent;
5382 if (strEQ(d,"grnam")) return -KEY_getgrnam;
5383 if (strEQ(d,"grgid")) return -KEY_getgrgid;
5385 else if (*d == 'l') {
5386 if (strEQ(d,"login")) return -KEY_getlogin;
5388 else if (strEQ(d,"c")) return -KEY_getc;
5393 if (strEQ(d,"gt")) return -KEY_gt;
5394 if (strEQ(d,"ge")) return -KEY_ge;
5397 if (strEQ(d,"grep")) return KEY_grep;
5398 if (strEQ(d,"goto")) return KEY_goto;
5399 if (strEQ(d,"glob")) return KEY_glob;
5402 if (strEQ(d,"gmtime")) return -KEY_gmtime;
5407 if (strEQ(d,"hex")) return -KEY_hex;
5410 if (strEQ(d,"INIT")) return KEY_INIT;
5415 if (strEQ(d,"if")) return KEY_if;
5418 if (strEQ(d,"int")) return -KEY_int;
5421 if (strEQ(d,"index")) return -KEY_index;
5422 if (strEQ(d,"ioctl")) return -KEY_ioctl;
5427 if (strEQ(d,"join")) return -KEY_join;
5431 if (strEQ(d,"keys")) return -KEY_keys;
5432 if (strEQ(d,"kill")) return -KEY_kill;
5438 if (strEQ(d,"lt")) return -KEY_lt;
5439 if (strEQ(d,"le")) return -KEY_le;
5440 if (strEQ(d,"lc")) return -KEY_lc;
5443 if (strEQ(d,"log")) return -KEY_log;
5446 if (strEQ(d,"last")) return KEY_last;
5447 if (strEQ(d,"link")) return -KEY_link;
5448 if (strEQ(d,"lock")) return -KEY_lock;
5451 if (strEQ(d,"local")) return KEY_local;
5452 if (strEQ(d,"lstat")) return -KEY_lstat;
5455 if (strEQ(d,"length")) return -KEY_length;
5456 if (strEQ(d,"listen")) return -KEY_listen;
5459 if (strEQ(d,"lcfirst")) return -KEY_lcfirst;
5462 if (strEQ(d,"localtime")) return -KEY_localtime;
5468 case 1: return KEY_m;
5470 if (strEQ(d,"my")) return KEY_my;
5473 if (strEQ(d,"map")) return KEY_map;
5476 if (strEQ(d,"mkdir")) return -KEY_mkdir;
5479 if (strEQ(d,"msgctl")) return -KEY_msgctl;
5480 if (strEQ(d,"msgget")) return -KEY_msgget;
5481 if (strEQ(d,"msgrcv")) return -KEY_msgrcv;
5482 if (strEQ(d,"msgsnd")) return -KEY_msgsnd;
5487 if (strEQ(d,"next")) return KEY_next;
5488 if (strEQ(d,"ne")) return -KEY_ne;
5489 if (strEQ(d,"not")) return -KEY_not;
5490 if (strEQ(d,"no")) return KEY_no;
5495 if (strEQ(d,"or")) return -KEY_or;
5498 if (strEQ(d,"ord")) return -KEY_ord;
5499 if (strEQ(d,"oct")) return -KEY_oct;
5500 if (strEQ(d,"our")) return KEY_our;
5503 if (strEQ(d,"open")) return -KEY_open;
5506 if (strEQ(d,"opendir")) return -KEY_opendir;
5513 if (strEQ(d,"pop")) return -KEY_pop;
5514 if (strEQ(d,"pos")) return KEY_pos;
5517 if (strEQ(d,"push")) return -KEY_push;
5518 if (strEQ(d,"pack")) return -KEY_pack;
5519 if (strEQ(d,"pipe")) return -KEY_pipe;
5522 if (strEQ(d,"print")) return KEY_print;
5525 if (strEQ(d,"printf")) return KEY_printf;
5528 if (strEQ(d,"package")) return KEY_package;
5531 if (strEQ(d,"prototype")) return KEY_prototype;
5536 if (strEQ(d,"q")) return KEY_q;
5537 if (strEQ(d,"qr")) return KEY_qr;
5538 if (strEQ(d,"qq")) return KEY_qq;
5539 if (strEQ(d,"qw")) return KEY_qw;
5540 if (strEQ(d,"qx")) return KEY_qx;
5542 else if (strEQ(d,"quotemeta")) return -KEY_quotemeta;
5547 if (strEQ(d,"ref")) return -KEY_ref;
5550 if (strEQ(d,"read")) return -KEY_read;
5551 if (strEQ(d,"rand")) return -KEY_rand;
5552 if (strEQ(d,"recv")) return -KEY_recv;
5553 if (strEQ(d,"redo")) return KEY_redo;
5556 if (strEQ(d,"rmdir")) return -KEY_rmdir;
5557 if (strEQ(d,"reset")) return -KEY_reset;
5560 if (strEQ(d,"return")) return KEY_return;
5561 if (strEQ(d,"rename")) return -KEY_rename;
5562 if (strEQ(d,"rindex")) return -KEY_rindex;
5565 if (strEQ(d,"require")) return -KEY_require;
5566 if (strEQ(d,"reverse")) return -KEY_reverse;
5567 if (strEQ(d,"readdir")) return -KEY_readdir;
5570 if (strEQ(d,"readlink")) return -KEY_readlink;
5571 if (strEQ(d,"readline")) return -KEY_readline;
5572 if (strEQ(d,"readpipe")) return -KEY_readpipe;
5575 if (strEQ(d,"rewinddir")) return -KEY_rewinddir;
5581 case 0: return KEY_s;
5583 if (strEQ(d,"scalar")) return KEY_scalar;
5588 if (strEQ(d,"seek")) return -KEY_seek;
5589 if (strEQ(d,"send")) return -KEY_send;
5592 if (strEQ(d,"semop")) return -KEY_semop;
5595 if (strEQ(d,"select")) return -KEY_select;
5596 if (strEQ(d,"semctl")) return -KEY_semctl;
5597 if (strEQ(d,"semget")) return -KEY_semget;
5600 if (strEQ(d,"setpgrp")) return -KEY_setpgrp;
5601 if (strEQ(d,"seekdir")) return -KEY_seekdir;
5604 if (strEQ(d,"setpwent")) return -KEY_setpwent;
5605 if (strEQ(d,"setgrent")) return -KEY_setgrent;
5608 if (strEQ(d,"setnetent")) return -KEY_setnetent;
5611 if (strEQ(d,"setsockopt")) return -KEY_setsockopt;
5612 if (strEQ(d,"sethostent")) return -KEY_sethostent;
5613 if (strEQ(d,"setservent")) return -KEY_setservent;
5616 if (strEQ(d,"setpriority")) return -KEY_setpriority;
5617 if (strEQ(d,"setprotoent")) return -KEY_setprotoent;
5624 if (strEQ(d,"shift")) return -KEY_shift;
5627 if (strEQ(d,"shmctl")) return -KEY_shmctl;
5628 if (strEQ(d,"shmget")) return -KEY_shmget;
5631 if (strEQ(d,"shmread")) return -KEY_shmread;
5634 if (strEQ(d,"shmwrite")) return -KEY_shmwrite;
5635 if (strEQ(d,"shutdown")) return -KEY_shutdown;
5640 if (strEQ(d,"sin")) return -KEY_sin;
5643 if (strEQ(d,"sleep")) return -KEY_sleep;
5646 if (strEQ(d,"sort")) return KEY_sort;
5647 if (strEQ(d,"socket")) return -KEY_socket;
5648 if (strEQ(d,"socketpair")) return -KEY_socketpair;
5651 if (strEQ(d,"split")) return KEY_split;
5652 if (strEQ(d,"sprintf")) return -KEY_sprintf;
5653 if (strEQ(d,"splice")) return -KEY_splice;
5656 if (strEQ(d,"sqrt")) return -KEY_sqrt;
5659 if (strEQ(d,"srand")) return -KEY_srand;
5662 if (strEQ(d,"stat")) return -KEY_stat;
5663 if (strEQ(d,"study")) return KEY_study;
5666 if (strEQ(d,"substr")) return -KEY_substr;
5667 if (strEQ(d,"sub")) return KEY_sub;
5672 if (strEQ(d,"system")) return -KEY_system;
5675 if (strEQ(d,"symlink")) return -KEY_symlink;
5676 if (strEQ(d,"syscall")) return -KEY_syscall;
5677 if (strEQ(d,"sysopen")) return -KEY_sysopen;
5678 if (strEQ(d,"sysread")) return -KEY_sysread;
5679 if (strEQ(d,"sysseek")) return -KEY_sysseek;
5682 if (strEQ(d,"syswrite")) return -KEY_syswrite;
5691 if (strEQ(d,"tr")) return KEY_tr;
5694 if (strEQ(d,"tie")) return KEY_tie;
5697 if (strEQ(d,"tell")) return -KEY_tell;
5698 if (strEQ(d,"tied")) return KEY_tied;
5699 if (strEQ(d,"time")) return -KEY_time;
5702 if (strEQ(d,"times")) return -KEY_times;
5705 if (strEQ(d,"telldir")) return -KEY_telldir;
5708 if (strEQ(d,"truncate")) return -KEY_truncate;
5715 if (strEQ(d,"uc")) return -KEY_uc;
5718 if (strEQ(d,"use")) return KEY_use;
5721 if (strEQ(d,"undef")) return KEY_undef;
5722 if (strEQ(d,"until")) return KEY_until;
5723 if (strEQ(d,"untie")) return KEY_untie;
5724 if (strEQ(d,"utime")) return -KEY_utime;
5725 if (strEQ(d,"umask")) return -KEY_umask;
5728 if (strEQ(d,"unless")) return KEY_unless;
5729 if (strEQ(d,"unpack")) return -KEY_unpack;
5730 if (strEQ(d,"unlink")) return -KEY_unlink;
5733 if (strEQ(d,"unshift")) return -KEY_unshift;
5734 if (strEQ(d,"ucfirst")) return -KEY_ucfirst;
5739 if (strEQ(d,"values")) return -KEY_values;
5740 if (strEQ(d,"vec")) return -KEY_vec;
5745 if (strEQ(d,"warn")) return -KEY_warn;
5746 if (strEQ(d,"wait")) return -KEY_wait;
5749 if (strEQ(d,"while")) return KEY_while;
5750 if (strEQ(d,"write")) return -KEY_write;
5753 if (strEQ(d,"waitpid")) return -KEY_waitpid;
5756 if (strEQ(d,"wantarray")) return -KEY_wantarray;
5761 if (len == 1) return -KEY_x;
5762 if (strEQ(d,"xor")) return -KEY_xor;
5765 if (len == 1) return KEY_y;
5774 S_checkcomma(pTHX_ register char *s, char *name, char *what)
5778 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
5779 if (ckWARN(WARN_SYNTAX)) {
5781 for (w = s+2; *w && level; w++) {
5788 for (; *w && isSPACE(*w); w++) ;
5789 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
5790 Perl_warner(aTHX_ WARN_SYNTAX,
5791 "%s (...) interpreted as function",name);
5794 while (s < PL_bufend && isSPACE(*s))
5798 while (s < PL_bufend && isSPACE(*s))
5800 if (isIDFIRST_lazy_if(s,UTF)) {
5802 while (isALNUM_lazy_if(s,UTF))
5804 while (s < PL_bufend && isSPACE(*s))
5809 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
5813 Perl_croak(aTHX_ "No comma allowed after %s", what);
5818 /* Either returns sv, or mortalizes sv and returns a new SV*.
5819 Best used as sv=new_constant(..., sv, ...).
5820 If s, pv are NULL, calls subroutine with one argument,
5821 and type is used with error messages only. */
5824 S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
5828 HV *table = GvHV(PL_hintgv); /* ^H */
5832 const char *why1, *why2, *why3;
5834 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
5837 why2 = strEQ(key,"charnames")
5838 ? "(possibly a missing \"use charnames ...\")"
5840 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
5841 (type ? type: "undef"), why2);
5843 /* This is convoluted and evil ("goto considered harmful")
5844 * but I do not understand the intricacies of all the different
5845 * failure modes of %^H in here. The goal here is to make
5846 * the most probable error message user-friendly. --jhi */
5851 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
5852 (type ? type: "undef"), why1, why2, why3);
5854 yyerror(SvPVX(msg));
5858 cvp = hv_fetch(table, key, strlen(key), FALSE);
5859 if (!cvp || !SvOK(*cvp)) {
5862 why3 = "} is not defined";
5865 sv_2mortal(sv); /* Parent created it permanently */
5868 pv = sv_2mortal(newSVpvn(s, len));
5870 typesv = sv_2mortal(newSVpv(type, 0));
5872 typesv = &PL_sv_undef;
5874 PUSHSTACKi(PERLSI_OVERLOAD);
5886 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
5890 /* Check the eval first */
5891 if (!PL_in_eval && SvTRUE(ERRSV)) {
5893 sv_catpv(ERRSV, "Propagated");
5894 yyerror(SvPV(ERRSV, n_a)); /* Duplicates the message inside eval */
5896 res = SvREFCNT_inc(sv);
5900 (void)SvREFCNT_inc(res);
5909 why1 = "Call to &{$^H{";
5911 why3 = "}} did not return a defined value";
5920 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
5922 register char *d = dest;
5923 register char *e = d + destlen - 3; /* two-character token, ending NUL */
5926 Perl_croak(aTHX_ ident_too_long);
5927 if (isALNUM(*s)) /* UTF handled below */
5929 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
5934 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
5938 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
5939 char *t = s + UTF8SKIP(s);
5940 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
5942 if (d + (t - s) > e)
5943 Perl_croak(aTHX_ ident_too_long);
5944 Copy(s, d, t - s, char);
5957 S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN destlen, I32 ck_uni)
5967 e = d + destlen - 3; /* two-character token, ending NUL */
5969 while (isDIGIT(*s)) {
5971 Perl_croak(aTHX_ ident_too_long);
5978 Perl_croak(aTHX_ ident_too_long);
5979 if (isALNUM(*s)) /* UTF handled below */
5981 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
5986 else if (*s == ':' && s[1] == ':') {
5990 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
5991 char *t = s + UTF8SKIP(s);
5992 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
5994 if (d + (t - s) > e)
5995 Perl_croak(aTHX_ ident_too_long);
5996 Copy(s, d, t - s, char);
6007 if (PL_lex_state != LEX_NORMAL)
6008 PL_lex_state = LEX_INTERPENDMAYBE;
6011 if (*s == '$' && s[1] &&
6012 (isALNUM_lazy_if(s+1,UTF) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
6025 if (*d == '^' && *s && isCONTROLVAR(*s)) {
6030 if (isSPACE(s[-1])) {
6033 if (!SPACE_OR_TAB(ch)) {
6039 if (isIDFIRST_lazy_if(d,UTF)) {
6043 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
6045 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
6048 Copy(s, d, e - s, char);
6053 while ((isALNUM(*s) || *s == ':') && d < e)
6056 Perl_croak(aTHX_ ident_too_long);
6059 while (s < send && SPACE_OR_TAB(*s)) s++;
6060 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
6061 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
6062 const char *brack = *s == '[' ? "[...]" : "{...}";
6063 Perl_warner(aTHX_ WARN_AMBIGUOUS,
6064 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
6065 funny, dest, brack, funny, dest, brack);
6068 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
6072 /* Handle extended ${^Foo} variables
6073 * 1999-02-27 mjd-perl-patch@plover.com */
6074 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
6078 while (isALNUM(*s) && d < e) {
6082 Perl_croak(aTHX_ ident_too_long);
6087 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets)
6088 PL_lex_state = LEX_INTERPEND;
6091 if (PL_lex_state == LEX_NORMAL) {
6092 if (ckWARN(WARN_AMBIGUOUS) &&
6093 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
6095 Perl_warner(aTHX_ WARN_AMBIGUOUS,
6096 "Ambiguous use of %c{%s} resolved to %c%s",
6097 funny, dest, funny, dest);
6102 s = bracket; /* let the parser handle it */
6106 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
6107 PL_lex_state = LEX_INTERPEND;
6112 Perl_pmflag(pTHX_ U16 *pmfl, int ch)
6117 *pmfl |= PMf_GLOBAL;
6119 *pmfl |= PMf_CONTINUE;
6123 *pmfl |= PMf_MULTILINE;
6125 *pmfl |= PMf_SINGLELINE;
6127 *pmfl |= PMf_EXTENDED;
6131 S_scan_pat(pTHX_ char *start, I32 type)
6136 s = scan_str(start,FALSE,FALSE);
6139 SvREFCNT_dec(PL_lex_stuff);
6140 PL_lex_stuff = Nullsv;
6141 Perl_croak(aTHX_ "Search pattern not terminated");
6144 pm = (PMOP*)newPMOP(type, 0);
6145 if (PL_multi_open == '?')
6146 pm->op_pmflags |= PMf_ONCE;
6148 while (*s && strchr("iomsx", *s))
6149 pmflag(&pm->op_pmflags,*s++);
6152 while (*s && strchr("iogcmsx", *s))
6153 pmflag(&pm->op_pmflags,*s++);
6155 pm->op_pmpermflags = pm->op_pmflags;
6157 PL_lex_op = (OP*)pm;
6158 yylval.ival = OP_MATCH;
6163 S_scan_subst(pTHX_ char *start)
6170 yylval.ival = OP_NULL;
6172 s = scan_str(start,FALSE,FALSE);
6176 SvREFCNT_dec(PL_lex_stuff);
6177 PL_lex_stuff = Nullsv;
6178 Perl_croak(aTHX_ "Substitution pattern not terminated");
6181 if (s[-1] == PL_multi_open)
6184 first_start = PL_multi_start;
6185 s = scan_str(s,FALSE,FALSE);
6188 SvREFCNT_dec(PL_lex_stuff);
6189 PL_lex_stuff = Nullsv;
6191 SvREFCNT_dec(PL_lex_repl);
6192 PL_lex_repl = Nullsv;
6193 Perl_croak(aTHX_ "Substitution replacement not terminated");
6195 PL_multi_start = first_start; /* so whole substitution is taken together */
6197 pm = (PMOP*)newPMOP(OP_SUBST, 0);
6203 else if (strchr("iogcmsx", *s))
6204 pmflag(&pm->op_pmflags,*s++);
6211 PL_sublex_info.super_bufptr = s;
6212 PL_sublex_info.super_bufend = PL_bufend;
6214 pm->op_pmflags |= PMf_EVAL;
6215 repl = newSVpvn("",0);
6217 sv_catpv(repl, es ? "eval " : "do ");
6218 sv_catpvn(repl, "{ ", 2);
6219 sv_catsv(repl, PL_lex_repl);
6220 sv_catpvn(repl, " };", 2);
6222 SvREFCNT_dec(PL_lex_repl);
6226 pm->op_pmpermflags = pm->op_pmflags;
6227 PL_lex_op = (OP*)pm;
6228 yylval.ival = OP_SUBST;
6233 S_scan_trans(pTHX_ char *start)
6244 yylval.ival = OP_NULL;
6246 s = scan_str(start,FALSE,FALSE);
6249 SvREFCNT_dec(PL_lex_stuff);
6250 PL_lex_stuff = Nullsv;
6251 Perl_croak(aTHX_ "Transliteration pattern not terminated");
6253 if (s[-1] == PL_multi_open)
6256 s = scan_str(s,FALSE,FALSE);
6259 SvREFCNT_dec(PL_lex_stuff);
6260 PL_lex_stuff = Nullsv;
6262 SvREFCNT_dec(PL_lex_repl);
6263 PL_lex_repl = Nullsv;
6264 Perl_croak(aTHX_ "Transliteration replacement not terminated");
6267 New(803,tbl,256,short);
6268 o = newPVOP(OP_TRANS, 0, (char*)tbl);
6270 complement = del = squash = 0;
6271 while (strchr("cds", *s)) {
6273 complement = OPpTRANS_COMPLEMENT;
6275 del = OPpTRANS_DELETE;
6277 squash = OPpTRANS_SQUASH;
6280 o->op_private = del|squash|complement|
6281 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
6282 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
6285 yylval.ival = OP_TRANS;
6290 S_scan_heredoc(pTHX_ register char *s)
6293 I32 op_type = OP_SCALAR;
6300 int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
6304 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
6307 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
6308 if (*peek && strchr("`'\"",*peek)) {
6311 s = delimcpy(d, e, s, PL_bufend, term, &len);
6321 if (!isALNUM_lazy_if(s,UTF))
6322 deprecate("bare << to mean <<\"\"");
6323 for (; isALNUM_lazy_if(s,UTF); s++) {
6328 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
6329 Perl_croak(aTHX_ "Delimiter for here document is too long");
6332 len = d - PL_tokenbuf;
6333 #ifndef PERL_STRICT_CR
6334 d = strchr(s, '\r');
6338 while (s < PL_bufend) {
6344 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
6353 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
6358 if (outer || !(d=ninstr(s,PL_bufend,d,d+1)))
6359 herewas = newSVpvn(s,PL_bufend-s);
6361 s--, herewas = newSVpvn(s,d-s);
6362 s += SvCUR(herewas);
6364 tmpstr = NEWSV(87,79);
6365 sv_upgrade(tmpstr, SVt_PVIV);
6370 else if (term == '`') {
6371 op_type = OP_BACKTICK;
6372 SvIVX(tmpstr) = '\\';
6376 PL_multi_start = CopLINE(PL_curcop);
6377 PL_multi_open = PL_multi_close = '<';
6378 term = *PL_tokenbuf;
6379 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6380 char *bufptr = PL_sublex_info.super_bufptr;
6381 char *bufend = PL_sublex_info.super_bufend;
6382 char *olds = s - SvCUR(herewas);
6383 s = strchr(bufptr, '\n');
6387 while (s < bufend &&
6388 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
6390 CopLINE_inc(PL_curcop);
6393 CopLINE_set(PL_curcop, PL_multi_start);
6394 missingterm(PL_tokenbuf);
6396 sv_setpvn(herewas,bufptr,d-bufptr+1);
6397 sv_setpvn(tmpstr,d+1,s-d);
6399 sv_catpvn(herewas,s,bufend-s);
6400 (void)strcpy(bufptr,SvPVX(herewas));
6407 while (s < PL_bufend &&
6408 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
6410 CopLINE_inc(PL_curcop);
6412 if (s >= PL_bufend) {
6413 CopLINE_set(PL_curcop, PL_multi_start);
6414 missingterm(PL_tokenbuf);
6416 sv_setpvn(tmpstr,d+1,s-d);
6418 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
6420 sv_catpvn(herewas,s,PL_bufend-s);
6421 sv_setsv(PL_linestr,herewas);
6422 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
6423 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6426 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
6427 while (s >= PL_bufend) { /* multiple line string? */
6429 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
6430 CopLINE_set(PL_curcop, PL_multi_start);
6431 missingterm(PL_tokenbuf);
6433 CopLINE_inc(PL_curcop);
6434 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6435 #ifndef PERL_STRICT_CR
6436 if (PL_bufend - PL_linestart >= 2) {
6437 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
6438 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
6440 PL_bufend[-2] = '\n';
6442 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
6444 else if (PL_bufend[-1] == '\r')
6445 PL_bufend[-1] = '\n';
6447 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
6448 PL_bufend[-1] = '\n';
6450 if (PERLDB_LINE && PL_curstash != PL_debstash) {
6451 SV *sv = NEWSV(88,0);
6453 sv_upgrade(sv, SVt_PVMG);
6454 sv_setsv(sv,PL_linestr);
6455 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
6457 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
6460 sv_catsv(PL_linestr,herewas);
6461 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6465 sv_catsv(tmpstr,PL_linestr);
6470 PL_multi_end = CopLINE(PL_curcop);
6471 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
6472 SvLEN_set(tmpstr, SvCUR(tmpstr) + 1);
6473 Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
6475 SvREFCNT_dec(herewas);
6476 if (UTF && !IN_BYTE && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
6478 PL_lex_stuff = tmpstr;
6479 yylval.ival = op_type;
6484 takes: current position in input buffer
6485 returns: new position in input buffer
6486 side-effects: yylval and lex_op are set.
6491 <FH> read from filehandle
6492 <pkg::FH> read from package qualified filehandle
6493 <pkg'FH> read from package qualified filehandle
6494 <$fh> read from filehandle in $fh
6500 S_scan_inputsymbol(pTHX_ char *start)
6502 register char *s = start; /* current position in buffer */
6508 d = PL_tokenbuf; /* start of temp holding space */
6509 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
6510 end = strchr(s, '\n');
6513 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
6515 /* die if we didn't have space for the contents of the <>,
6516 or if it didn't end, or if we see a newline
6519 if (len >= sizeof PL_tokenbuf)
6520 Perl_croak(aTHX_ "Excessively long <> operator");
6522 Perl_croak(aTHX_ "Unterminated <> operator");
6527 Remember, only scalar variables are interpreted as filehandles by
6528 this code. Anything more complex (e.g., <$fh{$num}>) will be
6529 treated as a glob() call.
6530 This code makes use of the fact that except for the $ at the front,
6531 a scalar variable and a filehandle look the same.
6533 if (*d == '$' && d[1]) d++;
6535 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
6536 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
6539 /* If we've tried to read what we allow filehandles to look like, and
6540 there's still text left, then it must be a glob() and not a getline.
6541 Use scan_str to pull out the stuff between the <> and treat it
6542 as nothing more than a string.
6545 if (d - PL_tokenbuf != len) {
6546 yylval.ival = OP_GLOB;
6548 s = scan_str(start,FALSE,FALSE);
6550 Perl_croak(aTHX_ "Glob not terminated");
6554 /* we're in a filehandle read situation */
6557 /* turn <> into <ARGV> */
6559 (void)strcpy(d,"ARGV");
6561 /* if <$fh>, create the ops to turn the variable into a
6567 /* try to find it in the pad for this block, otherwise find
6568 add symbol table ops
6570 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
6571 OP *o = newOP(OP_PADSV, 0);
6573 PL_lex_op = (OP*)newUNOP(OP_READLINE, 0, o);
6576 GV *gv = gv_fetchpv(d+1,TRUE, SVt_PV);
6577 PL_lex_op = (OP*)newUNOP(OP_READLINE, 0,
6578 newUNOP(OP_RV2SV, 0,
6579 newGVOP(OP_GV, 0, gv)));
6581 PL_lex_op->op_flags |= OPf_SPECIAL;
6582 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
6583 yylval.ival = OP_NULL;
6586 /* If it's none of the above, it must be a literal filehandle
6587 (<Foo::BAR> or <FOO>) so build a simple readline OP */
6589 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
6590 PL_lex_op = (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6591 yylval.ival = OP_NULL;
6600 takes: start position in buffer
6601 keep_quoted preserve \ on the embedded delimiter(s)
6602 keep_delims preserve the delimiters around the string
6603 returns: position to continue reading from buffer
6604 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
6605 updates the read buffer.
6607 This subroutine pulls a string out of the input. It is called for:
6608 q single quotes q(literal text)
6609 ' single quotes 'literal text'
6610 qq double quotes qq(interpolate $here please)
6611 " double quotes "interpolate $here please"
6612 qx backticks qx(/bin/ls -l)
6613 ` backticks `/bin/ls -l`
6614 qw quote words @EXPORT_OK = qw( func() $spam )
6615 m// regexp match m/this/
6616 s/// regexp substitute s/this/that/
6617 tr/// string transliterate tr/this/that/
6618 y/// string transliterate y/this/that/
6619 ($*@) sub prototypes sub foo ($)
6620 (stuff) sub attr parameters sub foo : attr(stuff)
6621 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
6623 In most of these cases (all but <>, patterns and transliterate)
6624 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
6625 calls scan_str(). s/// makes yylex() call scan_subst() which calls
6626 scan_str(). tr/// and y/// make yylex() call scan_trans() which
6629 It skips whitespace before the string starts, and treats the first
6630 character as the delimiter. If the delimiter is one of ([{< then
6631 the corresponding "close" character )]}> is used as the closing
6632 delimiter. It allows quoting of delimiters, and if the string has
6633 balanced delimiters ([{<>}]) it allows nesting.
6635 The lexer always reads these strings into lex_stuff, except in the
6636 case of the operators which take *two* arguments (s/// and tr///)
6637 when it checks to see if lex_stuff is full (presumably with the 1st
6638 arg to s or tr) and if so puts the string into lex_repl.
6643 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
6645 SV *sv; /* scalar value: string */
6646 char *tmps; /* temp string, used for delimiter matching */
6647 register char *s = start; /* current position in the buffer */
6648 register char term; /* terminating character */
6649 register char *to; /* current position in the sv's data */
6650 I32 brackets = 1; /* bracket nesting level */
6651 bool has_utf8 = FALSE; /* is there any utf8 content? */
6653 /* skip space before the delimiter */
6657 /* mark where we are, in case we need to report errors */
6660 /* after skipping whitespace, the next character is the terminator */
6662 if (UTF8_IS_CONTINUED(term) && UTF)
6665 /* mark where we are */
6666 PL_multi_start = CopLINE(PL_curcop);
6667 PL_multi_open = term;
6669 /* find corresponding closing delimiter */
6670 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6672 PL_multi_close = term;
6674 /* create a new SV to hold the contents. 87 is leak category, I'm
6675 assuming. 79 is the SV's initial length. What a random number. */
6677 sv_upgrade(sv, SVt_PVIV);
6679 (void)SvPOK_only(sv); /* validate pointer */
6681 /* move past delimiter and try to read a complete string */
6683 sv_catpvn(sv, s, 1);
6686 /* extend sv if need be */
6687 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
6688 /* set 'to' to the next character in the sv's string */
6689 to = SvPVX(sv)+SvCUR(sv);
6691 /* if open delimiter is the close delimiter read unbridle */
6692 if (PL_multi_open == PL_multi_close) {
6693 for (; s < PL_bufend; s++,to++) {
6694 /* embedded newlines increment the current line number */
6695 if (*s == '\n' && !PL_rsfp)
6696 CopLINE_inc(PL_curcop);
6697 /* handle quoted delimiters */
6698 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
6699 if (!keep_quoted && s[1] == term)
6701 /* any other quotes are simply copied straight through */
6705 /* terminate when run out of buffer (the for() condition), or
6706 have found the terminator */
6707 else if (*s == term)
6709 else if (!has_utf8 && UTF8_IS_CONTINUED(*s) && UTF)
6715 /* if the terminator isn't the same as the start character (e.g.,
6716 matched brackets), we have to allow more in the quoting, and
6717 be prepared for nested brackets.
6720 /* read until we run out of string, or we find the terminator */
6721 for (; s < PL_bufend; s++,to++) {
6722 /* embedded newlines increment the line count */
6723 if (*s == '\n' && !PL_rsfp)
6724 CopLINE_inc(PL_curcop);
6725 /* backslashes can escape the open or closing characters */
6726 if (*s == '\\' && s+1 < PL_bufend) {
6728 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
6733 /* allow nested opens and closes */
6734 else if (*s == PL_multi_close && --brackets <= 0)
6736 else if (*s == PL_multi_open)
6738 else if (!has_utf8 && UTF8_IS_CONTINUED(*s) && UTF)
6743 /* terminate the copied string and update the sv's end-of-string */
6745 SvCUR_set(sv, to - SvPVX(sv));
6748 * this next chunk reads more into the buffer if we're not done yet
6752 break; /* handle case where we are done yet :-) */
6754 #ifndef PERL_STRICT_CR
6755 if (to - SvPVX(sv) >= 2) {
6756 if ((to[-2] == '\r' && to[-1] == '\n') ||
6757 (to[-2] == '\n' && to[-1] == '\r'))
6761 SvCUR_set(sv, to - SvPVX(sv));
6763 else if (to[-1] == '\r')
6766 else if (to - SvPVX(sv) == 1 && to[-1] == '\r')
6770 /* if we're out of file, or a read fails, bail and reset the current
6771 line marker so we can report where the unterminated string began
6774 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
6776 CopLINE_set(PL_curcop, PL_multi_start);
6779 /* we read a line, so increment our line counter */
6780 CopLINE_inc(PL_curcop);
6782 /* update debugger info */
6783 if (PERLDB_LINE && PL_curstash != PL_debstash) {
6784 SV *sv = NEWSV(88,0);
6786 sv_upgrade(sv, SVt_PVMG);
6787 sv_setsv(sv,PL_linestr);
6788 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
6791 /* having changed the buffer, we must update PL_bufend */
6792 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6795 /* at this point, we have successfully read the delimited string */
6798 sv_catpvn(sv, s, 1);
6801 PL_multi_end = CopLINE(PL_curcop);
6804 /* if we allocated too much space, give some back */
6805 if (SvCUR(sv) + 5 < SvLEN(sv)) {
6806 SvLEN_set(sv, SvCUR(sv) + 1);
6807 Renew(SvPVX(sv), SvLEN(sv), char);
6810 /* decide whether this is the first or second quoted string we've read
6823 takes: pointer to position in buffer
6824 returns: pointer to new position in buffer
6825 side-effects: builds ops for the constant in yylval.op
6827 Read a number in any of the formats that Perl accepts:
6829 0(x[0-7A-F]+)|([0-7]+)|(b[01])
6830 [\d_]+(\.[\d_]*)?[Ee](\d+)
6832 Underbars (_) are allowed in decimal numbers. If -w is on,
6833 underbars before a decimal point must be at three digit intervals.
6835 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
6838 If it reads a number without a decimal point or an exponent, it will
6839 try converting the number to an integer and see if it can do so
6840 without loss of precision.
6844 Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
6846 register char *s = start; /* current position in buffer */
6847 register char *d; /* destination in temp buffer */
6848 register char *e; /* end of temp buffer */
6849 NV nv; /* number read, as a double */
6850 SV *sv = Nullsv; /* place to put the converted number */
6851 bool floatit; /* boolean: int or float? */
6852 char *lastub = 0; /* position of last underbar */
6853 static char number_too_long[] = "Number too long";
6855 /* We use the first character to decide what type of number this is */
6859 Perl_croak(aTHX_ "panic: scan_num");
6861 /* if it starts with a 0, it could be an octal number, a decimal in
6862 0.13 disguise, or a hexadecimal number, or a binary number. */
6866 u holds the "number so far"
6867 shift the power of 2 of the base
6868 (hex == 4, octal == 3, binary == 1)
6869 overflowed was the number more than we can hold?
6871 Shift is used when we add a digit. It also serves as an "are
6872 we in octal/hex/binary?" indicator to disallow hex characters
6878 bool overflowed = FALSE;
6879 static NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
6880 static char* bases[5] = { "", "binary", "", "octal",
6882 static char* Bases[5] = { "", "Binary", "", "Octal",
6884 static char *maxima[5] = { "",
6885 "0b11111111111111111111111111111111",
6889 char *base, *Base, *max;
6895 } else if (s[1] == 'b') {
6899 /* check for a decimal in disguise */
6900 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
6902 /* so it must be octal */
6906 base = bases[shift];
6907 Base = Bases[shift];
6908 max = maxima[shift];
6910 /* read the rest of the number */
6912 /* x is used in the overflow test,
6913 b is the digit we're adding on. */
6918 /* if we don't mention it, we're done */
6927 /* 8 and 9 are not octal */
6930 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
6934 case '2': case '3': case '4':
6935 case '5': case '6': case '7':
6937 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
6941 b = *s++ & 15; /* ASCII digit -> value of digit */
6945 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6946 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6947 /* make sure they said 0x */
6952 /* Prepare to put the digit we have onto the end
6953 of the number so far. We check for overflows.
6958 x = u << shift; /* make room for the digit */
6960 if ((x >> shift) != u
6961 && !(PL_hints & HINT_NEW_BINARY)) {
6964 if (ckWARN_d(WARN_OVERFLOW))
6965 Perl_warner(aTHX_ WARN_OVERFLOW,
6966 "Integer overflow in %s number",
6969 u = x | b; /* add the digit to the end */
6972 n *= nvshift[shift];
6973 /* If an NV has not enough bits in its
6974 * mantissa to represent an UV this summing of
6975 * small low-order numbers is a waste of time
6976 * (because the NV cannot preserve the
6977 * low-order bits anyway): we could just
6978 * remember when did we overflow and in the
6979 * end just multiply n by the right
6987 /* if we get here, we had success: make a scalar value from
6993 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
6994 Perl_warner(aTHX_ WARN_PORTABLE,
6995 "%s number > %s non-portable",
7001 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
7002 Perl_warner(aTHX_ WARN_PORTABLE,
7003 "%s number > %s non-portable",
7008 if (PL_hints & HINT_NEW_BINARY)
7009 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
7014 handle decimal numbers.
7015 we're also sent here when we read a 0 as the first digit
7017 case '1': case '2': case '3': case '4': case '5':
7018 case '6': case '7': case '8': case '9': case '.':
7021 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
7024 /* read next group of digits and _ and copy into d */
7025 while (isDIGIT(*s) || *s == '_') {
7026 /* skip underscores, checking for misplaced ones
7030 if (ckWARN(WARN_SYNTAX) && lastub && s - lastub != 3)
7031 Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number");
7035 /* check for end of fixed-length buffer */
7037 Perl_croak(aTHX_ number_too_long);
7038 /* if we're ok, copy the character */
7043 /* final misplaced underbar check */
7044 if (lastub && s - lastub != 3) {
7045 if (ckWARN(WARN_SYNTAX))
7046 Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number");
7049 /* read a decimal portion if there is one. avoid
7050 3..5 being interpreted as the number 3. followed
7053 if (*s == '.' && s[1] != '.') {
7057 /* copy, ignoring underbars, until we run out of
7058 digits. Note: no misplaced underbar checks!
7060 for (; isDIGIT(*s) || *s == '_'; s++) {
7061 /* fixed length buffer check */
7063 Perl_croak(aTHX_ number_too_long);
7067 if (*s == '.' && isDIGIT(s[1])) {
7068 /* oops, it's really a v-string, but without the "v" */
7074 /* read exponent part, if present */
7075 if (*s && strchr("eE",*s) && strchr("+-0123456789",s[1])) {
7079 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
7080 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
7082 /* allow positive or negative exponent */
7083 if (*s == '+' || *s == '-')
7086 /* read digits of exponent (no underbars :-) */
7087 while (isDIGIT(*s)) {
7089 Perl_croak(aTHX_ number_too_long);
7094 /* terminate the string */
7097 /* make an sv from the string */
7100 #if defined(Strtol) && defined(Strtoul)
7103 strtol/strtoll sets errno to ERANGE if the number is too big
7104 for an integer. We try to do an integer conversion first
7105 if no characters indicating "float" have been found.
7112 if (*PL_tokenbuf == '-')
7113 iv = Strtol(PL_tokenbuf, (char**)NULL, 10);
7115 uv = Strtoul(PL_tokenbuf, (char**)NULL, 10);
7117 floatit = TRUE; /* Probably just too large. */
7118 else if (*PL_tokenbuf == '-')
7120 else if (uv <= IV_MAX)
7121 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
7126 nv = Atof(PL_tokenbuf);
7131 No working strtou?ll?.
7133 Unfortunately atol() doesn't do range checks (returning
7134 LONG_MIN/LONG_MAX, and setting errno to ERANGE on overflows)
7135 everywhere [1], so we cannot use use atol() (or atoll()).
7136 If we could, they would be used, as Atol(), very much like
7137 Strtol() and Strtoul() are used above.
7139 [1] XXX Configure test needed to check for atol()
7140 (and atoll()) overflow behaviour XXX
7144 We need to do this the hard way. */
7146 nv = Atof(PL_tokenbuf);
7148 /* See if we can make do with an integer value without loss of
7149 precision. We use U_V to cast to a UV, because some
7150 compilers have issues. Then we try casting it back and see
7151 if it was the same [1]. We only do this if we know we
7152 specifically read an integer. If floatit is true, then we
7153 don't need to do the conversion at all.
7155 [1] Note that this is lossy if our NVs cannot preserve our
7156 UVs. There are metaconfig defines NV_PRESERVES_UV (a boolean)
7157 and NV_PRESERVES_UV_BITS (a number), but in general we really
7158 do hope all such potentially lossy platforms have strtou?ll?
7159 to do a lossless IV/UV conversion.
7161 Maybe could do some tricks with DBL_DIG, LDBL_DIG and
7162 DBL_MANT_DIG and LDBL_MANT_DIG (these are already available
7163 as NV_DIG and NV_MANT_DIG)?
7169 if (!floatit && (NV)uv == nv) {
7171 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
7179 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
7180 (PL_hints & HINT_NEW_INTEGER) )
7181 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
7182 (floatit ? "float" : "integer"),
7186 /* if it starts with a v, it could be a v-string */
7192 while (isDIGIT(*pos) || *pos == '_')
7194 if (!isALPHA(*pos)) {
7196 U8 tmpbuf[UTF8_MAXLEN+1];
7199 s++; /* get past 'v' */
7202 sv_setpvn(sv, "", 0);
7205 if (*s == '0' && isDIGIT(s[1]))
7206 yyerror("Octal number in vector unsupported");
7209 /* this is atoi() that tolerates underscores */
7212 while (--end >= s) {
7217 rev += (*end - '0') * mult;
7219 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
7220 Perl_warner(aTHX_ WARN_OVERFLOW,
7221 "Integer overflow in decimal number");
7224 tmpend = uv_to_utf8(tmpbuf, rev);
7225 utf8 = utf8 || rev > 127;
7226 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
7227 if (*pos == '.' && isDIGIT(pos[1]))
7233 while (isDIGIT(*pos) || *pos == '_')
7242 sv_utf8_downgrade(sv, TRUE);
7249 /* make the op for the constant and return */
7252 lvalp->opval = newSVOP(OP_CONST, 0, sv);
7254 lvalp->opval = Nullop;
7260 S_scan_formline(pTHX_ register char *s)
7264 SV *stuff = newSVpvn("",0);
7265 bool needargs = FALSE;
7268 if (*s == '.' || *s == /*{*/'}') {
7270 #ifdef PERL_STRICT_CR
7271 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
7273 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
7275 if (*t == '\n' || t == PL_bufend)
7278 if (PL_in_eval && !PL_rsfp) {
7279 eol = strchr(s,'\n');
7284 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7286 for (t = s; t < eol; t++) {
7287 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
7289 goto enough; /* ~~ must be first line in formline */
7291 if (*t == '@' || *t == '^')
7294 sv_catpvn(stuff, s, eol-s);
7295 #ifndef PERL_STRICT_CR
7296 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
7297 char *end = SvPVX(stuff) + SvCUR(stuff);
7306 s = filter_gets(PL_linestr, PL_rsfp, 0);
7307 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
7308 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
7311 yyerror("Format not terminated");
7321 PL_lex_state = LEX_NORMAL;
7322 PL_nextval[PL_nexttoke].ival = 0;
7326 PL_lex_state = LEX_FORMLINE;
7327 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
7329 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
7333 SvREFCNT_dec(stuff);
7334 PL_lex_formbrack = 0;
7345 PL_cshlen = strlen(PL_cshname);
7350 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
7352 I32 oldsavestack_ix = PL_savestack_ix;
7353 CV* outsidecv = PL_compcv;
7357 assert(SvTYPE(PL_compcv) == SVt_PVCV);
7359 SAVEI32(PL_subline);
7360 save_item(PL_subname);
7363 SAVESPTR(PL_comppad_name);
7364 SAVESPTR(PL_compcv);
7365 SAVEI32(PL_comppad_name_fill);
7366 SAVEI32(PL_min_intro_pending);
7367 SAVEI32(PL_max_intro_pending);
7368 SAVEI32(PL_pad_reset_pending);
7370 PL_compcv = (CV*)NEWSV(1104,0);
7371 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
7372 CvFLAGS(PL_compcv) |= flags;
7374 PL_comppad = newAV();
7375 av_push(PL_comppad, Nullsv);
7376 PL_curpad = AvARRAY(PL_comppad);
7377 PL_comppad_name = newAV();
7378 PL_comppad_name_fill = 0;
7379 PL_min_intro_pending = 0;
7381 PL_subline = CopLINE(PL_curcop);
7383 av_store(PL_comppad_name, 0, newSVpvn("@_", 2));
7384 PL_curpad[0] = (SV*)newAV();
7385 SvPADMY_on(PL_curpad[0]); /* XXX Needed? */
7386 #endif /* USE_THREADS */
7388 comppadlist = newAV();
7389 AvREAL_off(comppadlist);
7390 av_store(comppadlist, 0, (SV*)PL_comppad_name);
7391 av_store(comppadlist, 1, (SV*)PL_comppad);
7393 CvPADLIST(PL_compcv) = comppadlist;
7394 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
7396 CvOWNER(PL_compcv) = 0;
7397 New(666, CvMUTEXP(PL_compcv), 1, perl_mutex);
7398 MUTEX_INIT(CvMUTEXP(PL_compcv));
7399 #endif /* USE_THREADS */
7401 return oldsavestack_ix;
7405 Perl_yywarn(pTHX_ char *s)
7407 PL_in_eval |= EVAL_WARNONLY;
7409 PL_in_eval &= ~EVAL_WARNONLY;
7414 Perl_yyerror(pTHX_ char *s)
7417 char *context = NULL;
7421 if (!yychar || (yychar == ';' && !PL_rsfp))
7423 else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 &&
7424 PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) {
7425 while (isSPACE(*PL_oldoldbufptr))
7427 context = PL_oldoldbufptr;
7428 contlen = PL_bufptr - PL_oldoldbufptr;
7430 else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 &&
7431 PL_oldbufptr != PL_bufptr) {
7432 while (isSPACE(*PL_oldbufptr))
7434 context = PL_oldbufptr;
7435 contlen = PL_bufptr - PL_oldbufptr;
7437 else if (yychar > 255)
7438 where = "next token ???";
7439 #ifdef USE_PURE_BISON
7440 /* GNU Bison sets the value -2 */
7441 else if (yychar == -2) {
7443 else if ((yychar & 127) == 127) {
7445 if (PL_lex_state == LEX_NORMAL ||
7446 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
7447 where = "at end of line";
7448 else if (PL_lex_inpat)
7449 where = "within pattern";
7451 where = "within string";
7454 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
7456 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
7457 else if (isPRINT_LC(yychar))
7458 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
7460 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
7461 where = SvPVX(where_sv);
7463 msg = sv_2mortal(newSVpv(s, 0));
7464 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
7465 CopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
7467 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
7469 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
7470 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
7471 Perl_sv_catpvf(aTHX_ msg,
7472 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
7473 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
7476 if (PL_in_eval & EVAL_WARNONLY)
7477 Perl_warn(aTHX_ "%"SVf, msg);
7480 if (PL_error_count >= 10) {
7481 if (PL_in_eval && SvCUR(ERRSV))
7482 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
7483 ERRSV, CopFILE(PL_curcop));
7485 Perl_croak(aTHX_ "%s has too many errors.\n",
7486 CopFILE(PL_curcop));
7489 PL_in_my_stash = Nullhv;
7494 S_swallow_bom(pTHX_ U8 *s)
7497 slen = SvCUR(PL_linestr);
7501 /* UTF-16 little-endian */
7502 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
7503 Perl_croak(aTHX_ "Unsupported script encoding");
7504 #ifndef PERL_NO_UTF16_FILTER
7505 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n"));
7507 if (PL_bufend > (char*)s) {
7511 filter_add(utf16rev_textfilter, NULL);
7512 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
7513 PL_bufend = (char*)utf16_to_utf8_reversed(s, news,
7514 PL_bufend - (char*)s - 1,
7516 Copy(news, s, newlen, U8);
7517 SvCUR_set(PL_linestr, newlen);
7518 PL_bufend = SvPVX(PL_linestr) + newlen;
7519 news[newlen++] = '\0';
7523 Perl_croak(aTHX_ "Unsupported script encoding");
7528 if (s[1] == 0xFF) { /* UTF-16 big-endian */
7529 #ifndef PERL_NO_UTF16_FILTER
7530 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n"));
7532 if (PL_bufend > (char *)s) {
7536 filter_add(utf16_textfilter, NULL);
7537 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
7538 PL_bufend = (char*)utf16_to_utf8(s, news,
7539 PL_bufend - (char*)s,
7541 Copy(news, s, newlen, U8);
7542 SvCUR_set(PL_linestr, newlen);
7543 PL_bufend = SvPVX(PL_linestr) + newlen;
7544 news[newlen++] = '\0';
7548 Perl_croak(aTHX_ "Unsupported script encoding");
7553 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7554 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n"));
7559 if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */
7560 s[2] == 0xFE && s[3] == 0xFF)
7562 Perl_croak(aTHX_ "Unsupported script encoding");
7574 * Restore a source filter.
7578 restore_rsfp(pTHXo_ void *f)
7580 PerlIO *fp = (PerlIO*)f;
7582 if (PL_rsfp == PerlIO_stdin())
7583 PerlIO_clearerr(PL_rsfp);
7584 else if (PL_rsfp && (PL_rsfp != fp))
7585 PerlIO_close(PL_rsfp);
7589 #ifndef PERL_NO_UTF16_FILTER
7591 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen)
7593 I32 count = FILTER_READ(idx+1, sv, maxlen);
7598 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
7599 if (!*SvPV_nolen(sv))
7600 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7603 tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
7604 sv_usepvn(sv, (char*)tmps, tend - tmps);
7610 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen)
7612 I32 count = FILTER_READ(idx+1, sv, maxlen);
7617 if (!*SvPV_nolen(sv))
7618 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7621 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
7622 tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
7623 sv_usepvn(sv, (char*)tmps, tend - tmps);