toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* YYINITDEPTH -- initial size of the parser's stacks.  */
  49 #define YYINITDEPTH 200
  50
  51 /* XXX temporary backwards compatibility */
  52 #define PL_lex_brackets         (PL_parser->lex_brackets)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XFAKEBRACK 128
 130 #define XENUMMASK 127
 131
 132 #ifdef USE_UTF8_SCRIPTS
 133 #   define UTF (!IN_BYTES)
 134 #else
 135 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 136 #endif
 137
 138 /* The maximum number of characters preceding the unrecognized one to display */
 139 #define UNRECOGNIZED_PRECEDE_COUNT 10
 140
 141 /* In variables named $^X, these are the legal values for X.
 142  * 1999-02-27 mjd-perl-patch@plover.com */
 143 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 144
 145 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 146
 147 /* LEX_* are values for PL_lex_state, the state of the lexer.
 148  * They are arranged oddly so that the guard on the switch statement
 149  * can get by with a single comparison (if the compiler is smart enough).
 150  */
 151
 152 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 153
 154 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 155 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 156 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 157 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 158 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 159
 160                                    /* at end of code, eg "$x" followed by:  */
 161 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 162 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 163
 164 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 165                                         string or after \E, $foo, etc       */
 166 #define LEX_INTERPCONST          2 /* NOT USED */
 167 #define LEX_FORMLINE             1 /* expecting a format line               */
 168 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 169
 170
 171 #ifdef DEBUGGING
 172 static const char* const lex_state_names[] = {
 173     "KNOWNEXT",
 174     "FORMLINE",
 175     "INTERPCONST",
 176     "INTERPCONCAT",
 177     "INTERPENDMAYBE",
 178     "INTERPEND",
 179     "INTERPSTART",
 180     "INTERPPUSH",
 181     "INTERPCASEMOD",
 182     "INTERPNORMAL",
 183     "NORMAL"
 184 };
 185 #endif
 186
 187 #ifdef ff_next
 188 #undef ff_next
 189 #endif
 190
 191 #include "keywords.h"
 192
 193 /* CLINE is a macro that ensures PL_copline has a sane value */
 194
 195 #ifdef CLINE
 196 #undef CLINE
 197 #endif
 198 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 199
 200 #ifdef PERL_MAD
 201 #  define SKIPSPACE0(s) skipspace0(s)
 202 #  define SKIPSPACE1(s) skipspace1(s)
 203 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 204 #  define PEEKSPACE(s) skipspace2(s,0)
 205 #else
 206 #  define SKIPSPACE0(s) skipspace(s)
 207 #  define SKIPSPACE1(s) skipspace(s)
 208 #  define SKIPSPACE2(s,tsv) skipspace(s)
 209 #  define PEEKSPACE(s) skipspace(s)
 210 #endif
 211
 212 /*
 213  * Convenience functions to return different tokens and prime the
 214  * lexer for the next token.  They all take an argument.
 215  *
 216  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 217  * OPERATOR     : generic operator
 218  * AOPERATOR    : assignment operator
 219  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 220  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 221  * PREREF       : *EXPR where EXPR is not a simple identifier
 222  * TERM         : expression term
 223  * LOOPX        : loop exiting command (goto, last, dump, etc)
 224  * FTST         : file test operator
 225  * FUN0         : zero-argument function
 226  * FUN1         : not used, except for not, which isn't a UNIOP
 227  * BOop         : bitwise or or xor
 228  * BAop         : bitwise and
 229  * SHop         : shift operator
 230  * PWop         : power operator
 231  * PMop         : pattern-matching operator
 232  * Aop          : addition-level operator
 233  * Mop          : multiplication-level operator
 234  * Eop          : equality-testing operator
 235  * Rop          : relational operator <= != gt
 236  *
 237  * Also see LOP and lop() below.
 238  */
 239
 240 #ifdef DEBUGGING /* Serve -DT. */
 241 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 242 #else
 243 #   define REPORT(retval) (retval)
 244 #endif
 245
 246 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 247 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 248 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 249 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 252 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 253 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 254 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 255 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI2(f,x) { \
 273         pl_yylval.ival = f; \
 274         PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI2(f,XTERM)
 284 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 285
 286 #define UNIBRACK(f) { \
 287         pl_yylval.ival = f; \
 288         PL_bufptr = s; \
 289         PL_last_uni = PL_oldbufptr; \
 290         if (*s == '(') \
 291             return REPORT( (int)FUNC1 ); \
 292         s = PEEKSPACE(s); \
 293         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 294         }
 295
 296 /* grandfather return to old style */
 297 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 298
 299 #ifdef DEBUGGING
 300
 301 /* how to interpret the pl_yylval associated with the token */
 302 enum token_type {
 303     TOKENTYPE_NONE,
 304     TOKENTYPE_IVAL,
 305     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 306     TOKENTYPE_PVAL,
 307     TOKENTYPE_OPVAL,
 308     TOKENTYPE_GVVAL
 309 };
 310
 311 static struct debug_tokens {
 312     const int token;
 313     enum token_type type;
 314     const char *name;
 315 } const debug_tokens[] =
 316 {
 317     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 318     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 319     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 320     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 321     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 322     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 323     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 324     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 325     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 326     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 327     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 328     { DO,               TOKENTYPE_NONE,         "DO" },
 329     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 330     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 331     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 332     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 333     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 334     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 335     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 336     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 337     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 338     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 339     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 340     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 341     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 342     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 343     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 344     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 345     { IF,               TOKENTYPE_IVAL,         "IF" },
 346     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 347     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 348     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 349     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 350     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 351     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 352     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 353     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 354     { MY,               TOKENTYPE_IVAL,         "MY" },
 355     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 356     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 357     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 358     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 359     { OROR,             TOKENTYPE_NONE,         "OROR" },
 360     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 361     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 362     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 363     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 364     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 365     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 366     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 367     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 368     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 369     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 370     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 371     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 372     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 373     { SUB,              TOKENTYPE_NONE,         "SUB" },
 374     { THING,            TOKENTYPE_OPVAL,        "THING" },
 375     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 376     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 377     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 378     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 379     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 380     { USE,              TOKENTYPE_IVAL,         "USE" },
 381     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 382     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 383     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 384     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 385     { 0,                TOKENTYPE_NONE,         NULL }
 386 };
 387
 388 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 389
 390 STATIC int
 391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 392 {
 393     dVAR;
 394
 395     PERL_ARGS_ASSERT_TOKEREPORT;
 396
 397     if (DEBUG_T_TEST) {
 398         const char *name = NULL;
 399         enum token_type type = TOKENTYPE_NONE;
 400         const struct debug_tokens *p;
 401         SV* const report = newSVpvs("<== ");
 402
 403         for (p = debug_tokens; p->token; p++) {
 404             if (p->token == (int)rv) {
 405                 name = p->name;
 406                 type = p->type;
 407                 break;
 408             }
 409         }
 410         if (name)
 411             Perl_sv_catpv(aTHX_ report, name);
 412         else if ((char)rv > ' ' && (char)rv < '~')
 413             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 414         else if (!rv)
 415             sv_catpvs(report, "EOF");
 416         else
 417             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 418         switch (type) {
 419         case TOKENTYPE_NONE:
 420         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 421             break;
 422         case TOKENTYPE_IVAL:
 423             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 424             break;
 425         case TOKENTYPE_OPNUM:
 426             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 427                                     PL_op_name[lvalp->ival]);
 428             break;
 429         case TOKENTYPE_PVAL:
 430             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 431             break;
 432         case TOKENTYPE_OPVAL:
 433             if (lvalp->opval) {
 434                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 435                                     PL_op_name[lvalp->opval->op_type]);
 436                 if (lvalp->opval->op_type == OP_CONST) {
 437                     Perl_sv_catpvf(aTHX_ report, " %s",
 438                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 439                 }
 440
 441             }
 442             else
 443                 sv_catpvs(report, "(opval=null)");
 444             break;
 445         }
 446         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 447     };
 448     return (int)rv;
 449 }
 450
 451
 452 /* print the buffer with suitable escapes */
 453
 454 STATIC void
 455 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 456 {
 457     SV* const tmp = newSVpvs("");
 458
 459     PERL_ARGS_ASSERT_PRINTBUF;
 460
 461     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 static int
 468 S_deprecate_commaless_var_list(pTHX) {
 469     PL_expect = XTERM;
 470     deprecate("comma-less variable list");
 471     return REPORT(','); /* grandfather non-comma-format format */
 472 }
 473
 474 /*
 475  * S_ao
 476  *
 477  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 478  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 479  */
 480
 481 STATIC int
 482 S_ao(pTHX_ int toketype)
 483 {
 484     dVAR;
 485     if (*PL_bufptr == '=') {
 486         PL_bufptr++;
 487         if (toketype == ANDAND)
 488             pl_yylval.ival = OP_ANDASSIGN;
 489         else if (toketype == OROR)
 490             pl_yylval.ival = OP_ORASSIGN;
 491         else if (toketype == DORDOR)
 492             pl_yylval.ival = OP_DORASSIGN;
 493         toketype = ASSIGNOP;
 494     }
 495     return toketype;
 496 }
 497
 498 /*
 499  * S_no_op
 500  * When Perl expects an operator and finds something else, no_op
 501  * prints the warning.  It always prints "<something> found where
 502  * operator expected.  It prints "Missing semicolon on previous line?"
 503  * if the surprise occurs at the start of the line.  "do you need to
 504  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 505  * where the compiler doesn't know if foo is a method call or a function.
 506  * It prints "Missing operator before end of line" if there's nothing
 507  * after the missing operator, or "... before <...>" if there is something
 508  * after the missing operator.
 509  */
 510
 511 STATIC void
 512 S_no_op(pTHX_ const char *const what, char *s)
 513 {
 514     dVAR;
 515     char * const oldbp = PL_bufptr;
 516     const bool is_first = (PL_oldbufptr == PL_linestart);
 517
 518     PERL_ARGS_ASSERT_NO_OP;
 519
 520     if (!s)
 521         s = oldbp;
 522     else
 523         PL_bufptr = s;
 524     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 525     if (ckWARN_d(WARN_SYNTAX)) {
 526         if (is_first)
 527             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 528                     "\t(Missing semicolon on previous line?)\n");
 529         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 530             const char *t;
 531             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 532                 NOOP;
 533             if (t < PL_bufptr && isSPACE(*t))
 534                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 535                         "\t(Do you need to predeclare %.*s?)\n",
 536                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 537         }
 538         else {
 539             assert(s >= oldbp);
 540             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 541                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 542         }
 543     }
 544     PL_bufptr = oldbp;
 545 }
 546
 547 /*
 548  * S_missingterm
 549  * Complain about missing quote/regexp/heredoc terminator.
 550  * If it's called with NULL then it cauterizes the line buffer.
 551  * If we're in a delimited string and the delimiter is a control
 552  * character, it's reformatted into a two-char sequence like ^C.
 553  * This is fatal.
 554  */
 555
 556 STATIC void
 557 S_missingterm(pTHX_ char *s)
 558 {
 559     dVAR;
 560     char tmpbuf[3];
 561     char q;
 562     if (s) {
 563         char * const nl = strrchr(s,'\n');
 564         if (nl)
 565             *nl = '\0';
 566     }
 567     else if (isCNTRL(PL_multi_close)) {
 568         *tmpbuf = '^';
 569         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 570         tmpbuf[2] = '\0';
 571         s = tmpbuf;
 572     }
 573     else {
 574         *tmpbuf = (char)PL_multi_close;
 575         tmpbuf[1] = '\0';
 576         s = tmpbuf;
 577     }
 578     q = strchr(s,'"') ? '\'' : '"';
 579     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 580 }
 581
 582 #define FEATURE_IS_ENABLED(name)                                        \
 583         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 584             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 585 /* The longest string we pass in.  */
 586 #define MAX_FEATURE_LEN (sizeof("switch")-1)
 587
 588 /*
 589  * S_feature_is_enabled
 590  * Check whether the named feature is enabled.
 591  */
 592 STATIC bool
 593 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 594 {
 595     dVAR;
 596     HV * const hinthv = GvHV(PL_hintgv);
 597     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 598
 599     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 600
 601     assert(namelen <= MAX_FEATURE_LEN);
 602     memcpy(&he_name[8], name, namelen);
 603
 604     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 605 }
 606
 607 /*
 608  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 609  * utf16-to-utf8-reversed.
 610  */
 611
 612 #ifdef PERL_CR_FILTER
 613 static void
 614 strip_return(SV *sv)
 615 {
 616     register const char *s = SvPVX_const(sv);
 617     register const char * const e = s + SvCUR(sv);
 618
 619     PERL_ARGS_ASSERT_STRIP_RETURN;
 620
 621     /* outer loop optimized to do nothing if there are no CR-LFs */
 622     while (s < e) {
 623         if (*s++ == '\r' && *s == '\n') {
 624             /* hit a CR-LF, need to copy the rest */
 625             register char *d = s - 1;
 626             *d++ = *s++;
 627             while (s < e) {
 628                 if (*s == '\r' && s[1] == '\n')
 629                     s++;
 630                 *d++ = *s++;
 631             }
 632             SvCUR(sv) -= s - d;
 633             return;
 634         }
 635     }
 636 }
 637
 638 STATIC I32
 639 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 640 {
 641     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 642     if (count > 0 && !maxlen)
 643         strip_return(sv);
 644     return count;
 645 }
 646 #endif
 647
 648
 649
 650 /*
 651  * Perl_lex_start
 652  *
 653  * Create a parser object and initialise its parser and lexer fields
 654  *
 655  * rsfp       is the opened file handle to read from (if any),
 656  *
 657  * line       holds any initial content already read from the file (or in
 658  *            the case of no file, such as an eval, the whole contents);
 659  *
 660  * new_filter indicates that this is a new file and it shouldn't inherit
 661  *            the filters from the current parser (ie require).
 662  */
 663
 664 void
 665 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 666 {
 667     dVAR;
 668     const char *s = NULL;
 669     STRLEN len;
 670     yy_parser *parser, *oparser;
 671
 672     /* create and initialise a parser */
 673
 674     Newxz(parser, 1, yy_parser);
 675     parser->old_parser = oparser = PL_parser;
 676     PL_parser = parser;
 677
 678     Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
 679     parser->ps = parser->stack;
 680     parser->stack_size = YYINITDEPTH;
 681
 682     parser->stack->state = 0;
 683     parser->yyerrstatus = 0;
 684     parser->yychar = YYEMPTY;           /* Cause a token to be read.  */
 685
 686     /* on scope exit, free this parser and restore any outer one */
 687     SAVEPARSER(parser);
 688     parser->saved_curcop = PL_curcop;
 689
 690     /* initialise lexer state */
 691
 692 #ifdef PERL_MAD
 693     parser->curforce = -1;
 694 #else
 695     parser->nexttoke = 0;
 696 #endif
 697     parser->error_count = oparser ? oparser->error_count : 0;
 698     parser->copline = NOLINE;
 699     parser->lex_state = LEX_NORMAL;
 700     parser->expect = XSTATE;
 701     parser->rsfp = rsfp;
 702     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 703                 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 704
 705     Newx(parser->lex_brackstack, 120, char);
 706     Newx(parser->lex_casestack, 12, char);
 707     *parser->lex_casestack = '\0';
 708
 709     if (line) {
 710         s = SvPV_const(line, len);
 711     } else {
 712         len = 0;
 713     }
 714
 715     if (!len) {
 716         parser->linestr = newSVpvs("\n;");
 717     } else if (SvREADONLY(line) || s[len-1] != ';') {
 718         parser->linestr = newSVsv(line);
 719         if (s[len-1] != ';')
 720             sv_catpvs(parser->linestr, "\n;");
 721     } else {
 722         SvTEMP_off(line);
 723         SvREFCNT_inc_simple_void_NN(line);
 724         parser->linestr = line;
 725     }
 726     parser->oldoldbufptr =
 727         parser->oldbufptr =
 728         parser->bufptr =
 729         parser->linestart = SvPVX(parser->linestr);
 730     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 731     parser->last_lop = parser->last_uni = NULL;
 732 }
 733
 734
 735 /* delete a parser object */
 736
 737 void
 738 Perl_parser_free(pTHX_  const yy_parser *parser)
 739 {
 740     PERL_ARGS_ASSERT_PARSER_FREE;
 741
 742     PL_curcop = parser->saved_curcop;
 743     SvREFCNT_dec(parser->linestr);
 744
 745     if (parser->rsfp == PerlIO_stdin())
 746         PerlIO_clearerr(parser->rsfp);
 747     else if (parser->rsfp && (!parser->old_parser ||
 748                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 749         PerlIO_close(parser->rsfp);
 750     SvREFCNT_dec(parser->rsfp_filters);
 751
 752     Safefree(parser->stack);
 753     Safefree(parser->lex_brackstack);
 754     Safefree(parser->lex_casestack);
 755     PL_parser = parser->old_parser;
 756     Safefree(parser);
 757 }
 758
 759
 760 /*
 761  * Perl_lex_end
 762  * Finalizer for lexing operations.  Must be called when the parser is
 763  * done with the lexer.
 764  */
 765
 766 void
 767 Perl_lex_end(pTHX)
 768 {
 769     dVAR;
 770     PL_doextract = FALSE;
 771 }
 772
 773 /*
 774 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 775
 776 Buffer scalar containing the chunk currently under consideration of the
 777 text currently being lexed.  This is always a plain string scalar (for
 778 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 779 normal scalar means; instead refer to the buffer directly by the pointer
 780 variables described below.
 781
 782 The lexer maintains various C<char*> pointers to things in the
 783 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 784 reallocated, all of these pointers must be updated.  Don't attempt to
 785 do this manually, but rather use L</lex_grow_linestr> if you need to
 786 reallocate the buffer.
 787
 788 The content of the text chunk in the buffer is commonly exactly one
 789 complete line of input, up to and including a newline terminator,
 790 but there are situations where it is otherwise.  The octets of the
 791 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 792 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 793 flag on this scalar, which may disagree with it.
 794
 795 For direct examination of the buffer, the variable
 796 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 797 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 798 of these pointers is usually preferable to examination of the scalar
 799 through normal scalar means.
 800
 801 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 802
 803 Direct pointer to the end of the chunk of text currently being lexed, the
 804 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 805 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 806 always located at the end of the buffer, and does not count as part of
 807 the buffer's contents.
 808
 809 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 810
 811 Points to the current position of lexing inside the lexer buffer.
 812 Characters around this point may be freely examined, within
 813 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 814 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 815 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 816
 817 Lexing code (whether in the Perl core or not) moves this pointer past
 818 the characters that it consumes.  It is also expected to perform some
 819 bookkeeping whenever a newline character is consumed.  This movement
 820 can be more conveniently performed by the function L</lex_read_to>,
 821 which handles newlines appropriately.
 822
 823 Interpretation of the buffer's octets can be abstracted out by
 824 using the slightly higher-level functions L</lex_peek_unichar> and
 825 L</lex_read_unichar>.
 826
 827 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 828
 829 Points to the start of the current line inside the lexer buffer.
 830 This is useful for indicating at which column an error occurred, and
 831 not much else.  This must be updated by any lexing code that consumes
 832 a newline; the function L</lex_read_to> handles this detail.
 833
 834 =cut
 835 */
 836
 837 /*
 838 =for apidoc Amx|bool|lex_bufutf8
 839
 840 Indicates whether the octets in the lexer buffer
 841 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 842 of Unicode characters.  If not, they should be interpreted as Latin-1
 843 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 844
 845 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 846 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 847 encoding.
 848
 849 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 850 is significant, but not the whole story regarding the input character
 851 encoding.  Normally, when a file is being read, the scalar contains octets
 852 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 853 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 854 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 855 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 856 is in effect.  This logic may change in the future; use this function
 857 instead of implementing the logic yourself.
 858
 859 =cut
 860 */
 861
 862 bool
 863 Perl_lex_bufutf8(pTHX)
 864 {
 865     return UTF;
 866 }
 867
 868 /*
 869 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 870
 871 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 872 at least I<len> octets (including terminating NUL).  Returns a
 873 pointer to the reallocated buffer.  This is necessary before making
 874 any direct modification of the buffer that would increase its length.
 875 L</lex_stuff_pvn> provides a more convenient way to insert text into
 876 the buffer.
 877
 878 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 879 this function updates all of the lexer's variables that point directly
 880 into the buffer.
 881
 882 =cut
 883 */
 884
 885 char *
 886 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 887 {
 888     SV *linestr;
 889     char *buf;
 890     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 891     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 892     linestr = PL_parser->linestr;
 893     buf = SvPVX(linestr);
 894     if (len <= SvLEN(linestr))
 895         return buf;
 896     bufend_pos = PL_parser->bufend - buf;
 897     bufptr_pos = PL_parser->bufptr - buf;
 898     oldbufptr_pos = PL_parser->oldbufptr - buf;
 899     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 900     linestart_pos = PL_parser->linestart - buf;
 901     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 902     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 903     buf = sv_grow(linestr, len);
 904     PL_parser->bufend = buf + bufend_pos;
 905     PL_parser->bufptr = buf + bufptr_pos;
 906     PL_parser->oldbufptr = buf + oldbufptr_pos;
 907     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 908     PL_parser->linestart = buf + linestart_pos;
 909     if (PL_parser->last_uni)
 910         PL_parser->last_uni = buf + last_uni_pos;
 911     if (PL_parser->last_lop)
 912         PL_parser->last_lop = buf + last_lop_pos;
 913     return buf;
 914 }
 915
 916 /*
 917 =for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags
 918
 919 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 920 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 921 reallocating the buffer if necessary.  This means that lexing code that
 922 runs later will see the characters as if they had appeared in the input.
 923 It is not recommended to do this as part of normal parsing, and most
 924 uses of this facility run the risk of the inserted characters being
 925 interpreted in an unintended manner.
 926
 927 The string to be inserted is represented by I<len> octets starting
 928 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 929 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 930 The characters are recoded for the lexer buffer, according to how the
 931 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 932 to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
 933 function is more convenient.
 934
 935 =cut
 936 */
 937
 938 void
 939 Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
 940 {
 941     char *bufptr;
 942     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 943     if (flags & ~(LEX_STUFF_UTF8))
 944         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 945     if (UTF) {
 946         if (flags & LEX_STUFF_UTF8) {
 947             goto plain_copy;
 948         } else {
 949             STRLEN highhalf = 0;
 950             char *p, *e = pv+len;
 951             for (p = pv; p != e; p++)
 952                 highhalf += !!(((U8)*p) & 0x80);
 953             if (!highhalf)
 954                 goto plain_copy;
 955             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 956             bufptr = PL_parser->bufptr;
 957             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 958             PL_parser->bufend += len+highhalf;
 959             for (p = pv; p != e; p++) {
 960                 U8 c = (U8)*p;
 961                 if (c & 0x80) {
 962                     *bufptr++ = (char)(0xc0 | (c >> 6));
 963                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 964                 } else {
 965                     *bufptr++ = (char)c;
 966                 }
 967             }
 968         }
 969     } else {
 970         if (flags & LEX_STUFF_UTF8) {
 971             STRLEN highhalf = 0;
 972             char *p, *e = pv+len;
 973             for (p = pv; p != e; p++) {
 974                 U8 c = (U8)*p;
 975                 if (c >= 0xc4) {
 976                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 977                                 "non-Latin-1 character into Latin-1 input");
 978                 } else if (c >= 0xc2 && p+1 != e &&
 979                             (((U8)p[1]) & 0xc0) == 0x80) {
 980                     p++;
 981                     highhalf++;
 982                 } else if (c >= 0x80) {
 983                     /* malformed UTF-8 */
 984                     ENTER;
 985                     SAVESPTR(PL_warnhook);
 986                     PL_warnhook = PERL_WARNHOOK_FATAL;
 987                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 988                     LEAVE;
 989                 }
 990             }
 991             if (!highhalf)
 992                 goto plain_copy;
 993             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 994             bufptr = PL_parser->bufptr;
 995             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 996             PL_parser->bufend += len-highhalf;
 997             for (p = pv; p != e; p++) {
 998                 U8 c = (U8)*p;
 999                 if (c & 0x80) {
1000                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1001                     p++;
1002                 } else {
1003                     *bufptr++ = (char)c;
1004                 }
1005             }
1006         } else {
1007             plain_copy:
1008             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1009             bufptr = PL_parser->bufptr;
1010             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1011             PL_parser->bufend += len;
1012             Copy(pv, bufptr, len, char);
1013         }
1014     }
1015 }
1016
1017 /*
1018 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1019
1020 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022 reallocating the buffer if necessary.  This means that lexing code that
1023 runs later will see the characters as if they had appeared in the input.
1024 It is not recommended to do this as part of normal parsing, and most
1025 uses of this facility run the risk of the inserted characters being
1026 interpreted in an unintended manner.
1027
1028 The string to be inserted is the string value of I<sv>.  The characters
1029 are recoded for the lexer buffer, according to how the buffer is currently
1030 being interpreted (L</lex_bufutf8>).  If a string to be interpreted is
1031 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1032 need to construct a scalar.
1033
1034 =cut
1035 */
1036
1037 void
1038 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1039 {
1040     char *pv;
1041     STRLEN len;
1042     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1043     if (flags)
1044         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1045     pv = SvPV(sv, len);
1046     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1047 }
1048
1049 /*
1050 =for apidoc Amx|void|lex_unstuff|char *ptr
1051
1052 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1053 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1054 This hides the discarded text from any lexing code that runs later,
1055 as if the text had never appeared.
1056
1057 This is not the normal way to consume lexed text.  For that, use
1058 L</lex_read_to>.
1059
1060 =cut
1061 */
1062
1063 void
1064 Perl_lex_unstuff(pTHX_ char *ptr)
1065 {
1066     char *buf, *bufend;
1067     STRLEN unstuff_len;
1068     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1069     buf = PL_parser->bufptr;
1070     if (ptr < buf)
1071         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1072     if (ptr == buf)
1073         return;
1074     bufend = PL_parser->bufend;
1075     if (ptr > bufend)
1076         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1077     unstuff_len = ptr - buf;
1078     Move(ptr, buf, bufend+1-ptr, char);
1079     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1080     PL_parser->bufend = bufend - unstuff_len;
1081 }
1082
1083 /*
1084 =for apidoc Amx|void|lex_read_to|char *ptr
1085
1086 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1087 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1088 performing the correct bookkeeping whenever a newline character is passed.
1089 This is the normal way to consume lexed text.
1090
1091 Interpretation of the buffer's octets can be abstracted out by
1092 using the slightly higher-level functions L</lex_peek_unichar> and
1093 L</lex_read_unichar>.
1094
1095 =cut
1096 */
1097
1098 void
1099 Perl_lex_read_to(pTHX_ char *ptr)
1100 {
1101     char *s;
1102     PERL_ARGS_ASSERT_LEX_READ_TO;
1103     s = PL_parser->bufptr;
1104     if (ptr < s || ptr > PL_parser->bufend)
1105         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1106     for (; s != ptr; s++)
1107         if (*s == '\n') {
1108             CopLINE_inc(PL_curcop);
1109             PL_parser->linestart = s+1;
1110         }
1111     PL_parser->bufptr = ptr;
1112 }
1113
1114 /*
1115 =for apidoc Amx|void|lex_discard_to|char *ptr
1116
1117 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1118 up to I<ptr>.  The remaining content of the buffer will be moved, and
1119 all pointers into the buffer updated appropriately.  I<ptr> must not
1120 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1121 it is not permitted to discard text that has yet to be lexed.
1122
1123 Normally it is not necessarily to do this directly, because it suffices to
1124 use the implicit discarding behaviour of L</lex_next_chunk> and things
1125 based on it.  However, if a token stretches across multiple lines,
1126 and the lexing code has kept multiple lines of text in the buffer fof
1127 that purpose, then after completion of the token it would be wise to
1128 explicitly discard the now-unneeded earlier lines, to avoid future
1129 multi-line tokens growing the buffer without bound.
1130
1131 =cut
1132 */
1133
1134 void
1135 Perl_lex_discard_to(pTHX_ char *ptr)
1136 {
1137     char *buf;
1138     STRLEN discard_len;
1139     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1140     buf = SvPVX(PL_parser->linestr);
1141     if (ptr < buf)
1142         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1143     if (ptr == buf)
1144         return;
1145     if (ptr > PL_parser->bufptr)
1146         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1147     discard_len = ptr - buf;
1148     if (PL_parser->oldbufptr < ptr)
1149         PL_parser->oldbufptr = ptr;
1150     if (PL_parser->oldoldbufptr < ptr)
1151         PL_parser->oldoldbufptr = ptr;
1152     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1153         PL_parser->last_uni = NULL;
1154     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1155         PL_parser->last_lop = NULL;
1156     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1157     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1158     PL_parser->bufend -= discard_len;
1159     PL_parser->bufptr -= discard_len;
1160     PL_parser->oldbufptr -= discard_len;
1161     PL_parser->oldoldbufptr -= discard_len;
1162     if (PL_parser->last_uni)
1163         PL_parser->last_uni -= discard_len;
1164     if (PL_parser->last_lop)
1165         PL_parser->last_lop -= discard_len;
1166 }
1167
1168 /*
1169 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1170
1171 Reads in the next chunk of text to be lexed, appending it to
1172 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1173 looked to the end of the current chunk and wants to know more.  It is
1174 usual, but not necessary, for lexing to have consumed the entirety of
1175 the current chunk at this time.
1176
1177 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1178 chunk (i.e., the current chunk has been entirely consumed), normally the
1179 current chunk will be discarded at the same time that the new chunk is
1180 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1181 will not be discarded.  If the current chunk has not been entirely
1182 consumed, then it will not be discarded regardless of the flag.
1183
1184 Returns true if some new text was added to the buffer, or false if the
1185 buffer has reached the end of the input text.
1186
1187 =cut
1188 */
1189
1190 #define LEX_FAKE_EOF 0x80000000
1191
1192 bool
1193 Perl_lex_next_chunk(pTHX_ U32 flags)
1194 {
1195     SV *linestr;
1196     char *buf;
1197     STRLEN old_bufend_pos, new_bufend_pos;
1198     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1199     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1200     bool got_some;
1201     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1202         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1203 #ifdef PERL_MAD
1204     flags |= LEX_KEEP_PREVIOUS;
1205 #endif /* PERL_MAD */
1206     linestr = PL_parser->linestr;
1207     buf = SvPVX(linestr);
1208     if (!(flags & LEX_KEEP_PREVIOUS) &&
1209             PL_parser->bufptr == PL_parser->bufend) {
1210         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1211         linestart_pos = 0;
1212         if (PL_parser->last_uni != PL_parser->bufend)
1213             PL_parser->last_uni = NULL;
1214         if (PL_parser->last_lop != PL_parser->bufend)
1215             PL_parser->last_lop = NULL;
1216         last_uni_pos = last_lop_pos = 0;
1217         *buf = 0;
1218         SvCUR(linestr) = 0;
1219     } else {
1220         old_bufend_pos = PL_parser->bufend - buf;
1221         bufptr_pos = PL_parser->bufptr - buf;
1222         oldbufptr_pos = PL_parser->oldbufptr - buf;
1223         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1224         linestart_pos = PL_parser->linestart - buf;
1225         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1226         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1227     }
1228     if (flags & LEX_FAKE_EOF) {
1229         goto eof;
1230     } else if (!PL_parser->rsfp) {
1231         got_some = 0;
1232     } else if (filter_gets(linestr, old_bufend_pos)) {
1233         got_some = 1;
1234     } else {
1235         eof:
1236         /* End of real input.  Close filehandle (unless it was STDIN),
1237          * then add implicit termination.
1238          */
1239         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1240             PerlIO_clearerr(PL_parser->rsfp);
1241         else if (PL_parser->rsfp)
1242             (void)PerlIO_close(PL_parser->rsfp);
1243         PL_parser->rsfp = NULL;
1244         PL_doextract = FALSE;
1245 #ifdef PERL_MAD
1246         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1247             PL_faketokens = 1;
1248 #endif
1249         if (!PL_in_eval && PL_minus_p) {
1250             sv_catpvs(linestr,
1251                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1252             PL_minus_n = PL_minus_p = 0;
1253         } else if (!PL_in_eval && PL_minus_n) {
1254             sv_catpvs(linestr, /*{*/";}");
1255             PL_minus_n = 0;
1256         } else
1257             sv_catpvs(linestr, ";");
1258         got_some = 1;
1259     }
1260     buf = SvPVX(linestr);
1261     new_bufend_pos = SvCUR(linestr);
1262     PL_parser->bufend = buf + new_bufend_pos;
1263     PL_parser->bufptr = buf + bufptr_pos;
1264     PL_parser->oldbufptr = buf + oldbufptr_pos;
1265     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1266     PL_parser->linestart = buf + linestart_pos;
1267     if (PL_parser->last_uni)
1268         PL_parser->last_uni = buf + last_uni_pos;
1269     if (PL_parser->last_lop)
1270         PL_parser->last_lop = buf + last_lop_pos;
1271     if (got_some && (PERLDB_LINE || PERLDB_SAVESRC) &&
1272             PL_curstash != PL_debstash) {
1273         /* debugger active and we're not compiling the debugger code,
1274          * so store the line into the debugger's array of lines
1275          */
1276         update_debugger_info(NULL, buf+old_bufend_pos,
1277             new_bufend_pos-old_bufend_pos);
1278     }
1279     return got_some;
1280 }
1281
1282 /*
1283 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1284
1285 Looks ahead one (Unicode) character in the text currently being lexed.
1286 Returns the codepoint (unsigned integer value) of the next character,
1287 or -1 if lexing has reached the end of the input text.  To consume the
1288 peeked character, use L</lex_read_unichar>.
1289
1290 If the next character is in (or extends into) the next chunk of input
1291 text, the next chunk will be read in.  Normally the current chunk will be
1292 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1293 then the current chunk will not be discarded.
1294
1295 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1296 is encountered, an exception is generated.
1297
1298 =cut
1299 */
1300
1301 I32
1302 Perl_lex_peek_unichar(pTHX_ U32 flags)
1303 {
1304     char *s, *bufend;
1305     if (flags & ~(LEX_KEEP_PREVIOUS))
1306         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1307     s = PL_parser->bufptr;
1308     bufend = PL_parser->bufend;
1309     if (UTF) {
1310         U8 head;
1311         I32 unichar;
1312         STRLEN len, retlen;
1313         if (s == bufend) {
1314             if (!lex_next_chunk(flags))
1315                 return -1;
1316             s = PL_parser->bufptr;
1317             bufend = PL_parser->bufend;
1318         }
1319         head = (U8)*s;
1320         if (!(head & 0x80))
1321             return head;
1322         if (head & 0x40) {
1323             len = PL_utf8skip[head];
1324             while ((STRLEN)(bufend-s) < len) {
1325                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1326                     break;
1327                 s = PL_parser->bufptr;
1328                 bufend = PL_parser->bufend;
1329             }
1330         }
1331         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1332         if (retlen == (STRLEN)-1) {
1333             /* malformed UTF-8 */
1334             ENTER;
1335             SAVESPTR(PL_warnhook);
1336             PL_warnhook = PERL_WARNHOOK_FATAL;
1337             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1338             LEAVE;
1339         }
1340         return unichar;
1341     } else {
1342         if (s == bufend) {
1343             if (!lex_next_chunk(flags))
1344                 return -1;
1345             s = PL_parser->bufptr;
1346         }
1347         return (U8)*s;
1348     }
1349 }
1350
1351 /*
1352 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1353
1354 Reads the next (Unicode) character in the text currently being lexed.
1355 Returns the codepoint (unsigned integer value) of the character read,
1356 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1357 if lexing has reached the end of the input text.  To non-destructively
1358 examine the next character, use L</lex_peek_unichar> instead.
1359
1360 If the next character is in (or extends into) the next chunk of input
1361 text, the next chunk will be read in.  Normally the current chunk will be
1362 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1363 then the current chunk will not be discarded.
1364
1365 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1366 is encountered, an exception is generated.
1367
1368 =cut
1369 */
1370
1371 I32
1372 Perl_lex_read_unichar(pTHX_ U32 flags)
1373 {
1374     I32 c;
1375     if (flags & ~(LEX_KEEP_PREVIOUS))
1376         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1377     c = lex_peek_unichar(flags);
1378     if (c != -1) {
1379         if (c == '\n')
1380             CopLINE_inc(PL_curcop);
1381         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1382     }
1383     return c;
1384 }
1385
1386 /*
1387 =for apidoc Amx|void|lex_read_space|U32 flags
1388
1389 Reads optional spaces, in Perl style, in the text currently being
1390 lexed.  The spaces may include ordinary whitespace characters and
1391 Perl-style comments.  C<#line> directives are processed if encountered.
1392 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1393 at a non-space character (or the end of the input text).
1394
1395 If spaces extend into the next chunk of input text, the next chunk will
1396 be read in.  Normally the current chunk will be discarded at the same
1397 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1398 chunk will not be discarded.
1399
1400 =cut
1401 */
1402
1403 void
1404 Perl_lex_read_space(pTHX_ U32 flags)
1405 {
1406     char *s, *bufend;
1407     bool need_incline = 0;
1408     if (flags & ~(LEX_KEEP_PREVIOUS))
1409         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1410 #ifdef PERL_MAD
1411     if (PL_skipwhite) {
1412         sv_free(PL_skipwhite);
1413         PL_skipwhite = NULL;
1414     }
1415     if (PL_madskills)
1416         PL_skipwhite = newSVpvs("");
1417 #endif /* PERL_MAD */
1418     s = PL_parser->bufptr;
1419     bufend = PL_parser->bufend;
1420     while (1) {
1421         char c = *s;
1422         if (c == '#') {
1423             do {
1424                 c = *++s;
1425             } while (!(c == '\n' || (c == 0 && s == bufend)));
1426         } else if (c == '\n') {
1427             s++;
1428             PL_parser->linestart = s;
1429             if (s == bufend)
1430                 need_incline = 1;
1431             else
1432                 incline(s);
1433         } else if (isSPACE(c)) {
1434             s++;
1435         } else if (c == 0 && s == bufend) {
1436             bool got_more;
1437 #ifdef PERL_MAD
1438             if (PL_madskills)
1439                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1440 #endif /* PERL_MAD */
1441             PL_parser->bufptr = s;
1442             CopLINE_inc(PL_curcop);
1443             got_more = lex_next_chunk(flags);
1444             CopLINE_dec(PL_curcop);
1445             s = PL_parser->bufptr;
1446             bufend = PL_parser->bufend;
1447             if (!got_more)
1448                 break;
1449             if (need_incline && PL_parser->rsfp) {
1450                 incline(s);
1451                 need_incline = 0;
1452             }
1453         } else {
1454             break;
1455         }
1456     }
1457 #ifdef PERL_MAD
1458     if (PL_madskills)
1459         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1460 #endif /* PERL_MAD */
1461     PL_parser->bufptr = s;
1462 }
1463
1464 /*
1465  * S_incline
1466  * This subroutine has nothing to do with tilting, whether at windmills
1467  * or pinball tables.  Its name is short for "increment line".  It
1468  * increments the current line number in CopLINE(PL_curcop) and checks
1469  * to see whether the line starts with a comment of the form
1470  *    # line 500 "foo.pm"
1471  * If so, it sets the current line number and file to the values in the comment.
1472  */
1473
1474 STATIC void
1475 S_incline(pTHX_ const char *s)
1476 {
1477     dVAR;
1478     const char *t;
1479     const char *n;
1480     const char *e;
1481
1482     PERL_ARGS_ASSERT_INCLINE;
1483
1484     CopLINE_inc(PL_curcop);
1485     if (*s++ != '#')
1486         return;
1487     while (SPACE_OR_TAB(*s))
1488         s++;
1489     if (strnEQ(s, "line", 4))
1490         s += 4;
1491     else
1492         return;
1493     if (SPACE_OR_TAB(*s))
1494         s++;
1495     else
1496         return;
1497     while (SPACE_OR_TAB(*s))
1498         s++;
1499     if (!isDIGIT(*s))
1500         return;
1501
1502     n = s;
1503     while (isDIGIT(*s))
1504         s++;
1505     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1506         return;
1507     while (SPACE_OR_TAB(*s))
1508         s++;
1509     if (*s == '"' && (t = strchr(s+1, '"'))) {
1510         s++;
1511         e = t + 1;
1512     }
1513     else {
1514         t = s;
1515         while (!isSPACE(*t))
1516             t++;
1517         e = t;
1518     }
1519     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1520         e++;
1521     if (*e != '\n' && *e != '\0')
1522         return;         /* false alarm */
1523
1524     if (t - s > 0) {
1525         const STRLEN len = t - s;
1526 #ifndef USE_ITHREADS
1527         SV *const temp_sv = CopFILESV(PL_curcop);
1528         const char *cf;
1529         STRLEN tmplen;
1530
1531         if (temp_sv) {
1532             cf = SvPVX(temp_sv);
1533             tmplen = SvCUR(temp_sv);
1534         } else {
1535             cf = NULL;
1536             tmplen = 0;
1537         }
1538
1539         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1540             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1541              * to *{"::_<newfilename"} */
1542             /* However, the long form of evals is only turned on by the
1543                debugger - usually they're "(eval %lu)" */
1544             char smallbuf[128];
1545             char *tmpbuf;
1546             GV **gvp;
1547             STRLEN tmplen2 = len;
1548             if (tmplen + 2 <= sizeof smallbuf)
1549                 tmpbuf = smallbuf;
1550             else
1551                 Newx(tmpbuf, tmplen + 2, char);
1552             tmpbuf[0] = '_';
1553             tmpbuf[1] = '<';
1554             memcpy(tmpbuf + 2, cf, tmplen);
1555             tmplen += 2;
1556             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1557             if (gvp) {
1558                 char *tmpbuf2;
1559                 GV *gv2;
1560
1561                 if (tmplen2 + 2 <= sizeof smallbuf)
1562                     tmpbuf2 = smallbuf;
1563                 else
1564                     Newx(tmpbuf2, tmplen2 + 2, char);
1565
1566                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1567                     /* Either they malloc'd it, or we malloc'd it,
1568                        so no prefix is present in ours.  */
1569                     tmpbuf2[0] = '_';
1570                     tmpbuf2[1] = '<';
1571                 }
1572
1573                 memcpy(tmpbuf2 + 2, s, tmplen2);
1574                 tmplen2 += 2;
1575
1576                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1577                 if (!isGV(gv2)) {
1578                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1579                     /* adjust ${"::_<newfilename"} to store the new file name */
1580                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1581                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1582                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1583                 }
1584
1585                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1586             }
1587             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1588         }
1589 #endif
1590         CopFILE_free(PL_curcop);
1591         CopFILE_setn(PL_curcop, s, len);
1592     }
1593     CopLINE_set(PL_curcop, atoi(n)-1);
1594 }
1595
1596 #ifdef PERL_MAD
1597 /* skip space before PL_thistoken */
1598
1599 STATIC char *
1600 S_skipspace0(pTHX_ register char *s)
1601 {
1602     PERL_ARGS_ASSERT_SKIPSPACE0;
1603
1604     s = skipspace(s);
1605     if (!PL_madskills)
1606         return s;
1607     if (PL_skipwhite) {
1608         if (!PL_thiswhite)
1609             PL_thiswhite = newSVpvs("");
1610         sv_catsv(PL_thiswhite, PL_skipwhite);
1611         sv_free(PL_skipwhite);
1612         PL_skipwhite = 0;
1613     }
1614     PL_realtokenstart = s - SvPVX(PL_linestr);
1615     return s;
1616 }
1617
1618 /* skip space after PL_thistoken */
1619
1620 STATIC char *
1621 S_skipspace1(pTHX_ register char *s)
1622 {
1623     const char *start = s;
1624     I32 startoff = start - SvPVX(PL_linestr);
1625
1626     PERL_ARGS_ASSERT_SKIPSPACE1;
1627
1628     s = skipspace(s);
1629     if (!PL_madskills)
1630         return s;
1631     start = SvPVX(PL_linestr) + startoff;
1632     if (!PL_thistoken && PL_realtokenstart >= 0) {
1633         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1634         PL_thistoken = newSVpvn(tstart, start - tstart);
1635     }
1636     PL_realtokenstart = -1;
1637     if (PL_skipwhite) {
1638         if (!PL_nextwhite)
1639             PL_nextwhite = newSVpvs("");
1640         sv_catsv(PL_nextwhite, PL_skipwhite);
1641         sv_free(PL_skipwhite);
1642         PL_skipwhite = 0;
1643     }
1644     return s;
1645 }
1646
1647 STATIC char *
1648 S_skipspace2(pTHX_ register char *s, SV **svp)
1649 {
1650     char *start;
1651     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1652     const I32 startoff = s - SvPVX(PL_linestr);
1653
1654     PERL_ARGS_ASSERT_SKIPSPACE2;
1655
1656     s = skipspace(s);
1657     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1658     if (!PL_madskills || !svp)
1659         return s;
1660     start = SvPVX(PL_linestr) + startoff;
1661     if (!PL_thistoken && PL_realtokenstart >= 0) {
1662         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1663         PL_thistoken = newSVpvn(tstart, start - tstart);
1664         PL_realtokenstart = -1;
1665     }
1666     if (PL_skipwhite) {
1667         if (!*svp)
1668             *svp = newSVpvs("");
1669         sv_setsv(*svp, PL_skipwhite);
1670         sv_free(PL_skipwhite);
1671         PL_skipwhite = 0;
1672     }
1673
1674     return s;
1675 }
1676 #endif
1677
1678 STATIC void
1679 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1680 {
1681     AV *av = CopFILEAVx(PL_curcop);
1682     if (av) {
1683         SV * const sv = newSV_type(SVt_PVMG);
1684         if (orig_sv)
1685             sv_setsv(sv, orig_sv);
1686         else
1687             sv_setpvn(sv, buf, len);
1688         (void)SvIOK_on(sv);
1689         SvIV_set(sv, 0);
1690         av_store(av, (I32)CopLINE(PL_curcop), sv);
1691     }
1692 }
1693
1694 /*
1695  * S_skipspace
1696  * Called to gobble the appropriate amount and type of whitespace.
1697  * Skips comments as well.
1698  */
1699
1700 STATIC char *
1701 S_skipspace(pTHX_ register char *s)
1702 {
1703 #ifdef PERL_MAD
1704     char *start = s;
1705 #endif /* PERL_MAD */
1706     PERL_ARGS_ASSERT_SKIPSPACE;
1707 #ifdef PERL_MAD
1708     if (PL_skipwhite) {
1709         sv_free(PL_skipwhite);
1710         PL_skipwhite = NULL;
1711     }
1712 #endif /* PERL_MAD */
1713     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1714         while (s < PL_bufend && SPACE_OR_TAB(*s))
1715             s++;
1716     } else if (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE) {
1717         while (isSPACE(*s) && *s != '\n')
1718             s++;
1719         if (*s == '#') {
1720             do {
1721                 s++;
1722             } while (s != PL_bufend && *s != '\n');
1723         }
1724         if (*s == '\n')
1725             s++;
1726     } else {
1727         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1728         PL_bufptr = s;
1729         lex_read_space(LEX_KEEP_PREVIOUS);
1730         s = PL_bufptr;
1731         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1732         if (PL_linestart > PL_bufptr)
1733             PL_bufptr = PL_linestart;
1734         return s;
1735     }
1736 #ifdef PERL_MAD
1737     if (PL_madskills)
1738         PL_skipwhite = newSVpvn(start, s-start);
1739 #endif /* PERL_MAD */
1740     return s;
1741 }
1742
1743 /*
1744  * S_check_uni
1745  * Check the unary operators to ensure there's no ambiguity in how they're
1746  * used.  An ambiguous piece of code would be:
1747  *     rand + 5
1748  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1749  * the +5 is its argument.
1750  */
1751
1752 STATIC void
1753 S_check_uni(pTHX)
1754 {
1755     dVAR;
1756     const char *s;
1757     const char *t;
1758
1759     if (PL_oldoldbufptr != PL_last_uni)
1760         return;
1761     while (isSPACE(*PL_last_uni))
1762         PL_last_uni++;
1763     s = PL_last_uni;
1764     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1765         s++;
1766     if ((t = strchr(s, '(')) && t < PL_bufptr)
1767         return;
1768
1769     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1770                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1771                      (int)(s - PL_last_uni), PL_last_uni);
1772 }
1773
1774 /*
1775  * LOP : macro to build a list operator.  Its behaviour has been replaced
1776  * with a subroutine, S_lop() for which LOP is just another name.
1777  */
1778
1779 #define LOP(f,x) return lop(f,x,s)
1780
1781 /*
1782  * S_lop
1783  * Build a list operator (or something that might be one).  The rules:
1784  *  - if we have a next token, then it's a list operator [why?]
1785  *  - if the next thing is an opening paren, then it's a function
1786  *  - else it's a list operator
1787  */
1788
1789 STATIC I32
1790 S_lop(pTHX_ I32 f, int x, char *s)
1791 {
1792     dVAR;
1793
1794     PERL_ARGS_ASSERT_LOP;
1795
1796     pl_yylval.ival = f;
1797     CLINE;
1798     PL_expect = x;
1799     PL_bufptr = s;
1800     PL_last_lop = PL_oldbufptr;
1801     PL_last_lop_op = (OPCODE)f;
1802 #ifdef PERL_MAD
1803     if (PL_lasttoke)
1804         return REPORT(LSTOP);
1805 #else
1806     if (PL_nexttoke)
1807         return REPORT(LSTOP);
1808 #endif
1809     if (*s == '(')
1810         return REPORT(FUNC);
1811     s = PEEKSPACE(s);
1812     if (*s == '(')
1813         return REPORT(FUNC);
1814     else
1815         return REPORT(LSTOP);
1816 }
1817
1818 #ifdef PERL_MAD
1819  /*
1820  * S_start_force
1821  * Sets up for an eventual force_next().  start_force(0) basically does
1822  * an unshift, while start_force(-1) does a push.  yylex removes items
1823  * on the "pop" end.
1824  */
1825
1826 STATIC void
1827 S_start_force(pTHX_ int where)
1828 {
1829     int i;
1830
1831     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1832         where = PL_lasttoke;
1833     assert(PL_curforce < 0 || PL_curforce == where);
1834     if (PL_curforce != where) {
1835         for (i = PL_lasttoke; i > where; --i) {
1836             PL_nexttoke[i] = PL_nexttoke[i-1];
1837         }
1838         PL_lasttoke++;
1839     }
1840     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1841         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1842     PL_curforce = where;
1843     if (PL_nextwhite) {
1844         if (PL_madskills)
1845             curmad('^', newSVpvs(""));
1846         CURMAD('_', PL_nextwhite);
1847     }
1848 }
1849
1850 STATIC void
1851 S_curmad(pTHX_ char slot, SV *sv)
1852 {
1853     MADPROP **where;
1854
1855     if (!sv)
1856         return;
1857     if (PL_curforce < 0)
1858         where = &PL_thismad;
1859     else
1860         where = &PL_nexttoke[PL_curforce].next_mad;
1861
1862     if (PL_faketokens)
1863         sv_setpvs(sv, "");
1864     else {
1865         if (!IN_BYTES) {
1866             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1867                 SvUTF8_on(sv);
1868             else if (PL_encoding) {
1869                 sv_recode_to_utf8(sv, PL_encoding);
1870             }
1871         }
1872     }
1873
1874     /* keep a slot open for the head of the list? */
1875     if (slot != '_' && *where && (*where)->mad_key == '^') {
1876         (*where)->mad_key = slot;
1877         sv_free(MUTABLE_SV(((*where)->mad_val)));
1878         (*where)->mad_val = (void*)sv;
1879     }
1880     else
1881         addmad(newMADsv(slot, sv), where, 0);
1882 }
1883 #else
1884 #  define start_force(where)    NOOP
1885 #  define curmad(slot, sv)      NOOP
1886 #endif
1887
1888 /*
1889  * S_force_next
1890  * When the lexer realizes it knows the next token (for instance,
1891  * it is reordering tokens for the parser) then it can call S_force_next
1892  * to know what token to return the next time the lexer is called.  Caller
1893  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1894  * and possibly PL_expect to ensure the lexer handles the token correctly.
1895  */
1896
1897 STATIC void
1898 S_force_next(pTHX_ I32 type)
1899 {
1900     dVAR;
1901 #ifdef DEBUGGING
1902     if (DEBUG_T_TEST) {
1903         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1904         tokereport(type, &NEXTVAL_NEXTTOKE);
1905     }
1906 #endif
1907 #ifdef PERL_MAD
1908     if (PL_curforce < 0)
1909         start_force(PL_lasttoke);
1910     PL_nexttoke[PL_curforce].next_type = type;
1911     if (PL_lex_state != LEX_KNOWNEXT)
1912         PL_lex_defer = PL_lex_state;
1913     PL_lex_state = LEX_KNOWNEXT;
1914     PL_lex_expect = PL_expect;
1915     PL_curforce = -1;
1916 #else
1917     PL_nexttype[PL_nexttoke] = type;
1918     PL_nexttoke++;
1919     if (PL_lex_state != LEX_KNOWNEXT) {
1920         PL_lex_defer = PL_lex_state;
1921         PL_lex_expect = PL_expect;
1922         PL_lex_state = LEX_KNOWNEXT;
1923     }
1924 #endif
1925 }
1926
1927 STATIC SV *
1928 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1929 {
1930     dVAR;
1931     SV * const sv = newSVpvn_utf8(start, len,
1932                                   !IN_BYTES
1933                                   && UTF
1934                                   && !is_ascii_string((const U8*)start, len)
1935                                   && is_utf8_string((const U8*)start, len));
1936     return sv;
1937 }
1938
1939 /*
1940  * S_force_word
1941  * When the lexer knows the next thing is a word (for instance, it has
1942  * just seen -> and it knows that the next char is a word char, then
1943  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1944  * lookahead.
1945  *
1946  * Arguments:
1947  *   char *start : buffer position (must be within PL_linestr)
1948  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1949  *   int check_keyword : if true, Perl checks to make sure the word isn't
1950  *       a keyword (do this if the word is a label, e.g. goto FOO)
1951  *   int allow_pack : if true, : characters will also be allowed (require,
1952  *       use, etc. do this)
1953  *   int allow_initial_tick : used by the "sub" lexer only.
1954  */
1955
1956 STATIC char *
1957 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1958 {
1959     dVAR;
1960     register char *s;
1961     STRLEN len;
1962
1963     PERL_ARGS_ASSERT_FORCE_WORD;
1964
1965     start = SKIPSPACE1(start);
1966     s = start;
1967     if (isIDFIRST_lazy_if(s,UTF) ||
1968         (allow_pack && *s == ':') ||
1969         (allow_initial_tick && *s == '\'') )
1970     {
1971         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1972         if (check_keyword && keyword(PL_tokenbuf, len, 0))
1973             return start;
1974         start_force(PL_curforce);
1975         if (PL_madskills)
1976             curmad('X', newSVpvn(start,s-start));
1977         if (token == METHOD) {
1978             s = SKIPSPACE1(s);
1979             if (*s == '(')
1980                 PL_expect = XTERM;
1981             else {
1982                 PL_expect = XOPERATOR;
1983             }
1984         }
1985         if (PL_madskills)
1986             curmad('g', newSVpvs( "forced" ));
1987         NEXTVAL_NEXTTOKE.opval
1988             = (OP*)newSVOP(OP_CONST,0,
1989                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1990         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1991         force_next(token);
1992     }
1993     return s;
1994 }
1995
1996 /*
1997  * S_force_ident
1998  * Called when the lexer wants $foo *foo &foo etc, but the program
1999  * text only contains the "foo" portion.  The first argument is a pointer
2000  * to the "foo", and the second argument is the type symbol to prefix.
2001  * Forces the next token to be a "WORD".
2002  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2003  */
2004
2005 STATIC void
2006 S_force_ident(pTHX_ register const char *s, int kind)
2007 {
2008     dVAR;
2009
2010     PERL_ARGS_ASSERT_FORCE_IDENT;
2011
2012     if (*s) {
2013         const STRLEN len = strlen(s);
2014         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2015         start_force(PL_curforce);
2016         NEXTVAL_NEXTTOKE.opval = o;
2017         force_next(WORD);
2018         if (kind) {
2019             o->op_private = OPpCONST_ENTERED;
2020             /* XXX see note in pp_entereval() for why we forgo typo
2021                warnings if the symbol must be introduced in an eval.
2022                GSAR 96-10-12 */
2023             gv_fetchpvn_flags(s, len,
2024                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2025                               : GV_ADD,
2026                               kind == '$' ? SVt_PV :
2027                               kind == '@' ? SVt_PVAV :
2028                               kind == '%' ? SVt_PVHV :
2029                               SVt_PVGV
2030                               );
2031         }
2032     }
2033 }
2034
2035 NV
2036 Perl_str_to_version(pTHX_ SV *sv)
2037 {
2038     NV retval = 0.0;
2039     NV nshift = 1.0;
2040     STRLEN len;
2041     const char *start = SvPV_const(sv,len);
2042     const char * const end = start + len;
2043     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2044
2045     PERL_ARGS_ASSERT_STR_TO_VERSION;
2046
2047     while (start < end) {
2048         STRLEN skip;
2049         UV n;
2050         if (utf)
2051             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2052         else {
2053             n = *(U8*)start;
2054             skip = 1;
2055         }
2056         retval += ((NV)n)/nshift;
2057         start += skip;
2058         nshift *= 1000;
2059     }
2060     return retval;
2061 }
2062
2063 /*
2064  * S_force_version
2065  * Forces the next token to be a version number.
2066  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2067  * and if "guessing" is TRUE, then no new token is created (and the caller
2068  * must use an alternative parsing method).
2069  */
2070
2071 STATIC char *
2072 S_force_version(pTHX_ char *s, int guessing)
2073 {
2074     dVAR;
2075     OP *version = NULL;
2076     char *d;
2077 #ifdef PERL_MAD
2078     I32 startoff = s - SvPVX(PL_linestr);
2079 #endif
2080
2081     PERL_ARGS_ASSERT_FORCE_VERSION;
2082
2083     s = SKIPSPACE1(s);
2084
2085     d = s;
2086     if (*d == 'v')
2087         d++;
2088     if (isDIGIT(*d)) {
2089         while (isDIGIT(*d) || *d == '_' || *d == '.')
2090             d++;
2091 #ifdef PERL_MAD
2092         if (PL_madskills) {
2093             start_force(PL_curforce);
2094             curmad('X', newSVpvn(s,d-s));
2095         }
2096 #endif
2097         if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
2098             SV *ver;
2099             s = scan_num(s, &pl_yylval);
2100             version = pl_yylval.opval;
2101             ver = cSVOPx(version)->op_sv;
2102             if (SvPOK(ver) && !SvNIOK(ver)) {
2103                 SvUPGRADE(ver, SVt_PVNV);
2104                 SvNV_set(ver, str_to_version(ver));
2105                 SvNOK_on(ver);          /* hint that it is a version */
2106             }
2107         }
2108         else if (guessing) {
2109 #ifdef PERL_MAD
2110             if (PL_madskills) {
2111                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2112                 PL_nextwhite = 0;
2113                 s = SvPVX(PL_linestr) + startoff;
2114             }
2115 #endif
2116             return s;
2117         }
2118     }
2119
2120 #ifdef PERL_MAD
2121     if (PL_madskills && !version) {
2122         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2123         PL_nextwhite = 0;
2124         s = SvPVX(PL_linestr) + startoff;
2125     }
2126 #endif
2127     /* NOTE: The parser sees the package name and the VERSION swapped */
2128     start_force(PL_curforce);
2129     NEXTVAL_NEXTTOKE.opval = version;
2130     force_next(WORD);
2131
2132     return s;
2133 }
2134
2135 /*
2136  * S_tokeq
2137  * Tokenize a quoted string passed in as an SV.  It finds the next
2138  * chunk, up to end of string or a backslash.  It may make a new
2139  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2140  * turns \\ into \.
2141  */
2142
2143 STATIC SV *
2144 S_tokeq(pTHX_ SV *sv)
2145 {
2146     dVAR;
2147     register char *s;
2148     register char *send;
2149     register char *d;
2150     STRLEN len = 0;
2151     SV *pv = sv;
2152
2153     PERL_ARGS_ASSERT_TOKEQ;
2154
2155     if (!SvLEN(sv))
2156         goto finish;
2157
2158     s = SvPV_force(sv, len);
2159     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2160         goto finish;
2161     send = s + len;
2162     while (s < send && *s != '\\')
2163         s++;
2164     if (s == send)
2165         goto finish;
2166     d = s;
2167     if ( PL_hints & HINT_NEW_STRING ) {
2168         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2169     }
2170     while (s < send) {
2171         if (*s == '\\') {
2172             if (s + 1 < send && (s[1] == '\\'))
2173                 s++;            /* all that, just for this */
2174         }
2175         *d++ = *s++;
2176     }
2177     *d = '\0';
2178     SvCUR_set(sv, d - SvPVX_const(sv));
2179   finish:
2180     if ( PL_hints & HINT_NEW_STRING )
2181        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2182     return sv;
2183 }
2184
2185 /*
2186  * Now come three functions related to double-quote context,
2187  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2188  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2189  * interact with PL_lex_state, and create fake ( ... ) argument lists
2190  * to handle functions and concatenation.
2191  * They assume that whoever calls them will be setting up a fake
2192  * join call, because each subthing puts a ',' after it.  This lets
2193  *   "lower \luPpEr"
2194  * become
2195  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2196  *
2197  * (I'm not sure whether the spurious commas at the end of lcfirst's
2198  * arguments and join's arguments are created or not).
2199  */
2200
2201 /*
2202  * S_sublex_start
2203  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2204  *
2205  * Pattern matching will set PL_lex_op to the pattern-matching op to
2206  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2207  *
2208  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2209  *
2210  * Everything else becomes a FUNC.
2211  *
2212  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2213  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2214  * call to S_sublex_push().
2215  */
2216
2217 STATIC I32
2218 S_sublex_start(pTHX)
2219 {
2220     dVAR;
2221     register const I32 op_type = pl_yylval.ival;
2222
2223     if (op_type == OP_NULL) {
2224         pl_yylval.opval = PL_lex_op;
2225         PL_lex_op = NULL;
2226         return THING;
2227     }
2228     if (op_type == OP_CONST || op_type == OP_READLINE) {
2229         SV *sv = tokeq(PL_lex_stuff);
2230
2231         if (SvTYPE(sv) == SVt_PVIV) {
2232             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2233             STRLEN len;
2234             const char * const p = SvPV_const(sv, len);
2235             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2236             SvREFCNT_dec(sv);
2237             sv = nsv;
2238         }
2239         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2240         PL_lex_stuff = NULL;
2241         /* Allow <FH> // "foo" */
2242         if (op_type == OP_READLINE)
2243             PL_expect = XTERMORDORDOR;
2244         return THING;
2245     }
2246     else if (op_type == OP_BACKTICK && PL_lex_op) {
2247         /* readpipe() vas overriden */
2248         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2249         pl_yylval.opval = PL_lex_op;
2250         PL_lex_op = NULL;
2251         PL_lex_stuff = NULL;
2252         return THING;
2253     }
2254
2255     PL_sublex_info.super_state = PL_lex_state;
2256     PL_sublex_info.sub_inwhat = (U16)op_type;
2257     PL_sublex_info.sub_op = PL_lex_op;
2258     PL_lex_state = LEX_INTERPPUSH;
2259
2260     PL_expect = XTERM;
2261     if (PL_lex_op) {
2262         pl_yylval.opval = PL_lex_op;
2263         PL_lex_op = NULL;
2264         return PMFUNC;
2265     }
2266     else
2267         return FUNC;
2268 }
2269
2270 /*
2271  * S_sublex_push
2272  * Create a new scope to save the lexing state.  The scope will be
2273  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2274  * to the uc, lc, etc. found before.
2275  * Sets PL_lex_state to LEX_INTERPCONCAT.
2276  */
2277
2278 STATIC I32
2279 S_sublex_push(pTHX)
2280 {
2281     dVAR;
2282     ENTER;
2283
2284     PL_lex_state = PL_sublex_info.super_state;
2285     SAVEBOOL(PL_lex_dojoin);
2286     SAVEI32(PL_lex_brackets);
2287     SAVEI32(PL_lex_casemods);
2288     SAVEI32(PL_lex_starts);
2289     SAVEI8(PL_lex_state);
2290     SAVEVPTR(PL_lex_inpat);
2291     SAVEI16(PL_lex_inwhat);
2292     SAVECOPLINE(PL_curcop);
2293     SAVEPPTR(PL_bufptr);
2294     SAVEPPTR(PL_bufend);
2295     SAVEPPTR(PL_oldbufptr);
2296     SAVEPPTR(PL_oldoldbufptr);
2297     SAVEPPTR(PL_last_lop);
2298     SAVEPPTR(PL_last_uni);
2299     SAVEPPTR(PL_linestart);
2300     SAVESPTR(PL_linestr);
2301     SAVEGENERICPV(PL_lex_brackstack);
2302     SAVEGENERICPV(PL_lex_casestack);
2303
2304     PL_linestr = PL_lex_stuff;
2305     PL_lex_stuff = NULL;
2306
2307     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2308         = SvPVX(PL_linestr);
2309     PL_bufend += SvCUR(PL_linestr);
2310     PL_last_lop = PL_last_uni = NULL;
2311     SAVEFREESV(PL_linestr);
2312
2313     PL_lex_dojoin = FALSE;
2314     PL_lex_brackets = 0;
2315     Newx(PL_lex_brackstack, 120, char);
2316     Newx(PL_lex_casestack, 12, char);
2317     PL_lex_casemods = 0;
2318     *PL_lex_casestack = '\0';
2319     PL_lex_starts = 0;
2320     PL_lex_state = LEX_INTERPCONCAT;
2321     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2322
2323     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2324     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2325         PL_lex_inpat = PL_sublex_info.sub_op;
2326     else
2327         PL_lex_inpat = NULL;
2328
2329     return '(';
2330 }
2331
2332 /*
2333  * S_sublex_done
2334  * Restores lexer state after a S_sublex_push.
2335  */
2336
2337 STATIC I32
2338 S_sublex_done(pTHX)
2339 {
2340     dVAR;
2341     if (!PL_lex_starts++) {
2342         SV * const sv = newSVpvs("");
2343         if (SvUTF8(PL_linestr))
2344             SvUTF8_on(sv);
2345         PL_expect = XOPERATOR;
2346         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2347         return THING;
2348     }
2349
2350     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2351         PL_lex_state = LEX_INTERPCASEMOD;
2352         return yylex();
2353     }
2354
2355     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2356     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2357         PL_linestr = PL_lex_repl;
2358         PL_lex_inpat = 0;
2359         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2360         PL_bufend += SvCUR(PL_linestr);
2361         PL_last_lop = PL_last_uni = NULL;
2362         SAVEFREESV(PL_linestr);
2363         PL_lex_dojoin = FALSE;
2364         PL_lex_brackets = 0;
2365         PL_lex_casemods = 0;
2366         *PL_lex_casestack = '\0';
2367         PL_lex_starts = 0;
2368         if (SvEVALED(PL_lex_repl)) {
2369             PL_lex_state = LEX_INTERPNORMAL;
2370             PL_lex_starts++;
2371             /*  we don't clear PL_lex_repl here, so that we can check later
2372                 whether this is an evalled subst; that means we rely on the
2373                 logic to ensure sublex_done() is called again only via the
2374                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2375         }
2376         else {
2377             PL_lex_state = LEX_INTERPCONCAT;
2378             PL_lex_repl = NULL;
2379         }
2380         return ',';
2381     }
2382     else {
2383 #ifdef PERL_MAD
2384         if (PL_madskills) {
2385             if (PL_thiswhite) {
2386                 if (!PL_endwhite)
2387                     PL_endwhite = newSVpvs("");
2388                 sv_catsv(PL_endwhite, PL_thiswhite);
2389                 PL_thiswhite = 0;
2390             }
2391             if (PL_thistoken)
2392                 sv_setpvs(PL_thistoken,"");
2393             else
2394                 PL_realtokenstart = -1;
2395         }
2396 #endif
2397         LEAVE;
2398         PL_bufend = SvPVX(PL_linestr);
2399         PL_bufend += SvCUR(PL_linestr);
2400         PL_expect = XOPERATOR;
2401         PL_sublex_info.sub_inwhat = 0;
2402         return ')';
2403     }
2404 }
2405
2406 /*
2407   scan_const
2408
2409   Extracts a pattern, double-quoted string, or transliteration.  This
2410   is terrifying code.
2411
2412   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2413   processing a pattern (PL_lex_inpat is true), a transliteration
2414   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2415
2416   Returns a pointer to the character scanned up to. If this is
2417   advanced from the start pointer supplied (i.e. if anything was
2418   successfully parsed), will leave an OP for the substring scanned
2419   in pl_yylval. Caller must intuit reason for not parsing further
2420   by looking at the next characters herself.
2421
2422   In patterns:
2423     backslashes:
2424       double-quoted style: \r and \n
2425       regexp special ones: \D \s
2426       constants: \x31
2427       backrefs: \1
2428       case and quoting: \U \Q \E
2429     stops on @ and $, but not for $ as tail anchor
2430
2431   In transliterations:
2432     characters are VERY literal, except for - not at the start or end
2433     of the string, which indicates a range. If the range is in bytes,
2434     scan_const expands the range to the full set of intermediate
2435     characters. If the range is in utf8, the hyphen is replaced with
2436     a certain range mark which will be handled by pmtrans() in op.c.
2437
2438   In double-quoted strings:
2439     backslashes:
2440       double-quoted style: \r and \n
2441       constants: \x31
2442       deprecated backrefs: \1 (in substitution replacements)
2443       case and quoting: \U \Q \E
2444     stops on @ and $
2445
2446   scan_const does *not* construct ops to handle interpolated strings.
2447   It stops processing as soon as it finds an embedded $ or @ variable
2448   and leaves it to the caller to work out what's going on.
2449
2450   embedded arrays (whether in pattern or not) could be:
2451       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2452
2453   $ in double-quoted strings must be the symbol of an embedded scalar.
2454
2455   $ in pattern could be $foo or could be tail anchor.  Assumption:
2456   it's a tail anchor if $ is the last thing in the string, or if it's
2457   followed by one of "()| \r\n\t"
2458
2459   \1 (backreferences) are turned into $1
2460
2461   The structure of the code is
2462       while (there's a character to process) {
2463           handle transliteration ranges
2464           skip regexp comments /(?#comment)/ and codes /(?{code})/
2465           skip #-initiated comments in //x patterns
2466           check for embedded arrays
2467           check for embedded scalars
2468           if (backslash) {
2469               leave intact backslashes from leaveit (below)
2470               deprecate \1 in substitution replacements
2471               handle string-changing backslashes \l \U \Q \E, etc.
2472               switch (what was escaped) {
2473                   handle \- in a transliteration (becomes a literal -)
2474                   handle \132 (octal characters)
2475                   handle \x15 and \x{1234} (hex characters)
2476                   handle \N{name} (named characters)
2477                   handle \cV (control characters)
2478                   handle printf-style backslashes (\f, \r, \n, etc)
2479               } (end switch)
2480               continue
2481           } (end if backslash)
2482           handle regular character
2483     } (end while character to read)
2484
2485 */
2486
2487 STATIC char *
2488 S_scan_const(pTHX_ char *start)
2489 {
2490     dVAR;
2491     register char *send = PL_bufend;            /* end of the constant */
2492     SV *sv = newSV(send - start);               /* sv for the constant.  See
2493                                                    note below on sizing. */
2494     register char *s = start;                   /* start of the constant */
2495     register char *d = SvPVX(sv);               /* destination for copies */
2496     bool dorange = FALSE;                       /* are we in a translit range? */
2497     bool didrange = FALSE;                      /* did we just finish a range? */
2498     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
2499     I32  this_utf8 = UTF;                       /* Is the source string assumed
2500                                                    to be UTF8?  But, this can
2501                                                    show as true when the source
2502                                                    isn't utf8, as for example
2503                                                    when it is entirely composed
2504                                                    of hex constants */
2505
2506     /* Note on sizing:  The scanned constant is placed into sv, which is
2507      * initialized by newSV() assuming one byte of output for every byte of
2508      * input.  This routine expects newSV() to allocate an extra byte for a
2509      * trailing NUL, which this routine will append if it gets to the end of
2510      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2511      * CAPITAL LETTER A}), or more output than input if the constant ends up
2512      * recoded to utf8, but each time a construct is found that might increase
2513      * the needed size, SvGROW() is called.  Its size parameter each time is
2514      * based on the best guess estimate at the time, namely the length used so
2515      * far, plus the length the current construct will occupy, plus room for
2516      * the trailing NUL, plus one byte for every input byte still unscanned */
2517
2518     UV uv;
2519 #ifdef EBCDIC
2520     UV literal_endpoint = 0;
2521     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2522 #endif
2523
2524     PERL_ARGS_ASSERT_SCAN_CONST;
2525
2526     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2527         /* If we are doing a trans and we know we want UTF8 set expectation */
2528         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2529         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2530     }
2531
2532
2533     while (s < send || dorange) {
2534         /* get transliterations out of the way (they're most literal) */
2535         if (PL_lex_inwhat == OP_TRANS) {
2536             /* expand a range A-Z to the full set of characters.  AIE! */
2537             if (dorange) {
2538                 I32 i;                          /* current expanded character */
2539                 I32 min;                        /* first character in range */
2540                 I32 max;                        /* last character in range */
2541
2542 #ifdef EBCDIC
2543                 UV uvmax = 0;
2544 #endif
2545
2546                 if (has_utf8
2547 #ifdef EBCDIC
2548                     && !native_range
2549 #endif
2550                     ) {
2551                     char * const c = (char*)utf8_hop((U8*)d, -1);
2552                     char *e = d++;
2553                     while (e-- > c)
2554                         *(e + 1) = *e;
2555                     *c = (char)UTF_TO_NATIVE(0xff);
2556                     /* mark the range as done, and continue */
2557                     dorange = FALSE;
2558                     didrange = TRUE;
2559                     continue;
2560                 }
2561
2562                 i = d - SvPVX_const(sv);                /* remember current offset */
2563 #ifdef EBCDIC
2564                 SvGROW(sv,
2565                        SvLEN(sv) + (has_utf8 ?
2566                                     (512 - UTF_CONTINUATION_MARK +
2567                                      UNISKIP(0x100))
2568                                     : 256));
2569                 /* How many two-byte within 0..255: 128 in UTF-8,
2570                  * 96 in UTF-8-mod. */
2571 #else
2572                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2573 #endif
2574                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2575 #ifdef EBCDIC
2576                 if (has_utf8) {
2577                     int j;
2578                     for (j = 0; j <= 1; j++) {
2579                         char * const c = (char*)utf8_hop((U8*)d, -1);
2580                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2581                         if (j)
2582                             min = (U8)uv;
2583                         else if (uv < 256)
2584                             max = (U8)uv;
2585                         else {
2586                             max = (U8)0xff; /* only to \xff */
2587                             uvmax = uv; /* \x{100} to uvmax */
2588                         }
2589                         d = c; /* eat endpoint chars */
2590                      }
2591                 }
2592                else {
2593 #endif
2594                    d -= 2;              /* eat the first char and the - */
2595                    min = (U8)*d;        /* first char in range */
2596                    max = (U8)d[1];      /* last char in range  */
2597 #ifdef EBCDIC
2598                }
2599 #endif
2600
2601                 if (min > max) {
2602                     Perl_croak(aTHX_
2603                                "Invalid range \"%c-%c\" in transliteration operator",
2604                                (char)min, (char)max);
2605                 }
2606
2607 #ifdef EBCDIC
2608                 if (literal_endpoint == 2 &&
2609                     ((isLOWER(min) && isLOWER(max)) ||
2610                      (isUPPER(min) && isUPPER(max)))) {
2611                     if (isLOWER(min)) {
2612                         for (i = min; i <= max; i++)
2613                             if (isLOWER(i))
2614                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2615                     } else {
2616                         for (i = min; i <= max; i++)
2617                             if (isUPPER(i))
2618                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2619                     }
2620                 }
2621                 else
2622 #endif
2623                     for (i = min; i <= max; i++)
2624 #ifdef EBCDIC
2625                         if (has_utf8) {
2626                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2627                             if (UNI_IS_INVARIANT(ch))
2628                                 *d++ = (U8)i;
2629                             else {
2630                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2631                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2632                             }
2633                         }
2634                         else
2635 #endif
2636                             *d++ = (char)i;
2637
2638 #ifdef EBCDIC
2639                 if (uvmax) {
2640                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2641                     if (uvmax > 0x101)
2642                         *d++ = (char)UTF_TO_NATIVE(0xff);
2643                     if (uvmax > 0x100)
2644                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2645                 }
2646 #endif
2647
2648                 /* mark the range as done, and continue */
2649                 dorange = FALSE;
2650                 didrange = TRUE;
2651 #ifdef EBCDIC
2652                 literal_endpoint = 0;
2653 #endif
2654                 continue;
2655             }
2656
2657             /* range begins (ignore - as first or last char) */
2658             else if (*s == '-' && s+1 < send  && s != start) {
2659                 if (didrange) {
2660                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2661                 }
2662                 if (has_utf8
2663 #ifdef EBCDIC
2664                     && !native_range
2665 #endif
2666                     ) {
2667                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2668                     s++;
2669                     continue;
2670                 }
2671                 dorange = TRUE;
2672                 s++;
2673             }
2674             else {
2675                 didrange = FALSE;
2676 #ifdef EBCDIC
2677                 literal_endpoint = 0;
2678                 native_range = TRUE;
2679 #endif
2680             }
2681         }
2682
2683         /* if we get here, we're not doing a transliteration */
2684
2685         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2686            except for the last char, which will be done separately. */
2687         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2688             if (s[2] == '#') {
2689                 while (s+1 < send && *s != ')')
2690                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2691             }
2692             else if (s[2] == '{' /* This should match regcomp.c */
2693                     || (s[2] == '?' && s[3] == '{'))
2694             {
2695                 I32 count = 1;
2696                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2697                 char c;
2698
2699                 while (count && (c = *regparse)) {
2700                     if (c == '\\' && regparse[1])
2701                         regparse++;
2702                     else if (c == '{')
2703                         count++;
2704                     else if (c == '}')
2705                         count--;
2706                     regparse++;
2707                 }
2708                 if (*regparse != ')')
2709                     regparse--;         /* Leave one char for continuation. */
2710                 while (s < regparse)
2711                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2712             }
2713         }
2714
2715         /* likewise skip #-initiated comments in //x patterns */
2716         else if (*s == '#' && PL_lex_inpat &&
2717           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2718             while (s+1 < send && *s != '\n')
2719                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2720         }
2721
2722         /* check for embedded arrays
2723            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2724            */
2725         else if (*s == '@' && s[1]) {
2726             if (isALNUM_lazy_if(s+1,UTF))
2727                 break;
2728             if (strchr(":'{$", s[1]))
2729                 break;
2730             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2731                 break; /* in regexp, neither @+ nor @- are interpolated */
2732         }
2733
2734         /* check for embedded scalars.  only stop if we're sure it's a
2735            variable.
2736         */
2737         else if (*s == '$') {
2738             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2739                 break;
2740             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2741                 if (s[1] == '\\') {
2742                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2743                                    "Possible unintended interpolation of $\\ in regex");
2744                 }
2745                 break;          /* in regexp, $ might be tail anchor */
2746             }
2747         }
2748
2749         /* End of else if chain - OP_TRANS rejoin rest */
2750
2751         /* backslashes */
2752         if (*s == '\\' && s+1 < send) {
2753             s++;
2754
2755             /* deprecate \1 in strings and substitution replacements */
2756             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2757                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2758             {
2759                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2760                 *--s = '$';
2761                 break;
2762             }
2763
2764             /* string-change backslash escapes */
2765             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2766                 --s;
2767                 break;
2768             }
2769             /* skip any other backslash escapes in a pattern */
2770             else if (PL_lex_inpat) {
2771                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2772                 goto default_action;
2773             }
2774
2775             /* if we get here, it's either a quoted -, or a digit */
2776             switch (*s) {
2777
2778             /* quoted - in transliterations */
2779             case '-':
2780                 if (PL_lex_inwhat == OP_TRANS) {
2781                     *d++ = *s++;
2782                     continue;
2783                 }
2784                 /* FALL THROUGH */
2785             default:
2786                 {
2787                     if ((isALPHA(*s) || isDIGIT(*s)))
2788                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2789                                        "Unrecognized escape \\%c passed through",
2790                                        *s);
2791                     /* default action is to copy the quoted character */
2792                     goto default_action;
2793                 }
2794
2795             /* eg. \132 indicates the octal constant 0x132 */
2796             case '0': case '1': case '2': case '3':
2797             case '4': case '5': case '6': case '7':
2798                 {
2799                     I32 flags = 0;
2800                     STRLEN len = 3;
2801                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2802                     s += len;
2803                 }
2804                 goto NUM_ESCAPE_INSERT;
2805
2806             /* eg. \x24 indicates the hex constant 0x24 */
2807             case 'x':
2808                 ++s;
2809                 if (*s == '{') {
2810                     char* const e = strchr(s, '}');
2811                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2812                       PERL_SCAN_DISALLOW_PREFIX;
2813                     STRLEN len;
2814
2815                     ++s;
2816                     if (!e) {
2817                         yyerror("Missing right brace on \\x{}");
2818                         continue;
2819                     }
2820                     len = e - s;
2821                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2822                     s = e + 1;
2823                 }
2824                 else {
2825                     {
2826                         STRLEN len = 2;
2827                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2828                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2829                         s += len;
2830                     }
2831                 }
2832
2833               NUM_ESCAPE_INSERT:
2834                 /* Insert oct, hex, or \N{U+...} escaped character.  There will
2835                  * always be enough room in sv since such escapes will be
2836                  * longer than any UTF-8 sequence they can end up as, except if
2837                  * they force us to recode the rest of the string into utf8 */
2838
2839                 /* Here uv is the ordinal of the next character being added in
2840                  * unicode (converted from native).  (It has to be done before
2841                  * here because \N is interpreted as unicode, and oct and hex
2842                  * as native.) */
2843                 if (!UNI_IS_INVARIANT(uv)) {
2844                     if (!has_utf8 && uv > 255) {
2845                         /* Might need to recode whatever we have accumulated so
2846                          * far if it contains any chars variant in utf8 or
2847                          * utf-ebcdic. */
2848
2849                         SvCUR_set(sv, d - SvPVX_const(sv));
2850                         SvPOK_on(sv);
2851                         *d = '\0';
2852                         /* See Note on sizing above.  */
2853                         sv_utf8_upgrade_flags_grow(sv,
2854                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2855                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
2856                         d = SvPVX(sv) + SvCUR(sv);
2857                         has_utf8 = TRUE;
2858                     }
2859
2860                     if (has_utf8) {
2861                         d = (char*)uvuni_to_utf8((U8*)d, uv);
2862                         if (PL_lex_inwhat == OP_TRANS &&
2863                             PL_sublex_info.sub_op) {
2864                             PL_sublex_info.sub_op->op_private |=
2865                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2866                                              : OPpTRANS_TO_UTF);
2867                         }
2868 #ifdef EBCDIC
2869                         if (uv > 255 && !dorange)
2870                             native_range = FALSE;
2871 #endif
2872                     }
2873                     else {
2874                         *d++ = (char)uv;
2875                     }
2876                 }
2877                 else {
2878                     *d++ = (char) uv;
2879                 }
2880                 continue;
2881
2882             /* \N{LATIN SMALL LETTER A} is a named character, and so is
2883              * \N{U+0041} */
2884             case 'N':
2885                 ++s;
2886                 if (*s == '{') {
2887                     char* e = strchr(s, '}');
2888                     SV *res;
2889                     STRLEN len;
2890                     const char *str;
2891
2892                     if (!e) {
2893                         yyerror("Missing right brace on \\N{}");
2894                         e = s - 1;
2895                         goto cont_scan;
2896                     }
2897                     if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2898                         /* \N{U+...} The ... is a unicode value even on EBCDIC
2899                          * machines */
2900                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2901                           PERL_SCAN_DISALLOW_PREFIX;
2902                         s += 3;
2903                         len = e - s;
2904                         uv = grok_hex(s, &len, &flags, NULL);
2905                         if ( e > s && len != (STRLEN)(e - s) ) {
2906                             uv = 0xFFFD;
2907                         }
2908                         s = e + 1;
2909                         goto NUM_ESCAPE_INSERT;
2910                     }
2911                     res = newSVpvn(s + 1, e - s - 1);
2912                     res = new_constant( NULL, 0, "charnames",
2913                                         res, NULL, s - 2, e - s + 3 );
2914                     if (has_utf8)
2915                         sv_utf8_upgrade(res);
2916                     str = SvPV_const(res,len);
2917 #ifdef EBCDIC_NEVER_MIND
2918                     /* charnames uses pack U and that has been
2919                      * recently changed to do the below uni->native
2920                      * mapping, so this would be redundant (and wrong,
2921                      * the code point would be doubly converted).
2922                      * But leave this in just in case the pack U change
2923                      * gets revoked, but the semantics is still
2924                      * desireable for charnames. --jhi */
2925                     {
2926                          UV uv = utf8_to_uvchr((const U8*)str, 0);
2927
2928                          if (uv < 0x100) {
2929                               U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2930
2931                               d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2932                               sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2933                               str = SvPV_const(res, len);
2934                          }
2935                     }
2936 #endif
2937                     /* If destination is not in utf8 but this new character is,
2938                      * recode the dest to utf8 */
2939                     if (!has_utf8 && SvUTF8(res)) {
2940                         SvCUR_set(sv, d - SvPVX_const(sv));
2941                         SvPOK_on(sv);
2942                         *d = '\0';
2943                         /* See Note on sizing above.  */
2944                         sv_utf8_upgrade_flags_grow(sv,
2945                                             SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2946                                             len + (STRLEN)(send - s) + 1);
2947                         d = SvPVX(sv) + SvCUR(sv);
2948                         has_utf8 = TRUE;
2949                     } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2950
2951                         /* See Note on sizing above.  (NOTE: SvCUR() is not set
2952                          * correctly here). */
2953                         const STRLEN off = d - SvPVX_const(sv);
2954                         d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
2955                     }
2956 #ifdef EBCDIC
2957                     if (!dorange)
2958                         native_range = FALSE; /* \N{} is guessed to be Unicode */
2959 #endif
2960                     Copy(str, d, len, char);
2961                     d += len;
2962                     SvREFCNT_dec(res);
2963                   cont_scan:
2964                     s = e + 1;
2965                 }
2966                 else
2967                     yyerror("Missing braces on \\N{}");
2968                 continue;
2969
2970             /* \c is a control character */
2971             case 'c':
2972                 s++;
2973                 if (s < send) {
2974                     U8 c = *s++;
2975 #ifdef EBCDIC
2976                     if (isLOWER(c))
2977                         c = toUPPER(c);
2978 #endif
2979                     *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2980                 }
2981                 else {
2982                     yyerror("Missing control char name in \\c");
2983                 }
2984                 continue;
2985
2986             /* printf-style backslashes, formfeeds, newlines, etc */
2987             case 'b':
2988                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2989                 break;
2990             case 'n':
2991                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2992                 break;
2993             case 'r':
2994                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2995                 break;
2996             case 'f':
2997                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2998                 break;
2999             case 't':
3000                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3001                 break;
3002             case 'e':
3003                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3004                 break;
3005             case 'a':
3006                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3007                 break;
3008             } /* end switch */
3009
3010             s++;
3011             continue;
3012         } /* end if (backslash) */
3013 #ifdef EBCDIC
3014         else
3015             literal_endpoint++;
3016 #endif
3017
3018     default_action:
3019         /* If we started with encoded form, or already know we want it,
3020            then encode the next character */
3021         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3022             STRLEN len  = 1;
3023
3024
3025             /* One might think that it is wasted effort in the case of the
3026              * source being utf8 (this_utf8 == TRUE) to take the next character
3027              * in the source, convert it to an unsigned value, and then convert
3028              * it back again.  But the source has not been validated here.  The
3029              * routine that does the conversion checks for errors like
3030              * malformed utf8 */
3031
3032             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3033             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3034             if (!has_utf8) {
3035                 SvCUR_set(sv, d - SvPVX_const(sv));
3036                 SvPOK_on(sv);
3037                 *d = '\0';
3038                 /* See Note on sizing above.  */
3039                 sv_utf8_upgrade_flags_grow(sv,
3040                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3041                                         need + (STRLEN)(send - s) + 1);
3042                 d = SvPVX(sv) + SvCUR(sv);
3043                 has_utf8 = TRUE;
3044             } else if (need > len) {
3045                 /* encoded value larger than old, may need extra space (NOTE:
3046                  * SvCUR() is not set correctly here).   See Note on sizing
3047                  * above.  */
3048                 const STRLEN off = d - SvPVX_const(sv);
3049                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3050             }
3051             s += len;
3052
3053             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3054 #ifdef EBCDIC
3055             if (uv > 255 && !dorange)
3056                 native_range = FALSE;
3057 #endif
3058         }
3059         else {
3060             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3061         }
3062     } /* while loop to process each character */
3063
3064     /* terminate the string and set up the sv */
3065     *d = '\0';
3066     SvCUR_set(sv, d - SvPVX_const(sv));
3067     if (SvCUR(sv) >= SvLEN(sv))
3068         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3069
3070     SvPOK_on(sv);
3071     if (PL_encoding && !has_utf8) {
3072         sv_recode_to_utf8(sv, PL_encoding);
3073         if (SvUTF8(sv))
3074             has_utf8 = TRUE;
3075     }
3076     if (has_utf8) {
3077         SvUTF8_on(sv);
3078         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3079             PL_sublex_info.sub_op->op_private |=
3080                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3081         }
3082     }
3083
3084     /* shrink the sv if we allocated more than we used */
3085     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3086         SvPV_shrink_to_cur(sv);
3087     }
3088
3089     /* return the substring (via pl_yylval) only if we parsed anything */
3090     if (s > PL_bufptr) {
3091         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3092             const char *const key = PL_lex_inpat ? "qr" : "q";
3093             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3094             const char *type;
3095             STRLEN typelen;
3096
3097             if (PL_lex_inwhat == OP_TRANS) {
3098                 type = "tr";
3099                 typelen = 2;
3100             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3101                 type = "s";
3102                 typelen = 1;
3103             } else  {
3104                 type = "qq";
3105                 typelen = 2;
3106             }
3107
3108             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3109                                 type, typelen);
3110         }
3111         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3112     } else
3113         SvREFCNT_dec(sv);
3114     return s;
3115 }
3116
3117 /* S_intuit_more
3118  * Returns TRUE if there's more to the expression (e.g., a subscript),
3119  * FALSE otherwise.
3120  *
3121  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3122  *
3123  * ->[ and ->{ return TRUE
3124  * { and [ outside a pattern are always subscripts, so return TRUE
3125  * if we're outside a pattern and it's not { or [, then return FALSE
3126  * if we're in a pattern and the first char is a {
3127  *   {4,5} (any digits around the comma) returns FALSE
3128  * if we're in a pattern and the first char is a [
3129  *   [] returns FALSE
3130  *   [SOMETHING] has a funky algorithm to decide whether it's a
3131  *      character class or not.  It has to deal with things like
3132  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3133  * anything else returns TRUE
3134  */
3135
3136 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3137
3138 STATIC int
3139 S_intuit_more(pTHX_ register char *s)
3140 {
3141     dVAR;
3142
3143     PERL_ARGS_ASSERT_INTUIT_MORE;
3144
3145     if (PL_lex_brackets)
3146         return TRUE;
3147     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3148         return TRUE;
3149     if (*s != '{' && *s != '[')
3150         return FALSE;
3151     if (!PL_lex_inpat)
3152         return TRUE;
3153
3154     /* In a pattern, so maybe we have {n,m}. */
3155     if (*s == '{') {
3156         s++;
3157         if (!isDIGIT(*s))
3158             return TRUE;
3159         while (isDIGIT(*s))
3160             s++;
3161         if (*s == ',')
3162             s++;
3163         while (isDIGIT(*s))
3164             s++;
3165         if (*s == '}')
3166             return FALSE;
3167         return TRUE;
3168
3169     }
3170
3171     /* On the other hand, maybe we have a character class */
3172
3173     s++;
3174     if (*s == ']' || *s == '^')
3175         return FALSE;
3176     else {
3177         /* this is terrifying, and it works */
3178         int weight = 2;         /* let's weigh the evidence */
3179         char seen[256];
3180         unsigned char un_char = 255, last_un_char;
3181         const char * const send = strchr(s,']');
3182         char tmpbuf[sizeof PL_tokenbuf * 4];
3183
3184         if (!send)              /* has to be an expression */
3185             return TRUE;
3186
3187         Zero(seen,256,char);
3188         if (*s == '$')
3189             weight -= 3;
3190         else if (isDIGIT(*s)) {
3191             if (s[1] != ']') {
3192                 if (isDIGIT(s[1]) && s[2] == ']')
3193                     weight -= 10;
3194             }
3195             else
3196                 weight -= 100;
3197         }
3198         for (; s < send; s++) {
3199             last_un_char = un_char;
3200             un_char = (unsigned char)*s;
3201             switch (*s) {
3202             case '@':
3203             case '&':
3204             case '$':
3205                 weight -= seen[un_char] * 10;
3206                 if (isALNUM_lazy_if(s+1,UTF)) {
3207                     int len;
3208                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3209                     len = (int)strlen(tmpbuf);
3210                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3211                         weight -= 100;
3212                     else
3213                         weight -= 10;
3214                 }
3215                 else if (*s == '$' && s[1] &&
3216                   strchr("[#!%*<>()-=",s[1])) {
3217                     if (/*{*/ strchr("])} =",s[2]))
3218                         weight -= 10;
3219                     else
3220                         weight -= 1;
3221                 }
3222                 break;
3223             case '\\':
3224                 un_char = 254;
3225                 if (s[1]) {
3226                     if (strchr("wds]",s[1]))
3227                         weight += 100;
3228                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3229                         weight += 1;
3230                     else if (strchr("rnftbxcav",s[1]))
3231                         weight += 40;
3232                     else if (isDIGIT(s[1])) {
3233                         weight += 40;
3234                         while (s[1] && isDIGIT(s[1]))
3235                             s++;
3236                     }
3237                 }
3238                 else
3239                     weight += 100;
3240                 break;
3241             case '-':
3242                 if (s[1] == '\\')
3243                     weight += 50;
3244                 if (strchr("aA01! ",last_un_char))
3245                     weight += 30;
3246                 if (strchr("zZ79~",s[1]))
3247                     weight += 30;
3248                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3249                     weight -= 5;        /* cope with negative subscript */
3250                 break;
3251             default:
3252                 if (!isALNUM(last_un_char)
3253                     && !(last_un_char == '$' || last_un_char == '@'
3254                          || last_un_char == '&')
3255                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3256                     char *d = tmpbuf;
3257                     while (isALPHA(*s))
3258                         *d++ = *s++;
3259                     *d = '\0';
3260                     if (keyword(tmpbuf, d - tmpbuf, 0))
3261                         weight -= 150;
3262                 }
3263                 if (un_char == last_un_char + 1)
3264                     weight += 5;
3265                 weight -= seen[un_char];
3266                 break;
3267             }
3268             seen[un_char]++;
3269         }
3270         if (weight >= 0)        /* probably a character class */
3271             return FALSE;
3272     }
3273
3274     return TRUE;
3275 }
3276
3277 /*
3278  * S_intuit_method
3279  *
3280  * Does all the checking to disambiguate
3281  *   foo bar
3282  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3283  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3284  *
3285  * First argument is the stuff after the first token, e.g. "bar".
3286  *
3287  * Not a method if bar is a filehandle.
3288  * Not a method if foo is a subroutine prototyped to take a filehandle.
3289  * Not a method if it's really "Foo $bar"
3290  * Method if it's "foo $bar"
3291  * Not a method if it's really "print foo $bar"
3292  * Method if it's really "foo package::" (interpreted as package->foo)
3293  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3294  * Not a method if bar is a filehandle or package, but is quoted with
3295  *   =>
3296  */
3297
3298 STATIC int
3299 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3300 {
3301     dVAR;
3302     char *s = start + (*start == '$');
3303     char tmpbuf[sizeof PL_tokenbuf];
3304     STRLEN len;
3305     GV* indirgv;
3306 #ifdef PERL_MAD
3307     int soff;
3308 #endif
3309
3310     PERL_ARGS_ASSERT_INTUIT_METHOD;
3311
3312     if (gv) {
3313         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3314             return 0;
3315         if (cv) {
3316             if (SvPOK(cv)) {
3317                 const char *proto = SvPVX_const(cv);
3318                 if (proto) {
3319                     if (*proto == ';')
3320                         proto++;
3321                     if (*proto == '*')
3322                         return 0;
3323                 }
3324             }
3325         } else
3326             gv = NULL;
3327     }
3328     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3329     /* start is the beginning of the possible filehandle/object,
3330      * and s is the end of it
3331      * tmpbuf is a copy of it
3332      */
3333
3334     if (*start == '$') {
3335         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3336                 isUPPER(*PL_tokenbuf))
3337             return 0;
3338 #ifdef PERL_MAD
3339         len = start - SvPVX(PL_linestr);
3340 #endif
3341         s = PEEKSPACE(s);
3342 #ifdef PERL_MAD
3343         start = SvPVX(PL_linestr) + len;
3344 #endif
3345         PL_bufptr = start;
3346         PL_expect = XREF;
3347         return *s == '(' ? FUNCMETH : METHOD;
3348     }
3349     if (!keyword(tmpbuf, len, 0)) {
3350         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3351             len -= 2;
3352             tmpbuf[len] = '\0';
3353 #ifdef PERL_MAD
3354             soff = s - SvPVX(PL_linestr);
3355 #endif
3356             goto bare_package;
3357         }
3358         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3359         if (indirgv && GvCVu(indirgv))
3360             return 0;
3361         /* filehandle or package name makes it a method */
3362         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3363 #ifdef PERL_MAD
3364             soff = s - SvPVX(PL_linestr);
3365 #endif
3366             s = PEEKSPACE(s);
3367             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3368                 return 0;       /* no assumptions -- "=>" quotes bearword */
3369       bare_package:
3370             start_force(PL_curforce);
3371             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3372                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3373             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3374             if (PL_madskills)
3375                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3376             PL_expect = XTERM;
3377             force_next(WORD);
3378             PL_bufptr = s;
3379 #ifdef PERL_MAD
3380             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3381 #endif
3382             return *s == '(' ? FUNCMETH : METHOD;
3383         }
3384     }
3385     return 0;
3386 }
3387
3388 /* Encoded script support. filter_add() effectively inserts a
3389  * 'pre-processing' function into the current source input stream.
3390  * Note that the filter function only applies to the current source file
3391  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3392  *
3393  * The datasv parameter (which may be NULL) can be used to pass
3394  * private data to this instance of the filter. The filter function
3395  * can recover the SV using the FILTER_DATA macro and use it to
3396  * store private buffers and state information.
3397  *
3398  * The supplied datasv parameter is upgraded to a PVIO type
3399  * and the IoDIRP/IoANY field is used to store the function pointer,
3400  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3401  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3402  * private use must be set using malloc'd pointers.
3403  */
3404
3405 SV *
3406 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3407 {
3408     dVAR;
3409     if (!funcp)
3410         return NULL;
3411
3412     if (!PL_parser)
3413         return NULL;
3414
3415     if (!PL_rsfp_filters)
3416         PL_rsfp_filters = newAV();
3417     if (!datasv)
3418         datasv = newSV(0);
3419     SvUPGRADE(datasv, SVt_PVIO);
3420     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3421     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3422     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3423                           FPTR2DPTR(void *, IoANY(datasv)),
3424                           SvPV_nolen(datasv)));
3425     av_unshift(PL_rsfp_filters, 1);
3426     av_store(PL_rsfp_filters, 0, datasv) ;
3427     return(datasv);
3428 }
3429
3430
3431 /* Delete most recently added instance of this filter function. */
3432 void
3433 Perl_filter_del(pTHX_ filter_t funcp)
3434 {
3435     dVAR;
3436     SV *datasv;
3437
3438     PERL_ARGS_ASSERT_FILTER_DEL;
3439
3440 #ifdef DEBUGGING
3441     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3442                           FPTR2DPTR(void*, funcp)));
3443 #endif
3444     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3445         return;
3446     /* if filter is on top of stack (usual case) just pop it off */
3447     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3448     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3449         IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
3450         IoANY(datasv) = (void *)NULL;
3451         sv_free(av_pop(PL_rsfp_filters));
3452
3453         return;
3454     }
3455     /* we need to search for the correct entry and clear it     */
3456     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3457 }
3458
3459
3460 /* Invoke the idxth filter function for the current rsfp.        */
3461 /* maxlen 0 = read one text line */
3462 I32
3463 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3464 {
3465     dVAR;
3466     filter_t funcp;
3467     SV *datasv = NULL;
3468     /* This API is bad. It should have been using unsigned int for maxlen.
3469        Not sure if we want to change the API, but if not we should sanity
3470        check the value here.  */
3471     const unsigned int correct_length
3472         = maxlen < 0 ?
3473 #ifdef PERL_MICRO
3474         0x7FFFFFFF
3475 #else
3476         INT_MAX
3477 #endif
3478         : maxlen;
3479
3480     PERL_ARGS_ASSERT_FILTER_READ;
3481
3482     if (!PL_parser || !PL_rsfp_filters)
3483         return -1;
3484     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3485         /* Provide a default input filter to make life easy.    */
3486         /* Note that we append to the line. This is handy.      */
3487         DEBUG_P(PerlIO_printf(Perl_debug_log,
3488                               "filter_read %d: from rsfp\n", idx));
3489         if (correct_length) {
3490             /* Want a block */
3491             int len ;
3492             const int old_len = SvCUR(buf_sv);
3493
3494             /* ensure buf_sv is large enough */
3495             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3496             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3497                                    correct_length)) <= 0) {
3498                 if (PerlIO_error(PL_rsfp))
3499                     return -1;          /* error */
3500                 else
3501                     return 0 ;          /* end of file */
3502             }
3503             SvCUR_set(buf_sv, old_len + len) ;
3504             SvPVX(buf_sv)[old_len + len] = '\0';
3505         } else {
3506             /* Want a line */
3507             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3508                 if (PerlIO_error(PL_rsfp))
3509                     return -1;          /* error */
3510                 else
3511                     return 0 ;          /* end of file */
3512             }
3513         }
3514         return SvCUR(buf_sv);
3515     }
3516     /* Skip this filter slot if filter has been deleted */
3517     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3518         DEBUG_P(PerlIO_printf(Perl_debug_log,
3519                               "filter_read %d: skipped (filter deleted)\n",
3520                               idx));
3521         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3522     }
3523     /* Get function pointer hidden within datasv        */
3524     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3525     DEBUG_P(PerlIO_printf(Perl_debug_log,
3526                           "filter_read %d: via function %p (%s)\n",
3527                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3528     /* Call function. The function is expected to       */
3529     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3530     /* Return: <0:error, =0:eof, >0:not eof             */
3531     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3532 }
3533
3534 STATIC char *
3535 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3536 {
3537     dVAR;
3538
3539     PERL_ARGS_ASSERT_FILTER_GETS;
3540
3541 #ifdef PERL_CR_FILTER
3542     if (!PL_rsfp_filters) {
3543         filter_add(S_cr_textfilter,NULL);
3544     }
3545 #endif
3546     if (PL_rsfp_filters) {
3547         if (!append)
3548             SvCUR_set(sv, 0);   /* start with empty line        */
3549         if (FILTER_READ(0, sv, 0) > 0)
3550             return ( SvPVX(sv) ) ;
3551         else
3552             return NULL ;
3553     }
3554     else
3555         return (sv_gets(sv, PL_rsfp, append));
3556 }
3557
3558 STATIC HV *
3559 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3560 {
3561     dVAR;
3562     GV *gv;
3563
3564     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3565
3566     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3567         return PL_curstash;
3568
3569     if (len > 2 &&
3570         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3571         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3572     {
3573         return GvHV(gv);                        /* Foo:: */
3574     }
3575
3576     /* use constant CLASS => 'MyClass' */
3577     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3578     if (gv && GvCV(gv)) {
3579         SV * const sv = cv_const_sv(GvCV(gv));
3580         if (sv)
3581             pkgname = SvPV_const(sv, len);
3582     }
3583
3584     return gv_stashpvn(pkgname, len, 0);
3585 }
3586
3587 /*
3588  * S_readpipe_override
3589  * Check whether readpipe() is overriden, and generates the appropriate
3590  * optree, provided sublex_start() is called afterwards.
3591  */
3592 STATIC void
3593 S_readpipe_override(pTHX)
3594 {
3595     GV **gvp;
3596     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3597     pl_yylval.ival = OP_BACKTICK;
3598     if ((gv_readpipe
3599                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3600             ||
3601             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3602              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3603              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3604     {
3605         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3606             append_elem(OP_LIST,
3607                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3608                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3609     }
3610 }
3611
3612 #ifdef PERL_MAD
3613  /*
3614  * Perl_madlex
3615  * The intent of this yylex wrapper is to minimize the changes to the
3616  * tokener when we aren't interested in collecting madprops.  It remains
3617  * to be seen how successful this strategy will be...
3618  */
3619
3620 int
3621 Perl_madlex(pTHX)
3622 {
3623     int optype;
3624     char *s = PL_bufptr;
3625
3626     /* make sure PL_thiswhite is initialized */
3627     PL_thiswhite = 0;
3628     PL_thismad = 0;
3629
3630     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3631     if (PL_pending_ident)
3632         return S_pending_ident(aTHX);
3633
3634     /* previous token ate up our whitespace? */
3635     if (!PL_lasttoke && PL_nextwhite) {
3636         PL_thiswhite = PL_nextwhite;
3637         PL_nextwhite = 0;
3638     }
3639
3640     /* isolate the token, and figure out where it is without whitespace */
3641     PL_realtokenstart = -1;
3642     PL_thistoken = 0;
3643     optype = yylex();
3644     s = PL_bufptr;
3645     assert(PL_curforce < 0);
3646
3647     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3648         if (!PL_thistoken) {
3649             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3650                 PL_thistoken = newSVpvs("");
3651             else {
3652                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3653                 PL_thistoken = newSVpvn(tstart, s - tstart);
3654             }
3655         }
3656         if (PL_thismad) /* install head */
3657             CURMAD('X', PL_thistoken);
3658     }
3659
3660     /* last whitespace of a sublex? */
3661     if (optype == ')' && PL_endwhite) {
3662         CURMAD('X', PL_endwhite);
3663     }
3664
3665     if (!PL_thismad) {
3666
3667         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
3668         if (!PL_thiswhite && !PL_endwhite && !optype) {
3669             sv_free(PL_thistoken);
3670             PL_thistoken = 0;
3671             return 0;
3672         }
3673
3674         /* put off final whitespace till peg */
3675         if (optype == ';' && !PL_rsfp) {
3676             PL_nextwhite = PL_thiswhite;
3677             PL_thiswhite = 0;
3678         }
3679         else if (PL_thisopen) {
3680             CURMAD('q', PL_thisopen);
3681             if (PL_thistoken)
3682                 sv_free(PL_thistoken);
3683             PL_thistoken = 0;
3684         }
3685         else {
3686             /* Store actual token text as madprop X */
3687             CURMAD('X', PL_thistoken);
3688         }
3689
3690         if (PL_thiswhite) {
3691             /* add preceding whitespace as madprop _ */
3692             CURMAD('_', PL_thiswhite);
3693         }
3694
3695         if (PL_thisstuff) {
3696             /* add quoted material as madprop = */
3697             CURMAD('=', PL_thisstuff);
3698         }
3699
3700         if (PL_thisclose) {
3701             /* add terminating quote as madprop Q */
3702             CURMAD('Q', PL_thisclose);
3703         }
3704     }
3705
3706     /* special processing based on optype */
3707
3708     switch (optype) {
3709
3710     /* opval doesn't need a TOKEN since it can already store mp */
3711     case WORD:
3712     case METHOD:
3713     case FUNCMETH:
3714     case THING:
3715     case PMFUNC:
3716     case PRIVATEREF:
3717     case FUNC0SUB:
3718     case UNIOPSUB:
3719     case LSTOPSUB:
3720         if (pl_yylval.opval)
3721             append_madprops(PL_thismad, pl_yylval.opval, 0);
3722         PL_thismad = 0;
3723         return optype;
3724
3725     /* fake EOF */
3726     case 0:
3727         optype = PEG;
3728         if (PL_endwhite) {
3729             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3730             PL_endwhite = 0;
3731         }
3732         break;
3733
3734     case ']':
3735     case '}':
3736         if (PL_faketokens)
3737             break;
3738         /* remember any fake bracket that lexer is about to discard */
3739         if (PL_lex_brackets == 1 &&
3740             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3741         {
3742             s = PL_bufptr;
3743             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3744                 s++;
3745             if (*s == '}') {
3746                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3747                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3748                 PL_thiswhite = 0;
3749                 PL_bufptr = s - 1;
3750                 break;  /* don't bother looking for trailing comment */
3751             }
3752             else
3753                 s = PL_bufptr;
3754         }
3755         if (optype == ']')
3756             break;
3757         /* FALLTHROUGH */
3758
3759     /* attach a trailing comment to its statement instead of next token */
3760     case ';':
3761         if (PL_faketokens)
3762             break;
3763         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3764             s = PL_bufptr;
3765             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3766                 s++;
3767             if (*s == '\n' || *s == '#') {
3768                 while (s < PL_bufend && *s != '\n')
3769                     s++;
3770                 if (s < PL_bufend)
3771                     s++;
3772                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3773                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3774                 PL_thiswhite = 0;
3775                 PL_bufptr = s;
3776             }
3777         }
3778         break;
3779
3780     /* pval */
3781     case LABEL:
3782         break;
3783
3784     /* ival */
3785     default:
3786         break;
3787
3788     }
3789
3790     /* Create new token struct.  Note: opvals return early above. */
3791     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3792     PL_thismad = 0;
3793     return optype;
3794 }
3795 #endif
3796
3797 STATIC char *
3798 S_tokenize_use(pTHX_ int is_use, char *s) {
3799     dVAR;
3800
3801     PERL_ARGS_ASSERT_TOKENIZE_USE;
3802
3803     if (PL_expect != XSTATE)
3804         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3805                     is_use ? "use" : "no"));
3806     s = SKIPSPACE1(s);
3807     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3808         s = force_version(s, TRUE);
3809         if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3810             start_force(PL_curforce);
3811             NEXTVAL_NEXTTOKE.opval = NULL;
3812             force_next(WORD);
3813         }
3814         else if (*s == 'v') {
3815             s = force_word(s,WORD,FALSE,TRUE,FALSE);
3816             s = force_version(s, FALSE);
3817         }
3818     }
3819     else {
3820         s = force_word(s,WORD,FALSE,TRUE,FALSE);
3821         s = force_version(s, FALSE);
3822     }
3823     pl_yylval.ival = is_use;
3824     return s;
3825 }
3826 #ifdef DEBUGGING
3827     static const char* const exp_name[] =
3828         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3829           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3830         };
3831 #endif
3832
3833 /*
3834   yylex
3835
3836   Works out what to call the token just pulled out of the input
3837   stream.  The yacc parser takes care of taking the ops we return and
3838   stitching them into a tree.
3839
3840   Returns:
3841     PRIVATEREF
3842
3843   Structure:
3844       if read an identifier
3845           if we're in a my declaration
3846               croak if they tried to say my($foo::bar)
3847               build the ops for a my() declaration
3848           if it's an access to a my() variable
3849               are we in a sort block?
3850                   croak if my($a); $a <=> $b
3851               build ops for access to a my() variable
3852           if in a dq string, and they've said @foo and we can't find @foo
3853               croak
3854           build ops for a bareword
3855       if we already built the token before, use it.
3856 */
3857
3858
3859 #ifdef __SC__
3860 #pragma segment Perl_yylex
3861 #endif
3862 int
3863 Perl_yylex(pTHX)
3864 {
3865     dVAR;
3866     register char *s = PL_bufptr;
3867     register char *d;
3868     STRLEN len;
3869     bool bof = FALSE;
3870
3871     /* orig_keyword, gvp, and gv are initialized here because
3872      * jump to the label just_a_word_zero can bypass their
3873      * initialization later. */
3874     I32 orig_keyword = 0;
3875     GV *gv = NULL;
3876     GV **gvp = NULL;
3877
3878     DEBUG_T( {
3879         SV* tmp = newSVpvs("");
3880         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3881             (IV)CopLINE(PL_curcop),
3882             lex_state_names[PL_lex_state],
3883             exp_name[PL_expect],
3884             pv_display(tmp, s, strlen(s), 0, 60));
3885         SvREFCNT_dec(tmp);
3886     } );
3887     /* check if there's an identifier for us to look at */
3888     if (PL_pending_ident)
3889         return REPORT(S_pending_ident(aTHX));
3890
3891     /* no identifier pending identification */
3892
3893     switch (PL_lex_state) {
3894 #ifdef COMMENTARY
3895     case LEX_NORMAL:            /* Some compilers will produce faster */
3896     case LEX_INTERPNORMAL:      /* code if we comment these out. */
3897         break;
3898 #endif
3899
3900     /* when we've already built the next token, just pull it out of the queue */
3901     case LEX_KNOWNEXT:
3902 #ifdef PERL_MAD
3903         PL_lasttoke--;
3904         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3905         if (PL_madskills) {
3906             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3907             PL_nexttoke[PL_lasttoke].next_mad = 0;
3908             if (PL_thismad && PL_thismad->mad_key == '_') {
3909                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
3910                 PL_thismad->mad_val = 0;
3911                 mad_free(PL_thismad);
3912                 PL_thismad = 0;
3913             }
3914         }
3915         if (!PL_lasttoke) {
3916             PL_lex_state = PL_lex_defer;
3917             PL_expect = PL_lex_expect;
3918             PL_lex_defer = LEX_NORMAL;
3919             if (!PL_nexttoke[PL_lasttoke].next_type)
3920                 return yylex();
3921         }
3922 #else
3923         PL_nexttoke--;
3924         pl_yylval = PL_nextval[PL_nexttoke];
3925         if (!PL_nexttoke) {
3926             PL_lex_state = PL_lex_defer;
3927             PL_expect = PL_lex_expect;
3928             PL_lex_defer = LEX_NORMAL;
3929         }
3930 #endif
3931 #ifdef PERL_MAD
3932         /* FIXME - can these be merged?  */
3933         return(PL_nexttoke[PL_lasttoke].next_type);
3934 #else
3935         return REPORT(PL_nexttype[PL_nexttoke]);
3936 #endif
3937
3938     /* interpolated case modifiers like \L \U, including \Q and \E.
3939        when we get here, PL_bufptr is at the \
3940     */
3941     case LEX_INTERPCASEMOD:
3942 #ifdef DEBUGGING
3943         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3944             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3945 #endif
3946         /* handle \E or end of string */
3947         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3948             /* if at a \E */
3949             if (PL_lex_casemods) {
3950                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3951                 PL_lex_casestack[PL_lex_casemods] = '\0';
3952
3953                 if (PL_bufptr != PL_bufend
3954                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3955                     PL_bufptr += 2;
3956                     PL_lex_state = LEX_INTERPCONCAT;
3957 #ifdef PERL_MAD
3958                     if (PL_madskills)
3959                         PL_thistoken = newSVpvs("\\E");
3960 #endif
3961                 }
3962                 return REPORT(')');
3963             }
3964 #ifdef PERL_MAD
3965             while (PL_bufptr != PL_bufend &&
3966               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3967                 if (!PL_thiswhite)
3968                     PL_thiswhite = newSVpvs("");
3969                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3970                 PL_bufptr += 2;
3971             }
3972 #else
3973             if (PL_bufptr != PL_bufend)
3974                 PL_bufptr += 2;
3975 #endif
3976             PL_lex_state = LEX_INTERPCONCAT;
3977             return yylex();
3978         }
3979         else {
3980             DEBUG_T({ PerlIO_printf(Perl_debug_log,
3981               "### Saw case modifier\n"); });
3982             s = PL_bufptr + 1;
3983             if (s[1] == '\\' && s[2] == 'E') {
3984 #ifdef PERL_MAD
3985                 if (!PL_thiswhite)
3986                     PL_thiswhite = newSVpvs("");
3987                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3988 #endif
3989                 PL_bufptr = s + 3;
3990                 PL_lex_state = LEX_INTERPCONCAT;
3991                 return yylex();
3992             }
3993             else {
3994                 I32 tmp;
3995                 if (!PL_madskills) /* when just compiling don't need correct */
3996                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3997                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
3998                 if ((*s == 'L' || *s == 'U') &&
3999                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4000                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4001                     return REPORT(')');
4002                 }
4003                 if (PL_lex_casemods > 10)
4004                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4005                 PL_lex_casestack[PL_lex_casemods++] = *s;
4006                 PL_lex_casestack[PL_lex_casemods] = '\0';
4007                 PL_lex_state = LEX_INTERPCONCAT;
4008                 start_force(PL_curforce);
4009                 NEXTVAL_NEXTTOKE.ival = 0;
4010                 force_next('(');
4011                 start_force(PL_curforce);
4012                 if (*s == 'l')
4013                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4014                 else if (*s == 'u')
4015                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4016                 else if (*s == 'L')
4017                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4018                 else if (*s == 'U')
4019                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4020                 else if (*s == 'Q')
4021                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4022                 else
4023                     Perl_croak(aTHX_ "panic: yylex");
4024                 if (PL_madskills) {
4025                     SV* const tmpsv = newSVpvs("\\ ");
4026                     /* replace the space with the character we want to escape
4027                      */
4028                     SvPVX(tmpsv)[1] = *s;
4029                     curmad('_', tmpsv);
4030                 }
4031                 PL_bufptr = s + 1;
4032             }
4033             force_next(FUNC);
4034             if (PL_lex_starts) {
4035                 s = PL_bufptr;
4036                 PL_lex_starts = 0;
4037 #ifdef PERL_MAD
4038                 if (PL_madskills) {
4039                     if (PL_thistoken)
4040                         sv_free(PL_thistoken);
4041                     PL_thistoken = newSVpvs("");
4042                 }
4043 #endif
4044                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4045                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4046                     OPERATOR(',');
4047                 else
4048                     Aop(OP_CONCAT);
4049             }
4050             else
4051                 return yylex();
4052         }
4053
4054     case LEX_INTERPPUSH:
4055         return REPORT(sublex_push());
4056
4057     case LEX_INTERPSTART:
4058         if (PL_bufptr == PL_bufend)
4059             return REPORT(sublex_done());
4060         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4061               "### Interpolated variable\n"); });
4062         PL_expect = XTERM;
4063         PL_lex_dojoin = (*PL_bufptr == '@');
4064         PL_lex_state = LEX_INTERPNORMAL;
4065         if (PL_lex_dojoin) {
4066             start_force(PL_curforce);
4067             NEXTVAL_NEXTTOKE.ival = 0;
4068             force_next(',');
4069             start_force(PL_curforce);
4070             force_ident("\"", '$');
4071             start_force(PL_curforce);
4072             NEXTVAL_NEXTTOKE.ival = 0;
4073             force_next('$');
4074             start_force(PL_curforce);
4075             NEXTVAL_NEXTTOKE.ival = 0;
4076             force_next('(');
4077             start_force(PL_curforce);
4078             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4079             force_next(FUNC);
4080         }
4081         if (PL_lex_starts++) {
4082             s = PL_bufptr;
4083 #ifdef PERL_MAD
4084             if (PL_madskills) {
4085                 if (PL_thistoken)
4086                     sv_free(PL_thistoken);
4087                 PL_thistoken = newSVpvs("");
4088             }
4089 #endif
4090             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4091             if (!PL_lex_casemods && PL_lex_inpat)
4092                 OPERATOR(',');
4093             else
4094                 Aop(OP_CONCAT);
4095         }
4096         return yylex();
4097
4098     case LEX_INTERPENDMAYBE:
4099         if (intuit_more(PL_bufptr)) {
4100             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4101             break;
4102         }
4103         /* FALL THROUGH */
4104
4105     case LEX_INTERPEND:
4106         if (PL_lex_dojoin) {
4107             PL_lex_dojoin = FALSE;
4108             PL_lex_state = LEX_INTERPCONCAT;
4109 #ifdef PERL_MAD
4110             if (PL_madskills) {
4111                 if (PL_thistoken)
4112                     sv_free(PL_thistoken);
4113                 PL_thistoken = newSVpvs("");
4114             }
4115 #endif
4116             return REPORT(')');
4117         }
4118         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4119             && SvEVALED(PL_lex_repl))
4120         {
4121             if (PL_bufptr != PL_bufend)
4122                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4123             PL_lex_repl = NULL;
4124         }
4125         /* FALLTHROUGH */
4126     case LEX_INTERPCONCAT:
4127 #ifdef DEBUGGING
4128         if (PL_lex_brackets)
4129             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4130 #endif
4131         if (PL_bufptr == PL_bufend)
4132             return REPORT(sublex_done());
4133
4134         if (SvIVX(PL_linestr) == '\'') {
4135             SV *sv = newSVsv(PL_linestr);
4136             if (!PL_lex_inpat)
4137                 sv = tokeq(sv);
4138             else if ( PL_hints & HINT_NEW_RE )
4139                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4140             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4141             s = PL_bufend;
4142         }
4143         else {
4144             s = scan_const(PL_bufptr);
4145             if (*s == '\\')
4146                 PL_lex_state = LEX_INTERPCASEMOD;
4147             else
4148                 PL_lex_state = LEX_INTERPSTART;
4149         }
4150
4151         if (s != PL_bufptr) {
4152             start_force(PL_curforce);
4153             if (PL_madskills) {
4154                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4155             }
4156             NEXTVAL_NEXTTOKE = pl_yylval;
4157             PL_expect = XTERM;
4158             force_next(THING);
4159             if (PL_lex_starts++) {
4160 #ifdef PERL_MAD
4161                 if (PL_madskills) {
4162                     if (PL_thistoken)
4163                         sv_free(PL_thistoken);
4164                     PL_thistoken = newSVpvs("");
4165                 }
4166 #endif
4167                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4168                 if (!PL_lex_casemods && PL_lex_inpat)
4169                     OPERATOR(',');
4170                 else
4171                     Aop(OP_CONCAT);
4172             }
4173             else {
4174                 PL_bufptr = s;
4175                 return yylex();
4176             }
4177         }
4178
4179         return yylex();
4180     case LEX_FORMLINE:
4181         PL_lex_state = LEX_NORMAL;
4182         s = scan_formline(PL_bufptr);
4183         if (!PL_lex_formbrack)
4184             goto rightbracket;
4185         OPERATOR(';');
4186     }
4187
4188     s = PL_bufptr;
4189     PL_oldoldbufptr = PL_oldbufptr;
4190     PL_oldbufptr = s;
4191
4192   retry:
4193 #ifdef PERL_MAD
4194     if (PL_thistoken) {
4195         sv_free(PL_thistoken);
4196         PL_thistoken = 0;
4197     }
4198     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4199 #endif
4200     switch (*s) {
4201     default:
4202         if (isIDFIRST_lazy_if(s,UTF))
4203             goto keylookup;
4204         {
4205         unsigned char c = *s;
4206         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4207         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4208             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4209         } else {
4210             d = PL_linestart;
4211         }
4212         *s = '\0';
4213         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4214     }
4215     case 4:
4216     case 26:
4217         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4218     case 0:
4219 #ifdef PERL_MAD
4220         if (PL_madskills)
4221             PL_faketokens = 0;
4222 #endif
4223         if (!PL_rsfp) {
4224             PL_last_uni = 0;
4225             PL_last_lop = 0;
4226             if (PL_lex_brackets) {
4227                 yyerror((const char *)
4228                         (PL_lex_formbrack
4229                          ? "Format not terminated"
4230                          : "Missing right curly or square bracket"));
4231             }
4232             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4233                         "### Tokener got EOF\n");
4234             } );
4235             TOKEN(0);
4236         }
4237         if (s++ < PL_bufend)
4238             goto retry;                 /* ignore stray nulls */
4239         PL_last_uni = 0;
4240         PL_last_lop = 0;
4241         if (!PL_in_eval && !PL_preambled) {
4242             PL_preambled = TRUE;
4243 #ifdef PERL_MAD
4244             if (PL_madskills)
4245                 PL_faketokens = 1;
4246 #endif
4247             if (PL_perldb) {
4248                 /* Generate a string of Perl code to load the debugger.
4249                  * If PERL5DB is set, it will return the contents of that,
4250                  * otherwise a compile-time require of perl5db.pl.  */
4251
4252                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4253
4254                 if (pdb) {
4255                     sv_setpv(PL_linestr, pdb);
4256                     sv_catpvs(PL_linestr,";");
4257                 } else {
4258                     SETERRNO(0,SS_NORMAL);
4259                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4260                 }
4261             } else
4262                 sv_setpvs(PL_linestr,"");
4263             if (PL_preambleav) {
4264                 SV **svp = AvARRAY(PL_preambleav);
4265                 SV **const end = svp + AvFILLp(PL_preambleav);
4266                 while(svp <= end) {
4267                     sv_catsv(PL_linestr, *svp);
4268                     ++svp;
4269                     sv_catpvs(PL_linestr, ";");
4270                 }
4271                 sv_free(MUTABLE_SV(PL_preambleav));
4272                 PL_preambleav = NULL;
4273             }
4274             if (PL_minus_E)
4275                 sv_catpvs(PL_linestr,
4276                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4277             if (PL_minus_n || PL_minus_p) {
4278                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4279                 if (PL_minus_l)
4280                     sv_catpvs(PL_linestr,"chomp;");
4281                 if (PL_minus_a) {
4282                     if (PL_minus_F) {
4283                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4284                              || *PL_splitstr == '"')
4285                               && strchr(PL_splitstr + 1, *PL_splitstr))
4286                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4287                         else {
4288                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4289                                bytes can be used as quoting characters.  :-) */
4290                             const char *splits = PL_splitstr;
4291                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4292                             do {
4293                                 /* Need to \ \s  */
4294                                 if (*splits == '\\')
4295                                     sv_catpvn(PL_linestr, splits, 1);
4296                                 sv_catpvn(PL_linestr, splits, 1);
4297                             } while (*splits++);
4298                             /* This loop will embed the trailing NUL of
4299                                PL_linestr as the last thing it does before
4300                                terminating.  */
4301                             sv_catpvs(PL_linestr, ");");
4302                         }
4303                     }
4304                     else
4305                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4306                 }
4307             }
4308             sv_catpvs(PL_linestr, "\n");
4309             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4310             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4311             PL_last_lop = PL_last_uni = NULL;
4312             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4313                 update_debugger_info(PL_linestr, NULL, 0);
4314             goto retry;
4315         }
4316         do {
4317             U32 fake_eof = 0;
4318             if (0) {
4319               fake_eof:
4320                 fake_eof = LEX_FAKE_EOF;
4321             }
4322             PL_bufptr = PL_bufend;
4323             if (!lex_next_chunk(fake_eof)) {
4324                 s = PL_bufptr;
4325                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4326             }
4327 #ifdef PERL_MAD
4328             if (!PL_rsfp)
4329                 PL_realtokenstart = -1;
4330 #endif
4331             s = PL_bufptr;
4332             /* If it looks like the start of a BOM or raw UTF-16,
4333              * check if it in fact is. */
4334             bof = PL_rsfp ? TRUE : FALSE;
4335             if (bof &&
4336                      (*s == 0 ||
4337                       *(U8*)s == 0xEF ||
4338                       *(U8*)s >= 0xFE ||
4339                       s[1] == 0)) {
4340                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4341                 if (bof) {
4342                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4343                     s = swallow_bom((U8*)s);
4344                 }
4345             }
4346             if (PL_doextract) {
4347                 /* Incest with pod. */
4348 #ifdef PERL_MAD
4349                 if (PL_madskills)
4350                     sv_catsv(PL_thiswhite, PL_linestr);
4351 #endif
4352                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4353                     sv_setpvs(PL_linestr, "");
4354                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4355                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4356                     PL_last_lop = PL_last_uni = NULL;
4357                     PL_doextract = FALSE;
4358                 }
4359             }
4360             incline(s);
4361         } while (PL_doextract);
4362         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4363         if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4364             update_debugger_info(PL_linestr, NULL, 0);
4365         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4366         PL_last_lop = PL_last_uni = NULL;
4367         if (CopLINE(PL_curcop) == 1) {
4368             while (s < PL_bufend && isSPACE(*s))
4369                 s++;
4370             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4371                 s++;
4372 #ifdef PERL_MAD
4373             if (PL_madskills)
4374                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4375 #endif
4376             d = NULL;
4377             if (!PL_in_eval) {
4378                 if (*s == '#' && *(s+1) == '!')
4379                     d = s + 2;
4380 #ifdef ALTERNATE_SHEBANG
4381                 else {
4382                     static char const as[] = ALTERNATE_SHEBANG;
4383                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4384                         d = s + (sizeof(as) - 1);
4385                 }
4386 #endif /* ALTERNATE_SHEBANG */
4387             }
4388             if (d) {
4389                 char *ipath;
4390                 char *ipathend;
4391
4392                 while (isSPACE(*d))
4393                     d++;
4394                 ipath = d;
4395                 while (*d && !isSPACE(*d))
4396                     d++;
4397                 ipathend = d;
4398
4399 #ifdef ARG_ZERO_IS_SCRIPT
4400                 if (ipathend > ipath) {
4401                     /*
4402                      * HP-UX (at least) sets argv[0] to the script name,
4403                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4404                      * at least, set argv[0] to the basename of the Perl
4405                      * interpreter. So, having found "#!", we'll set it right.
4406                      */
4407                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4408                                                     SVt_PV)); /* $^X */
4409                     assert(SvPOK(x) || SvGMAGICAL(x));
4410                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4411                         sv_setpvn(x, ipath, ipathend - ipath);
4412                         SvSETMAGIC(x);
4413                     }
4414                     else {
4415                         STRLEN blen;
4416                         STRLEN llen;
4417                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4418                         const char * const lstart = SvPV_const(x,llen);
4419                         if (llen < blen) {
4420                             bstart += blen - llen;
4421                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4422                                 sv_setpvn(x, ipath, ipathend - ipath);
4423                                 SvSETMAGIC(x);
4424                             }
4425                         }
4426                     }
4427                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4428                 }
4429 #endif /* ARG_ZERO_IS_SCRIPT */
4430
4431                 /*
4432                  * Look for options.
4433                  */
4434                 d = instr(s,"perl -");
4435                 if (!d) {
4436                     d = instr(s,"perl");
4437 #if defined(DOSISH)
4438                     /* avoid getting into infinite loops when shebang
4439                      * line contains "Perl" rather than "perl" */
4440                     if (!d) {
4441                         for (d = ipathend-4; d >= ipath; --d) {
4442                             if ((*d == 'p' || *d == 'P')
4443                                 && !ibcmp(d, "perl", 4))
4444                             {
4445                                 break;
4446                             }
4447                         }
4448                         if (d < ipath)
4449                             d = NULL;
4450                     }
4451 #endif
4452                 }
4453 #ifdef ALTERNATE_SHEBANG
4454                 /*
4455                  * If the ALTERNATE_SHEBANG on this system starts with a
4456                  * character that can be part of a Perl expression, then if
4457                  * we see it but not "perl", we're probably looking at the
4458                  * start of Perl code, not a request to hand off to some
4459                  * other interpreter.  Similarly, if "perl" is there, but
4460                  * not in the first 'word' of the line, we assume the line
4461                  * contains the start of the Perl program.
4462                  */
4463                 if (d && *s != '#') {
4464                     const char *c = ipath;
4465                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4466                         c++;
4467                     if (c < d)
4468                         d = NULL;       /* "perl" not in first word; ignore */
4469                     else
4470                         *s = '#';       /* Don't try to parse shebang line */
4471                 }
4472 #endif /* ALTERNATE_SHEBANG */
4473                 if (!d &&
4474                     *s == '#' &&
4475                     ipathend > ipath &&
4476                     !PL_minus_c &&
4477                     !instr(s,"indir") &&
4478                     instr(PL_origargv[0],"perl"))
4479                 {
4480                     dVAR;
4481                     char **newargv;
4482
4483                     *ipathend = '\0';
4484                     s = ipathend + 1;
4485                     while (s < PL_bufend && isSPACE(*s))
4486                         s++;
4487                     if (s < PL_bufend) {
4488                         Newx(newargv,PL_origargc+3,char*);
4489                         newargv[1] = s;
4490                         while (s < PL_bufend && !isSPACE(*s))
4491                             s++;
4492                         *s = '\0';
4493                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4494                     }
4495                     else
4496                         newargv = PL_origargv;
4497                     newargv[0] = ipath;
4498                     PERL_FPU_PRE_EXEC
4499                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4500                     PERL_FPU_POST_EXEC
4501                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4502                 }
4503                 if (d) {
4504                     while (*d && !isSPACE(*d))
4505                         d++;
4506                     while (SPACE_OR_TAB(*d))
4507                         d++;
4508
4509                     if (*d++ == '-') {
4510                         const bool switches_done = PL_doswitches;
4511                         const U32 oldpdb = PL_perldb;
4512                         const bool oldn = PL_minus_n;
4513                         const bool oldp = PL_minus_p;
4514                         const char *d1 = d;
4515
4516                         do {
4517                             bool baduni = FALSE;
4518                             if (*d1 == 'C') {
4519                                 const char *d2 = d1 + 1;
4520                                 if (parse_unicode_opts((const char **)&d2)
4521                                     != PL_unicode)
4522                                     baduni = TRUE;
4523                             }
4524                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4525                                 const char * const m = d1;
4526                                 while (*d1 && !isSPACE(*d1))
4527                                     d1++;
4528                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4529                                       (int)(d1 - m), m);
4530                             }
4531                             d1 = moreswitches(d1);
4532                         } while (d1);
4533                         if (PL_doswitches && !switches_done) {
4534                             int argc = PL_origargc;
4535                             char **argv = PL_origargv;
4536                             do {
4537                                 argc--,argv++;
4538                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4539                             init_argv_symbols(argc,argv);
4540                         }
4541                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4542                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4543                               /* if we have already added "LINE: while (<>) {",
4544                                  we must not do it again */
4545                         {
4546                             sv_setpvs(PL_linestr, "");
4547                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4548                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4549                             PL_last_lop = PL_last_uni = NULL;
4550                             PL_preambled = FALSE;
4551                             if (PERLDB_LINE || PERLDB_SAVESRC)
4552                                 (void)gv_fetchfile(PL_origfilename);
4553                             goto retry;
4554                         }
4555                     }
4556                 }
4557             }
4558         }
4559         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4560             PL_bufptr = s;
4561             PL_lex_state = LEX_FORMLINE;
4562             return yylex();
4563         }
4564         goto retry;
4565     case '\r':
4566 #ifdef PERL_STRICT_CR
4567         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4568         Perl_croak(aTHX_
4569       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4570 #endif
4571     case ' ': case '\t': case '\f': case 013:
4572 #ifdef PERL_MAD
4573         PL_realtokenstart = -1;
4574         if (!PL_thiswhite)
4575             PL_thiswhite = newSVpvs("");
4576         sv_catpvn(PL_thiswhite, s, 1);
4577 #endif
4578         s++;
4579         goto retry;
4580     case '#':
4581     case '\n':
4582 #ifdef PERL_MAD
4583         PL_realtokenstart = -1;
4584         if (PL_madskills)
4585             PL_faketokens = 0;
4586 #endif
4587         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4588             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4589                 /* handle eval qq[#line 1 "foo"\n ...] */
4590                 CopLINE_dec(PL_curcop);
4591                 incline(s);
4592             }
4593             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4594                 s = SKIPSPACE0(s);
4595                 if (!PL_in_eval || PL_rsfp)
4596                     incline(s);
4597             }
4598             else {
4599                 d = s;
4600                 while (d < PL_bufend && *d != '\n')
4601                     d++;
4602                 if (d < PL_bufend)
4603                     d++;
4604                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4605                   Perl_croak(aTHX_ "panic: input overflow");
4606 #ifdef PERL_MAD
4607                 if (PL_madskills)
4608                     PL_thiswhite = newSVpvn(s, d - s);
4609 #endif
4610                 s = d;
4611                 incline(s);
4612             }
4613             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4614                 PL_bufptr = s;
4615                 PL_lex_state = LEX_FORMLINE;
4616                 return yylex();
4617             }
4618         }
4619         else {
4620 #ifdef PERL_MAD
4621             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4622                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4623                     PL_faketokens = 0;
4624                     s = SKIPSPACE0(s);
4625                     TOKEN(PEG); /* make sure any #! line is accessible */
4626                 }
4627                 s = SKIPSPACE0(s);
4628             }
4629             else {
4630 /*              if (PL_madskills && PL_lex_formbrack) { */
4631                     d = s;
4632                     while (d < PL_bufend && *d != '\n')
4633                         d++;
4634                     if (d < PL_bufend)
4635                         d++;
4636                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4637                       Perl_croak(aTHX_ "panic: input overflow");
4638                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4639                         if (!PL_thiswhite)
4640                             PL_thiswhite = newSVpvs("");
4641                         if (CopLINE(PL_curcop) == 1) {
4642                             sv_setpvs(PL_thiswhite, "");
4643                             PL_faketokens = 0;
4644                         }
4645                         sv_catpvn(PL_thiswhite, s, d - s);
4646                     }
4647                     s = d;
4648 /*              }
4649                 *s = '\0';
4650                 PL_bufend = s; */
4651             }
4652 #else
4653             *s = '\0';
4654             PL_bufend = s;
4655 #endif
4656         }
4657         goto retry;
4658     case '-':
4659         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4660             I32 ftst = 0;
4661             char tmp;
4662
4663             s++;
4664             PL_bufptr = s;
4665             tmp = *s++;
4666
4667             while (s < PL_bufend && SPACE_OR_TAB(*s))
4668                 s++;
4669
4670             if (strnEQ(s,"=>",2)) {
4671                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4672                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4673                 OPERATOR('-');          /* unary minus */
4674             }
4675             PL_last_uni = PL_oldbufptr;
4676             switch (tmp) {
4677             case 'r': ftst = OP_FTEREAD;        break;
4678             case 'w': ftst = OP_FTEWRITE;       break;
4679             case 'x': ftst = OP_FTEEXEC;        break;
4680             case 'o': ftst = OP_FTEOWNED;       break;
4681             case 'R': ftst = OP_FTRREAD;        break;
4682             case 'W': ftst = OP_FTRWRITE;       break;
4683             case 'X': ftst = OP_FTREXEC;        break;
4684             case 'O': ftst = OP_FTROWNED;       break;
4685             case 'e': ftst = OP_FTIS;           break;
4686             case 'z': ftst = OP_FTZERO;         break;
4687             case 's': ftst = OP_FTSIZE;         break;
4688             case 'f': ftst = OP_FTFILE;         break;
4689             case 'd': ftst = OP_FTDIR;          break;
4690             case 'l': ftst = OP_FTLINK;         break;
4691             case 'p': ftst = OP_FTPIPE;         break;
4692             case 'S': ftst = OP_FTSOCK;         break;
4693             case 'u': ftst = OP_FTSUID;         break;
4694             case 'g': ftst = OP_FTSGID;         break;
4695             case 'k': ftst = OP_FTSVTX;         break;
4696             case 'b': ftst = OP_FTBLK;          break;
4697             case 'c': ftst = OP_FTCHR;          break;
4698             case 't': ftst = OP_FTTTY;          break;
4699             case 'T': ftst = OP_FTTEXT;         break;
4700             case 'B': ftst = OP_FTBINARY;       break;
4701             case 'M': case 'A': case 'C':
4702                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4703                 switch (tmp) {
4704                 case 'M': ftst = OP_FTMTIME;    break;
4705                 case 'A': ftst = OP_FTATIME;    break;
4706                 case 'C': ftst = OP_FTCTIME;    break;
4707                 default:                        break;
4708                 }
4709                 break;
4710             default:
4711                 break;
4712             }
4713             if (ftst) {
4714                 PL_last_lop_op = (OPCODE)ftst;
4715                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4716                         "### Saw file test %c\n", (int)tmp);
4717                 } );
4718                 FTST(ftst);
4719             }
4720             else {
4721                 /* Assume it was a minus followed by a one-letter named
4722                  * subroutine call (or a -bareword), then. */
4723                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4724                         "### '-%c' looked like a file test but was not\n",
4725                         (int) tmp);
4726                 } );
4727                 s = --PL_bufptr;
4728             }
4729         }
4730         {
4731             const char tmp = *s++;
4732             if (*s == tmp) {
4733                 s++;
4734                 if (PL_expect == XOPERATOR)
4735                     TERM(POSTDEC);
4736                 else
4737                     OPERATOR(PREDEC);
4738             }
4739             else if (*s == '>') {
4740                 s++;
4741                 s = SKIPSPACE1(s);
4742                 if (isIDFIRST_lazy_if(s,UTF)) {
4743                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4744                     TOKEN(ARROW);
4745                 }
4746                 else if (*s == '$')
4747                     OPERATOR(ARROW);
4748                 else
4749                     TERM(ARROW);
4750             }
4751             if (PL_expect == XOPERATOR)
4752                 Aop(OP_SUBTRACT);
4753             else {
4754                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4755                     check_uni();
4756                 OPERATOR('-');          /* unary minus */
4757             }
4758         }
4759
4760     case '+':
4761         {
4762             const char tmp = *s++;
4763             if (*s == tmp) {
4764                 s++;
4765                 if (PL_expect == XOPERATOR)
4766                     TERM(POSTINC);
4767                 else
4768                     OPERATOR(PREINC);
4769             }
4770             if (PL_expect == XOPERATOR)
4771                 Aop(OP_ADD);
4772             else {
4773                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4774                     check_uni();
4775                 OPERATOR('+');
4776             }
4777         }
4778
4779     case '*':
4780         if (PL_expect != XOPERATOR) {
4781             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4782             PL_expect = XOPERATOR;
4783             force_ident(PL_tokenbuf, '*');
4784             if (!*PL_tokenbuf)
4785                 PREREF('*');
4786             TERM('*');
4787         }
4788         s++;
4789         if (*s == '*') {
4790             s++;
4791             PWop(OP_POW);
4792         }
4793         Mop(OP_MULTIPLY);
4794
4795     case '%':
4796         if (PL_expect == XOPERATOR) {
4797             ++s;
4798             Mop(OP_MODULO);
4799         }
4800         PL_tokenbuf[0] = '%';
4801         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4802                 sizeof PL_tokenbuf - 1, FALSE);
4803         if (!PL_tokenbuf[1]) {
4804             PREREF('%');
4805         }
4806         PL_pending_ident = '%';
4807         TERM('%');
4808
4809     case '^':
4810         s++;
4811         BOop(OP_BIT_XOR);
4812     case '[':
4813         PL_lex_brackets++;
4814         {
4815             const char tmp = *s++;
4816             OPERATOR(tmp);
4817         }
4818     case '~':
4819         if (s[1] == '~'
4820             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4821         {
4822             s += 2;
4823             Eop(OP_SMARTMATCH);
4824         }
4825     case ',':
4826         {
4827             const char tmp = *s++;
4828             OPERATOR(tmp);
4829         }
4830     case ':':
4831         if (s[1] == ':') {
4832             len = 0;
4833             goto just_a_word_zero_gv;
4834         }
4835         s++;
4836         switch (PL_expect) {
4837             OP *attrs;
4838 #ifdef PERL_MAD
4839             I32 stuffstart;
4840 #endif
4841         case XOPERATOR:
4842             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4843                 break;
4844             PL_bufptr = s;      /* update in case we back off */
4845             if (*s == '=') {
4846                 deprecate(":= for an empty attribute list");
4847             }
4848             goto grabattrs;
4849         case XATTRBLOCK:
4850             PL_expect = XBLOCK;
4851             goto grabattrs;
4852         case XATTRTERM:
4853             PL_expect = XTERMBLOCK;
4854          grabattrs:
4855 #ifdef PERL_MAD
4856             stuffstart = s - SvPVX(PL_linestr) - 1;
4857 #endif
4858             s = PEEKSPACE(s);
4859             attrs = NULL;
4860             while (isIDFIRST_lazy_if(s,UTF)) {
4861                 I32 tmp;
4862                 SV *sv;
4863                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4864                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4865                     if (tmp < 0) tmp = -tmp;
4866                     switch (tmp) {
4867                     case KEY_or:
4868                     case KEY_and:
4869                     case KEY_for:
4870                     case KEY_foreach:
4871                     case KEY_unless:
4872                     case KEY_if:
4873                     case KEY_while:
4874                     case KEY_until:
4875                         goto got_attrs;
4876                     default:
4877                         break;
4878                     }
4879                 }
4880                 sv = newSVpvn(s, len);
4881                 if (*d == '(') {
4882                     d = scan_str(d,TRUE,TRUE);
4883                     if (!d) {
4884                         /* MUST advance bufptr here to avoid bogus
4885                            "at end of line" context messages from yyerror().
4886                          */
4887                         PL_bufptr = s + len;
4888                         yyerror("Unterminated attribute parameter in attribute list");
4889                         if (attrs)
4890                             op_free(attrs);
4891                         sv_free(sv);
4892                         return REPORT(0);       /* EOF indicator */
4893                     }
4894                 }
4895                 if (PL_lex_stuff) {
4896                     sv_catsv(sv, PL_lex_stuff);
4897                     attrs = append_elem(OP_LIST, attrs,
4898                                         newSVOP(OP_CONST, 0, sv));
4899                     SvREFCNT_dec(PL_lex_stuff);
4900                     PL_lex_stuff = NULL;
4901                 }
4902                 else {
4903                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4904                         sv_free(sv);
4905                         if (PL_in_my == KEY_our) {
4906                             deprecate(":unique");
4907                         }
4908                         else
4909                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4910                     }
4911
4912                     /* NOTE: any CV attrs applied here need to be part of
4913                        the CVf_BUILTIN_ATTRS define in cv.h! */
4914                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4915                         sv_free(sv);
4916                         CvLVALUE_on(PL_compcv);
4917                     }
4918                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4919                         sv_free(sv);
4920                         deprecate(":locked");
4921                     }
4922                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4923                         sv_free(sv);
4924                         CvMETHOD_on(PL_compcv);
4925                     }
4926                     /* After we've set the flags, it could be argued that
4927                        we don't need to do the attributes.pm-based setting
4928                        process, and shouldn't bother appending recognized
4929                        flags.  To experiment with that, uncomment the
4930                        following "else".  (Note that's already been
4931                        uncommented.  That keeps the above-applied built-in
4932                        attributes from being intercepted (and possibly
4933                        rejected) by a package's attribute routines, but is
4934                        justified by the performance win for the common case
4935                        of applying only built-in attributes.) */
4936                     else
4937                         attrs = append_elem(OP_LIST, attrs,
4938                                             newSVOP(OP_CONST, 0,
4939                                                     sv));
4940                 }
4941                 s = PEEKSPACE(d);
4942                 if (*s == ':' && s[1] != ':')
4943                     s = PEEKSPACE(s+1);
4944                 else if (s == d)
4945                     break;      /* require real whitespace or :'s */
4946                 /* XXX losing whitespace on sequential attributes here */
4947             }
4948             {
4949                 const char tmp
4950                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4951                 if (*s != ';' && *s != '}' && *s != tmp
4952                     && (tmp != '=' || *s != ')')) {
4953                     const char q = ((*s == '\'') ? '"' : '\'');
4954                     /* If here for an expression, and parsed no attrs, back
4955                        off. */
4956                     if (tmp == '=' && !attrs) {
4957                         s = PL_bufptr;
4958                         break;
4959                     }
4960                     /* MUST advance bufptr here to avoid bogus "at end of line"
4961                        context messages from yyerror().
4962                     */
4963                     PL_bufptr = s;
4964                     yyerror( (const char *)
4965                              (*s
4966                               ? Perl_form(aTHX_ "Invalid separator character "
4967                                           "%c%c%c in attribute list", q, *s, q)
4968                               : "Unterminated attribute list" ) );
4969                     if (attrs)
4970                         op_free(attrs);
4971                     OPERATOR(':');
4972                 }
4973             }
4974         got_attrs:
4975             if (attrs) {
4976                 start_force(PL_curforce);
4977                 NEXTVAL_NEXTTOKE.opval = attrs;
4978                 CURMAD('_', PL_nextwhite);
4979                 force_next(THING);
4980             }
4981 #ifdef PERL_MAD
4982             if (PL_madskills) {
4983                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4984                                      (s - SvPVX(PL_linestr)) - stuffstart);
4985             }
4986 #endif
4987             TOKEN(COLONATTR);
4988         }
4989         OPERATOR(':');
4990     case '(':
4991         s++;
4992         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4993             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
4994         else
4995             PL_expect = XTERM;
4996         s = SKIPSPACE1(s);
4997         TOKEN('(');
4998     case ';':
4999         CLINE;
5000         {
5001             const char tmp = *s++;
5002             OPERATOR(tmp);
5003         }
5004     case ')':
5005         {
5006             const char tmp = *s++;
5007             s = SKIPSPACE1(s);
5008             if (*s == '{')
5009                 PREBLOCK(tmp);
5010             TERM(tmp);
5011         }
5012     case ']':
5013         s++;
5014         if (PL_lex_brackets <= 0)
5015             yyerror("Unmatched right square bracket");
5016         else
5017             --PL_lex_brackets;
5018         if (PL_lex_state == LEX_INTERPNORMAL) {
5019             if (PL_lex_brackets == 0) {
5020                 if (*s == '-' && s[1] == '>')
5021                     PL_lex_state = LEX_INTERPENDMAYBE;
5022                 else if (*s != '[' && *s != '{')
5023                     PL_lex_state = LEX_INTERPEND;
5024             }
5025         }
5026         TERM(']');
5027     case '{':
5028       leftbracket:
5029         s++;
5030         if (PL_lex_brackets > 100) {
5031             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5032         }
5033         switch (PL_expect) {
5034         case XTERM:
5035             if (PL_lex_formbrack) {
5036                 s--;
5037                 PRETERMBLOCK(DO);
5038             }
5039             if (PL_oldoldbufptr == PL_last_lop)
5040                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5041             else
5042                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5043             OPERATOR(HASHBRACK);
5044         case XOPERATOR:
5045             while (s < PL_bufend && SPACE_OR_TAB(*s))
5046                 s++;
5047             d = s;
5048             PL_tokenbuf[0] = '\0';
5049             if (d < PL_bufend && *d == '-') {
5050                 PL_tokenbuf[0] = '-';
5051                 d++;
5052                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5053                     d++;
5054             }
5055             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5056                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5057                               FALSE, &len);
5058                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5059                     d++;
5060                 if (*d == '}') {
5061                     const char minus = (PL_tokenbuf[0] == '-');
5062                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5063                     if (minus)
5064                         force_next('-');
5065                 }
5066             }
5067             /* FALL THROUGH */
5068         case XATTRBLOCK:
5069         case XBLOCK:
5070             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5071             PL_expect = XSTATE;
5072             break;
5073         case XATTRTERM:
5074         case XTERMBLOCK:
5075             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5076             PL_expect = XSTATE;
5077             break;
5078         default: {
5079                 const char *t;
5080                 if (PL_oldoldbufptr == PL_last_lop)
5081                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5082                 else
5083                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5084                 s = SKIPSPACE1(s);
5085                 if (*s == '}') {
5086                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5087                         PL_expect = XTERM;
5088                         /* This hack is to get the ${} in the message. */
5089                         PL_bufptr = s+1;
5090                         yyerror("syntax error");
5091                         break;
5092                     }
5093                     OPERATOR(HASHBRACK);
5094                 }
5095                 /* This hack serves to disambiguate a pair of curlies
5096                  * as being a block or an anon hash.  Normally, expectation
5097                  * determines that, but in cases where we're not in a
5098                  * position to expect anything in particular (like inside
5099                  * eval"") we have to resolve the ambiguity.  This code
5100                  * covers the case where the first term in the curlies is a
5101                  * quoted string.  Most other cases need to be explicitly
5102                  * disambiguated by prepending a "+" before the opening
5103                  * curly in order to force resolution as an anon hash.
5104                  *
5105                  * XXX should probably propagate the outer expectation
5106                  * into eval"" to rely less on this hack, but that could
5107                  * potentially break current behavior of eval"".
5108                  * GSAR 97-07-21
5109                  */
5110                 t = s;
5111                 if (*s == '\'' || *s == '"' || *s == '`') {
5112                     /* common case: get past first string, handling escapes */
5113                     for (t++; t < PL_bufend && *t != *s;)
5114                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5115                             t++;
5116                     t++;
5117                 }
5118                 else if (*s == 'q') {
5119                     if (++t < PL_bufend
5120                         && (!isALNUM(*t)
5121                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5122                                 && !isALNUM(*t))))
5123                     {
5124                         /* skip q//-like construct */
5125                         const char *tmps;
5126                         char open, close, term;
5127                         I32 brackets = 1;
5128
5129                         while (t < PL_bufend && isSPACE(*t))
5130                             t++;
5131                         /* check for q => */
5132                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5133                             OPERATOR(HASHBRACK);
5134                         }
5135                         term = *t;
5136                         open = term;
5137                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5138                             term = tmps[5];
5139                         close = term;
5140                         if (open == close)
5141                             for (t++; t < PL_bufend; t++) {
5142                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5143                                     t++;
5144                                 else if (*t == open)
5145                                     break;
5146                             }
5147                         else {
5148                             for (t++; t < PL_bufend; t++) {
5149                                 if (*t == '\\' && t+1 < PL_bufend)
5150                                     t++;
5151                                 else if (*t == close && --brackets <= 0)
5152                                     break;
5153                                 else if (*t == open)
5154                                     brackets++;
5155                             }
5156                         }
5157                         t++;
5158                     }
5159                     else
5160                         /* skip plain q word */
5161                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5162                              t += UTF8SKIP(t);
5163                 }
5164                 else if (isALNUM_lazy_if(t,UTF)) {
5165                     t += UTF8SKIP(t);
5166                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5167                          t += UTF8SKIP(t);
5168                 }
5169                 while (t < PL_bufend && isSPACE(*t))
5170                     t++;
5171                 /* if comma follows first term, call it an anon hash */
5172                 /* XXX it could be a comma expression with loop modifiers */
5173                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5174                                    || (*t == '=' && t[1] == '>')))
5175                     OPERATOR(HASHBRACK);
5176                 if (PL_expect == XREF)
5177                     PL_expect = XTERM;
5178                 else {
5179                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5180                     PL_expect = XSTATE;
5181                 }
5182             }
5183             break;
5184         }
5185         pl_yylval.ival = CopLINE(PL_curcop);
5186         if (isSPACE(*s) || *s == '#')
5187             PL_copline = NOLINE;   /* invalidate current command line number */
5188         TOKEN('{');
5189     case '}':
5190       rightbracket:
5191         s++;
5192         if (PL_lex_brackets <= 0)
5193             yyerror("Unmatched right curly bracket");
5194         else
5195             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5196         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5197             PL_lex_formbrack = 0;
5198         if (PL_lex_state == LEX_INTERPNORMAL) {
5199             if (PL_lex_brackets == 0) {
5200                 if (PL_expect & XFAKEBRACK) {
5201                     PL_expect &= XENUMMASK;
5202                     PL_lex_state = LEX_INTERPEND;
5203                     PL_bufptr = s;
5204 #if 0
5205                     if (PL_madskills) {
5206                         if (!PL_thiswhite)
5207                             PL_thiswhite = newSVpvs("");
5208                         sv_catpvs(PL_thiswhite,"}");
5209                     }
5210 #endif
5211                     return yylex();     /* ignore fake brackets */
5212                 }
5213                 if (*s == '-' && s[1] == '>')
5214                     PL_lex_state = LEX_INTERPENDMAYBE;
5215                 else if (*s != '[' && *s != '{')
5216                     PL_lex_state = LEX_INTERPEND;
5217             }
5218         }
5219         if (PL_expect & XFAKEBRACK) {
5220             PL_expect &= XENUMMASK;
5221             PL_bufptr = s;
5222             return yylex();             /* ignore fake brackets */
5223         }
5224         start_force(PL_curforce);
5225         if (PL_madskills) {
5226             curmad('X', newSVpvn(s-1,1));
5227             CURMAD('_', PL_thiswhite);
5228         }
5229         force_next('}');
5230 #ifdef PERL_MAD
5231         if (!PL_thistoken)
5232             PL_thistoken = newSVpvs("");
5233 #endif
5234         TOKEN(';');
5235     case '&':
5236         s++;
5237         if (*s++ == '&')
5238             AOPERATOR(ANDAND);
5239         s--;
5240         if (PL_expect == XOPERATOR) {
5241             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5242                 && isIDFIRST_lazy_if(s,UTF))
5243             {
5244                 CopLINE_dec(PL_curcop);
5245                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5246                 CopLINE_inc(PL_curcop);
5247             }
5248             BAop(OP_BIT_AND);
5249         }
5250
5251         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5252         if (*PL_tokenbuf) {
5253             PL_expect = XOPERATOR;
5254             force_ident(PL_tokenbuf, '&');
5255         }
5256         else
5257             PREREF('&');
5258         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5259         TERM('&');
5260
5261     case '|':
5262         s++;
5263         if (*s++ == '|')
5264             AOPERATOR(OROR);
5265         s--;
5266         BOop(OP_BIT_OR);
5267     case '=':
5268         s++;
5269         {
5270             const char tmp = *s++;
5271             if (tmp == '=')
5272                 Eop(OP_EQ);
5273             if (tmp == '>')
5274                 OPERATOR(',');
5275             if (tmp == '~')
5276                 PMop(OP_MATCH);
5277             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5278                 && strchr("+-*/%.^&|<",tmp))
5279                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5280                             "Reversed %c= operator",(int)tmp);
5281             s--;
5282             if (PL_expect == XSTATE && isALPHA(tmp) &&
5283                 (s == PL_linestart+1 || s[-2] == '\n') )
5284                 {
5285                     if (PL_in_eval && !PL_rsfp) {
5286                         d = PL_bufend;
5287                         while (s < d) {
5288                             if (*s++ == '\n') {
5289                                 incline(s);
5290                                 if (strnEQ(s,"=cut",4)) {
5291                                     s = strchr(s,'\n');
5292                                     if (s)
5293                                         s++;
5294                                     else
5295                                         s = d;
5296                                     incline(s);
5297                                     goto retry;
5298                                 }
5299                             }
5300                         }
5301                         goto retry;
5302                     }
5303 #ifdef PERL_MAD
5304                     if (PL_madskills) {
5305                         if (!PL_thiswhite)
5306                             PL_thiswhite = newSVpvs("");
5307                         sv_catpvn(PL_thiswhite, PL_linestart,
5308                                   PL_bufend - PL_linestart);
5309                     }
5310 #endif
5311                     s = PL_bufend;
5312                     PL_doextract = TRUE;
5313                     goto retry;
5314                 }
5315         }
5316         if (PL_lex_brackets < PL_lex_formbrack) {
5317             const char *t = s;
5318 #ifdef PERL_STRICT_CR
5319             while (SPACE_OR_TAB(*t))
5320 #else
5321             while (SPACE_OR_TAB(*t) || *t == '\r')
5322 #endif
5323                 t++;
5324             if (*t == '\n' || *t == '#') {
5325                 s--;
5326                 PL_expect = XBLOCK;
5327                 goto leftbracket;
5328             }
5329         }
5330         pl_yylval.ival = 0;
5331         OPERATOR(ASSIGNOP);
5332     case '!':
5333         s++;
5334         {
5335             const char tmp = *s++;
5336             if (tmp == '=') {
5337                 /* was this !=~ where !~ was meant?
5338                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5339
5340                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5341                     const char *t = s+1;
5342
5343                     while (t < PL_bufend && isSPACE(*t))
5344                         ++t;
5345
5346                     if (*t == '/' || *t == '?' ||
5347                         ((*t == 'm' || *t == 's' || *t == 'y')
5348                          && !isALNUM(t[1])) ||
5349                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5350                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5351                                     "!=~ should be !~");
5352                 }
5353                 Eop(OP_NE);
5354             }
5355             if (tmp == '~')
5356                 PMop(OP_NOT);
5357         }
5358         s--;
5359         OPERATOR('!');
5360     case '<':
5361         if (PL_expect != XOPERATOR) {
5362             if (s[1] != '<' && !strchr(s,'>'))
5363                 check_uni();
5364             if (s[1] == '<')
5365                 s = scan_heredoc(s);
5366             else
5367                 s = scan_inputsymbol(s);
5368             TERM(sublex_start());
5369         }
5370         s++;
5371         {
5372             char tmp = *s++;
5373             if (tmp == '<')
5374                 SHop(OP_LEFT_SHIFT);
5375             if (tmp == '=') {
5376                 tmp = *s++;
5377                 if (tmp == '>')
5378                     Eop(OP_NCMP);
5379                 s--;
5380                 Rop(OP_LE);
5381             }
5382         }
5383         s--;
5384         Rop(OP_LT);
5385     case '>':
5386         s++;
5387         {
5388             const char tmp = *s++;
5389             if (tmp == '>')
5390                 SHop(OP_RIGHT_SHIFT);
5391             else if (tmp == '=')
5392                 Rop(OP_GE);
5393         }
5394         s--;
5395         Rop(OP_GT);
5396
5397     case '$':
5398         CLINE;
5399
5400         if (PL_expect == XOPERATOR) {
5401             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5402                 return deprecate_commaless_var_list();
5403             }
5404         }
5405
5406         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
5407             PL_tokenbuf[0] = '@';
5408             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5409                            sizeof PL_tokenbuf - 1, FALSE);
5410             if (PL_expect == XOPERATOR)
5411                 no_op("Array length", s);
5412             if (!PL_tokenbuf[1])
5413                 PREREF(DOLSHARP);
5414             PL_expect = XOPERATOR;
5415             PL_pending_ident = '#';
5416             TOKEN(DOLSHARP);
5417         }
5418
5419         PL_tokenbuf[0] = '$';
5420         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5421                        sizeof PL_tokenbuf - 1, FALSE);
5422         if (PL_expect == XOPERATOR)
5423             no_op("Scalar", s);
5424         if (!PL_tokenbuf[1]) {
5425             if (s == PL_bufend)
5426                 yyerror("Final $ should be \\$ or $name");
5427             PREREF('$');
5428         }
5429
5430         /* This kludge not intended to be bulletproof. */
5431         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5432             pl_yylval.opval = newSVOP(OP_CONST, 0,
5433                                    newSViv(CopARYBASE_get(&PL_compiling)));
5434             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5435             TERM(THING);
5436         }
5437
5438         d = s;
5439         {
5440             const char tmp = *s;
5441             if (PL_lex_state == LEX_NORMAL)
5442                 s = SKIPSPACE1(s);
5443
5444             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5445                 && intuit_more(s)) {
5446                 if (*s == '[') {
5447                     PL_tokenbuf[0] = '@';
5448                     if (ckWARN(WARN_SYNTAX)) {
5449                         char *t = s+1;
5450
5451                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5452                             t++;
5453                         if (*t++ == ',') {
5454                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5455                             while (t < PL_bufend && *t != ']')
5456                                 t++;
5457                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5458                                         "Multidimensional syntax %.*s not supported",
5459                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5460                         }
5461                     }
5462                 }
5463                 else if (*s == '{') {
5464                     char *t;
5465                     PL_tokenbuf[0] = '%';
5466                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5467                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5468                         {
5469                             char tmpbuf[sizeof PL_tokenbuf];
5470                             do {
5471                                 t++;
5472                             } while (isSPACE(*t));
5473                             if (isIDFIRST_lazy_if(t,UTF)) {
5474                                 STRLEN len;
5475                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5476                                               &len);
5477                                 while (isSPACE(*t))
5478                                     t++;
5479                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5480                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5481                                                 "You need to quote \"%s\"",
5482                                                 tmpbuf);
5483                             }
5484                         }
5485                 }
5486             }
5487
5488             PL_expect = XOPERATOR;
5489             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5490                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5491                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5492                     PL_expect = XOPERATOR;
5493                 else if (strchr("$@\"'`q", *s))
5494                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5495                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5496                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5497                 else if (isIDFIRST_lazy_if(s,UTF)) {
5498                     char tmpbuf[sizeof PL_tokenbuf];
5499                     int t2;
5500                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5501                     if ((t2 = keyword(tmpbuf, len, 0))) {
5502                         /* binary operators exclude handle interpretations */
5503                         switch (t2) {
5504                         case -KEY_x:
5505                         case -KEY_eq:
5506                         case -KEY_ne:
5507                         case -KEY_gt:
5508                         case -KEY_lt:
5509                         case -KEY_ge:
5510                         case -KEY_le:
5511                         case -KEY_cmp:
5512                             break;
5513                         default:
5514                             PL_expect = XTERM;  /* e.g. print $fh length() */
5515                             break;
5516                         }
5517                     }
5518                     else {
5519                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5520                     }
5521                 }
5522                 else if (isDIGIT(*s))
5523                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5524                 else if (*s == '.' && isDIGIT(s[1]))
5525                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5526                 else if ((*s == '?' || *s == '-' || *s == '+')
5527                          && !isSPACE(s[1]) && s[1] != '=')
5528                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5529                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5530                          && s[1] != '/')
5531                     PL_expect = XTERM;          /* e.g. print $fh /.../
5532                                                    XXX except DORDOR operator
5533                                                 */
5534                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5535                          && s[2] != '=')
5536                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5537             }
5538         }
5539         PL_pending_ident = '$';
5540         TOKEN('$');
5541
5542     case '@':
5543         if (PL_expect == XOPERATOR)
5544             no_op("Array", s);
5545         PL_tokenbuf[0] = '@';
5546         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5547         if (!PL_tokenbuf[1]) {
5548             PREREF('@');
5549         }
5550         if (PL_lex_state == LEX_NORMAL)
5551             s = SKIPSPACE1(s);
5552         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5553             if (*s == '{')
5554                 PL_tokenbuf[0] = '%';
5555
5556             /* Warn about @ where they meant $. */
5557             if (*s == '[' || *s == '{') {
5558                 if (ckWARN(WARN_SYNTAX)) {
5559                     const char *t = s + 1;
5560                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5561                         t++;
5562                     if (*t == '}' || *t == ']') {
5563                         t++;
5564                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5565                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5566                             "Scalar value %.*s better written as $%.*s",
5567                             (int)(t-PL_bufptr), PL_bufptr,
5568                             (int)(t-PL_bufptr-1), PL_bufptr+1);
5569                     }
5570                 }
5571             }
5572         }
5573         PL_pending_ident = '@';
5574         TERM('@');
5575
5576      case '/':                  /* may be division, defined-or, or pattern */
5577         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5578             s += 2;
5579             AOPERATOR(DORDOR);
5580         }
5581      case '?':                  /* may either be conditional or pattern */
5582         if (PL_expect == XOPERATOR) {
5583              char tmp = *s++;
5584              if(tmp == '?') {
5585                 OPERATOR('?');
5586              }
5587              else {
5588                  tmp = *s++;
5589                  if(tmp == '/') {
5590                      /* A // operator. */
5591                     AOPERATOR(DORDOR);
5592                  }
5593                  else {
5594                      s--;
5595                      Mop(OP_DIVIDE);
5596                  }
5597              }
5598          }
5599          else {
5600              /* Disable warning on "study /blah/" */
5601              if (PL_oldoldbufptr == PL_last_uni
5602               && (*PL_last_uni != 's' || s - PL_last_uni < 5
5603                   || memNE(PL_last_uni, "study", 5)
5604                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
5605               ))
5606                  check_uni();
5607              s = scan_pat(s,OP_MATCH);
5608              TERM(sublex_start());
5609          }
5610
5611     case '.':
5612         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5613 #ifdef PERL_STRICT_CR
5614             && s[1] == '\n'
5615 #else
5616             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5617 #endif
5618             && (s == PL_linestart || s[-1] == '\n') )
5619         {
5620             PL_lex_formbrack = 0;
5621             PL_expect = XSTATE;
5622             goto rightbracket;
5623         }
5624         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5625             s += 3;
5626             OPERATOR(YADAYADA);
5627         }
5628         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5629             char tmp = *s++;
5630             if (*s == tmp) {
5631                 s++;
5632                 if (*s == tmp) {
5633                     s++;
5634                     pl_yylval.ival = OPf_SPECIAL;
5635                 }
5636                 else
5637                     pl_yylval.ival = 0;
5638                 OPERATOR(DOTDOT);
5639             }
5640             if (PL_expect != XOPERATOR)
5641                 check_uni();
5642             Aop(OP_CONCAT);
5643         }
5644         /* FALL THROUGH */
5645     case '0': case '1': case '2': case '3': case '4':
5646     case '5': case '6': case '7': case '8': case '9':
5647         s = scan_num(s, &pl_yylval);
5648         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5649         if (PL_expect == XOPERATOR)
5650             no_op("Number",s);
5651         TERM(THING);
5652
5653     case '\'':
5654         s = scan_str(s,!!PL_madskills,FALSE);
5655         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5656         if (PL_expect == XOPERATOR) {
5657             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5658                 return deprecate_commaless_var_list();
5659             }
5660             else
5661                 no_op("String",s);
5662         }
5663         if (!s)
5664             missingterm(NULL);
5665         pl_yylval.ival = OP_CONST;
5666         TERM(sublex_start());
5667
5668     case '"':
5669         s = scan_str(s,!!PL_madskills,FALSE);
5670         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5671         if (PL_expect == XOPERATOR) {
5672             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5673                 return deprecate_commaless_var_list();
5674             }
5675             else
5676                 no_op("String",s);
5677         }
5678         if (!s)
5679             missingterm(NULL);
5680         pl_yylval.ival = OP_CONST;
5681         /* FIXME. I think that this can be const if char *d is replaced by
5682            more localised variables.  */
5683         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5684             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5685                 pl_yylval.ival = OP_STRINGIFY;
5686                 break;
5687             }
5688         }
5689         TERM(sublex_start());
5690
5691     case '`':
5692         s = scan_str(s,!!PL_madskills,FALSE);
5693         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5694         if (PL_expect == XOPERATOR)
5695             no_op("Backticks",s);
5696         if (!s)
5697             missingterm(NULL);
5698         readpipe_override();
5699         TERM(sublex_start());
5700
5701     case '\\':
5702         s++;
5703         if (PL_lex_inwhat && isDIGIT(*s))
5704             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5705                            *s, *s);
5706         if (PL_expect == XOPERATOR)
5707             no_op("Backslash",s);
5708         OPERATOR(REFGEN);
5709
5710     case 'v':
5711         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5712             char *start = s + 2;
5713             while (isDIGIT(*start) || *start == '_')
5714                 start++;
5715             if (*start == '.' && isDIGIT(start[1])) {
5716                 s = scan_num(s, &pl_yylval);
5717                 TERM(THING);
5718             }
5719             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5720             else if (!isALPHA(*start) && (PL_expect == XTERM
5721                         || PL_expect == XREF || PL_expect == XSTATE
5722                         || PL_expect == XTERMORDORDOR)) {
5723                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5724                 if (!gv) {
5725                     s = scan_num(s, &pl_yylval);
5726                     TERM(THING);
5727                 }
5728             }
5729         }
5730         goto keylookup;
5731     case 'x':
5732         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5733             s++;
5734             Mop(OP_REPEAT);
5735         }
5736         goto keylookup;
5737
5738     case '_':
5739     case 'a': case 'A':
5740     case 'b': case 'B':
5741     case 'c': case 'C':
5742     case 'd': case 'D':
5743     case 'e': case 'E':
5744     case 'f': case 'F':
5745     case 'g': case 'G':
5746     case 'h': case 'H':
5747     case 'i': case 'I':
5748     case 'j': case 'J':
5749     case 'k': case 'K':
5750     case 'l': case 'L':
5751     case 'm': case 'M':
5752     case 'n': case 'N':
5753     case 'o': case 'O':
5754     case 'p': case 'P':
5755     case 'q': case 'Q':
5756     case 'r': case 'R':
5757     case 's': case 'S':
5758     case 't': case 'T':
5759     case 'u': case 'U':
5760               case 'V':
5761     case 'w': case 'W':
5762               case 'X':
5763     case 'y': case 'Y':
5764     case 'z': case 'Z':
5765
5766       keylookup: {
5767         bool anydelim;
5768         I32 tmp;
5769
5770         orig_keyword = 0;
5771         gv = NULL;
5772         gvp = NULL;
5773
5774         PL_bufptr = s;
5775         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5776
5777         /* Some keywords can be followed by any delimiter, including ':' */
5778         anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5779                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5780                              (PL_tokenbuf[0] == 'q' &&
5781                               strchr("qwxr", PL_tokenbuf[1])))));
5782
5783         /* x::* is just a word, unless x is "CORE" */
5784         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5785             goto just_a_word;
5786
5787         d = s;
5788         while (d < PL_bufend && isSPACE(*d))
5789                 d++;    /* no comments skipped here, or s### is misparsed */
5790
5791         /* Is this a word before a => operator? */
5792         if (*d == '=' && d[1] == '>') {
5793             CLINE;
5794             pl_yylval.opval
5795                 = (OP*)newSVOP(OP_CONST, 0,
5796                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5797             pl_yylval.opval->op_private = OPpCONST_BARE;
5798             TERM(WORD);
5799         }
5800
5801         /* Check for plugged-in keyword */
5802         {
5803             OP *o;
5804             int result;
5805             char *saved_bufptr = PL_bufptr;
5806             PL_bufptr = s;
5807             result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
5808             s = PL_bufptr;
5809             if (result == KEYWORD_PLUGIN_DECLINE) {
5810                 /* not a plugged-in keyword */
5811                 PL_bufptr = saved_bufptr;
5812             } else if (result == KEYWORD_PLUGIN_STMT) {
5813                 pl_yylval.opval = o;
5814                 CLINE;
5815                 PL_expect = XSTATE;
5816                 return REPORT(PLUGSTMT);
5817             } else if (result == KEYWORD_PLUGIN_EXPR) {
5818                 pl_yylval.opval = o;
5819                 CLINE;
5820                 PL_expect = XOPERATOR;
5821                 return REPORT(PLUGEXPR);
5822             } else {
5823                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
5824                                         PL_tokenbuf);
5825             }
5826         }
5827
5828         /* Check for built-in keyword */
5829         tmp = keyword(PL_tokenbuf, len, 0);
5830
5831         /* Is this a label? */
5832         if (!anydelim && PL_expect == XSTATE
5833               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5834             if (tmp)
5835                 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5836             s = d + 1;
5837             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5838             CLINE;
5839             TOKEN(LABEL);
5840         }
5841
5842         if (tmp < 0) {                  /* second-class keyword? */
5843             GV *ogv = NULL;     /* override (winner) */
5844             GV *hgv = NULL;     /* hidden (loser) */
5845             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5846                 CV *cv;
5847                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5848                     (cv = GvCVu(gv)))
5849                 {
5850                     if (GvIMPORTED_CV(gv))
5851                         ogv = gv;
5852                     else if (! CvMETHOD(cv))
5853                         hgv = gv;
5854                 }
5855                 if (!ogv &&
5856                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5857                     (gv = *gvp) && isGV_with_GP(gv) &&
5858                     GvCVu(gv) && GvIMPORTED_CV(gv))
5859                 {
5860                     ogv = gv;
5861                 }
5862             }
5863             if (ogv) {
5864                 orig_keyword = tmp;
5865                 tmp = 0;                /* overridden by import or by GLOBAL */
5866             }
5867             else if (gv && !gvp
5868                      && -tmp==KEY_lock  /* XXX generalizable kludge */
5869                      && GvCVu(gv))
5870             {
5871                 tmp = 0;                /* any sub overrides "weak" keyword */
5872             }
5873             else {                      /* no override */
5874                 tmp = -tmp;
5875                 if (tmp == KEY_dump) {
5876                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5877                                    "dump() better written as CORE::dump()");
5878                 }
5879                 gv = NULL;
5880                 gvp = 0;
5881                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
5882                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5883                                    "Ambiguous call resolved as CORE::%s(), %s",
5884                                    GvENAME(hgv), "qualify as such or use &");
5885             }
5886         }
5887
5888       reserved_word:
5889         switch (tmp) {
5890
5891         default:                        /* not a keyword */
5892             /* Trade off - by using this evil construction we can pull the
5893                variable gv into the block labelled keylookup. If not, then
5894                we have to give it function scope so that the goto from the
5895                earlier ':' case doesn't bypass the initialisation.  */
5896             if (0) {
5897             just_a_word_zero_gv:
5898                 gv = NULL;
5899                 gvp = NULL;
5900                 orig_keyword = 0;
5901             }
5902           just_a_word: {
5903                 SV *sv;
5904                 int pkgname = 0;
5905                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5906                 OP *rv2cv_op;
5907                 CV *cv;
5908 #ifdef PERL_MAD
5909                 SV *nextPL_nextwhite = 0;
5910 #endif
5911
5912
5913                 /* Get the rest if it looks like a package qualifier */
5914
5915                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5916                     STRLEN morelen;
5917                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5918                                   TRUE, &morelen);
5919                     if (!morelen)
5920                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5921                                 *s == '\'' ? "'" : "::");
5922                     len += morelen;
5923                     pkgname = 1;
5924                 }
5925
5926                 if (PL_expect == XOPERATOR) {
5927                     if (PL_bufptr == PL_linestart) {
5928                         CopLINE_dec(PL_curcop);
5929                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5930                         CopLINE_inc(PL_curcop);
5931                     }
5932                     else
5933                         no_op("Bareword",s);
5934                 }
5935
5936                 /* Look for a subroutine with this name in current package,
5937                    unless name is "Foo::", in which case Foo is a bearword
5938                    (and a package name). */
5939
5940                 if (len > 2 && !PL_madskills &&
5941                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5942                 {
5943                     if (ckWARN(WARN_BAREWORD)
5944                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5945                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5946                             "Bareword \"%s\" refers to nonexistent package",
5947                              PL_tokenbuf);
5948                     len -= 2;
5949                     PL_tokenbuf[len] = '\0';
5950                     gv = NULL;
5951                     gvp = 0;
5952                 }
5953                 else {
5954                     if (!gv) {
5955                         /* Mustn't actually add anything to a symbol table.
5956                            But also don't want to "initialise" any placeholder
5957                            constants that might already be there into full
5958                            blown PVGVs with attached PVCV.  */
5959                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5960                                                GV_NOADD_NOINIT, SVt_PVCV);
5961                     }
5962                     len = 0;
5963                 }
5964
5965                 /* if we saw a global override before, get the right name */
5966
5967                 if (gvp) {
5968                     sv = newSVpvs("CORE::GLOBAL::");
5969                     sv_catpv(sv,PL_tokenbuf);
5970                 }
5971                 else {
5972                     /* If len is 0, newSVpv does strlen(), which is correct.
5973                        If len is non-zero, then it will be the true length,
5974                        and so the scalar will be created correctly.  */
5975                     sv = newSVpv(PL_tokenbuf,len);
5976                 }
5977 #ifdef PERL_MAD
5978                 if (PL_madskills && !PL_thistoken) {
5979                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5980                     PL_thistoken = newSVpvn(start,s - start);
5981                     PL_realtokenstart = s - SvPVX(PL_linestr);
5982                 }
5983 #endif
5984
5985                 /* Presume this is going to be a bareword of some sort. */
5986
5987                 CLINE;
5988                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5989                 pl_yylval.opval->op_private = OPpCONST_BARE;
5990                 /* UTF-8 package name? */
5991                 if (UTF && !IN_BYTES &&
5992                     is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5993                     SvUTF8_on(sv);
5994
5995                 /* And if "Foo::", then that's what it certainly is. */
5996
5997                 if (len)
5998                     goto safe_bareword;
5999
6000                 cv = NULL;
6001                 {
6002                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6003                     const_op->op_private = OPpCONST_BARE;
6004                     rv2cv_op = newCVREF(0, const_op);
6005                 }
6006                 if (rv2cv_op->op_type == OP_RV2CV &&
6007                         (rv2cv_op->op_flags & OPf_KIDS)) {
6008                     OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6009                     switch (rv_op->op_type) {
6010                         case OP_CONST: {
6011                             SV *sv = cSVOPx_sv(rv_op);
6012                             if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6013                                 cv = (CV*)SvRV(sv);
6014                         } break;
6015                         case OP_GV: {
6016                             GV *gv = cGVOPx_gv(rv_op);
6017                             CV *maybe_cv = GvCVu(gv);
6018                             if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6019                                 cv = maybe_cv;
6020                         } break;
6021                     }
6022                 }
6023
6024                 /* See if it's the indirect object for a list operator. */
6025
6026                 if (PL_oldoldbufptr &&
6027                     PL_oldoldbufptr < PL_bufptr &&
6028                     (PL_oldoldbufptr == PL_last_lop
6029                      || PL_oldoldbufptr == PL_last_uni) &&
6030                     /* NO SKIPSPACE BEFORE HERE! */
6031                     (PL_expect == XREF ||
6032                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6033                 {
6034                     bool immediate_paren = *s == '(';
6035
6036                     /* (Now we can afford to cross potential line boundary.) */
6037                     s = SKIPSPACE2(s,nextPL_nextwhite);
6038 #ifdef PERL_MAD
6039                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6040 #endif
6041
6042                     /* Two barewords in a row may indicate method call. */
6043
6044                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6045                         (tmp = intuit_method(s, gv, cv))) {
6046                         op_free(rv2cv_op);
6047                         return REPORT(tmp);
6048                     }
6049
6050                     /* If not a declared subroutine, it's an indirect object. */
6051                     /* (But it's an indir obj regardless for sort.) */
6052                     /* Also, if "_" follows a filetest operator, it's a bareword */
6053
6054                     if (
6055                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6056                          (!cv &&
6057                         (PL_last_lop_op != OP_MAPSTART &&
6058                          PL_last_lop_op != OP_GREPSTART))))
6059                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6060                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6061                        )
6062                     {
6063                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6064                         goto bareword;
6065                     }
6066                 }
6067
6068                 PL_expect = XOPERATOR;
6069 #ifdef PERL_MAD
6070                 if (isSPACE(*s))
6071                     s = SKIPSPACE2(s,nextPL_nextwhite);
6072                 PL_nextwhite = nextPL_nextwhite;
6073 #else
6074                 s = skipspace(s);
6075 #endif
6076
6077                 /* Is this a word before a => operator? */
6078                 if (*s == '=' && s[1] == '>' && !pkgname) {
6079                     op_free(rv2cv_op);
6080                     CLINE;
6081                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6082                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6083                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6084                     TERM(WORD);
6085                 }
6086
6087                 /* If followed by a paren, it's certainly a subroutine. */
6088                 if (*s == '(') {
6089                     CLINE;
6090                     if (cv) {
6091                         d = s + 1;
6092                         while (SPACE_OR_TAB(*d))
6093                             d++;
6094                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6095                             s = d + 1;
6096                             goto its_constant;
6097                         }
6098                     }
6099 #ifdef PERL_MAD
6100                     if (PL_madskills) {
6101                         PL_nextwhite = PL_thiswhite;
6102                         PL_thiswhite = 0;
6103                     }
6104                     start_force(PL_curforce);
6105 #endif
6106                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6107                     PL_expect = XOPERATOR;
6108 #ifdef PERL_MAD
6109                     if (PL_madskills) {
6110                         PL_nextwhite = nextPL_nextwhite;
6111                         curmad('X', PL_thistoken);
6112                         PL_thistoken = newSVpvs("");
6113                     }
6114 #endif
6115                     op_free(rv2cv_op);
6116                     force_next(WORD);
6117                     pl_yylval.ival = 0;
6118                     TOKEN('&');
6119                 }
6120
6121                 /* If followed by var or block, call it a method (unless sub) */
6122
6123                 if ((*s == '$' || *s == '{') && !cv) {
6124                     op_free(rv2cv_op);
6125                     PL_last_lop = PL_oldbufptr;
6126                     PL_last_lop_op = OP_METHOD;
6127                     PREBLOCK(METHOD);
6128                 }
6129
6130                 /* If followed by a bareword, see if it looks like indir obj. */
6131
6132                 if (!orig_keyword
6133                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6134                         && (tmp = intuit_method(s, gv, cv))) {
6135                     op_free(rv2cv_op);
6136                     return REPORT(tmp);
6137                 }
6138
6139                 /* Not a method, so call it a subroutine (if defined) */
6140
6141                 if (cv) {
6142                     if (lastchar == '-')
6143                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6144                                          "Ambiguous use of -%s resolved as -&%s()",
6145                                          PL_tokenbuf, PL_tokenbuf);
6146                     /* Check for a constant sub */
6147                     if ((sv = cv_const_sv(cv))) {
6148                   its_constant:
6149                         op_free(rv2cv_op);
6150                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6151                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6152                         pl_yylval.opval->op_private = 0;
6153                         TOKEN(WORD);
6154                     }
6155
6156                     op_free(pl_yylval.opval);
6157                     pl_yylval.opval = rv2cv_op;
6158                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6159                     PL_last_lop = PL_oldbufptr;
6160                     PL_last_lop_op = OP_ENTERSUB;
6161                     /* Is there a prototype? */
6162                     if (
6163 #ifdef PERL_MAD
6164                         cv &&
6165 #endif
6166                         SvPOK(cv))
6167                     {
6168                         STRLEN protolen;
6169                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6170                         if (!protolen)
6171                             TERM(FUNC0SUB);
6172                         if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
6173                             OPERATOR(UNIOPSUB);
6174                         while (*proto == ';')
6175                             proto++;
6176                         if (*proto == '&' && *s == '{') {
6177                             if (PL_curstash)
6178                                 sv_setpvs(PL_subname, "__ANON__");
6179                             else
6180                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6181                             PREBLOCK(LSTOPSUB);
6182                         }
6183                     }
6184 #ifdef PERL_MAD
6185                     {
6186                         if (PL_madskills) {
6187                             PL_nextwhite = PL_thiswhite;
6188                             PL_thiswhite = 0;
6189                         }
6190                         start_force(PL_curforce);
6191                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6192                         PL_expect = XTERM;
6193                         if (PL_madskills) {
6194                             PL_nextwhite = nextPL_nextwhite;
6195                             curmad('X', PL_thistoken);
6196                             PL_thistoken = newSVpvs("");
6197                         }
6198                         force_next(WORD);
6199                         TOKEN(NOAMP);
6200                     }
6201                 }
6202
6203                 /* Guess harder when madskills require "best effort". */
6204                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6205                     int probable_sub = 0;
6206                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6207                         probable_sub = 1;
6208                     else if (isALPHA(*s)) {
6209                         char tmpbuf[1024];
6210                         STRLEN tmplen;
6211                         d = s;
6212                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6213                         if (!keyword(tmpbuf, tmplen, 0))
6214                             probable_sub = 1;
6215                         else {
6216                             while (d < PL_bufend && isSPACE(*d))
6217                                 d++;
6218                             if (*d == '=' && d[1] == '>')
6219                                 probable_sub = 1;
6220                         }
6221                     }
6222                     if (probable_sub) {
6223                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6224                         op_free(pl_yylval.opval);
6225                         pl_yylval.opval = rv2cv_op;
6226                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6227                         PL_last_lop = PL_oldbufptr;
6228                         PL_last_lop_op = OP_ENTERSUB;
6229                         PL_nextwhite = PL_thiswhite;
6230                         PL_thiswhite = 0;
6231                         start_force(PL_curforce);
6232                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6233                         PL_expect = XTERM;
6234                         PL_nextwhite = nextPL_nextwhite;
6235                         curmad('X', PL_thistoken);
6236                         PL_thistoken = newSVpvs("");
6237                         force_next(WORD);
6238                         TOKEN(NOAMP);
6239                     }
6240 #else
6241                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6242                     PL_expect = XTERM;
6243                     force_next(WORD);
6244                     TOKEN(NOAMP);
6245 #endif
6246                 }
6247
6248                 /* Call it a bare word */
6249
6250                 if (PL_hints & HINT_STRICT_SUBS)
6251                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6252                 else {
6253                 bareword:
6254                     /* after "print" and similar functions (corresponding to
6255                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6256                      * a filehandle should be subject to "strict subs".
6257                      * Likewise for the optional indirect-object argument to system
6258                      * or exec, which can't be a bareword */
6259                     if ((PL_last_lop_op == OP_PRINT
6260                             || PL_last_lop_op == OP_PRTF
6261                             || PL_last_lop_op == OP_SAY
6262                             || PL_last_lop_op == OP_SYSTEM
6263                             || PL_last_lop_op == OP_EXEC)
6264                             && (PL_hints & HINT_STRICT_SUBS))
6265                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6266                     if (lastchar != '-') {
6267                         if (ckWARN(WARN_RESERVED)) {
6268                             d = PL_tokenbuf;
6269                             while (isLOWER(*d))
6270                                 d++;
6271                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6272                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6273                                        PL_tokenbuf);
6274                         }
6275                     }
6276                 }
6277                 op_free(rv2cv_op);
6278
6279             safe_bareword:
6280                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6281                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6282                                      "Operator or semicolon missing before %c%s",
6283                                      lastchar, PL_tokenbuf);
6284                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6285                                      "Ambiguous use of %c resolved as operator %c",
6286                                      lastchar, lastchar);
6287                 }
6288                 TOKEN(WORD);
6289             }
6290
6291         case KEY___FILE__:
6292             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6293                                         newSVpv(CopFILE(PL_curcop),0));
6294             TERM(THING);
6295
6296         case KEY___LINE__:
6297             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6298                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6299             TERM(THING);
6300
6301         case KEY___PACKAGE__:
6302             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6303                                         (PL_curstash
6304                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6305                                          : &PL_sv_undef));
6306             TERM(THING);
6307
6308         case KEY___DATA__:
6309         case KEY___END__: {
6310             GV *gv;
6311             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6312                 const char *pname = "main";
6313                 if (PL_tokenbuf[2] == 'D')
6314                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6315                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6316                                 SVt_PVIO);
6317                 GvMULTI_on(gv);
6318                 if (!GvIO(gv))
6319                     GvIOp(gv) = newIO();
6320                 IoIFP(GvIOp(gv)) = PL_rsfp;
6321 #if defined(HAS_FCNTL) && defined(F_SETFD)
6322                 {
6323                     const int fd = PerlIO_fileno(PL_rsfp);
6324                     fcntl(fd,F_SETFD,fd >= 3);
6325                 }
6326 #endif
6327                 /* Mark this internal pseudo-handle as clean */
6328                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6329                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6330                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6331                 else
6332                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6333 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6334                 /* if the script was opened in binmode, we need to revert
6335                  * it to text mode for compatibility; but only iff it has CRs
6336                  * XXX this is a questionable hack at best. */
6337                 if (PL_bufend-PL_bufptr > 2
6338                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6339                 {
6340                     Off_t loc = 0;
6341                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6342                         loc = PerlIO_tell(PL_rsfp);
6343                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6344                     }
6345 #ifdef NETWARE
6346                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6347 #else
6348                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6349 #endif  /* NETWARE */
6350 #ifdef PERLIO_IS_STDIO /* really? */
6351 #  if defined(__BORLANDC__)
6352                         /* XXX see note in do_binmode() */
6353                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6354 #  endif
6355 #endif
6356                         if (loc > 0)
6357                             PerlIO_seek(PL_rsfp, loc, 0);
6358                     }
6359                 }
6360 #endif
6361 #ifdef PERLIO_LAYERS
6362                 if (!IN_BYTES) {
6363                     if (UTF)
6364                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6365                     else if (PL_encoding) {
6366                         SV *name;
6367                         dSP;
6368                         ENTER;
6369                         SAVETMPS;
6370                         PUSHMARK(sp);
6371                         EXTEND(SP, 1);
6372                         XPUSHs(PL_encoding);
6373                         PUTBACK;
6374                         call_method("name", G_SCALAR);
6375                         SPAGAIN;
6376                         name = POPs;
6377                         PUTBACK;
6378                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6379                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6380                                                       SVfARG(name)));
6381                         FREETMPS;
6382                         LEAVE;
6383                     }
6384                 }
6385 #endif
6386 #ifdef PERL_MAD
6387                 if (PL_madskills) {
6388                     if (PL_realtokenstart >= 0) {
6389                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6390                         if (!PL_endwhite)
6391                             PL_endwhite = newSVpvs("");
6392                         sv_catsv(PL_endwhite, PL_thiswhite);
6393                         PL_thiswhite = 0;
6394                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6395                         PL_realtokenstart = -1;
6396                     }
6397                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6398                            != NULL) ;
6399                 }
6400 #endif
6401                 PL_rsfp = NULL;
6402             }
6403             goto fake_eof;
6404         }
6405
6406         case KEY_AUTOLOAD:
6407         case KEY_DESTROY:
6408         case KEY_BEGIN:
6409         case KEY_UNITCHECK:
6410         case KEY_CHECK:
6411         case KEY_INIT:
6412         case KEY_END:
6413             if (PL_expect == XSTATE) {
6414                 s = PL_bufptr;
6415                 goto really_sub;
6416             }
6417             goto just_a_word;
6418
6419         case KEY_CORE:
6420             if (*s == ':' && s[1] == ':') {
6421                 s += 2;
6422                 d = s;
6423                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6424                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6425                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6426                 if (tmp < 0)
6427                     tmp = -tmp;
6428                 else if (tmp == KEY_require || tmp == KEY_do)
6429                     /* that's a way to remember we saw "CORE::" */
6430                     orig_keyword = tmp;
6431                 goto reserved_word;
6432             }
6433             goto just_a_word;
6434
6435         case KEY_abs:
6436             UNI(OP_ABS);
6437
6438         case KEY_alarm:
6439             UNI(OP_ALARM);
6440
6441         case KEY_accept:
6442             LOP(OP_ACCEPT,XTERM);
6443
6444         case KEY_and:
6445             OPERATOR(ANDOP);
6446
6447         case KEY_atan2:
6448             LOP(OP_ATAN2,XTERM);
6449
6450         case KEY_bind:
6451             LOP(OP_BIND,XTERM);
6452
6453         case KEY_binmode:
6454             LOP(OP_BINMODE,XTERM);
6455
6456         case KEY_bless:
6457             LOP(OP_BLESS,XTERM);
6458
6459         case KEY_break:
6460             FUN0(OP_BREAK);
6461
6462         case KEY_chop:
6463             UNI(OP_CHOP);
6464
6465         case KEY_continue:
6466             /* When 'use switch' is in effect, continue has a dual
6467                life as a control operator. */
6468             {
6469                 if (!FEATURE_IS_ENABLED("switch"))
6470                     PREBLOCK(CONTINUE);
6471                 else {
6472                     /* We have to disambiguate the two senses of
6473                       "continue". If the next token is a '{' then
6474                       treat it as the start of a continue block;
6475                       otherwise treat it as a control operator.
6476                      */
6477                     s = skipspace(s);
6478                     if (*s == '{')
6479             PREBLOCK(CONTINUE);
6480                     else
6481                         FUN0(OP_CONTINUE);
6482                 }
6483             }
6484
6485         case KEY_chdir:
6486             /* may use HOME */
6487             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6488             UNI(OP_CHDIR);
6489
6490         case KEY_close:
6491             UNI(OP_CLOSE);
6492
6493         case KEY_closedir:
6494             UNI(OP_CLOSEDIR);
6495
6496         case KEY_cmp:
6497             Eop(OP_SCMP);
6498
6499         case KEY_caller:
6500             UNI(OP_CALLER);
6501
6502         case KEY_crypt:
6503 #ifdef FCRYPT
6504             if (!PL_cryptseen) {
6505                 PL_cryptseen = TRUE;
6506                 init_des();
6507             }
6508 #endif
6509             LOP(OP_CRYPT,XTERM);
6510
6511         case KEY_chmod:
6512             LOP(OP_CHMOD,XTERM);
6513
6514         case KEY_chown:
6515             LOP(OP_CHOWN,XTERM);
6516
6517         case KEY_connect:
6518             LOP(OP_CONNECT,XTERM);
6519
6520         case KEY_chr:
6521             UNI(OP_CHR);
6522
6523         case KEY_cos:
6524             UNI(OP_COS);
6525
6526         case KEY_chroot:
6527             UNI(OP_CHROOT);
6528
6529         case KEY_default:
6530             PREBLOCK(DEFAULT);
6531
6532         case KEY_do:
6533             s = SKIPSPACE1(s);
6534             if (*s == '{')
6535                 PRETERMBLOCK(DO);
6536             if (*s != '\'')
6537                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6538             if (orig_keyword == KEY_do) {
6539                 orig_keyword = 0;
6540                 pl_yylval.ival = 1;
6541             }
6542             else
6543                 pl_yylval.ival = 0;
6544             OPERATOR(DO);
6545
6546         case KEY_die:
6547             PL_hints |= HINT_BLOCK_SCOPE;
6548             LOP(OP_DIE,XTERM);
6549
6550         case KEY_defined:
6551             UNI(OP_DEFINED);
6552
6553         case KEY_delete:
6554             UNI(OP_DELETE);
6555
6556         case KEY_dbmopen:
6557             gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6558             LOP(OP_DBMOPEN,XTERM);
6559
6560         case KEY_dbmclose:
6561             UNI(OP_DBMCLOSE);
6562
6563         case KEY_dump:
6564             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6565             LOOPX(OP_DUMP);
6566
6567         case KEY_else:
6568             PREBLOCK(ELSE);
6569
6570         case KEY_elsif:
6571             pl_yylval.ival = CopLINE(PL_curcop);
6572             OPERATOR(ELSIF);
6573
6574         case KEY_eq:
6575             Eop(OP_SEQ);
6576
6577         case KEY_exists:
6578             UNI(OP_EXISTS);
6579
6580         case KEY_exit:
6581             if (PL_madskills)
6582                 UNI(OP_INT);
6583             UNI(OP_EXIT);
6584
6585         case KEY_eval:
6586             s = SKIPSPACE1(s);
6587             PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
6588             UNIBRACK(OP_ENTEREVAL);
6589
6590         case KEY_eof:
6591             UNI(OP_EOF);
6592
6593         case KEY_exp:
6594             UNI(OP_EXP);
6595
6596         case KEY_each:
6597             UNI(OP_EACH);
6598
6599         case KEY_exec:
6600             LOP(OP_EXEC,XREF);
6601
6602         case KEY_endhostent:
6603             FUN0(OP_EHOSTENT);
6604
6605         case KEY_endnetent:
6606             FUN0(OP_ENETENT);
6607
6608         case KEY_endservent:
6609             FUN0(OP_ESERVENT);
6610
6611         case KEY_endprotoent:
6612             FUN0(OP_EPROTOENT);
6613
6614         case KEY_endpwent:
6615             FUN0(OP_EPWENT);
6616
6617         case KEY_endgrent:
6618             FUN0(OP_EGRENT);
6619
6620         case KEY_for:
6621         case KEY_foreach:
6622             pl_yylval.ival = CopLINE(PL_curcop);
6623             s = SKIPSPACE1(s);
6624             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6625                 char *p = s;
6626 #ifdef PERL_MAD
6627                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6628 #endif
6629
6630                 if ((PL_bufend - p) >= 3 &&
6631                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6632                     p += 2;
6633                 else if ((PL_bufend - p) >= 4 &&
6634                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6635                     p += 3;
6636                 p = PEEKSPACE(p);
6637                 if (isIDFIRST_lazy_if(p,UTF)) {
6638                     p = scan_ident(p, PL_bufend,
6639                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6640                     p = PEEKSPACE(p);
6641                 }
6642                 if (*p != '$')
6643                     Perl_croak(aTHX_ "Missing $ on loop variable");
6644 #ifdef PERL_MAD
6645                 s = SvPVX(PL_linestr) + soff;
6646 #endif
6647             }
6648             OPERATOR(FOR);
6649
6650         case KEY_formline:
6651             LOP(OP_FORMLINE,XTERM);
6652
6653         case KEY_fork:
6654             FUN0(OP_FORK);
6655
6656         case KEY_fcntl:
6657             LOP(OP_FCNTL,XTERM);
6658
6659         case KEY_fileno:
6660             UNI(OP_FILENO);
6661
6662         case KEY_flock:
6663             LOP(OP_FLOCK,XTERM);
6664
6665         case KEY_gt:
6666             Rop(OP_SGT);
6667
6668         case KEY_ge:
6669             Rop(OP_SGE);
6670
6671         case KEY_grep:
6672             LOP(OP_GREPSTART, XREF);
6673
6674         case KEY_goto:
6675             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6676             LOOPX(OP_GOTO);
6677
6678         case KEY_gmtime:
6679             UNI(OP_GMTIME);
6680
6681         case KEY_getc:
6682             UNIDOR(OP_GETC);
6683
6684         case KEY_getppid:
6685             FUN0(OP_GETPPID);
6686
6687         case KEY_getpgrp:
6688             UNI(OP_GETPGRP);
6689
6690         case KEY_getpriority:
6691             LOP(OP_GETPRIORITY,XTERM);
6692
6693         case KEY_getprotobyname:
6694             UNI(OP_GPBYNAME);
6695
6696         case KEY_getprotobynumber:
6697             LOP(OP_GPBYNUMBER,XTERM);
6698
6699         case KEY_getprotoent:
6700             FUN0(OP_GPROTOENT);
6701
6702         case KEY_getpwent:
6703             FUN0(OP_GPWENT);
6704
6705         case KEY_getpwnam:
6706             UNI(OP_GPWNAM);
6707
6708         case KEY_getpwuid:
6709             UNI(OP_GPWUID);
6710
6711         case KEY_getpeername:
6712             UNI(OP_GETPEERNAME);
6713
6714         case KEY_gethostbyname:
6715             UNI(OP_GHBYNAME);
6716
6717         case KEY_gethostbyaddr:
6718             LOP(OP_GHBYADDR,XTERM);
6719
6720         case KEY_gethostent:
6721             FUN0(OP_GHOSTENT);
6722
6723         case KEY_getnetbyname:
6724             UNI(OP_GNBYNAME);
6725
6726         case KEY_getnetbyaddr:
6727             LOP(OP_GNBYADDR,XTERM);
6728
6729         case KEY_getnetent:
6730             FUN0(OP_GNETENT);
6731
6732         case KEY_getservbyname:
6733             LOP(OP_GSBYNAME,XTERM);
6734
6735         case KEY_getservbyport:
6736             LOP(OP_GSBYPORT,XTERM);
6737
6738         case KEY_getservent:
6739             FUN0(OP_GSERVENT);
6740
6741         case KEY_getsockname:
6742             UNI(OP_GETSOCKNAME);
6743
6744         case KEY_getsockopt:
6745             LOP(OP_GSOCKOPT,XTERM);
6746
6747         case KEY_getgrent:
6748             FUN0(OP_GGRENT);
6749
6750         case KEY_getgrnam:
6751             UNI(OP_GGRNAM);
6752
6753         case KEY_getgrgid:
6754             UNI(OP_GGRGID);
6755
6756         case KEY_getlogin:
6757             FUN0(OP_GETLOGIN);
6758
6759         case KEY_given:
6760             pl_yylval.ival = CopLINE(PL_curcop);
6761             OPERATOR(GIVEN);
6762
6763         case KEY_glob:
6764             LOP(OP_GLOB,XTERM);
6765
6766         case KEY_hex:
6767             UNI(OP_HEX);
6768
6769         case KEY_if:
6770             pl_yylval.ival = CopLINE(PL_curcop);
6771             OPERATOR(IF);
6772
6773         case KEY_index:
6774             LOP(OP_INDEX,XTERM);
6775
6776         case KEY_int:
6777             UNI(OP_INT);
6778
6779         case KEY_ioctl:
6780             LOP(OP_IOCTL,XTERM);
6781
6782         case KEY_join:
6783             LOP(OP_JOIN,XTERM);
6784
6785         case KEY_keys:
6786             UNI(OP_KEYS);
6787
6788         case KEY_kill:
6789             LOP(OP_KILL,XTERM);
6790
6791         case KEY_last:
6792             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6793             LOOPX(OP_LAST);
6794
6795         case KEY_lc:
6796             UNI(OP_LC);
6797
6798         case KEY_lcfirst:
6799             UNI(OP_LCFIRST);
6800
6801         case KEY_local:
6802             pl_yylval.ival = 0;
6803             OPERATOR(LOCAL);
6804
6805         case KEY_length:
6806             UNI(OP_LENGTH);
6807
6808         case KEY_lt:
6809             Rop(OP_SLT);
6810
6811         case KEY_le:
6812             Rop(OP_SLE);
6813
6814         case KEY_localtime:
6815             UNI(OP_LOCALTIME);
6816
6817         case KEY_log:
6818             UNI(OP_LOG);
6819
6820         case KEY_link:
6821             LOP(OP_LINK,XTERM);
6822
6823         case KEY_listen:
6824             LOP(OP_LISTEN,XTERM);
6825
6826         case KEY_lock:
6827             UNI(OP_LOCK);
6828
6829         case KEY_lstat:
6830             UNI(OP_LSTAT);
6831
6832         case KEY_m:
6833             s = scan_pat(s,OP_MATCH);
6834             TERM(sublex_start());
6835
6836         case KEY_map:
6837             LOP(OP_MAPSTART, XREF);
6838
6839         case KEY_mkdir:
6840             LOP(OP_MKDIR,XTERM);
6841
6842         case KEY_msgctl:
6843             LOP(OP_MSGCTL,XTERM);
6844
6845         case KEY_msgget:
6846             LOP(OP_MSGGET,XTERM);
6847
6848         case KEY_msgrcv:
6849             LOP(OP_MSGRCV,XTERM);
6850
6851         case KEY_msgsnd:
6852             LOP(OP_MSGSND,XTERM);
6853
6854         case KEY_our:
6855         case KEY_my:
6856         case KEY_state:
6857             PL_in_my = (U16)tmp;
6858             s = SKIPSPACE1(s);
6859             if (isIDFIRST_lazy_if(s,UTF)) {
6860 #ifdef PERL_MAD
6861                 char* start = s;
6862 #endif
6863                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6864                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6865                     goto really_sub;
6866                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6867                 if (!PL_in_my_stash) {
6868                     char tmpbuf[1024];
6869                     PL_bufptr = s;
6870                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6871                     yyerror(tmpbuf);
6872                 }
6873 #ifdef PERL_MAD
6874                 if (PL_madskills) {     /* just add type to declarator token */
6875                     sv_catsv(PL_thistoken, PL_nextwhite);
6876                     PL_nextwhite = 0;
6877                     sv_catpvn(PL_thistoken, start, s - start);
6878                 }
6879 #endif
6880             }
6881             pl_yylval.ival = 1;
6882             OPERATOR(MY);
6883
6884         case KEY_next:
6885             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6886             LOOPX(OP_NEXT);
6887
6888         case KEY_ne:
6889             Eop(OP_SNE);
6890
6891         case KEY_no:
6892             s = tokenize_use(0, s);
6893             OPERATOR(USE);
6894
6895         case KEY_not:
6896             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6897                 FUN1(OP_NOT);
6898             else
6899                 OPERATOR(NOTOP);
6900
6901         case KEY_open:
6902             s = SKIPSPACE1(s);
6903             if (isIDFIRST_lazy_if(s,UTF)) {
6904                 const char *t;
6905                 for (d = s; isALNUM_lazy_if(d,UTF);)
6906                     d++;
6907                 for (t=d; isSPACE(*t);)
6908                     t++;
6909                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6910                     /* [perl #16184] */
6911                     && !(t[0] == '=' && t[1] == '>')
6912                 ) {
6913                     int parms_len = (int)(d-s);
6914                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6915                            "Precedence problem: open %.*s should be open(%.*s)",
6916                             parms_len, s, parms_len, s);
6917                 }
6918             }
6919             LOP(OP_OPEN,XTERM);
6920
6921         case KEY_or:
6922             pl_yylval.ival = OP_OR;
6923             OPERATOR(OROP);
6924
6925         case KEY_ord:
6926             UNI(OP_ORD);
6927
6928         case KEY_oct:
6929             UNI(OP_OCT);
6930
6931         case KEY_opendir:
6932             LOP(OP_OPEN_DIR,XTERM);
6933
6934         case KEY_print:
6935             checkcomma(s,PL_tokenbuf,"filehandle");
6936             LOP(OP_PRINT,XREF);
6937
6938         case KEY_printf:
6939             checkcomma(s,PL_tokenbuf,"filehandle");
6940             LOP(OP_PRTF,XREF);
6941
6942         case KEY_prototype:
6943             UNI(OP_PROTOTYPE);
6944
6945         case KEY_push:
6946             LOP(OP_PUSH,XTERM);
6947
6948         case KEY_pop:
6949             UNIDOR(OP_POP);
6950
6951         case KEY_pos:
6952             UNIDOR(OP_POS);
6953
6954         case KEY_pack:
6955             LOP(OP_PACK,XTERM);
6956
6957         case KEY_package:
6958             s = force_word(s,WORD,FALSE,TRUE,FALSE);
6959             s = force_version(s, FALSE);
6960             OPERATOR(PACKAGE);
6961
6962         case KEY_pipe:
6963             LOP(OP_PIPE_OP,XTERM);
6964
6965         case KEY_q:
6966             s = scan_str(s,!!PL_madskills,FALSE);
6967             if (!s)
6968                 missingterm(NULL);
6969             pl_yylval.ival = OP_CONST;
6970             TERM(sublex_start());
6971
6972         case KEY_quotemeta:
6973             UNI(OP_QUOTEMETA);
6974
6975         case KEY_qw:
6976             s = scan_str(s,!!PL_madskills,FALSE);
6977             if (!s)
6978                 missingterm(NULL);
6979             PL_expect = XOPERATOR;
6980             force_next(')');
6981             if (SvCUR(PL_lex_stuff)) {
6982                 OP *words = NULL;
6983                 int warned = 0;
6984                 d = SvPV_force(PL_lex_stuff, len);
6985                 while (len) {
6986                     for (; isSPACE(*d) && len; --len, ++d)
6987                         /**/;
6988                     if (len) {
6989                         SV *sv;
6990                         const char *b = d;
6991                         if (!warned && ckWARN(WARN_QW)) {
6992                             for (; !isSPACE(*d) && len; --len, ++d) {
6993                                 if (*d == ',') {
6994                                     Perl_warner(aTHX_ packWARN(WARN_QW),
6995                                         "Possible attempt to separate words with commas");
6996                                     ++warned;
6997                                 }
6998                                 else if (*d == '#') {
6999                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7000                                         "Possible attempt to put comments in qw() list");
7001                                     ++warned;
7002                                 }
7003                             }
7004                         }
7005                         else {
7006                             for (; !isSPACE(*d) && len; --len, ++d)
7007                                 /**/;
7008                         }
7009                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7010                         words = append_elem(OP_LIST, words,
7011                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7012                     }
7013                 }
7014                 if (words) {
7015                     start_force(PL_curforce);
7016                     NEXTVAL_NEXTTOKE.opval = words;
7017                     force_next(THING);
7018                 }
7019             }
7020             if (PL_lex_stuff) {
7021                 SvREFCNT_dec(PL_lex_stuff);
7022                 PL_lex_stuff = NULL;
7023             }
7024             PL_expect = XTERM;
7025             TOKEN('(');
7026
7027         case KEY_qq:
7028             s = scan_str(s,!!PL_madskills,FALSE);
7029             if (!s)
7030                 missingterm(NULL);
7031             pl_yylval.ival = OP_STRINGIFY;
7032             if (SvIVX(PL_lex_stuff) == '\'')
7033                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
7034             TERM(sublex_start());
7035
7036         case KEY_qr:
7037             s = scan_pat(s,OP_QR);
7038             TERM(sublex_start());
7039
7040         case KEY_qx:
7041             s = scan_str(s,!!PL_madskills,FALSE);
7042             if (!s)
7043                 missingterm(NULL);
7044             readpipe_override();
7045             TERM(sublex_start());
7046
7047         case KEY_return:
7048             OLDLOP(OP_RETURN);
7049
7050         case KEY_require:
7051             s = SKIPSPACE1(s);
7052             if (isDIGIT(*s)) {
7053                 s = force_version(s, FALSE);
7054             }
7055             else if (*s != 'v' || !isDIGIT(s[1])
7056                     || (s = force_version(s, TRUE), *s == 'v'))
7057             {
7058                 *PL_tokenbuf = '\0';
7059                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7060                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7061                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7062                 else if (*s == '<')
7063                     yyerror("<> should be quotes");
7064             }
7065             if (orig_keyword == KEY_require) {
7066                 orig_keyword = 0;
7067                 pl_yylval.ival = 1;
7068             }
7069             else
7070                 pl_yylval.ival = 0;
7071             PL_expect = XTERM;
7072             PL_bufptr = s;
7073             PL_last_uni = PL_oldbufptr;
7074             PL_last_lop_op = OP_REQUIRE;
7075             s = skipspace(s);
7076             return REPORT( (int)REQUIRE );
7077
7078         case KEY_reset:
7079             UNI(OP_RESET);
7080
7081         case KEY_redo:
7082             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7083             LOOPX(OP_REDO);
7084
7085         case KEY_rename:
7086             LOP(OP_RENAME,XTERM);
7087
7088         case KEY_rand:
7089             UNI(OP_RAND);
7090
7091         case KEY_rmdir:
7092             UNI(OP_RMDIR);
7093
7094         case KEY_rindex:
7095             LOP(OP_RINDEX,XTERM);
7096
7097         case KEY_read:
7098             LOP(OP_READ,XTERM);
7099
7100         case KEY_readdir:
7101             UNI(OP_READDIR);
7102
7103         case KEY_readline:
7104             UNIDOR(OP_READLINE);
7105
7106         case KEY_readpipe:
7107             UNIDOR(OP_BACKTICK);
7108
7109         case KEY_rewinddir:
7110             UNI(OP_REWINDDIR);
7111
7112         case KEY_recv:
7113             LOP(OP_RECV,XTERM);
7114
7115         case KEY_reverse:
7116             LOP(OP_REVERSE,XTERM);
7117
7118         case KEY_readlink:
7119             UNIDOR(OP_READLINK);
7120
7121         case KEY_ref:
7122             UNI(OP_REF);
7123
7124         case KEY_s:
7125             s = scan_subst(s);
7126             if (pl_yylval.opval)
7127                 TERM(sublex_start());
7128             else
7129                 TOKEN(1);       /* force error */
7130
7131         case KEY_say:
7132             checkcomma(s,PL_tokenbuf,"filehandle");
7133             LOP(OP_SAY,XREF);
7134
7135         case KEY_chomp:
7136             UNI(OP_CHOMP);
7137
7138         case KEY_scalar:
7139             UNI(OP_SCALAR);
7140
7141         case KEY_select:
7142             LOP(OP_SELECT,XTERM);
7143
7144         case KEY_seek:
7145             LOP(OP_SEEK,XTERM);
7146
7147         case KEY_semctl:
7148             LOP(OP_SEMCTL,XTERM);
7149
7150         case KEY_semget:
7151             LOP(OP_SEMGET,XTERM);
7152
7153         case KEY_semop:
7154             LOP(OP_SEMOP,XTERM);
7155
7156         case KEY_send:
7157             LOP(OP_SEND,XTERM);
7158
7159         case KEY_setpgrp:
7160             LOP(OP_SETPGRP,XTERM);
7161
7162         case KEY_setpriority:
7163             LOP(OP_SETPRIORITY,XTERM);
7164
7165         case KEY_sethostent:
7166             UNI(OP_SHOSTENT);
7167
7168         case KEY_setnetent:
7169             UNI(OP_SNETENT);
7170
7171         case KEY_setservent:
7172             UNI(OP_SSERVENT);
7173
7174         case KEY_setprotoent:
7175             UNI(OP_SPROTOENT);
7176
7177         case KEY_setpwent:
7178             FUN0(OP_SPWENT);
7179
7180         case KEY_setgrent:
7181             FUN0(OP_SGRENT);
7182
7183         case KEY_seekdir:
7184             LOP(OP_SEEKDIR,XTERM);
7185
7186         case KEY_setsockopt:
7187             LOP(OP_SSOCKOPT,XTERM);
7188
7189         case KEY_shift:
7190             UNIDOR(OP_SHIFT);
7191
7192         case KEY_shmctl:
7193             LOP(OP_SHMCTL,XTERM);
7194
7195         case KEY_shmget:
7196             LOP(OP_SHMGET,XTERM);
7197
7198         case KEY_shmread:
7199             LOP(OP_SHMREAD,XTERM);
7200
7201         case KEY_shmwrite:
7202             LOP(OP_SHMWRITE,XTERM);
7203
7204         case KEY_shutdown:
7205             LOP(OP_SHUTDOWN,XTERM);
7206
7207         case KEY_sin:
7208             UNI(OP_SIN);
7209
7210         case KEY_sleep:
7211             UNI(OP_SLEEP);
7212
7213         case KEY_socket:
7214             LOP(OP_SOCKET,XTERM);
7215
7216         case KEY_socketpair:
7217             LOP(OP_SOCKPAIR,XTERM);
7218
7219         case KEY_sort:
7220             checkcomma(s,PL_tokenbuf,"subroutine name");
7221             s = SKIPSPACE1(s);
7222             if (*s == ';' || *s == ')')         /* probably a close */
7223                 Perl_croak(aTHX_ "sort is now a reserved word");
7224             PL_expect = XTERM;
7225             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7226             LOP(OP_SORT,XREF);
7227
7228         case KEY_split:
7229             LOP(OP_SPLIT,XTERM);
7230
7231         case KEY_sprintf:
7232             LOP(OP_SPRINTF,XTERM);
7233
7234         case KEY_splice:
7235             LOP(OP_SPLICE,XTERM);
7236
7237         case KEY_sqrt:
7238             UNI(OP_SQRT);
7239
7240         case KEY_srand:
7241             UNI(OP_SRAND);
7242
7243         case KEY_stat:
7244             UNI(OP_STAT);
7245
7246         case KEY_study:
7247             UNI(OP_STUDY);
7248
7249         case KEY_substr:
7250             LOP(OP_SUBSTR,XTERM);
7251
7252         case KEY_format:
7253         case KEY_sub:
7254           really_sub:
7255             {
7256                 char tmpbuf[sizeof PL_tokenbuf];
7257                 SSize_t tboffset = 0;
7258                 expectation attrful;
7259                 bool have_name, have_proto;
7260                 const int key = tmp;
7261
7262 #ifdef PERL_MAD
7263                 SV *tmpwhite = 0;
7264
7265                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7266                 SV *subtoken = newSVpvn(tstart, s - tstart);
7267                 PL_thistoken = 0;
7268
7269                 d = s;
7270                 s = SKIPSPACE2(s,tmpwhite);
7271 #else
7272                 s = skipspace(s);
7273 #endif
7274
7275                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7276                     (*s == ':' && s[1] == ':'))
7277                 {
7278 #ifdef PERL_MAD
7279                     SV *nametoke = NULL;
7280 #endif
7281
7282                     PL_expect = XBLOCK;
7283                     attrful = XATTRBLOCK;
7284                     /* remember buffer pos'n for later force_word */
7285                     tboffset = s - PL_oldbufptr;
7286                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7287 #ifdef PERL_MAD
7288                     if (PL_madskills)
7289                         nametoke = newSVpvn(s, d - s);
7290 #endif
7291                     if (memchr(tmpbuf, ':', len))
7292                         sv_setpvn(PL_subname, tmpbuf, len);
7293                     else {
7294                         sv_setsv(PL_subname,PL_curstname);
7295                         sv_catpvs(PL_subname,"::");
7296                         sv_catpvn(PL_subname,tmpbuf,len);
7297                     }
7298                     have_name = TRUE;
7299
7300 #ifdef PERL_MAD
7301
7302                     start_force(0);
7303                     CURMAD('X', nametoke);
7304                     CURMAD('_', tmpwhite);
7305                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7306                                       FALSE, TRUE, TRUE);
7307
7308                     s = SKIPSPACE2(d,tmpwhite);
7309 #else
7310                     s = skipspace(d);
7311 #endif
7312                 }
7313                 else {
7314                     if (key == KEY_my)
7315                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7316                     PL_expect = XTERMBLOCK;
7317                     attrful = XATTRTERM;
7318                     sv_setpvs(PL_subname,"?");
7319                     have_name = FALSE;
7320                 }
7321
7322                 if (key == KEY_format) {
7323                     if (*s == '=')
7324                         PL_lex_formbrack = PL_lex_brackets + 1;
7325 #ifdef PERL_MAD
7326                     PL_thistoken = subtoken;
7327                     s = d;
7328 #else
7329                     if (have_name)
7330                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7331                                           FALSE, TRUE, TRUE);
7332 #endif
7333                     OPERATOR(FORMAT);
7334                 }
7335
7336                 /* Look for a prototype */
7337                 if (*s == '(') {
7338                     char *p;
7339                     bool bad_proto = FALSE;
7340                     bool in_brackets = FALSE;
7341                     char greedy_proto = ' ';
7342                     bool proto_after_greedy_proto = FALSE;
7343                     bool must_be_last = FALSE;
7344                     bool underscore = FALSE;
7345                     bool seen_underscore = FALSE;
7346                     const bool warnsyntax = ckWARN(WARN_SYNTAX);
7347
7348                     s = scan_str(s,!!PL_madskills,FALSE);
7349                     if (!s)
7350                         Perl_croak(aTHX_ "Prototype not terminated");
7351                     /* strip spaces and check for bad characters */
7352                     d = SvPVX(PL_lex_stuff);
7353                     tmp = 0;
7354                     for (p = d; *p; ++p) {
7355                         if (!isSPACE(*p)) {
7356                             d[tmp++] = *p;
7357
7358                             if (warnsyntax) {
7359                                 if (must_be_last)
7360                                     proto_after_greedy_proto = TRUE;
7361                                 if (!strchr("$@%*;[]&\\_", *p)) {
7362                                     bad_proto = TRUE;
7363                                 }
7364                                 else {
7365                                     if ( underscore ) {
7366                                         if ( *p != ';' )
7367                                             bad_proto = TRUE;
7368                                         underscore = FALSE;
7369                                     }
7370                                     if ( *p == '[' ) {
7371                                         in_brackets = TRUE;
7372                                     }
7373                                     else if ( *p == ']' ) {
7374                                         in_brackets = FALSE;
7375                                     }
7376                                     else if ( (*p == '@' || *p == '%') &&
7377                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
7378                                          !in_brackets ) {
7379                                         must_be_last = TRUE;
7380                                         greedy_proto = *p;
7381                                     }
7382                                     else if ( *p == '_' ) {
7383                                         underscore = seen_underscore = TRUE;
7384                                     }
7385                                 }
7386                             }
7387                         }
7388                     }
7389                     d[tmp] = '\0';
7390                     if (proto_after_greedy_proto)
7391                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7392                                     "Prototype after '%c' for %"SVf" : %s",
7393                                     greedy_proto, SVfARG(PL_subname), d);
7394                     if (bad_proto)
7395                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7396                                     "Illegal character %sin prototype for %"SVf" : %s",
7397                                     seen_underscore ? "after '_' " : "",
7398                                     SVfARG(PL_subname), d);
7399                     SvCUR_set(PL_lex_stuff, tmp);
7400                     have_proto = TRUE;
7401
7402 #ifdef PERL_MAD
7403                     start_force(0);
7404                     CURMAD('q', PL_thisopen);
7405                     CURMAD('_', tmpwhite);
7406                     CURMAD('=', PL_thisstuff);
7407                     CURMAD('Q', PL_thisclose);
7408                     NEXTVAL_NEXTTOKE.opval =
7409                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7410                     PL_lex_stuff = NULL;
7411                     force_next(THING);
7412
7413                     s = SKIPSPACE2(s,tmpwhite);
7414 #else
7415                     s = skipspace(s);
7416 #endif
7417                 }
7418                 else
7419                     have_proto = FALSE;
7420
7421                 if (*s == ':' && s[1] != ':')
7422                     PL_expect = attrful;
7423                 else if (*s != '{' && key == KEY_sub) {
7424                     if (!have_name)
7425                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7426                     else if (*s != ';')
7427                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7428                 }
7429
7430 #ifdef PERL_MAD
7431                 start_force(0);
7432                 if (tmpwhite) {
7433                     if (PL_madskills)
7434                         curmad('^', newSVpvs(""));
7435                     CURMAD('_', tmpwhite);
7436                 }
7437                 force_next(0);
7438
7439                 PL_thistoken = subtoken;
7440 #else
7441                 if (have_proto) {
7442                     NEXTVAL_NEXTTOKE.opval =
7443                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7444                     PL_lex_stuff = NULL;
7445                     force_next(THING);
7446                 }
7447 #endif
7448                 if (!have_name) {
7449                     if (PL_curstash)
7450                         sv_setpvs(PL_subname, "__ANON__");
7451                     else
7452                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7453                     TOKEN(ANONSUB);
7454                 }
7455 #ifndef PERL_MAD
7456                 (void) force_word(PL_oldbufptr + tboffset, WORD,
7457                                   FALSE, TRUE, TRUE);
7458 #endif
7459                 if (key == KEY_my)
7460                     TOKEN(MYSUB);
7461                 TOKEN(SUB);
7462             }
7463
7464         case KEY_system:
7465             LOP(OP_SYSTEM,XREF);
7466
7467         case KEY_symlink:
7468             LOP(OP_SYMLINK,XTERM);
7469
7470         case KEY_syscall:
7471             LOP(OP_SYSCALL,XTERM);
7472
7473         case KEY_sysopen:
7474             LOP(OP_SYSOPEN,XTERM);
7475
7476         case KEY_sysseek:
7477             LOP(OP_SYSSEEK,XTERM);
7478
7479         case KEY_sysread:
7480             LOP(OP_SYSREAD,XTERM);
7481
7482         case KEY_syswrite:
7483             LOP(OP_SYSWRITE,XTERM);
7484
7485         case KEY_tr:
7486             s = scan_trans(s);
7487             TERM(sublex_start());
7488
7489         case KEY_tell:
7490             UNI(OP_TELL);
7491
7492         case KEY_telldir:
7493             UNI(OP_TELLDIR);
7494
7495         case KEY_tie:
7496             LOP(OP_TIE,XTERM);
7497
7498         case KEY_tied:
7499             UNI(OP_TIED);
7500
7501         case KEY_time:
7502             FUN0(OP_TIME);
7503
7504         case KEY_times:
7505             FUN0(OP_TMS);
7506
7507         case KEY_truncate:
7508             LOP(OP_TRUNCATE,XTERM);
7509
7510         case KEY_uc:
7511             UNI(OP_UC);
7512
7513         case KEY_ucfirst:
7514             UNI(OP_UCFIRST);
7515
7516         case KEY_untie:
7517             UNI(OP_UNTIE);
7518
7519         case KEY_until:
7520             pl_yylval.ival = CopLINE(PL_curcop);
7521             OPERATOR(UNTIL);
7522
7523         case KEY_unless:
7524             pl_yylval.ival = CopLINE(PL_curcop);
7525             OPERATOR(UNLESS);
7526
7527         case KEY_unlink:
7528             LOP(OP_UNLINK,XTERM);
7529
7530         case KEY_undef:
7531             UNIDOR(OP_UNDEF);
7532
7533         case KEY_unpack:
7534             LOP(OP_UNPACK,XTERM);
7535
7536         case KEY_utime:
7537             LOP(OP_UTIME,XTERM);
7538
7539         case KEY_umask:
7540             UNIDOR(OP_UMASK);
7541
7542         case KEY_unshift:
7543             LOP(OP_UNSHIFT,XTERM);
7544
7545         case KEY_use:
7546             s = tokenize_use(1, s);
7547             OPERATOR(USE);
7548
7549         case KEY_values:
7550             UNI(OP_VALUES);
7551
7552         case KEY_vec:
7553             LOP(OP_VEC,XTERM);
7554
7555         case KEY_when:
7556             pl_yylval.ival = CopLINE(PL_curcop);
7557             OPERATOR(WHEN);
7558
7559         case KEY_while:
7560             pl_yylval.ival = CopLINE(PL_curcop);
7561             OPERATOR(WHILE);
7562
7563         case KEY_warn:
7564             PL_hints |= HINT_BLOCK_SCOPE;
7565             LOP(OP_WARN,XTERM);
7566
7567         case KEY_wait:
7568             FUN0(OP_WAIT);
7569
7570         case KEY_waitpid:
7571             LOP(OP_WAITPID,XTERM);
7572
7573         case KEY_wantarray:
7574             FUN0(OP_WANTARRAY);
7575
7576         case KEY_write:
7577 #ifdef EBCDIC
7578         {
7579             char ctl_l[2];
7580             ctl_l[0] = toCTRL('L');
7581             ctl_l[1] = '\0';
7582             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7583         }
7584 #else
7585             /* Make sure $^L is defined */
7586             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7587 #endif
7588             UNI(OP_ENTERWRITE);
7589
7590         case KEY_x:
7591             if (PL_expect == XOPERATOR)
7592                 Mop(OP_REPEAT);
7593             check_uni();
7594             goto just_a_word;
7595
7596         case KEY_xor:
7597             pl_yylval.ival = OP_XOR;
7598             OPERATOR(OROP);
7599
7600         case KEY_y:
7601             s = scan_trans(s);
7602             TERM(sublex_start());
7603         }
7604     }}
7605 }
7606 #ifdef __SC__
7607 #pragma segment Main
7608 #endif
7609
7610 static int
7611 S_pending_ident(pTHX)
7612 {
7613     dVAR;
7614     register char *d;
7615     PADOFFSET tmp = 0;
7616     /* pit holds the identifier we read and pending_ident is reset */
7617     char pit = PL_pending_ident;
7618     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7619     /* All routes through this function want to know if there is a colon.  */
7620     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7621     PL_pending_ident = 0;
7622
7623     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7624     DEBUG_T({ PerlIO_printf(Perl_debug_log,
7625           "### Pending identifier '%s'\n", PL_tokenbuf); });
7626
7627     /* if we're in a my(), we can't allow dynamics here.
7628        $foo'bar has already been turned into $foo::bar, so
7629        just check for colons.
7630
7631        if it's a legal name, the OP is a PADANY.
7632     */
7633     if (PL_in_my) {
7634         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
7635             if (has_colon)
7636                 yyerror(Perl_form(aTHX_ "No package name allowed for "
7637                                   "variable %s in \"our\"",
7638                                   PL_tokenbuf));
7639             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7640         }
7641         else {
7642             if (has_colon)
7643                 yyerror(Perl_form(aTHX_ PL_no_myglob,
7644                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7645
7646             pl_yylval.opval = newOP(OP_PADANY, 0);
7647             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7648             return PRIVATEREF;
7649         }
7650     }
7651
7652     /*
7653        build the ops for accesses to a my() variable.
7654
7655        Deny my($a) or my($b) in a sort block, *if* $a or $b is
7656        then used in a comparison.  This catches most, but not
7657        all cases.  For instance, it catches
7658            sort { my($a); $a <=> $b }
7659        but not
7660            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7661        (although why you'd do that is anyone's guess).
7662     */
7663
7664     if (!has_colon) {
7665         if (!PL_in_my)
7666             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
7667         if (tmp != NOT_IN_PAD) {
7668             /* might be an "our" variable" */
7669             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7670                 /* build ops for a bareword */
7671                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
7672                 HEK * const stashname = HvNAME_HEK(stash);
7673                 SV *  const sym = newSVhek(stashname);
7674                 sv_catpvs(sym, "::");
7675                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7676                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7677                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7678                 gv_fetchsv(sym,
7679                     (PL_in_eval
7680                         ? (GV_ADDMULTI | GV_ADDINEVAL)
7681                         : GV_ADDMULTI
7682                     ),
7683                     ((PL_tokenbuf[0] == '$') ? SVt_PV
7684                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7685                      : SVt_PVHV));
7686                 return WORD;
7687             }
7688
7689             /* if it's a sort block and they're naming $a or $b */
7690             if (PL_last_lop_op == OP_SORT &&
7691                 PL_tokenbuf[0] == '$' &&
7692                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7693                 && !PL_tokenbuf[2])
7694             {
7695                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7696                      d < PL_bufend && *d != '\n';
7697                      d++)
7698                 {
7699                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7700                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7701                               PL_tokenbuf);
7702                     }
7703                 }
7704             }
7705
7706             pl_yylval.opval = newOP(OP_PADANY, 0);
7707             pl_yylval.opval->op_targ = tmp;
7708             return PRIVATEREF;
7709         }
7710     }
7711
7712     /*
7713        Whine if they've said @foo in a doublequoted string,
7714        and @foo isn't a variable we can find in the symbol
7715        table.
7716     */
7717     if (ckWARN(WARN_AMBIGUOUS) &&
7718         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7719         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7720                                          SVt_PVAV);
7721         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7722                 /* DO NOT warn for @- and @+ */
7723                 && !( PL_tokenbuf[2] == '\0' &&
7724                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7725            )
7726         {
7727             /* Downgraded from fatal to warning 20000522 mjd */
7728             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7729                         "Possible unintended interpolation of %s in string",
7730                         PL_tokenbuf);
7731         }
7732     }
7733
7734     /* build ops for a bareword */
7735     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7736                                                       tokenbuf_len - 1));
7737     pl_yylval.opval->op_private = OPpCONST_ENTERED;
7738     gv_fetchpvn_flags(
7739             PL_tokenbuf + 1, tokenbuf_len - 1,
7740             /* If the identifier refers to a stash, don't autovivify it.
7741              * Change 24660 had the side effect of causing symbol table
7742              * hashes to always be defined, even if they were freshly
7743              * created and the only reference in the entire program was
7744              * the single statement with the defined %foo::bar:: test.
7745              * It appears that all code in the wild doing this actually
7746              * wants to know whether sub-packages have been loaded, so
7747              * by avoiding auto-vivifying symbol tables, we ensure that
7748              * defined %foo::bar:: continues to be false, and the existing
7749              * tests still give the expected answers, even though what
7750              * they're actually testing has now changed subtly.
7751              */
7752             (*PL_tokenbuf == '%'
7753              && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7754              && d[-1] == ':'
7755              ? 0
7756              : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7757             ((PL_tokenbuf[0] == '$') ? SVt_PV
7758              : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7759              : SVt_PVHV));
7760     return WORD;
7761 }
7762
7763 /*
7764  *  The following code was generated by perl_keyword.pl.
7765  */
7766
7767 I32
7768 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7769 {
7770     dVAR;
7771
7772     PERL_ARGS_ASSERT_KEYWORD;
7773
7774   switch (len)
7775   {
7776     case 1: /* 5 tokens of length 1 */
7777       switch (name[0])
7778       {
7779         case 'm':
7780           {                                       /* m          */
7781             return KEY_m;
7782           }
7783
7784         case 'q':
7785           {                                       /* q          */
7786             return KEY_q;
7787           }
7788
7789         case 's':
7790           {                                       /* s          */
7791             return KEY_s;
7792           }
7793
7794         case 'x':
7795           {                                       /* x          */
7796             return -KEY_x;
7797           }
7798
7799         case 'y':
7800           {                                       /* y          */
7801             return KEY_y;
7802           }
7803
7804         default:
7805           goto unknown;
7806       }
7807
7808     case 2: /* 18 tokens of length 2 */
7809       switch (name[0])
7810       {
7811         case 'd':
7812           if (name[1] == 'o')
7813           {                                       /* do         */
7814             return KEY_do;
7815           }
7816
7817           goto unknown;
7818
7819         case 'e':
7820           if (name[1] == 'q')
7821           {                                       /* eq         */
7822             return -KEY_eq;
7823           }
7824
7825           goto unknown;
7826
7827         case 'g':
7828           switch (name[1])
7829           {
7830             case 'e':
7831               {                                   /* ge         */
7832                 return -KEY_ge;
7833               }
7834
7835             case 't':
7836               {                                   /* gt         */
7837                 return -KEY_gt;
7838               }
7839
7840             default:
7841               goto unknown;
7842           }
7843
7844         case 'i':
7845           if (name[1] == 'f')
7846           {                                       /* if         */
7847             return KEY_if;
7848           }
7849
7850           goto unknown;
7851
7852         case 'l':
7853           switch (name[1])
7854           {
7855             case 'c':
7856               {                                   /* lc         */
7857                 return -KEY_lc;
7858               }
7859
7860             case 'e':
7861               {                                   /* le         */
7862                 return -KEY_le;
7863               }
7864
7865             case 't':
7866               {                                   /* lt         */
7867                 return -KEY_lt;
7868               }
7869
7870             default:
7871               goto unknown;
7872           }
7873
7874         case 'm':
7875           if (name[1] == 'y')
7876           {                                       /* my         */
7877             return KEY_my;
7878           }
7879
7880           goto unknown;
7881
7882         case 'n':
7883           switch (name[1])
7884           {
7885             case 'e':
7886               {                                   /* ne         */
7887                 return -KEY_ne;
7888               }
7889
7890             case 'o':
7891               {                                   /* no         */
7892                 return KEY_no;
7893               }
7894
7895             default:
7896               goto unknown;
7897           }
7898
7899         case 'o':
7900           if (name[1] == 'r')
7901           {                                       /* or         */
7902             return -KEY_or;
7903           }
7904
7905           goto unknown;
7906
7907         case 'q':
7908           switch (name[1])
7909           {
7910             case 'q':
7911               {                                   /* qq         */
7912                 return KEY_qq;
7913               }
7914
7915             case 'r':
7916               {                                   /* qr         */
7917                 return KEY_qr;
7918               }
7919
7920             case 'w':
7921               {                                   /* qw         */
7922                 return KEY_qw;
7923               }
7924
7925             case 'x':
7926               {                                   /* qx         */
7927                 return KEY_qx;
7928               }
7929
7930             default:
7931               goto unknown;
7932           }
7933
7934         case 't':
7935           if (name[1] == 'r')
7936           {                                       /* tr         */
7937             return KEY_tr;
7938           }
7939
7940           goto unknown;
7941
7942         case 'u':
7943           if (name[1] == 'c')
7944           {                                       /* uc         */
7945             return -KEY_uc;
7946           }
7947
7948           goto unknown;
7949
7950         default:
7951           goto unknown;
7952       }
7953
7954     case 3: /* 29 tokens of length 3 */
7955       switch (name[0])
7956       {
7957         case 'E':
7958           if (name[1] == 'N' &&
7959               name[2] == 'D')
7960           {                                       /* END        */
7961             return KEY_END;
7962           }
7963
7964           goto unknown;
7965
7966         case 'a':
7967           switch (name[1])
7968           {
7969             case 'b':
7970               if (name[2] == 's')
7971               {                                   /* abs        */
7972                 return -KEY_abs;
7973               }
7974
7975               goto unknown;
7976
7977             case 'n':
7978               if (name[2] == 'd')
7979               {                                   /* and        */
7980                 return -KEY_and;
7981               }
7982
7983               goto unknown;
7984
7985             default:
7986               goto unknown;
7987           }
7988
7989         case 'c':
7990           switch (name[1])
7991           {
7992             case 'h':
7993               if (name[2] == 'r')
7994               {                                   /* chr        */
7995                 return -KEY_chr;
7996               }
7997
7998               goto unknown;
7999
8000             case 'm':
8001               if (name[2] == 'p')
8002               {                                   /* cmp        */
8003                 return -KEY_cmp;
8004               }
8005
8006               goto unknown;
8007
8008             case 'o':
8009               if (name[2] == 's')
8010               {                                   /* cos        */
8011                 return -KEY_cos;
8012               }
8013
8014               goto unknown;
8015
8016             default:
8017               goto unknown;
8018           }
8019
8020         case 'd':
8021           if (name[1] == 'i' &&
8022               name[2] == 'e')
8023           {                                       /* die        */
8024             return -KEY_die;
8025           }
8026
8027           goto unknown;
8028
8029         case 'e':
8030           switch (name[1])
8031           {
8032             case 'o':
8033               if (name[2] == 'f')
8034               {                                   /* eof        */
8035                 return -KEY_eof;
8036               }
8037
8038               goto unknown;
8039
8040             case 'x':
8041               if (name[2] == 'p')
8042               {                                   /* exp        */
8043                 return -KEY_exp;
8044               }
8045
8046               goto unknown;
8047
8048             default:
8049               goto unknown;
8050           }
8051
8052         case 'f':
8053           if (name[1] == 'o' &&
8054               name[2] == 'r')
8055           {                                       /* for        */
8056             return KEY_for;
8057           }
8058
8059           goto unknown;
8060
8061         case 'h':
8062           if (name[1] == 'e' &&
8063               name[2] == 'x')
8064           {                                       /* hex        */
8065             return -KEY_hex;
8066           }
8067
8068           goto unknown;
8069
8070         case 'i':
8071           if (name[1] == 'n' &&
8072               name[2] == 't')
8073           {                                       /* int        */
8074             return -KEY_int;
8075           }
8076
8077           goto unknown;
8078
8079         case 'l':
8080           if (name[1] == 'o' &&
8081               name[2] == 'g')
8082           {                                       /* log        */
8083             return -KEY_log;
8084           }
8085
8086           goto unknown;
8087
8088         case 'm':
8089           if (name[1] == 'a' &&
8090               name[2] == 'p')
8091           {                                       /* map        */
8092             return KEY_map;
8093           }
8094
8095           goto unknown;
8096
8097         case 'n':
8098           if (name[1] == 'o' &&
8099               name[2] == 't')
8100           {                                       /* not        */
8101             return -KEY_not;
8102           }
8103
8104           goto unknown;
8105
8106         case 'o':
8107           switch (name[1])
8108           {
8109             case 'c':
8110               if (name[2] == 't')
8111               {                                   /* oct        */
8112                 return -KEY_oct;
8113               }
8114
8115               goto unknown;
8116
8117             case 'r':
8118               if (name[2] == 'd')
8119               {                                   /* ord        */
8120                 return -KEY_ord;
8121               }
8122
8123               goto unknown;
8124
8125             case 'u':
8126               if (name[2] == 'r')
8127               {                                   /* our        */
8128                 return KEY_our;
8129               }
8130
8131               goto unknown;
8132
8133             default:
8134               goto unknown;
8135           }
8136
8137         case 'p':
8138           if (name[1] == 'o')
8139           {
8140             switch (name[2])
8141             {
8142               case 'p':
8143                 {                                 /* pop        */
8144                   return -KEY_pop;
8145                 }
8146
8147               case 's':
8148                 {                                 /* pos        */
8149                   return KEY_pos;
8150                 }
8151
8152               default:
8153                 goto unknown;
8154             }
8155           }
8156
8157           goto unknown;
8158
8159         case 'r':
8160           if (name[1] == 'e' &&
8161               name[2] == 'f')
8162           {                                       /* ref        */
8163             return -KEY_ref;
8164           }
8165
8166           goto unknown;
8167
8168         case 's':
8169           switch (name[1])
8170           {
8171             case 'a':
8172               if (name[2] == 'y')
8173               {                                   /* say        */
8174                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8175               }
8176
8177               goto unknown;
8178
8179             case 'i':
8180               if (name[2] == 'n')
8181               {                                   /* sin        */
8182                 return -KEY_sin;
8183               }
8184
8185               goto unknown;
8186
8187             case 'u':
8188               if (name[2] == 'b')
8189               {                                   /* sub        */
8190                 return KEY_sub;
8191               }
8192
8193               goto unknown;
8194
8195             default:
8196               goto unknown;
8197           }
8198
8199         case 't':
8200           if (name[1] == 'i' &&
8201               name[2] == 'e')
8202           {                                       /* tie        */
8203             return KEY_tie;
8204           }
8205
8206           goto unknown;
8207
8208         case 'u':
8209           if (name[1] == 's' &&
8210               name[2] == 'e')
8211           {                                       /* use        */
8212             return KEY_use;
8213           }
8214
8215           goto unknown;
8216
8217         case 'v':
8218           if (name[1] == 'e' &&
8219               name[2] == 'c')
8220           {                                       /* vec        */
8221             return -KEY_vec;
8222           }
8223
8224           goto unknown;
8225
8226         case 'x':
8227           if (name[1] == 'o' &&
8228               name[2] == 'r')
8229           {                                       /* xor        */
8230             return -KEY_xor;
8231           }
8232
8233           goto unknown;
8234
8235         default:
8236           goto unknown;
8237       }
8238
8239     case 4: /* 41 tokens of length 4 */
8240       switch (name[0])
8241       {
8242         case 'C':
8243           if (name[1] == 'O' &&
8244               name[2] == 'R' &&
8245               name[3] == 'E')
8246           {                                       /* CORE       */
8247             return -KEY_CORE;
8248           }
8249
8250           goto unknown;
8251
8252         case 'I':
8253           if (name[1] == 'N' &&
8254               name[2] == 'I' &&
8255               name[3] == 'T')
8256           {                                       /* INIT       */
8257             return KEY_INIT;
8258           }
8259
8260           goto unknown;
8261
8262         case 'b':
8263           if (name[1] == 'i' &&
8264               name[2] == 'n' &&
8265               name[3] == 'd')
8266           {                                       /* bind       */
8267             return -KEY_bind;
8268           }
8269
8270           goto unknown;
8271
8272         case 'c':
8273           if (name[1] == 'h' &&
8274               name[2] == 'o' &&
8275               name[3] == 'p')
8276           {                                       /* chop       */
8277             return -KEY_chop;
8278           }
8279
8280           goto unknown;
8281
8282         case 'd':
8283           if (name[1] == 'u' &&
8284               name[2] == 'm' &&
8285               name[3] == 'p')
8286           {                                       /* dump       */
8287             return -KEY_dump;
8288           }
8289
8290           goto unknown;
8291
8292         case 'e':
8293           switch (name[1])
8294           {
8295             case 'a':
8296               if (name[2] == 'c' &&
8297                   name[3] == 'h')
8298               {                                   /* each       */
8299                 return -KEY_each;
8300               }
8301
8302               goto unknown;
8303
8304             case 'l':
8305               if (name[2] == 's' &&
8306                   name[3] == 'e')
8307               {                                   /* else       */
8308                 return KEY_else;
8309               }
8310
8311               goto unknown;
8312
8313             case 'v':
8314               if (name[2] == 'a' &&
8315                   name[3] == 'l')
8316               {                                   /* eval       */
8317                 return KEY_eval;
8318               }
8319
8320               goto unknown;
8321
8322             case 'x':
8323               switch (name[2])
8324               {
8325                 case 'e':
8326                   if (name[3] == 'c')
8327                   {                               /* exec       */
8328                     return -KEY_exec;
8329                   }
8330
8331                   goto unknown;
8332
8333                 case 'i':
8334                   if (name[3] == 't')
8335                   {                               /* exit       */
8336                     return -KEY_exit;
8337                   }
8338
8339                   goto unknown;
8340
8341                 default:
8342                   goto unknown;
8343               }
8344
8345             default:
8346               goto unknown;
8347           }
8348
8349         case 'f':
8350           if (name[1] == 'o' &&
8351               name[2] == 'r' &&
8352               name[3] == 'k')
8353           {                                       /* fork       */
8354             return -KEY_fork;
8355           }
8356
8357           goto unknown;
8358
8359         case 'g':
8360           switch (name[1])
8361           {
8362             case 'e':
8363               if (name[2] == 't' &&
8364                   name[3] == 'c')
8365               {                                   /* getc       */
8366                 return -KEY_getc;
8367               }
8368
8369               goto unknown;
8370
8371             case 'l':
8372               if (name[2] == 'o' &&
8373                   name[3] == 'b')
8374               {                                   /* glob       */
8375                 return KEY_glob;
8376               }
8377
8378               goto unknown;
8379
8380             case 'o':
8381               if (name[2] == 't' &&
8382                   name[3] == 'o')
8383               {                                   /* goto       */
8384                 return KEY_goto;
8385               }
8386
8387               goto unknown;
8388
8389             case 'r':
8390               if (name[2] == 'e' &&
8391                   name[3] == 'p')
8392               {                                   /* grep       */
8393                 return KEY_grep;
8394               }
8395
8396               goto unknown;
8397
8398             default:
8399               goto unknown;
8400           }
8401
8402         case 'j':
8403           if (name[1] == 'o' &&
8404               name[2] == 'i' &&
8405               name[3] == 'n')
8406           {                                       /* join       */
8407             return -KEY_join;
8408           }
8409
8410           goto unknown;
8411
8412         case 'k':
8413           switch (name[1])
8414           {
8415             case 'e':
8416               if (name[2] == 'y' &&
8417                   name[3] == 's')
8418               {                                   /* keys       */
8419                 return -KEY_keys;
8420               }
8421
8422               goto unknown;
8423
8424             case 'i':
8425               if (name[2] == 'l' &&
8426                   name[3] == 'l')
8427               {                                   /* kill       */
8428                 return -KEY_kill;
8429               }
8430
8431               goto unknown;
8432
8433             default:
8434               goto unknown;
8435           }
8436
8437         case 'l':
8438           switch (name[1])
8439           {
8440             case 'a':
8441               if (name[2] == 's' &&
8442                   name[3] == 't')
8443               {                                   /* last       */
8444                 return KEY_last;
8445               }
8446
8447               goto unknown;
8448
8449             case 'i':
8450               if (name[2] == 'n' &&
8451                   name[3] == 'k')
8452               {                                   /* link       */
8453                 return -KEY_link;
8454               }
8455
8456               goto unknown;
8457
8458             case 'o':
8459               if (name[2] == 'c' &&
8460                   name[3] == 'k')
8461               {                                   /* lock       */
8462                 return -KEY_lock;
8463               }
8464
8465               goto unknown;
8466
8467             default:
8468               goto unknown;
8469           }
8470
8471         case 'n':
8472           if (name[1] == 'e' &&
8473               name[2] == 'x' &&
8474               name[3] == 't')
8475           {                                       /* next       */
8476             return KEY_next;
8477           }
8478
8479           goto unknown;
8480
8481         case 'o':
8482           if (name[1] == 'p' &&
8483               name[2] == 'e' &&
8484               name[3] == 'n')
8485           {                                       /* open       */
8486             return -KEY_open;
8487           }
8488
8489           goto unknown;
8490
8491         case 'p':
8492           switch (name[1])
8493           {
8494             case 'a':
8495               if (name[2] == 'c' &&
8496                   name[3] == 'k')
8497               {                                   /* pack       */
8498                 return -KEY_pack;
8499               }
8500
8501               goto unknown;
8502
8503             case 'i':
8504               if (name[2] == 'p' &&
8505                   name[3] == 'e')
8506               {                                   /* pipe       */
8507                 return -KEY_pipe;
8508               }
8509
8510               goto unknown;
8511
8512             case 'u':
8513               if (name[2] == 's' &&
8514                   name[3] == 'h')
8515               {                                   /* push       */
8516                 return -KEY_push;
8517               }
8518
8519               goto unknown;
8520
8521             default:
8522               goto unknown;
8523           }
8524
8525         case 'r':
8526           switch (name[1])
8527           {
8528             case 'a':
8529               if (name[2] == 'n' &&
8530                   name[3] == 'd')
8531               {                                   /* rand       */
8532                 return -KEY_rand;
8533               }
8534
8535               goto unknown;
8536
8537             case 'e':
8538               switch (name[2])
8539               {
8540                 case 'a':
8541                   if (name[3] == 'd')
8542                   {                               /* read       */
8543                     return -KEY_read;
8544                   }
8545
8546                   goto unknown;
8547
8548                 case 'c':
8549                   if (name[3] == 'v')
8550                   {                               /* recv       */
8551                     return -KEY_recv;
8552                   }
8553
8554                   goto unknown;
8555
8556                 case 'd':
8557                   if (name[3] == 'o')
8558                   {                               /* redo       */
8559                     return KEY_redo;
8560                   }
8561
8562                   goto unknown;
8563
8564                 default:
8565                   goto unknown;
8566               }
8567
8568             default:
8569               goto unknown;
8570           }
8571
8572         case 's':
8573           switch (name[1])
8574           {
8575             case 'e':
8576               switch (name[2])
8577               {
8578                 case 'e':
8579                   if (name[3] == 'k')
8580                   {                               /* seek       */
8581                     return -KEY_seek;
8582                   }
8583
8584                   goto unknown;
8585
8586                 case 'n':
8587                   if (name[3] == 'd')
8588                   {                               /* send       */
8589                     return -KEY_send;
8590                   }
8591
8592                   goto unknown;
8593
8594                 default:
8595                   goto unknown;
8596               }
8597
8598             case 'o':
8599               if (name[2] == 'r' &&
8600                   name[3] == 't')
8601               {                                   /* sort       */
8602                 return KEY_sort;
8603               }
8604
8605               goto unknown;
8606
8607             case 'q':
8608               if (name[2] == 'r' &&
8609                   name[3] == 't')
8610               {                                   /* sqrt       */
8611                 return -KEY_sqrt;
8612               }
8613
8614               goto unknown;
8615
8616             case 't':
8617               if (name[2] == 'a' &&
8618                   name[3] == 't')
8619               {                                   /* stat       */
8620                 return -KEY_stat;
8621               }
8622
8623               goto unknown;
8624
8625             default:
8626               goto unknown;
8627           }
8628
8629         case 't':
8630           switch (name[1])
8631           {
8632             case 'e':
8633               if (name[2] == 'l' &&
8634                   name[3] == 'l')
8635               {                                   /* tell       */
8636                 return -KEY_tell;
8637               }
8638
8639               goto unknown;
8640
8641             case 'i':
8642               switch (name[2])
8643               {
8644                 case 'e':
8645                   if (name[3] == 'd')
8646                   {                               /* tied       */
8647                     return KEY_tied;
8648                   }
8649
8650                   goto unknown;
8651
8652                 case 'm':
8653                   if (name[3] == 'e')
8654                   {                               /* time       */
8655                     return -KEY_time;
8656                   }
8657
8658                   goto unknown;
8659
8660                 default:
8661                   goto unknown;
8662               }
8663
8664             default:
8665               goto unknown;
8666           }
8667
8668         case 'w':
8669           switch (name[1])
8670           {
8671             case 'a':
8672               switch (name[2])
8673               {
8674                 case 'i':
8675                   if (name[3] == 't')
8676                   {                               /* wait       */
8677                     return -KEY_wait;
8678                   }
8679
8680                   goto unknown;
8681
8682                 case 'r':
8683                   if (name[3] == 'n')
8684                   {                               /* warn       */
8685                     return -KEY_warn;
8686                   }
8687
8688                   goto unknown;
8689
8690                 default:
8691                   goto unknown;
8692               }
8693
8694             case 'h':
8695               if (name[2] == 'e' &&
8696                   name[3] == 'n')
8697               {                                   /* when       */
8698                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8699               }
8700
8701               goto unknown;
8702
8703             default:
8704               goto unknown;
8705           }
8706
8707         default:
8708           goto unknown;
8709       }
8710
8711     case 5: /* 39 tokens of length 5 */
8712       switch (name[0])
8713       {
8714         case 'B':
8715           if (name[1] == 'E' &&
8716               name[2] == 'G' &&
8717               name[3] == 'I' &&
8718               name[4] == 'N')
8719           {                                       /* BEGIN      */
8720             return KEY_BEGIN;
8721           }
8722
8723           goto unknown;
8724
8725         case 'C':
8726           if (name[1] == 'H' &&
8727               name[2] == 'E' &&
8728               name[3] == 'C' &&
8729               name[4] == 'K')
8730           {                                       /* CHECK      */
8731             return KEY_CHECK;
8732           }
8733
8734           goto unknown;
8735
8736         case 'a':
8737           switch (name[1])
8738           {
8739             case 'l':
8740               if (name[2] == 'a' &&
8741                   name[3] == 'r' &&
8742                   name[4] == 'm')
8743               {                                   /* alarm      */
8744                 return -KEY_alarm;
8745               }
8746
8747               goto unknown;
8748
8749             case 't':
8750               if (name[2] == 'a' &&
8751                   name[3] == 'n' &&
8752                   name[4] == '2')
8753               {                                   /* atan2      */
8754                 return -KEY_atan2;
8755               }
8756
8757               goto unknown;
8758
8759             default:
8760               goto unknown;
8761           }
8762
8763         case 'b':
8764           switch (name[1])
8765           {
8766             case 'l':
8767               if (name[2] == 'e' &&
8768                   name[3] == 's' &&
8769                   name[4] == 's')
8770               {                                   /* bless      */
8771                 return -KEY_bless;
8772               }
8773
8774               goto unknown;
8775
8776             case 'r':
8777               if (name[2] == 'e' &&
8778                   name[3] == 'a' &&
8779                   name[4] == 'k')
8780               {                                   /* break      */
8781                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8782               }
8783
8784               goto unknown;
8785
8786             default:
8787               goto unknown;
8788           }
8789
8790         case 'c':
8791           switch (name[1])
8792           {
8793             case 'h':
8794               switch (name[2])
8795               {
8796                 case 'd':
8797                   if (name[3] == 'i' &&
8798                       name[4] == 'r')
8799                   {                               /* chdir      */
8800                     return -KEY_chdir;
8801                   }
8802
8803                   goto unknown;
8804
8805                 case 'm':
8806                   if (name[3] == 'o' &&
8807                       name[4] == 'd')
8808                   {                               /* chmod      */
8809                     return -KEY_chmod;
8810                   }
8811
8812                   goto unknown;
8813
8814                 case 'o':
8815                   switch (name[3])
8816                   {
8817                     case 'm':
8818                       if (name[4] == 'p')
8819                       {                           /* chomp      */
8820                         return -KEY_chomp;
8821                       }
8822
8823                       goto unknown;
8824
8825                     case 'w':
8826                       if (name[4] == 'n')
8827                       {                           /* chown      */
8828                         return -KEY_chown;
8829                       }
8830
8831                       goto unknown;
8832
8833                     default:
8834                       goto unknown;
8835                   }
8836
8837                 default:
8838                   goto unknown;
8839               }
8840
8841             case 'l':
8842               if (name[2] == 'o' &&
8843                   name[3] == 's' &&
8844                   name[4] == 'e')
8845               {                                   /* close      */
8846                 return -KEY_close;
8847               }
8848
8849               goto unknown;
8850
8851             case 'r':
8852               if (name[2] == 'y' &&
8853                   name[3] == 'p' &&
8854                   name[4] == 't')
8855               {                                   /* crypt      */
8856                 return -KEY_crypt;
8857               }
8858
8859               goto unknown;
8860
8861             default:
8862               goto unknown;
8863           }
8864
8865         case 'e':
8866           if (name[1] == 'l' &&
8867               name[2] == 's' &&
8868               name[3] == 'i' &&
8869               name[4] == 'f')
8870           {                                       /* elsif      */
8871             return KEY_elsif;
8872           }
8873
8874           goto unknown;
8875
8876         case 'f':
8877           switch (name[1])
8878           {
8879             case 'c':
8880               if (name[2] == 'n' &&
8881                   name[3] == 't' &&
8882                   name[4] == 'l')
8883               {                                   /* fcntl      */
8884                 return -KEY_fcntl;
8885               }
8886
8887               goto unknown;
8888
8889             case 'l':
8890               if (name[2] == 'o' &&
8891                   name[3] == 'c' &&
8892                   name[4] == 'k')
8893               {                                   /* flock      */
8894                 return -KEY_flock;
8895               }
8896
8897               goto unknown;
8898
8899             default:
8900               goto unknown;
8901           }
8902
8903         case 'g':
8904           if (name[1] == 'i' &&
8905               name[2] == 'v' &&
8906               name[3] == 'e' &&
8907               name[4] == 'n')
8908           {                                       /* given      */
8909             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8910           }
8911
8912           goto unknown;
8913
8914         case 'i':
8915           switch (name[1])
8916           {
8917             case 'n':
8918               if (name[2] == 'd' &&
8919                   name[3] == 'e' &&
8920                   name[4] == 'x')
8921               {                                   /* index      */
8922                 return -KEY_index;
8923               }
8924
8925               goto unknown;
8926
8927             case 'o':
8928               if (name[2] == 'c' &&
8929                   name[3] == 't' &&
8930                   name[4] == 'l')
8931               {                                   /* ioctl      */
8932                 return -KEY_ioctl;
8933               }
8934
8935               goto unknown;
8936
8937             default:
8938               goto unknown;
8939           }
8940
8941         case 'l':
8942           switch (name[1])
8943           {
8944             case 'o':
8945               if (name[2] == 'c' &&
8946                   name[3] == 'a' &&
8947                   name[4] == 'l')
8948               {                                   /* local      */
8949                 return KEY_local;
8950               }
8951
8952               goto unknown;
8953
8954             case 's':
8955               if (name[2] == 't' &&
8956                   name[3] == 'a' &&
8957                   name[4] == 't')
8958               {                                   /* lstat      */
8959                 return -KEY_lstat;
8960               }
8961
8962               goto unknown;
8963
8964             default:
8965               goto unknown;
8966           }
8967
8968         case 'm':
8969           if (name[1] == 'k' &&
8970               name[2] == 'd' &&
8971               name[3] == 'i' &&
8972               name[4] == 'r')
8973           {                                       /* mkdir      */
8974             return -KEY_mkdir;
8975           }
8976
8977           goto unknown;
8978
8979         case 'p':
8980           if (name[1] == 'r' &&
8981               name[2] == 'i' &&
8982               name[3] == 'n' &&
8983               name[4] == 't')
8984           {                                       /* print      */
8985             return KEY_print;
8986           }
8987
8988           goto unknown;
8989
8990         case 'r':
8991           switch (name[1])
8992           {
8993             case 'e':
8994               if (name[2] == 's' &&
8995                   name[3] == 'e' &&
8996                   name[4] == 't')
8997               {                                   /* reset      */
8998                 return -KEY_reset;
8999               }
9000
9001               goto unknown;
9002
9003             case 'm':
9004               if (name[2] == 'd' &&
9005                   name[3] == 'i' &&
9006                   name[4] == 'r')
9007               {                                   /* rmdir      */
9008                 return -KEY_rmdir;
9009               }
9010
9011               goto unknown;
9012
9013             default:
9014               goto unknown;
9015           }
9016
9017         case 's':
9018           switch (name[1])
9019           {
9020             case 'e':
9021               if (name[2] == 'm' &&
9022                   name[3] == 'o' &&
9023                   name[4] == 'p')
9024               {                                   /* semop      */
9025                 return -KEY_semop;
9026               }
9027
9028               goto unknown;
9029
9030             case 'h':
9031               if (name[2] == 'i' &&
9032                   name[3] == 'f' &&
9033                   name[4] == 't')
9034               {                                   /* shift      */
9035                 return -KEY_shift;
9036               }
9037
9038               goto unknown;
9039
9040             case 'l':
9041               if (name[2] == 'e' &&
9042                   name[3] == 'e' &&
9043                   name[4] == 'p')
9044               {                                   /* sleep      */
9045                 return -KEY_sleep;
9046               }
9047
9048               goto unknown;
9049
9050             case 'p':
9051               if (name[2] == 'l' &&
9052                   name[3] == 'i' &&
9053                   name[4] == 't')
9054               {                                   /* split      */
9055                 return KEY_split;
9056               }
9057
9058               goto unknown;
9059
9060             case 'r':
9061               if (name[2] == 'a' &&
9062                   name[3] == 'n' &&
9063                   name[4] == 'd')
9064               {                                   /* srand      */
9065                 return -KEY_srand;
9066               }
9067
9068               goto unknown;
9069
9070             case 't':
9071               switch (name[2])
9072               {
9073                 case 'a':
9074                   if (name[3] == 't' &&
9075                       name[4] == 'e')
9076                   {                               /* state      */
9077                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9078                   }
9079
9080                   goto unknown;
9081
9082                 case 'u':
9083                   if (name[3] == 'd' &&
9084                       name[4] == 'y')
9085                   {                               /* study      */
9086                     return KEY_study;
9087                   }
9088
9089                   goto unknown;
9090
9091                 default:
9092                   goto unknown;
9093               }
9094
9095             default:
9096               goto unknown;
9097           }
9098
9099         case 't':
9100           if (name[1] == 'i' &&
9101               name[2] == 'm' &&
9102               name[3] == 'e' &&
9103               name[4] == 's')
9104           {                                       /* times      */
9105             return -KEY_times;
9106           }
9107
9108           goto unknown;
9109
9110         case 'u':
9111           switch (name[1])
9112           {
9113             case 'm':
9114               if (name[2] == 'a' &&
9115                   name[3] == 's' &&
9116                   name[4] == 'k')
9117               {                                   /* umask      */
9118                 return -KEY_umask;
9119               }
9120
9121               goto unknown;
9122
9123             case 'n':
9124               switch (name[2])
9125               {
9126                 case 'd':
9127                   if (name[3] == 'e' &&
9128                       name[4] == 'f')
9129                   {                               /* undef      */
9130                     return KEY_undef;
9131                   }
9132
9133                   goto unknown;
9134
9135                 case 't':
9136                   if (name[3] == 'i')
9137                   {
9138                     switch (name[4])
9139                     {
9140                       case 'e':
9141                         {                         /* untie      */
9142                           return KEY_untie;
9143                         }
9144
9145                       case 'l':
9146                         {                         /* until      */
9147                           return KEY_until;
9148                         }
9149
9150                       default:
9151                         goto unknown;
9152                     }
9153                   }
9154
9155                   goto unknown;
9156
9157                 default:
9158                   goto unknown;
9159               }
9160
9161             case 't':
9162               if (name[2] == 'i' &&
9163                   name[3] == 'm' &&
9164                   name[4] == 'e')
9165               {                                   /* utime      */
9166                 return -KEY_utime;
9167               }
9168
9169               goto unknown;
9170
9171             default:
9172               goto unknown;
9173           }
9174
9175         case 'w':
9176           switch (name[1])
9177           {
9178             case 'h':
9179               if (name[2] == 'i' &&
9180                   name[3] == 'l' &&
9181                   name[4] == 'e')
9182               {                                   /* while      */
9183                 return KEY_while;
9184               }
9185
9186               goto unknown;
9187
9188             case 'r':
9189               if (name[2] == 'i' &&
9190                   name[3] == 't' &&
9191                   name[4] == 'e')
9192               {                                   /* write      */
9193                 return -KEY_write;
9194               }
9195
9196               goto unknown;
9197
9198             default:
9199               goto unknown;
9200           }
9201
9202         default:
9203           goto unknown;
9204       }
9205
9206     case 6: /* 33 tokens of length 6 */
9207       switch (name[0])
9208       {
9209         case 'a':
9210           if (name[1] == 'c' &&
9211               name[2] == 'c' &&
9212               name[3] == 'e' &&
9213               name[4] == 'p' &&
9214               name[5] == 't')
9215           {                                       /* accept     */
9216             return -KEY_accept;
9217           }
9218
9219           goto unknown;
9220
9221         case 'c':
9222           switch (name[1])
9223           {
9224             case 'a':
9225               if (name[2] == 'l' &&
9226                   name[3] == 'l' &&
9227                   name[4] == 'e' &&
9228                   name[5] == 'r')
9229               {                                   /* caller     */
9230                 return -KEY_caller;
9231               }
9232
9233               goto unknown;
9234
9235             case 'h':
9236               if (name[2] == 'r' &&
9237                   name[3] == 'o' &&
9238                   name[4] == 'o' &&
9239                   name[5] == 't')
9240               {                                   /* chroot     */
9241                 return -KEY_chroot;
9242               }
9243
9244               goto unknown;
9245
9246             default:
9247               goto unknown;
9248           }
9249
9250         case 'd':
9251           if (name[1] == 'e' &&
9252               name[2] == 'l' &&
9253               name[3] == 'e' &&
9254               name[4] == 't' &&
9255               name[5] == 'e')
9256           {                                       /* delete     */
9257             return KEY_delete;
9258           }
9259
9260           goto unknown;
9261
9262         case 'e':
9263           switch (name[1])
9264           {
9265             case 'l':
9266               if (name[2] == 's' &&
9267                   name[3] == 'e' &&
9268                   name[4] == 'i' &&
9269                   name[5] == 'f')
9270               {                                   /* elseif     */
9271                   Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9272               }
9273
9274               goto unknown;
9275
9276             case 'x':
9277               if (name[2] == 'i' &&
9278                   name[3] == 's' &&
9279                   name[4] == 't' &&
9280                   name[5] == 's')
9281               {                                   /* exists     */
9282                 return KEY_exists;
9283               }
9284
9285               goto unknown;
9286
9287             default:
9288               goto unknown;
9289           }
9290
9291         case 'f':
9292           switch (name[1])
9293           {
9294             case 'i':
9295               if (name[2] == 'l' &&
9296                   name[3] == 'e' &&
9297                   name[4] == 'n' &&
9298                   name[5] == 'o')
9299               {                                   /* fileno     */
9300                 return -KEY_fileno;
9301               }
9302
9303               goto unknown;
9304
9305             case 'o':
9306               if (name[2] == 'r' &&
9307                   name[3] == 'm' &&
9308                   name[4] == 'a' &&
9309                   name[5] == 't')
9310               {                                   /* format     */
9311                 return KEY_format;
9312               }
9313
9314               goto unknown;
9315
9316             default:
9317               goto unknown;
9318           }
9319
9320         case 'g':
9321           if (name[1] == 'm' &&
9322               name[2] == 't' &&
9323               name[3] == 'i' &&
9324               name[4] == 'm' &&
9325               name[5] == 'e')
9326           {                                       /* gmtime     */
9327             return -KEY_gmtime;
9328           }
9329
9330           goto unknown;
9331
9332         case 'l':
9333           switch (name[1])
9334           {
9335             case 'e':
9336               if (name[2] == 'n' &&
9337                   name[3] == 'g' &&
9338                   name[4] == 't' &&
9339                   name[5] == 'h')
9340               {                                   /* length     */
9341                 return -KEY_length;
9342               }
9343
9344               goto unknown;
9345
9346             case 'i':
9347               if (name[2] == 's' &&
9348                   name[3] == 't' &&
9349                   name[4] == 'e' &&
9350                   name[5] == 'n')
9351               {                                   /* listen     */
9352                 return -KEY_listen;
9353               }
9354
9355               goto unknown;
9356
9357             default:
9358               goto unknown;
9359           }
9360
9361         case 'm':
9362           if (name[1] == 's' &&
9363               name[2] == 'g')
9364           {
9365             switch (name[3])
9366             {
9367               case 'c':
9368                 if (name[4] == 't' &&
9369                     name[5] == 'l')
9370                 {                                 /* msgctl     */
9371                   return -KEY_msgctl;
9372                 }
9373
9374                 goto unknown;
9375
9376               case 'g':
9377                 if (name[4] == 'e' &&
9378                     name[5] == 't')
9379                 {                                 /* msgget     */
9380                   return -KEY_msgget;
9381                 }
9382
9383                 goto unknown;
9384
9385               case 'r':
9386                 if (name[4] == 'c' &&
9387                     name[5] == 'v')
9388                 {                                 /* msgrcv     */
9389                   return -KEY_msgrcv;
9390                 }
9391
9392                 goto unknown;
9393
9394               case 's':
9395                 if (name[4] == 'n' &&
9396                     name[5] == 'd')
9397                 {                                 /* msgsnd     */
9398                   return -KEY_msgsnd;
9399                 }
9400
9401                 goto unknown;
9402
9403               default:
9404                 goto unknown;
9405             }
9406           }
9407
9408           goto unknown;
9409
9410         case 'p':
9411           if (name[1] == 'r' &&
9412               name[2] == 'i' &&
9413               name[3] == 'n' &&
9414               name[4] == 't' &&
9415               name[5] == 'f')
9416           {                                       /* printf     */
9417             return KEY_printf;
9418           }
9419
9420           goto unknown;
9421
9422         case 'r':
9423           switch (name[1])
9424           {
9425             case 'e':
9426               switch (name[2])
9427               {
9428                 case 'n':
9429                   if (name[3] == 'a' &&
9430                       name[4] == 'm' &&
9431                       name[5] == 'e')
9432                   {                               /* rename     */
9433                     return -KEY_rename;
9434                   }
9435
9436                   goto unknown;
9437
9438                 case 't':
9439                   if (name[3] == 'u' &&
9440                       name[4] == 'r' &&
9441                       name[5] == 'n')
9442                   {                               /* return     */
9443                     return KEY_return;
9444                   }
9445
9446                   goto unknown;
9447
9448                 default:
9449                   goto unknown;
9450               }
9451
9452             case 'i':
9453               if (name[2] == 'n' &&
9454                   name[3] == 'd' &&
9455                   name[4] == 'e' &&
9456                   name[5] == 'x')
9457               {                                   /* rindex     */
9458                 return -KEY_rindex;
9459               }
9460
9461               goto unknown;
9462
9463             default:
9464               goto unknown;
9465           }
9466
9467         case 's':
9468           switch (name[1])
9469           {
9470             case 'c':
9471               if (name[2] == 'a' &&
9472                   name[3] == 'l' &&
9473                   name[4] == 'a' &&
9474                   name[5] == 'r')
9475               {                                   /* scalar     */
9476                 return KEY_scalar;
9477               }
9478
9479               goto unknown;
9480
9481             case 'e':
9482               switch (name[2])
9483               {
9484                 case 'l':
9485                   if (name[3] == 'e' &&
9486                       name[4] == 'c' &&
9487                       name[5] == 't')
9488                   {                               /* select     */
9489                     return -KEY_select;
9490                   }
9491
9492                   goto unknown;
9493
9494                 case 'm':
9495                   switch (name[3])
9496                   {
9497                     case 'c':
9498                       if (name[4] == 't' &&
9499                           name[5] == 'l')
9500                       {                           /* semctl     */
9501                         return -KEY_semctl;
9502                       }
9503
9504                       goto unknown;
9505
9506                     case 'g':
9507                       if (name[4] == 'e' &&
9508                           name[5] == 't')
9509                       {                           /* semget     */
9510                         return -KEY_semget;
9511                       }
9512
9513                       goto unknown;
9514
9515                     default:
9516                       goto unknown;
9517                   }
9518
9519                 default:
9520                   goto unknown;
9521               }
9522
9523             case 'h':
9524               if (name[2] == 'm')
9525               {
9526                 switch (name[3])
9527                 {
9528                   case 'c':
9529                     if (name[4] == 't' &&
9530                         name[5] == 'l')
9531                     {                             /* shmctl     */
9532                       return -KEY_shmctl;
9533                     }
9534
9535                     goto unknown;
9536
9537                   case 'g':
9538                     if (name[4] == 'e' &&
9539                         name[5] == 't')
9540                     {                             /* shmget     */
9541                       return -KEY_shmget;
9542                     }
9543
9544                     goto unknown;
9545
9546                   default:
9547                     goto unknown;
9548                 }
9549               }
9550
9551               goto unknown;
9552
9553             case 'o':
9554               if (name[2] == 'c' &&
9555                   name[3] == 'k' &&
9556                   name[4] == 'e' &&
9557                   name[5] == 't')
9558               {                                   /* socket     */
9559                 return -KEY_socket;
9560               }
9561
9562               goto unknown;
9563
9564             case 'p':
9565               if (name[2] == 'l' &&
9566                   name[3] == 'i' &&
9567                   name[4] == 'c' &&
9568                   name[5] == 'e')
9569               {                                   /* splice     */
9570                 return -KEY_splice;
9571               }
9572
9573               goto unknown;
9574
9575             case 'u':
9576               if (name[2] == 'b' &&
9577                   name[3] == 's' &&
9578                   name[4] == 't' &&
9579                   name[5] == 'r')
9580               {                                   /* substr     */
9581                 return -KEY_substr;
9582               }
9583
9584               goto unknown;
9585
9586             case 'y':
9587               if (name[2] == 's' &&
9588                   name[3] == 't' &&
9589                   name[4] == 'e' &&
9590                   name[5] == 'm')
9591               {                                   /* system     */
9592                 return -KEY_system;
9593               }
9594
9595               goto unknown;
9596
9597             default:
9598               goto unknown;
9599           }
9600
9601         case 'u':
9602           if (name[1] == 'n')
9603           {
9604             switch (name[2])
9605             {
9606               case 'l':
9607                 switch (name[3])
9608                 {
9609                   case 'e':
9610                     if (name[4] == 's' &&
9611                         name[5] == 's')
9612                     {                             /* unless     */
9613                       return KEY_unless;
9614                     }
9615
9616                     goto unknown;
9617
9618                   case 'i':
9619                     if (name[4] == 'n' &&
9620                         name[5] == 'k')
9621                     {                             /* unlink     */
9622                       return -KEY_unlink;
9623                     }
9624
9625                     goto unknown;
9626
9627                   default:
9628                     goto unknown;
9629                 }
9630
9631               case 'p':
9632                 if (name[3] == 'a' &&
9633                     name[4] == 'c' &&
9634                     name[5] == 'k')
9635                 {                                 /* unpack     */
9636                   return -KEY_unpack;
9637                 }
9638
9639                 goto unknown;
9640
9641               default:
9642                 goto unknown;
9643             }
9644           }
9645
9646           goto unknown;
9647
9648         case 'v':
9649           if (name[1] == 'a' &&
9650               name[2] == 'l' &&
9651               name[3] == 'u' &&
9652               name[4] == 'e' &&
9653               name[5] == 's')
9654           {                                       /* values     */
9655             return -KEY_values;
9656           }
9657
9658           goto unknown;
9659
9660         default:
9661           goto unknown;
9662       }
9663
9664     case 7: /* 29 tokens of length 7 */
9665       switch (name[0])
9666       {
9667         case 'D':
9668           if (name[1] == 'E' &&
9669               name[2] == 'S' &&
9670               name[3] == 'T' &&
9671               name[4] == 'R' &&
9672               name[5] == 'O' &&
9673               name[6] == 'Y')
9674           {                                       /* DESTROY    */
9675             return KEY_DESTROY;
9676           }
9677
9678           goto unknown;
9679
9680         case '_':
9681           if (name[1] == '_' &&
9682               name[2] == 'E' &&
9683               name[3] == 'N' &&
9684               name[4] == 'D' &&
9685               name[5] == '_' &&
9686               name[6] == '_')
9687           {                                       /* __END__    */
9688             return KEY___END__;
9689           }
9690
9691           goto unknown;
9692
9693         case 'b':
9694           if (name[1] == 'i' &&
9695               name[2] == 'n' &&
9696               name[3] == 'm' &&
9697               name[4] == 'o' &&
9698               name[5] == 'd' &&
9699               name[6] == 'e')
9700           {                                       /* binmode    */
9701             return -KEY_binmode;
9702           }
9703
9704           goto unknown;
9705
9706         case 'c':
9707           if (name[1] == 'o' &&
9708               name[2] == 'n' &&
9709               name[3] == 'n' &&
9710               name[4] == 'e' &&
9711               name[5] == 'c' &&
9712               name[6] == 't')
9713           {                                       /* connect    */
9714             return -KEY_connect;
9715           }
9716
9717           goto unknown;
9718
9719         case 'd':
9720           switch (name[1])
9721           {
9722             case 'b':
9723               if (name[2] == 'm' &&
9724                   name[3] == 'o' &&
9725                   name[4] == 'p' &&
9726                   name[5] == 'e' &&
9727                   name[6] == 'n')
9728               {                                   /* dbmopen    */
9729                 return -KEY_dbmopen;
9730               }
9731
9732               goto unknown;
9733
9734             case 'e':
9735               if (name[2] == 'f')
9736               {
9737                 switch (name[3])
9738                 {
9739                   case 'a':
9740                     if (name[4] == 'u' &&
9741                         name[5] == 'l' &&
9742                         name[6] == 't')
9743                     {                             /* default    */
9744                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9745                     }
9746
9747                     goto unknown;
9748
9749                   case 'i':
9750                     if (name[4] == 'n' &&
9751                         name[5] == 'e' &&
9752                         name[6] == 'd')
9753                     {                             /* defined    */
9754                       return KEY_defined;
9755                     }
9756
9757                     goto unknown;
9758
9759                   default:
9760                     goto unknown;
9761                 }
9762               }
9763
9764               goto unknown;
9765
9766             default:
9767               goto unknown;
9768           }
9769
9770         case 'f':
9771           if (name[1] == 'o' &&
9772               name[2] == 'r' &&
9773               name[3] == 'e' &&
9774               name[4] == 'a' &&
9775               name[5] == 'c' &&
9776               name[6] == 'h')
9777           {                                       /* foreach    */
9778             return KEY_foreach;
9779           }
9780
9781           goto unknown;
9782
9783         case 'g':
9784           if (name[1] == 'e' &&
9785               name[2] == 't' &&
9786               name[3] == 'p')
9787           {
9788             switch (name[4])
9789             {
9790               case 'g':
9791                 if (name[5] == 'r' &&
9792                     name[6] == 'p')
9793                 {                                 /* getpgrp    */
9794                   return -KEY_getpgrp;
9795                 }
9796
9797                 goto unknown;
9798
9799               case 'p':
9800                 if (name[5] == 'i' &&
9801                     name[6] == 'd')
9802                 {                                 /* getppid    */
9803                   return -KEY_getppid;
9804                 }
9805
9806                 goto unknown;
9807
9808               default:
9809                 goto unknown;
9810             }
9811           }
9812
9813           goto unknown;
9814
9815         case 'l':
9816           if (name[1] == 'c' &&
9817               name[2] == 'f' &&
9818               name[3] == 'i' &&
9819               name[4] == 'r' &&
9820               name[5] == 's' &&
9821               name[6] == 't')
9822           {                                       /* lcfirst    */
9823             return -KEY_lcfirst;
9824           }
9825
9826           goto unknown;
9827
9828         case 'o':
9829           if (name[1] == 'p' &&
9830               name[2] == 'e' &&
9831               name[3] == 'n' &&
9832               name[4] == 'd' &&
9833               name[5] == 'i' &&
9834               name[6] == 'r')
9835           {                                       /* opendir    */
9836             return -KEY_opendir;
9837           }
9838
9839           goto unknown;
9840
9841         case 'p':
9842           if (name[1] == 'a' &&
9843               name[2] == 'c' &&
9844               name[3] == 'k' &&
9845               name[4] == 'a' &&
9846               name[5] == 'g' &&
9847               name[6] == 'e')
9848           {                                       /* package    */
9849             return KEY_package;
9850           }
9851
9852           goto unknown;
9853
9854         case 'r':
9855           if (name[1] == 'e')
9856           {
9857             switch (name[2])
9858             {
9859               case 'a':
9860                 if (name[3] == 'd' &&
9861                     name[4] == 'd' &&
9862                     name[5] == 'i' &&
9863                     name[6] == 'r')
9864                 {                                 /* readdir    */
9865                   return -KEY_readdir;
9866                 }
9867
9868                 goto unknown;
9869
9870               case 'q':
9871                 if (name[3] == 'u' &&
9872                     name[4] == 'i' &&
9873                     name[5] == 'r' &&
9874                     name[6] == 'e')
9875                 {                                 /* require    */
9876                   return KEY_require;
9877                 }
9878
9879                 goto unknown;
9880
9881               case 'v':
9882                 if (name[3] == 'e' &&
9883                     name[4] == 'r' &&
9884                     name[5] == 's' &&
9885                     name[6] == 'e')
9886                 {                                 /* reverse    */
9887                   return -KEY_reverse;
9888                 }
9889
9890                 goto unknown;
9891
9892               default:
9893                 goto unknown;
9894             }
9895           }
9896
9897           goto unknown;
9898
9899         case 's':
9900           switch (name[1])
9901           {
9902             case 'e':
9903               switch (name[2])
9904               {
9905                 case 'e':
9906                   if (name[3] == 'k' &&
9907                       name[4] == 'd' &&
9908                       name[5] == 'i' &&
9909                       name[6] == 'r')
9910                   {                               /* seekdir    */
9911                     return -KEY_seekdir;
9912                   }
9913
9914                   goto unknown;
9915
9916                 case 't':
9917                   if (name[3] == 'p' &&
9918                       name[4] == 'g' &&
9919                       name[5] == 'r' &&
9920                       name[6] == 'p')
9921                   {                               /* setpgrp    */
9922                     return -KEY_setpgrp;
9923                   }
9924
9925                   goto unknown;
9926
9927                 default:
9928                   goto unknown;
9929               }
9930
9931             case 'h':
9932               if (name[2] == 'm' &&
9933                   name[3] == 'r' &&
9934                   name[4] == 'e' &&
9935                   name[5] == 'a' &&
9936                   name[6] == 'd')
9937               {                                   /* shmread    */
9938                 return -KEY_shmread;
9939               }
9940
9941               goto unknown;
9942
9943             case 'p':
9944               if (name[2] == 'r' &&
9945                   name[3] == 'i' &&
9946                   name[4] == 'n' &&
9947                   name[5] == 't' &&
9948                   name[6] == 'f')
9949               {                                   /* sprintf    */
9950                 return -KEY_sprintf;
9951               }
9952
9953               goto unknown;
9954
9955             case 'y':
9956               switch (name[2])
9957               {
9958                 case 'm':
9959                   if (name[3] == 'l' &&
9960                       name[4] == 'i' &&
9961                       name[5] == 'n' &&
9962                       name[6] == 'k')
9963                   {                               /* symlink    */
9964                     return -KEY_symlink;
9965                   }
9966
9967                   goto unknown;
9968
9969                 case 's':
9970                   switch (name[3])
9971                   {
9972                     case 'c':
9973                       if (name[4] == 'a' &&
9974                           name[5] == 'l' &&
9975                           name[6] == 'l')
9976                       {                           /* syscall    */
9977                         return -KEY_syscall;
9978                       }
9979
9980                       goto unknown;
9981
9982                     case 'o':
9983                       if (name[4] == 'p' &&
9984                           name[5] == 'e' &&
9985                           name[6] == 'n')
9986                       {                           /* sysopen    */
9987                         return -KEY_sysopen;
9988                       }
9989
9990                       goto unknown;
9991
9992                     case 'r':
9993                       if (name[4] == 'e' &&
9994                           name[5] == 'a' &&
9995                           name[6] == 'd')
9996                       {                           /* sysread    */
9997                         return -KEY_sysread;
9998                       }
9999
10000                       goto unknown;
10001
10002                     case 's':
10003                       if (name[4] == 'e' &&
10004                           name[5] == 'e' &&
10005                           name[6] == 'k')
10006                       {                           /* sysseek    */
10007                         return -KEY_sysseek;
10008                       }
10009
10010                       goto unknown;
10011
10012                     default:
10013                       goto unknown;
10014                   }
10015
10016                 default:
10017                   goto unknown;
10018               }
10019
10020             default:
10021               goto unknown;
10022           }
10023
10024         case 't':
10025           if (name[1] == 'e' &&
10026               name[2] == 'l' &&
10027               name[3] == 'l' &&
10028               name[4] == 'd' &&
10029               name[5] == 'i' &&
10030               name[6] == 'r')
10031           {                                       /* telldir    */
10032             return -KEY_telldir;
10033           }
10034
10035           goto unknown;
10036
10037         case 'u':
10038           switch (name[1])
10039           {
10040             case 'c':
10041               if (name[2] == 'f' &&
10042                   name[3] == 'i' &&
10043                   name[4] == 'r' &&
10044                   name[5] == 's' &&
10045                   name[6] == 't')
10046               {                                   /* ucfirst    */
10047                 return -KEY_ucfirst;
10048               }
10049
10050               goto unknown;
10051
10052             case 'n':
10053               if (name[2] == 's' &&
10054                   name[3] == 'h' &&
10055                   name[4] == 'i' &&
10056                   name[5] == 'f' &&
10057                   name[6] == 't')
10058               {                                   /* unshift    */
10059                 return -KEY_unshift;
10060               }
10061
10062               goto unknown;
10063
10064             default:
10065               goto unknown;
10066           }
10067
10068         case 'w':
10069           if (name[1] == 'a' &&
10070               name[2] == 'i' &&
10071               name[3] == 't' &&
10072               name[4] == 'p' &&
10073               name[5] == 'i' &&
10074               name[6] == 'd')
10075           {                                       /* waitpid    */
10076             return -KEY_waitpid;
10077           }
10078
10079           goto unknown;
10080
10081         default:
10082           goto unknown;
10083       }
10084
10085     case 8: /* 26 tokens of length 8 */
10086       switch (name[0])
10087       {
10088         case 'A':
10089           if (name[1] == 'U' &&
10090               name[2] == 'T' &&
10091               name[3] == 'O' &&
10092               name[4] == 'L' &&
10093               name[5] == 'O' &&
10094               name[6] == 'A' &&
10095               name[7] == 'D')
10096           {                                       /* AUTOLOAD   */
10097             return KEY_AUTOLOAD;
10098           }
10099
10100           goto unknown;
10101
10102         case '_':
10103           if (name[1] == '_')
10104           {
10105             switch (name[2])
10106             {
10107               case 'D':
10108                 if (name[3] == 'A' &&
10109                     name[4] == 'T' &&
10110                     name[5] == 'A' &&
10111                     name[6] == '_' &&
10112                     name[7] == '_')
10113                 {                                 /* __DATA__   */
10114                   return KEY___DATA__;
10115                 }
10116
10117                 goto unknown;
10118
10119               case 'F':
10120                 if (name[3] == 'I' &&
10121                     name[4] == 'L' &&
10122                     name[5] == 'E' &&
10123                     name[6] == '_' &&
10124                     name[7] == '_')
10125                 {                                 /* __FILE__   */
10126                   return -KEY___FILE__;
10127                 }
10128
10129                 goto unknown;
10130
10131               case 'L':
10132                 if (name[3] == 'I' &&
10133                     name[4] == 'N' &&
10134                     name[5] == 'E' &&
10135                     name[6] == '_' &&
10136                     name[7] == '_')
10137                 {                                 /* __LINE__   */
10138                   return -KEY___LINE__;
10139                 }
10140
10141                 goto unknown;
10142
10143               default:
10144                 goto unknown;
10145             }
10146           }
10147
10148           goto unknown;
10149
10150         case 'c':
10151           switch (name[1])
10152           {
10153             case 'l':
10154               if (name[2] == 'o' &&
10155                   name[3] == 's' &&
10156                   name[4] == 'e' &&
10157                   name[5] == 'd' &&
10158                   name[6] == 'i' &&
10159                   name[7] == 'r')
10160               {                                   /* closedir   */
10161                 return -KEY_closedir;
10162               }
10163
10164               goto unknown;
10165
10166             case 'o':
10167               if (name[2] == 'n' &&
10168                   name[3] == 't' &&
10169                   name[4] == 'i' &&
10170                   name[5] == 'n' &&
10171                   name[6] == 'u' &&
10172                   name[7] == 'e')
10173               {                                   /* continue   */
10174                 return -KEY_continue;
10175               }
10176
10177               goto unknown;
10178
10179             default:
10180               goto unknown;
10181           }
10182
10183         case 'd':
10184           if (name[1] == 'b' &&
10185               name[2] == 'm' &&
10186               name[3] == 'c' &&
10187               name[4] == 'l' &&
10188               name[5] == 'o' &&
10189               name[6] == 's' &&
10190               name[7] == 'e')
10191           {                                       /* dbmclose   */
10192             return -KEY_dbmclose;
10193           }
10194
10195           goto unknown;
10196
10197         case 'e':
10198           if (name[1] == 'n' &&
10199               name[2] == 'd')
10200           {
10201             switch (name[3])
10202             {
10203               case 'g':
10204                 if (name[4] == 'r' &&
10205                     name[5] == 'e' &&
10206                     name[6] == 'n' &&
10207                     name[7] == 't')
10208                 {                                 /* endgrent   */
10209                   return -KEY_endgrent;
10210                 }
10211
10212                 goto unknown;
10213
10214               case 'p':
10215                 if (name[4] == 'w' &&
10216                     name[5] == 'e' &&
10217                     name[6] == 'n' &&
10218                     name[7] == 't')
10219                 {                                 /* endpwent   */
10220                   return -KEY_endpwent;
10221                 }
10222
10223                 goto unknown;
10224
10225               default:
10226                 goto unknown;
10227             }
10228           }
10229
10230           goto unknown;
10231
10232         case 'f':
10233           if (name[1] == 'o' &&
10234               name[2] == 'r' &&
10235               name[3] == 'm' &&
10236               name[4] == 'l' &&
10237               name[5] == 'i' &&
10238               name[6] == 'n' &&
10239               name[7] == 'e')
10240           {                                       /* formline   */
10241             return -KEY_formline;
10242           }
10243
10244           goto unknown;
10245
10246         case 'g':
10247           if (name[1] == 'e' &&
10248               name[2] == 't')
10249           {
10250             switch (name[3])
10251             {
10252               case 'g':
10253                 if (name[4] == 'r')
10254                 {
10255                   switch (name[5])
10256                   {
10257                     case 'e':
10258                       if (name[6] == 'n' &&
10259                           name[7] == 't')
10260                       {                           /* getgrent   */
10261                         return -KEY_getgrent;
10262                       }
10263
10264                       goto unknown;
10265
10266                     case 'g':
10267                       if (name[6] == 'i' &&
10268                           name[7] == 'd')
10269                       {                           /* getgrgid   */
10270                         return -KEY_getgrgid;
10271                       }
10272
10273                       goto unknown;
10274
10275                     case 'n':
10276                       if (name[6] == 'a' &&
10277                           name[7] == 'm')
10278                       {                           /* getgrnam   */
10279                         return -KEY_getgrnam;
10280                       }
10281
10282                       goto unknown;
10283
10284                     default:
10285                       goto unknown;
10286                   }
10287                 }
10288
10289                 goto unknown;
10290
10291               case 'l':
10292                 if (name[4] == 'o' &&
10293                     name[5] == 'g' &&
10294                     name[6] == 'i' &&
10295                     name[7] == 'n')
10296                 {                                 /* getlogin   */
10297                   return -KEY_getlogin;
10298                 }
10299
10300                 goto unknown;
10301
10302               case 'p':
10303                 if (name[4] == 'w')
10304                 {
10305                   switch (name[5])
10306                   {
10307                     case 'e':
10308                       if (name[6] == 'n' &&
10309                           name[7] == 't')
10310                       {                           /* getpwent   */
10311                         return -KEY_getpwent;
10312                       }
10313
10314                       goto unknown;
10315
10316                     case 'n':
10317                       if (name[6] == 'a' &&
10318                           name[7] == 'm')
10319                       {                           /* getpwnam   */
10320                         return -KEY_getpwnam;
10321                       }
10322
10323                       goto unknown;
10324
10325                     case 'u':
10326                       if (name[6] == 'i' &&
10327                           name[7] == 'd')
10328                       {                           /* getpwuid   */
10329                         return -KEY_getpwuid;
10330                       }
10331
10332                       goto unknown;
10333
10334                     default:
10335                       goto unknown;
10336                   }
10337                 }
10338
10339                 goto unknown;
10340
10341               default:
10342                 goto unknown;
10343             }
10344           }
10345
10346           goto unknown;
10347
10348         case 'r':
10349           if (name[1] == 'e' &&
10350               name[2] == 'a' &&
10351               name[3] == 'd')
10352           {
10353             switch (name[4])
10354             {
10355               case 'l':
10356                 if (name[5] == 'i' &&
10357                     name[6] == 'n')
10358                 {
10359                   switch (name[7])
10360                   {
10361                     case 'e':
10362                       {                           /* readline   */
10363                         return -KEY_readline;
10364                       }
10365
10366                     case 'k':
10367                       {                           /* readlink   */
10368                         return -KEY_readlink;
10369                       }
10370
10371                     default:
10372                       goto unknown;
10373                   }
10374                 }
10375
10376                 goto unknown;
10377
10378               case 'p':
10379                 if (name[5] == 'i' &&
10380                     name[6] == 'p' &&
10381                     name[7] == 'e')
10382                 {                                 /* readpipe   */
10383                   return -KEY_readpipe;
10384                 }
10385
10386                 goto unknown;
10387
10388               default:
10389                 goto unknown;
10390             }
10391           }
10392
10393           goto unknown;
10394
10395         case 's':
10396           switch (name[1])
10397           {
10398             case 'e':
10399               if (name[2] == 't')
10400               {
10401                 switch (name[3])
10402                 {
10403                   case 'g':
10404                     if (name[4] == 'r' &&
10405                         name[5] == 'e' &&
10406                         name[6] == 'n' &&
10407                         name[7] == 't')
10408                     {                             /* setgrent   */
10409                       return -KEY_setgrent;
10410                     }
10411
10412                     goto unknown;
10413
10414                   case 'p':
10415                     if (name[4] == 'w' &&
10416                         name[5] == 'e' &&
10417                         name[6] == 'n' &&
10418                         name[7] == 't')
10419                     {                             /* setpwent   */
10420                       return -KEY_setpwent;
10421                     }
10422
10423                     goto unknown;
10424
10425                   default:
10426                     goto unknown;
10427                 }
10428               }
10429
10430               goto unknown;
10431
10432             case 'h':
10433               switch (name[2])
10434               {
10435                 case 'm':
10436                   if (name[3] == 'w' &&
10437                       name[4] == 'r' &&
10438                       name[5] == 'i' &&
10439                       name[6] == 't' &&
10440                       name[7] == 'e')
10441                   {                               /* shmwrite   */
10442                     return -KEY_shmwrite;
10443                   }
10444
10445                   goto unknown;
10446
10447                 case 'u':
10448                   if (name[3] == 't' &&
10449                       name[4] == 'd' &&
10450                       name[5] == 'o' &&
10451                       name[6] == 'w' &&
10452                       name[7] == 'n')
10453                   {                               /* shutdown   */
10454                     return -KEY_shutdown;
10455                   }
10456
10457                   goto unknown;
10458
10459                 default:
10460                   goto unknown;
10461               }
10462
10463             case 'y':
10464               if (name[2] == 's' &&
10465                   name[3] == 'w' &&
10466                   name[4] == 'r' &&
10467                   name[5] == 'i' &&
10468                   name[6] == 't' &&
10469                   name[7] == 'e')
10470               {                                   /* syswrite   */
10471                 return -KEY_syswrite;
10472               }
10473
10474               goto unknown;
10475
10476             default:
10477               goto unknown;
10478           }
10479
10480         case 't':
10481           if (name[1] == 'r' &&
10482               name[2] == 'u' &&
10483               name[3] == 'n' &&
10484               name[4] == 'c' &&
10485               name[5] == 'a' &&
10486               name[6] == 't' &&
10487               name[7] == 'e')
10488           {                                       /* truncate   */
10489             return -KEY_truncate;
10490           }
10491
10492           goto unknown;
10493
10494         default:
10495           goto unknown;
10496       }
10497
10498     case 9: /* 9 tokens of length 9 */
10499       switch (name[0])
10500       {
10501         case 'U':
10502           if (name[1] == 'N' &&
10503               name[2] == 'I' &&
10504               name[3] == 'T' &&
10505               name[4] == 'C' &&
10506               name[5] == 'H' &&
10507               name[6] == 'E' &&
10508               name[7] == 'C' &&
10509               name[8] == 'K')
10510           {                                       /* UNITCHECK  */
10511             return KEY_UNITCHECK;
10512           }
10513
10514           goto unknown;
10515
10516         case 'e':
10517           if (name[1] == 'n' &&
10518               name[2] == 'd' &&
10519               name[3] == 'n' &&
10520               name[4] == 'e' &&
10521               name[5] == 't' &&
10522               name[6] == 'e' &&
10523               name[7] == 'n' &&
10524               name[8] == 't')
10525           {                                       /* endnetent  */
10526             return -KEY_endnetent;
10527           }
10528
10529           goto unknown;
10530
10531         case 'g':
10532           if (name[1] == 'e' &&
10533               name[2] == 't' &&
10534               name[3] == 'n' &&
10535               name[4] == 'e' &&
10536               name[5] == 't' &&
10537               name[6] == 'e' &&
10538               name[7] == 'n' &&
10539               name[8] == 't')
10540           {                                       /* getnetent  */
10541             return -KEY_getnetent;
10542           }
10543
10544           goto unknown;
10545
10546         case 'l':
10547           if (name[1] == 'o' &&
10548               name[2] == 'c' &&
10549               name[3] == 'a' &&
10550               name[4] == 'l' &&
10551               name[5] == 't' &&
10552               name[6] == 'i' &&
10553               name[7] == 'm' &&
10554               name[8] == 'e')
10555           {                                       /* localtime  */
10556             return -KEY_localtime;
10557           }
10558
10559           goto unknown;
10560
10561         case 'p':
10562           if (name[1] == 'r' &&
10563               name[2] == 'o' &&
10564               name[3] == 't' &&
10565               name[4] == 'o' &&
10566               name[5] == 't' &&
10567               name[6] == 'y' &&
10568               name[7] == 'p' &&
10569               name[8] == 'e')
10570           {                                       /* prototype  */
10571             return KEY_prototype;
10572           }
10573
10574           goto unknown;
10575
10576         case 'q':
10577           if (name[1] == 'u' &&
10578               name[2] == 'o' &&
10579               name[3] == 't' &&
10580               name[4] == 'e' &&
10581               name[5] == 'm' &&
10582               name[6] == 'e' &&
10583               name[7] == 't' &&
10584               name[8] == 'a')
10585           {                                       /* quotemeta  */
10586             return -KEY_quotemeta;
10587           }
10588
10589           goto unknown;
10590
10591         case 'r':
10592           if (name[1] == 'e' &&
10593               name[2] == 'w' &&
10594               name[3] == 'i' &&
10595               name[4] == 'n' &&
10596               name[5] == 'd' &&
10597               name[6] == 'd' &&
10598               name[7] == 'i' &&
10599               name[8] == 'r')
10600           {                                       /* rewinddir  */
10601             return -KEY_rewinddir;
10602           }
10603
10604           goto unknown;
10605
10606         case 's':
10607           if (name[1] == 'e' &&
10608               name[2] == 't' &&
10609               name[3] == 'n' &&
10610               name[4] == 'e' &&
10611               name[5] == 't' &&
10612               name[6] == 'e' &&
10613               name[7] == 'n' &&
10614               name[8] == 't')
10615           {                                       /* setnetent  */
10616             return -KEY_setnetent;
10617           }
10618
10619           goto unknown;
10620
10621         case 'w':
10622           if (name[1] == 'a' &&
10623               name[2] == 'n' &&
10624               name[3] == 't' &&
10625               name[4] == 'a' &&
10626               name[5] == 'r' &&
10627               name[6] == 'r' &&
10628               name[7] == 'a' &&
10629               name[8] == 'y')
10630           {                                       /* wantarray  */
10631             return -KEY_wantarray;
10632           }
10633
10634           goto unknown;
10635
10636         default:
10637           goto unknown;
10638       }
10639
10640     case 10: /* 9 tokens of length 10 */
10641       switch (name[0])
10642       {
10643         case 'e':
10644           if (name[1] == 'n' &&
10645               name[2] == 'd')
10646           {
10647             switch (name[3])
10648             {
10649               case 'h':
10650                 if (name[4] == 'o' &&
10651                     name[5] == 's' &&
10652                     name[6] == 't' &&
10653                     name[7] == 'e' &&
10654                     name[8] == 'n' &&
10655                     name[9] == 't')
10656                 {                                 /* endhostent */
10657                   return -KEY_endhostent;
10658                 }
10659
10660                 goto unknown;
10661
10662               case 's':
10663                 if (name[4] == 'e' &&
10664                     name[5] == 'r' &&
10665                     name[6] == 'v' &&
10666                     name[7] == 'e' &&
10667                     name[8] == 'n' &&
10668                     name[9] == 't')
10669                 {                                 /* endservent */
10670                   return -KEY_endservent;
10671                 }
10672
10673                 goto unknown;
10674
10675               default:
10676                 goto unknown;
10677             }
10678           }
10679
10680           goto unknown;
10681
10682         case 'g':
10683           if (name[1] == 'e' &&
10684               name[2] == 't')
10685           {
10686             switch (name[3])
10687             {
10688               case 'h':
10689                 if (name[4] == 'o' &&
10690                     name[5] == 's' &&
10691                     name[6] == 't' &&
10692                     name[7] == 'e' &&
10693                     name[8] == 'n' &&
10694                     name[9] == 't')
10695                 {                                 /* gethostent */
10696                   return -KEY_gethostent;
10697                 }
10698
10699                 goto unknown;
10700
10701               case 's':
10702                 switch (name[4])
10703                 {
10704                   case 'e':
10705                     if (name[5] == 'r' &&
10706                         name[6] == 'v' &&
10707                         name[7] == 'e' &&
10708                         name[8] == 'n' &&
10709                         name[9] == 't')
10710                     {                             /* getservent */
10711                       return -KEY_getservent;
10712                     }
10713
10714                     goto unknown;
10715
10716                   case 'o':
10717                     if (name[5] == 'c' &&
10718                         name[6] == 'k' &&
10719                         name[7] == 'o' &&
10720                         name[8] == 'p' &&
10721                         name[9] == 't')
10722                     {                             /* getsockopt */
10723                       return -KEY_getsockopt;
10724                     }
10725
10726                     goto unknown;
10727
10728                   default:
10729                     goto unknown;
10730                 }
10731
10732               default:
10733                 goto unknown;
10734             }
10735           }
10736
10737           goto unknown;
10738
10739         case 's':
10740           switch (name[1])
10741           {
10742             case 'e':
10743               if (name[2] == 't')
10744               {
10745                 switch (name[3])
10746                 {
10747                   case 'h':
10748                     if (name[4] == 'o' &&
10749                         name[5] == 's' &&
10750                         name[6] == 't' &&
10751                         name[7] == 'e' &&
10752                         name[8] == 'n' &&
10753                         name[9] == 't')
10754                     {                             /* sethostent */
10755                       return -KEY_sethostent;
10756                     }
10757
10758                     goto unknown;
10759
10760                   case 's':
10761                     switch (name[4])
10762                     {
10763                       case 'e':
10764                         if (name[5] == 'r' &&
10765                             name[6] == 'v' &&
10766                             name[7] == 'e' &&
10767                             name[8] == 'n' &&
10768                             name[9] == 't')
10769                         {                         /* setservent */
10770                           return -KEY_setservent;
10771                         }
10772
10773                         goto unknown;
10774
10775                       case 'o':
10776                         if (name[5] == 'c' &&
10777                             name[6] == 'k' &&
10778                             name[7] == 'o' &&
10779                             name[8] == 'p' &&
10780                             name[9] == 't')
10781                         {                         /* setsockopt */
10782                           return -KEY_setsockopt;
10783                         }
10784
10785                         goto unknown;
10786
10787                       default:
10788                         goto unknown;
10789                     }
10790
10791                   default:
10792                     goto unknown;
10793                 }
10794               }
10795
10796               goto unknown;
10797
10798             case 'o':
10799               if (name[2] == 'c' &&
10800                   name[3] == 'k' &&
10801                   name[4] == 'e' &&
10802                   name[5] == 't' &&
10803                   name[6] == 'p' &&
10804                   name[7] == 'a' &&
10805                   name[8] == 'i' &&
10806                   name[9] == 'r')
10807               {                                   /* socketpair */
10808                 return -KEY_socketpair;
10809               }
10810
10811               goto unknown;
10812
10813             default:
10814               goto unknown;
10815           }
10816
10817         default:
10818           goto unknown;
10819       }
10820
10821     case 11: /* 8 tokens of length 11 */
10822       switch (name[0])
10823       {
10824         case '_':
10825           if (name[1] == '_' &&
10826               name[2] == 'P' &&
10827               name[3] == 'A' &&
10828               name[4] == 'C' &&
10829               name[5] == 'K' &&
10830               name[6] == 'A' &&
10831               name[7] == 'G' &&
10832               name[8] == 'E' &&
10833               name[9] == '_' &&
10834               name[10] == '_')
10835           {                                       /* __PACKAGE__ */
10836             return -KEY___PACKAGE__;
10837           }
10838
10839           goto unknown;
10840
10841         case 'e':
10842           if (name[1] == 'n' &&
10843               name[2] == 'd' &&
10844               name[3] == 'p' &&
10845               name[4] == 'r' &&
10846               name[5] == 'o' &&
10847               name[6] == 't' &&
10848               name[7] == 'o' &&
10849               name[8] == 'e' &&
10850               name[9] == 'n' &&
10851               name[10] == 't')
10852           {                                       /* endprotoent */
10853             return -KEY_endprotoent;
10854           }
10855
10856           goto unknown;
10857
10858         case 'g':
10859           if (name[1] == 'e' &&
10860               name[2] == 't')
10861           {
10862             switch (name[3])
10863             {
10864               case 'p':
10865                 switch (name[4])
10866                 {
10867                   case 'e':
10868                     if (name[5] == 'e' &&
10869                         name[6] == 'r' &&
10870                         name[7] == 'n' &&
10871                         name[8] == 'a' &&
10872                         name[9] == 'm' &&
10873                         name[10] == 'e')
10874                     {                             /* getpeername */
10875                       return -KEY_getpeername;
10876                     }
10877
10878                     goto unknown;
10879
10880                   case 'r':
10881                     switch (name[5])
10882                     {
10883                       case 'i':
10884                         if (name[6] == 'o' &&
10885                             name[7] == 'r' &&
10886                             name[8] == 'i' &&
10887                             name[9] == 't' &&
10888                             name[10] == 'y')
10889                         {                         /* getpriority */
10890                           return -KEY_getpriority;
10891                         }
10892
10893                         goto unknown;
10894
10895                       case 'o':
10896                         if (name[6] == 't' &&
10897                             name[7] == 'o' &&
10898                             name[8] == 'e' &&
10899                             name[9] == 'n' &&
10900                             name[10] == 't')
10901                         {                         /* getprotoent */
10902                           return -KEY_getprotoent;
10903                         }
10904
10905                         goto unknown;
10906
10907                       default:
10908                         goto unknown;
10909                     }
10910
10911                   default:
10912                     goto unknown;
10913                 }
10914
10915               case 's':
10916                 if (name[4] == 'o' &&
10917                     name[5] == 'c' &&
10918                     name[6] == 'k' &&
10919                     name[7] == 'n' &&
10920                     name[8] == 'a' &&
10921                     name[9] == 'm' &&
10922                     name[10] == 'e')
10923                 {                                 /* getsockname */
10924                   return -KEY_getsockname;
10925                 }
10926
10927                 goto unknown;
10928
10929               default:
10930                 goto unknown;
10931             }
10932           }
10933
10934           goto unknown;
10935
10936         case 's':
10937           if (name[1] == 'e' &&
10938               name[2] == 't' &&
10939               name[3] == 'p' &&
10940               name[4] == 'r')
10941           {
10942             switch (name[5])
10943             {
10944               case 'i':
10945                 if (name[6] == 'o' &&
10946                     name[7] == 'r' &&
10947                     name[8] == 'i' &&
10948                     name[9] == 't' &&
10949                     name[10] == 'y')
10950                 {                                 /* setpriority */
10951                   return -KEY_setpriority;
10952                 }
10953
10954                 goto unknown;
10955
10956               case 'o':
10957                 if (name[6] == 't' &&
10958                     name[7] == 'o' &&
10959                     name[8] == 'e' &&
10960                     name[9] == 'n' &&
10961                     name[10] == 't')
10962                 {                                 /* setprotoent */
10963                   return -KEY_setprotoent;
10964                 }
10965
10966                 goto unknown;
10967
10968               default:
10969                 goto unknown;
10970             }
10971           }
10972
10973           goto unknown;
10974
10975         default:
10976           goto unknown;
10977       }
10978
10979     case 12: /* 2 tokens of length 12 */
10980       if (name[0] == 'g' &&
10981           name[1] == 'e' &&
10982           name[2] == 't' &&
10983           name[3] == 'n' &&
10984           name[4] == 'e' &&
10985           name[5] == 't' &&
10986           name[6] == 'b' &&
10987           name[7] == 'y')
10988       {
10989         switch (name[8])
10990         {
10991           case 'a':
10992             if (name[9] == 'd' &&
10993                 name[10] == 'd' &&
10994                 name[11] == 'r')
10995             {                                     /* getnetbyaddr */
10996               return -KEY_getnetbyaddr;
10997             }
10998
10999             goto unknown;
11000
11001           case 'n':
11002             if (name[9] == 'a' &&
11003                 name[10] == 'm' &&
11004                 name[11] == 'e')
11005             {                                     /* getnetbyname */
11006               return -KEY_getnetbyname;
11007             }
11008
11009             goto unknown;
11010
11011           default:
11012             goto unknown;
11013         }
11014       }
11015
11016       goto unknown;
11017
11018     case 13: /* 4 tokens of length 13 */
11019       if (name[0] == 'g' &&
11020           name[1] == 'e' &&
11021           name[2] == 't')
11022       {
11023         switch (name[3])
11024         {
11025           case 'h':
11026             if (name[4] == 'o' &&
11027                 name[5] == 's' &&
11028                 name[6] == 't' &&
11029                 name[7] == 'b' &&
11030                 name[8] == 'y')
11031             {
11032               switch (name[9])
11033               {
11034                 case 'a':
11035                   if (name[10] == 'd' &&
11036                       name[11] == 'd' &&
11037                       name[12] == 'r')
11038                   {                               /* gethostbyaddr */
11039                     return -KEY_gethostbyaddr;
11040                   }
11041
11042                   goto unknown;
11043
11044                 case 'n':
11045                   if (name[10] == 'a' &&
11046                       name[11] == 'm' &&
11047                       name[12] == 'e')
11048                   {                               /* gethostbyname */
11049                     return -KEY_gethostbyname;
11050                   }
11051
11052                   goto unknown;
11053
11054                 default:
11055                   goto unknown;
11056               }
11057             }
11058
11059             goto unknown;
11060
11061           case 's':
11062             if (name[4] == 'e' &&
11063                 name[5] == 'r' &&
11064                 name[6] == 'v' &&
11065                 name[7] == 'b' &&
11066                 name[8] == 'y')
11067             {
11068               switch (name[9])
11069               {
11070                 case 'n':
11071                   if (name[10] == 'a' &&
11072                       name[11] == 'm' &&
11073                       name[12] == 'e')
11074                   {                               /* getservbyname */
11075                     return -KEY_getservbyname;
11076                   }
11077
11078                   goto unknown;
11079
11080                 case 'p':
11081                   if (name[10] == 'o' &&
11082                       name[11] == 'r' &&
11083                       name[12] == 't')
11084                   {                               /* getservbyport */
11085                     return -KEY_getservbyport;
11086                   }
11087
11088                   goto unknown;
11089
11090                 default:
11091                   goto unknown;
11092               }
11093             }
11094
11095             goto unknown;
11096
11097           default:
11098             goto unknown;
11099         }
11100       }
11101
11102       goto unknown;
11103
11104     case 14: /* 1 tokens of length 14 */
11105       if (name[0] == 'g' &&
11106           name[1] == 'e' &&
11107           name[2] == 't' &&
11108           name[3] == 'p' &&
11109           name[4] == 'r' &&
11110           name[5] == 'o' &&
11111           name[6] == 't' &&
11112           name[7] == 'o' &&
11113           name[8] == 'b' &&
11114           name[9] == 'y' &&
11115           name[10] == 'n' &&
11116           name[11] == 'a' &&
11117           name[12] == 'm' &&
11118           name[13] == 'e')
11119       {                                           /* getprotobyname */
11120         return -KEY_getprotobyname;
11121       }
11122
11123       goto unknown;
11124
11125     case 16: /* 1 tokens of length 16 */
11126       if (name[0] == 'g' &&
11127           name[1] == 'e' &&
11128           name[2] == 't' &&
11129           name[3] == 'p' &&
11130           name[4] == 'r' &&
11131           name[5] == 'o' &&
11132           name[6] == 't' &&
11133           name[7] == 'o' &&
11134           name[8] == 'b' &&
11135           name[9] == 'y' &&
11136           name[10] == 'n' &&
11137           name[11] == 'u' &&
11138           name[12] == 'm' &&
11139           name[13] == 'b' &&
11140           name[14] == 'e' &&
11141           name[15] == 'r')
11142       {                                           /* getprotobynumber */
11143         return -KEY_getprotobynumber;
11144       }
11145
11146       goto unknown;
11147
11148     default:
11149       goto unknown;
11150   }
11151
11152 unknown:
11153   return 0;
11154 }
11155
11156 STATIC void
11157 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11158 {
11159     dVAR;
11160
11161     PERL_ARGS_ASSERT_CHECKCOMMA;
11162
11163     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
11164         if (ckWARN(WARN_SYNTAX)) {
11165             int level = 1;
11166             const char *w;
11167             for (w = s+2; *w && level; w++) {
11168                 if (*w == '(')
11169                     ++level;
11170                 else if (*w == ')')
11171                     --level;
11172             }
11173             while (isSPACE(*w))
11174                 ++w;
11175             /* the list of chars below is for end of statements or
11176              * block / parens, boolean operators (&&, ||, //) and branch
11177              * constructs (or, and, if, until, unless, while, err, for).
11178              * Not a very solid hack... */
11179             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11180                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11181                             "%s (...) interpreted as function",name);
11182         }
11183     }
11184     while (s < PL_bufend && isSPACE(*s))
11185         s++;
11186     if (*s == '(')
11187         s++;
11188     while (s < PL_bufend && isSPACE(*s))
11189         s++;
11190     if (isIDFIRST_lazy_if(s,UTF)) {
11191         const char * const w = s++;
11192         while (isALNUM_lazy_if(s,UTF))
11193             s++;
11194         while (s < PL_bufend && isSPACE(*s))
11195             s++;
11196         if (*s == ',') {
11197             GV* gv;
11198             if (keyword(w, s - w, 0))
11199                 return;
11200
11201             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11202             if (gv && GvCVu(gv))
11203                 return;
11204             Perl_croak(aTHX_ "No comma allowed after %s", what);
11205         }
11206     }
11207 }
11208
11209 /* Either returns sv, or mortalizes sv and returns a new SV*.
11210    Best used as sv=new_constant(..., sv, ...).
11211    If s, pv are NULL, calls subroutine with one argument,
11212    and type is used with error messages only. */
11213
11214 STATIC SV *
11215 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11216                SV *sv, SV *pv, const char *type, STRLEN typelen)
11217 {
11218     dVAR; dSP;
11219     HV * const table = GvHV(PL_hintgv);          /* ^H */
11220     SV *res;
11221     SV **cvp;
11222     SV *cv, *typesv;
11223     const char *why1 = "", *why2 = "", *why3 = "";
11224
11225     PERL_ARGS_ASSERT_NEW_CONSTANT;
11226
11227     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11228         SV *msg;
11229
11230         why2 = (const char *)
11231             (strEQ(key,"charnames")
11232              ? "(possibly a missing \"use charnames ...\")"
11233              : "");
11234         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11235                             (type ? type: "undef"), why2);
11236
11237         /* This is convoluted and evil ("goto considered harmful")
11238          * but I do not understand the intricacies of all the different
11239          * failure modes of %^H in here.  The goal here is to make
11240          * the most probable error message user-friendly. --jhi */
11241
11242         goto msgdone;
11243
11244     report:
11245         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11246                             (type ? type: "undef"), why1, why2, why3);
11247     msgdone:
11248         yyerror(SvPVX_const(msg));
11249         SvREFCNT_dec(msg);
11250         return sv;
11251     }
11252     cvp = hv_fetch(table, key, keylen, FALSE);
11253     if (!cvp || !SvOK(*cvp)) {
11254         why1 = "$^H{";
11255         why2 = key;
11256         why3 = "} is not defined";
11257         goto report;
11258     }
11259     sv_2mortal(sv);                     /* Parent created it permanently */
11260     cv = *cvp;
11261     if (!pv && s)
11262         pv = newSVpvn_flags(s, len, SVs_TEMP);
11263     if (type && pv)
11264         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11265     else
11266         typesv = &PL_sv_undef;
11267
11268     PUSHSTACKi(PERLSI_OVERLOAD);
11269     ENTER ;
11270     SAVETMPS;
11271
11272     PUSHMARK(SP) ;
11273     EXTEND(sp, 3);
11274     if (pv)
11275         PUSHs(pv);
11276     PUSHs(sv);
11277     if (pv)
11278         PUSHs(typesv);
11279     PUTBACK;
11280     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11281
11282     SPAGAIN ;
11283
11284     /* Check the eval first */
11285     if (!PL_in_eval && SvTRUE(ERRSV)) {
11286         sv_catpvs(ERRSV, "Propagated");
11287         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11288         (void)POPs;
11289         res = SvREFCNT_inc_simple(sv);
11290     }
11291     else {
11292         res = POPs;
11293         SvREFCNT_inc_simple_void(res);
11294     }
11295
11296     PUTBACK ;
11297     FREETMPS ;
11298     LEAVE ;
11299     POPSTACK;
11300
11301     if (!SvOK(res)) {
11302         why1 = "Call to &{$^H{";
11303         why2 = key;
11304         why3 = "}} did not return a defined value";
11305         sv = res;
11306         goto report;
11307     }
11308
11309     return res;
11310 }
11311
11312 /* Returns a NUL terminated string, with the length of the string written to
11313    *slp
11314    */
11315 STATIC char *
11316 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11317 {
11318     dVAR;
11319     register char *d = dest;
11320     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
11321
11322     PERL_ARGS_ASSERT_SCAN_WORD;
11323
11324     for (;;) {
11325         if (d >= e)
11326             Perl_croak(aTHX_ ident_too_long);
11327         if (isALNUM(*s))        /* UTF handled below */
11328             *d++ = *s++;
11329         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11330             *d++ = ':';
11331             *d++ = ':';
11332             s++;
11333         }
11334         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11335             *d++ = *s++;
11336             *d++ = *s++;
11337         }
11338         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11339             char *t = s + UTF8SKIP(s);
11340             size_t len;
11341             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11342                 t += UTF8SKIP(t);
11343             len = t - s;
11344             if (d + len > e)
11345                 Perl_croak(aTHX_ ident_too_long);
11346             Copy(s, d, len, char);
11347             d += len;
11348             s = t;
11349         }
11350         else {
11351             *d = '\0';
11352             *slp = d - dest;
11353             return s;
11354         }
11355     }
11356 }
11357
11358 STATIC char *
11359 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11360 {
11361     dVAR;
11362     char *bracket = NULL;
11363     char funny = *s++;
11364     register char *d = dest;
11365     register char * const e = d + destlen + 3;    /* two-character token, ending NUL */
11366
11367     PERL_ARGS_ASSERT_SCAN_IDENT;
11368
11369     if (isSPACE(*s))
11370         s = PEEKSPACE(s);
11371     if (isDIGIT(*s)) {
11372         while (isDIGIT(*s)) {
11373             if (d >= e)
11374                 Perl_croak(aTHX_ ident_too_long);
11375             *d++ = *s++;
11376         }
11377     }
11378     else {
11379         for (;;) {
11380             if (d >= e)
11381                 Perl_croak(aTHX_ ident_too_long);
11382             if (isALNUM(*s))    /* UTF handled below */
11383                 *d++ = *s++;
11384             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11385                 *d++ = ':';
11386                 *d++ = ':';
11387                 s++;
11388             }
11389             else if (*s == ':' && s[1] == ':') {
11390                 *d++ = *s++;
11391                 *d++ = *s++;
11392             }
11393             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11394                 char *t = s + UTF8SKIP(s);
11395                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11396                     t += UTF8SKIP(t);
11397                 if (d + (t - s) > e)
11398                     Perl_croak(aTHX_ ident_too_long);
11399                 Copy(s, d, t - s, char);
11400                 d += t - s;
11401                 s = t;
11402             }
11403             else
11404                 break;
11405         }
11406     }
11407     *d = '\0';
11408     d = dest;
11409     if (*d) {
11410         if (PL_lex_state != LEX_NORMAL)
11411             PL_lex_state = LEX_INTERPENDMAYBE;
11412         return s;
11413     }
11414     if (*s == '$' && s[1] &&
11415         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11416     {
11417         return s;
11418     }
11419     if (*s == '{') {
11420         bracket = s;
11421         s++;
11422     }
11423     else if (ck_uni)
11424         check_uni();
11425     if (s < send)
11426         *d = *s++;
11427     d[1] = '\0';
11428     if (*d == '^' && *s && isCONTROLVAR(*s)) {
11429         *d = toCTRL(*s);
11430         s++;
11431     }
11432     if (bracket) {
11433         if (isSPACE(s[-1])) {
11434             while (s < send) {
11435                 const char ch = *s++;
11436                 if (!SPACE_OR_TAB(ch)) {
11437                     *d = ch;
11438                     break;
11439                 }
11440             }
11441         }
11442         if (isIDFIRST_lazy_if(d,UTF)) {
11443             d++;
11444             if (UTF) {
11445                 char *end = s;
11446                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11447                     end += UTF8SKIP(end);
11448                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11449                         end += UTF8SKIP(end);
11450                 }
11451                 Copy(s, d, end - s, char);
11452                 d += end - s;
11453                 s = end;
11454             }
11455             else {
11456                 while ((isALNUM(*s) || *s == ':') && d < e)
11457                     *d++ = *s++;
11458                 if (d >= e)
11459                     Perl_croak(aTHX_ ident_too_long);
11460             }
11461             *d = '\0';
11462             while (s < send && SPACE_OR_TAB(*s))
11463                 s++;
11464             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11465                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11466                     const char * const brack =
11467                         (const char *)
11468                         ((*s == '[') ? "[...]" : "{...}");
11469                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11470                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11471                         funny, dest, brack, funny, dest, brack);
11472                 }
11473                 bracket++;
11474                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11475                 return s;
11476             }
11477         }
11478         /* Handle extended ${^Foo} variables
11479          * 1999-02-27 mjd-perl-patch@plover.com */
11480         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11481                  && isALNUM(*s))
11482         {
11483             d++;
11484             while (isALNUM(*s) && d < e) {
11485                 *d++ = *s++;
11486             }
11487             if (d >= e)
11488                 Perl_croak(aTHX_ ident_too_long);
11489             *d = '\0';
11490         }
11491         if (*s == '}') {
11492             s++;
11493             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11494                 PL_lex_state = LEX_INTERPEND;
11495                 PL_expect = XREF;
11496             }
11497             if (PL_lex_state == LEX_NORMAL) {
11498                 if (ckWARN(WARN_AMBIGUOUS) &&
11499                     (keyword(dest, d - dest, 0)
11500                      || get_cvn_flags(dest, d - dest, 0)))
11501                 {
11502                     if (funny == '#')
11503                         funny = '@';
11504                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11505                         "Ambiguous use of %c{%s} resolved to %c%s",
11506                         funny, dest, funny, dest);
11507                 }
11508             }
11509         }
11510         else {
11511             s = bracket;                /* let the parser handle it */
11512             *dest = '\0';
11513         }
11514     }
11515     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11516         PL_lex_state = LEX_INTERPEND;
11517     return s;
11518 }
11519
11520 static U32
11521 S_pmflag(U32 pmfl, const char ch) {
11522     switch (ch) {
11523         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11524     case GLOBAL_PAT_MOD:    pmfl |= PMf_GLOBAL; break;
11525     case CONTINUE_PAT_MOD:  pmfl |= PMf_CONTINUE; break;
11526     case ONCE_PAT_MOD:      pmfl |= PMf_KEEP; break;
11527     case KEEPCOPY_PAT_MOD:  pmfl |= PMf_KEEPCOPY; break;
11528     }
11529     return pmfl;
11530 }
11531
11532 void
11533 Perl_pmflag(pTHX_ U32* pmfl, int ch)
11534 {
11535     PERL_ARGS_ASSERT_PMFLAG;
11536
11537     Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
11538                      "Perl_pmflag() is deprecated, and will be removed from the XS API");
11539
11540     if (ch<256) {
11541         *pmfl = S_pmflag(*pmfl, (char)ch);
11542     }
11543 }
11544
11545 STATIC char *
11546 S_scan_pat(pTHX_ char *start, I32 type)
11547 {
11548     dVAR;
11549     PMOP *pm;
11550     char *s = scan_str(start,!!PL_madskills,FALSE);
11551     const char * const valid_flags =
11552         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11553 #ifdef PERL_MAD
11554     char *modstart;
11555 #endif
11556
11557     PERL_ARGS_ASSERT_SCAN_PAT;
11558
11559     if (!s) {
11560         const char * const delimiter = skipspace(start);
11561         Perl_croak(aTHX_
11562                    (const char *)
11563                    (*delimiter == '?'
11564                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
11565                     : "Search pattern not terminated" ));
11566     }
11567
11568     pm = (PMOP*)newPMOP(type, 0);
11569     if (PL_multi_open == '?') {
11570         /* This is the only point in the code that sets PMf_ONCE:  */
11571         pm->op_pmflags |= PMf_ONCE;
11572
11573         /* Hence it's safe to do this bit of PMOP book-keeping here, which
11574            allows us to restrict the list needed by reset to just the ??
11575            matches.  */
11576         assert(type != OP_TRANS);
11577         if (PL_curstash) {
11578             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11579             U32 elements;
11580             if (!mg) {
11581                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11582                                  0);
11583             }
11584             elements = mg->mg_len / sizeof(PMOP**);
11585             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11586             ((PMOP**)mg->mg_ptr) [elements++] = pm;
11587             mg->mg_len = elements * sizeof(PMOP**);
11588             PmopSTASH_set(pm,PL_curstash);
11589         }
11590     }
11591 #ifdef PERL_MAD
11592     modstart = s;
11593 #endif
11594     while (*s && strchr(valid_flags, *s))
11595         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11596 #ifdef PERL_MAD
11597     if (PL_madskills && modstart != s) {
11598         SV* tmptoken = newSVpvn(modstart, s - modstart);
11599         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11600     }
11601 #endif
11602     /* issue a warning if /c is specified,but /g is not */
11603     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11604     {
11605         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11606                        "Use of /c modifier is meaningless without /g" );
11607     }
11608
11609     PL_lex_op = (OP*)pm;
11610     pl_yylval.ival = OP_MATCH;
11611     return s;
11612 }
11613
11614 STATIC char *
11615 S_scan_subst(pTHX_ char *start)
11616 {
11617     dVAR;
11618     register char *s;
11619     register PMOP *pm;
11620     I32 first_start;
11621     I32 es = 0;
11622 #ifdef PERL_MAD
11623     char *modstart;
11624 #endif
11625
11626     PERL_ARGS_ASSERT_SCAN_SUBST;
11627
11628     pl_yylval.ival = OP_NULL;
11629
11630     s = scan_str(start,!!PL_madskills,FALSE);
11631
11632     if (!s)
11633         Perl_croak(aTHX_ "Substitution pattern not terminated");
11634
11635     if (s[-1] == PL_multi_open)
11636         s--;
11637 #ifdef PERL_MAD
11638     if (PL_madskills) {
11639         CURMAD('q', PL_thisopen);
11640         CURMAD('_', PL_thiswhite);
11641         CURMAD('E', PL_thisstuff);
11642         CURMAD('Q', PL_thisclose);
11643         PL_realtokenstart = s - SvPVX(PL_linestr);
11644     }
11645 #endif
11646
11647     first_start = PL_multi_start;
11648     s = scan_str(s,!!PL_madskills,FALSE);
11649     if (!s) {
11650         if (PL_lex_stuff) {
11651             SvREFCNT_dec(PL_lex_stuff);
11652             PL_lex_stuff = NULL;
11653         }
11654         Perl_croak(aTHX_ "Substitution replacement not terminated");
11655     }
11656     PL_multi_start = first_start;       /* so whole substitution is taken together */
11657
11658     pm = (PMOP*)newPMOP(OP_SUBST, 0);
11659
11660 #ifdef PERL_MAD
11661     if (PL_madskills) {
11662         CURMAD('z', PL_thisopen);
11663         CURMAD('R', PL_thisstuff);
11664         CURMAD('Z', PL_thisclose);
11665     }
11666     modstart = s;
11667 #endif
11668
11669     while (*s) {
11670         if (*s == EXEC_PAT_MOD) {
11671             s++;
11672             es++;
11673         }
11674         else if (strchr(S_PAT_MODS, *s))
11675             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11676         else
11677             break;
11678     }
11679
11680 #ifdef PERL_MAD
11681     if (PL_madskills) {
11682         if (modstart != s)
11683             curmad('m', newSVpvn(modstart, s - modstart));
11684         append_madprops(PL_thismad, (OP*)pm, 0);
11685         PL_thismad = 0;
11686     }
11687 #endif
11688     if ((pm->op_pmflags & PMf_CONTINUE)) {
11689         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11690     }
11691
11692     if (es) {
11693         SV * const repl = newSVpvs("");
11694
11695         PL_sublex_info.super_bufptr = s;
11696         PL_sublex_info.super_bufend = PL_bufend;
11697         PL_multi_end = 0;
11698         pm->op_pmflags |= PMf_EVAL;
11699         while (es-- > 0) {
11700             if (es)
11701                 sv_catpvs(repl, "eval ");
11702             else
11703                 sv_catpvs(repl, "do ");
11704         }
11705         sv_catpvs(repl, "{");
11706         sv_catsv(repl, PL_lex_repl);
11707         if (strchr(SvPVX(PL_lex_repl), '#'))
11708             sv_catpvs(repl, "\n");
11709         sv_catpvs(repl, "}");
11710         SvEVALED_on(repl);
11711         SvREFCNT_dec(PL_lex_repl);
11712         PL_lex_repl = repl;
11713     }
11714
11715     PL_lex_op = (OP*)pm;
11716     pl_yylval.ival = OP_SUBST;
11717     return s;
11718 }
11719
11720 STATIC char *
11721 S_scan_trans(pTHX_ char *start)
11722 {
11723     dVAR;
11724     register char* s;
11725     OP *o;
11726     short *tbl;
11727     U8 squash;
11728     U8 del;
11729     U8 complement;
11730 #ifdef PERL_MAD
11731     char *modstart;
11732 #endif
11733
11734     PERL_ARGS_ASSERT_SCAN_TRANS;
11735
11736     pl_yylval.ival = OP_NULL;
11737
11738     s = scan_str(start,!!PL_madskills,FALSE);
11739     if (!s)
11740         Perl_croak(aTHX_ "Transliteration pattern not terminated");
11741
11742     if (s[-1] == PL_multi_open)
11743         s--;
11744 #ifdef PERL_MAD
11745     if (PL_madskills) {
11746         CURMAD('q', PL_thisopen);
11747         CURMAD('_', PL_thiswhite);
11748         CURMAD('E', PL_thisstuff);
11749         CURMAD('Q', PL_thisclose);
11750         PL_realtokenstart = s - SvPVX(PL_linestr);
11751     }
11752 #endif
11753
11754     s = scan_str(s,!!PL_madskills,FALSE);
11755     if (!s) {
11756         if (PL_lex_stuff) {
11757             SvREFCNT_dec(PL_lex_stuff);
11758             PL_lex_stuff = NULL;
11759         }
11760         Perl_croak(aTHX_ "Transliteration replacement not terminated");
11761     }
11762     if (PL_madskills) {
11763         CURMAD('z', PL_thisopen);
11764         CURMAD('R', PL_thisstuff);
11765         CURMAD('Z', PL_thisclose);
11766     }
11767
11768     complement = del = squash = 0;
11769 #ifdef PERL_MAD
11770     modstart = s;
11771 #endif
11772     while (1) {
11773         switch (*s) {
11774         case 'c':
11775             complement = OPpTRANS_COMPLEMENT;
11776             break;
11777         case 'd':
11778             del = OPpTRANS_DELETE;
11779             break;
11780         case 's':
11781             squash = OPpTRANS_SQUASH;
11782             break;
11783         default:
11784             goto no_more;
11785         }
11786         s++;
11787     }
11788   no_more:
11789
11790     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11791     o = newPVOP(OP_TRANS, 0, (char*)tbl);
11792     o->op_private &= ~OPpTRANS_ALL;
11793     o->op_private |= del|squash|complement|
11794       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11795       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
11796
11797     PL_lex_op = o;
11798     pl_yylval.ival = OP_TRANS;
11799
11800 #ifdef PERL_MAD
11801     if (PL_madskills) {
11802         if (modstart != s)
11803             curmad('m', newSVpvn(modstart, s - modstart));
11804         append_madprops(PL_thismad, o, 0);
11805         PL_thismad = 0;
11806     }
11807 #endif
11808
11809     return s;
11810 }
11811
11812 STATIC char *
11813 S_scan_heredoc(pTHX_ register char *s)
11814 {
11815     dVAR;
11816     SV *herewas;
11817     I32 op_type = OP_SCALAR;
11818     I32 len;
11819     SV *tmpstr;
11820     char term;
11821     const char *found_newline;
11822     register char *d;
11823     register char *e;
11824     char *peek;
11825     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11826 #ifdef PERL_MAD
11827     I32 stuffstart = s - SvPVX(PL_linestr);
11828     char *tstart;
11829
11830     PL_realtokenstart = -1;
11831 #endif
11832
11833     PERL_ARGS_ASSERT_SCAN_HEREDOC;
11834
11835     s += 2;
11836     d = PL_tokenbuf;
11837     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11838     if (!outer)
11839         *d++ = '\n';
11840     peek = s;
11841     while (SPACE_OR_TAB(*peek))
11842         peek++;
11843     if (*peek == '`' || *peek == '\'' || *peek =='"') {
11844         s = peek;
11845         term = *s++;
11846         s = delimcpy(d, e, s, PL_bufend, term, &len);
11847         d += len;
11848         if (s < PL_bufend)
11849             s++;
11850     }
11851     else {
11852         if (*s == '\\')
11853             s++, term = '\'';
11854         else
11855             term = '"';
11856         if (!isALNUM_lazy_if(s,UTF))
11857             deprecate("bare << to mean <<\"\"");
11858         for (; isALNUM_lazy_if(s,UTF); s++) {
11859             if (d < e)
11860                 *d++ = *s;
11861         }
11862     }
11863     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11864         Perl_croak(aTHX_ "Delimiter for here document is too long");
11865     *d++ = '\n';
11866     *d = '\0';
11867     len = d - PL_tokenbuf;
11868
11869 #ifdef PERL_MAD
11870     if (PL_madskills) {
11871         tstart = PL_tokenbuf + !outer;
11872         PL_thisclose = newSVpvn(tstart, len - !outer);
11873         tstart = SvPVX(PL_linestr) + stuffstart;
11874         PL_thisopen = newSVpvn(tstart, s - tstart);
11875         stuffstart = s - SvPVX(PL_linestr);
11876     }
11877 #endif
11878 #ifndef PERL_STRICT_CR
11879     d = strchr(s, '\r');
11880     if (d) {
11881         char * const olds = s;
11882         s = d;
11883         while (s < PL_bufend) {
11884             if (*s == '\r') {
11885                 *d++ = '\n';
11886                 if (*++s == '\n')
11887                     s++;
11888             }
11889             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
11890                 *d++ = *s++;
11891                 s++;
11892             }
11893             else
11894                 *d++ = *s++;
11895         }
11896         *d = '\0';
11897         PL_bufend = d;
11898         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11899         s = olds;
11900     }
11901 #endif
11902 #ifdef PERL_MAD
11903     found_newline = 0;
11904 #endif
11905     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11906         herewas = newSVpvn(s,PL_bufend-s);
11907     }
11908     else {
11909 #ifdef PERL_MAD
11910         herewas = newSVpvn(s-1,found_newline-s+1);
11911 #else
11912         s--;
11913         herewas = newSVpvn(s,found_newline-s);
11914 #endif
11915     }
11916 #ifdef PERL_MAD
11917     if (PL_madskills) {
11918         tstart = SvPVX(PL_linestr) + stuffstart;
11919         if (PL_thisstuff)
11920             sv_catpvn(PL_thisstuff, tstart, s - tstart);
11921         else
11922             PL_thisstuff = newSVpvn(tstart, s - tstart);
11923     }
11924 #endif
11925     s += SvCUR(herewas);
11926
11927 #ifdef PERL_MAD
11928     stuffstart = s - SvPVX(PL_linestr);
11929
11930     if (found_newline)
11931         s--;
11932 #endif
11933
11934     tmpstr = newSV_type(SVt_PVIV);
11935     SvGROW(tmpstr, 80);
11936     if (term == '\'') {
11937         op_type = OP_CONST;
11938         SvIV_set(tmpstr, -1);
11939     }
11940     else if (term == '`') {
11941         op_type = OP_BACKTICK;
11942         SvIV_set(tmpstr, '\\');
11943     }
11944
11945     CLINE;
11946     PL_multi_start = CopLINE(PL_curcop);
11947     PL_multi_open = PL_multi_close = '<';
11948     term = *PL_tokenbuf;
11949     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11950         char * const bufptr = PL_sublex_info.super_bufptr;
11951         char * const bufend = PL_sublex_info.super_bufend;
11952         char * const olds = s - SvCUR(herewas);
11953         s = strchr(bufptr, '\n');
11954         if (!s)
11955             s = bufend;
11956         d = s;
11957         while (s < bufend &&
11958           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11959             if (*s++ == '\n')
11960                 CopLINE_inc(PL_curcop);
11961         }
11962         if (s >= bufend) {
11963             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11964             missingterm(PL_tokenbuf);
11965         }
11966         sv_setpvn(herewas,bufptr,d-bufptr+1);
11967         sv_setpvn(tmpstr,d+1,s-d);
11968         s += len - 1;
11969         sv_catpvn(herewas,s,bufend-s);
11970         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11971
11972         s = olds;
11973         goto retval;
11974     }
11975     else if (!outer) {
11976         d = s;
11977         while (s < PL_bufend &&
11978           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11979             if (*s++ == '\n')
11980                 CopLINE_inc(PL_curcop);
11981         }
11982         if (s >= PL_bufend) {
11983             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11984             missingterm(PL_tokenbuf);
11985         }
11986         sv_setpvn(tmpstr,d+1,s-d);
11987 #ifdef PERL_MAD
11988         if (PL_madskills) {
11989             if (PL_thisstuff)
11990                 sv_catpvn(PL_thisstuff, d + 1, s - d);
11991             else
11992                 PL_thisstuff = newSVpvn(d + 1, s - d);
11993             stuffstart = s - SvPVX(PL_linestr);
11994         }
11995 #endif
11996         s += len - 1;
11997         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11998
11999         sv_catpvn(herewas,s,PL_bufend-s);
12000         sv_setsv(PL_linestr,herewas);
12001         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12002         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12003         PL_last_lop = PL_last_uni = NULL;
12004     }
12005     else
12006         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
12007     while (s >= PL_bufend) {    /* multiple line string? */
12008 #ifdef PERL_MAD
12009         if (PL_madskills) {
12010             tstart = SvPVX(PL_linestr) + stuffstart;
12011             if (PL_thisstuff)
12012                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12013             else
12014                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12015         }
12016 #endif
12017         PL_bufptr = s;
12018         if (!outer || !lex_next_chunk(0)) {
12019             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12020             missingterm(PL_tokenbuf);
12021         }
12022         s = PL_bufptr;
12023 #ifdef PERL_MAD
12024         stuffstart = s - SvPVX(PL_linestr);
12025 #endif
12026         CopLINE_inc(PL_curcop);
12027         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12028         PL_last_lop = PL_last_uni = NULL;
12029 #ifndef PERL_STRICT_CR
12030         if (PL_bufend - PL_linestart >= 2) {
12031             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12032                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12033             {
12034                 PL_bufend[-2] = '\n';
12035                 PL_bufend--;
12036                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12037             }
12038             else if (PL_bufend[-1] == '\r')
12039                 PL_bufend[-1] = '\n';
12040         }
12041         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12042             PL_bufend[-1] = '\n';
12043 #endif
12044         if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
12045             update_debugger_info(PL_linestr, NULL, 0);
12046         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12047             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12048             *(SvPVX(PL_linestr) + off ) = ' ';
12049             sv_catsv(PL_linestr,herewas);
12050             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12051             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12052         }
12053         else {
12054             s = PL_bufend;
12055             sv_catsv(tmpstr,PL_linestr);
12056         }
12057     }
12058     s++;
12059 retval:
12060     PL_multi_end = CopLINE(PL_curcop);
12061     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12062         SvPV_shrink_to_cur(tmpstr);
12063     }
12064     SvREFCNT_dec(herewas);
12065     if (!IN_BYTES) {
12066         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12067             SvUTF8_on(tmpstr);
12068         else if (PL_encoding)
12069             sv_recode_to_utf8(tmpstr, PL_encoding);
12070     }
12071     PL_lex_stuff = tmpstr;
12072     pl_yylval.ival = op_type;
12073     return s;
12074 }
12075
12076 /* scan_inputsymbol
12077    takes: current position in input buffer
12078    returns: new position in input buffer
12079    side-effects: pl_yylval and lex_op are set.
12080
12081    This code handles:
12082
12083    <>           read from ARGV
12084    <FH>         read from filehandle
12085    <pkg::FH>    read from package qualified filehandle
12086    <pkg'FH>     read from package qualified filehandle
12087    <$fh>        read from filehandle in $fh
12088    <*.h>        filename glob
12089
12090 */
12091
12092 STATIC char *
12093 S_scan_inputsymbol(pTHX_ char *start)
12094 {
12095     dVAR;
12096     register char *s = start;           /* current position in buffer */
12097     char *end;
12098     I32 len;
12099     char *d = PL_tokenbuf;                                      /* start of temp holding space */
12100     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
12101
12102     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12103
12104     end = strchr(s, '\n');
12105     if (!end)
12106         end = PL_bufend;
12107     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
12108
12109     /* die if we didn't have space for the contents of the <>,
12110        or if it didn't end, or if we see a newline
12111     */
12112
12113     if (len >= (I32)sizeof PL_tokenbuf)
12114         Perl_croak(aTHX_ "Excessively long <> operator");
12115     if (s >= end)
12116         Perl_croak(aTHX_ "Unterminated <> operator");
12117
12118     s++;
12119
12120     /* check for <$fh>
12121        Remember, only scalar variables are interpreted as filehandles by
12122        this code.  Anything more complex (e.g., <$fh{$num}>) will be
12123        treated as a glob() call.
12124        This code makes use of the fact that except for the $ at the front,
12125        a scalar variable and a filehandle look the same.
12126     */
12127     if (*d == '$' && d[1]) d++;
12128
12129     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12130     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12131         d++;
12132
12133     /* If we've tried to read what we allow filehandles to look like, and
12134        there's still text left, then it must be a glob() and not a getline.
12135        Use scan_str to pull out the stuff between the <> and treat it
12136        as nothing more than a string.
12137     */
12138
12139     if (d - PL_tokenbuf != len) {
12140         pl_yylval.ival = OP_GLOB;
12141         s = scan_str(start,!!PL_madskills,FALSE);
12142         if (!s)
12143            Perl_croak(aTHX_ "Glob not terminated");
12144         return s;
12145     }
12146     else {
12147         bool readline_overriden = FALSE;
12148         GV *gv_readline;
12149         GV **gvp;
12150         /* we're in a filehandle read situation */
12151         d = PL_tokenbuf;
12152
12153         /* turn <> into <ARGV> */
12154         if (!len)
12155             Copy("ARGV",d,5,char);
12156
12157         /* Check whether readline() is overriden */
12158         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12159         if ((gv_readline
12160                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12161                 ||
12162                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12163                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12164                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12165             readline_overriden = TRUE;
12166
12167         /* if <$fh>, create the ops to turn the variable into a
12168            filehandle
12169         */
12170         if (*d == '$') {
12171             /* try to find it in the pad for this block, otherwise find
12172                add symbol table ops
12173             */
12174             const PADOFFSET tmp = pad_findmy(d, len, 0);
12175             if (tmp != NOT_IN_PAD) {
12176                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12177                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12178                     HEK * const stashname = HvNAME_HEK(stash);
12179                     SV * const sym = sv_2mortal(newSVhek(stashname));
12180                     sv_catpvs(sym, "::");
12181                     sv_catpv(sym, d+1);
12182                     d = SvPVX(sym);
12183                     goto intro_sym;
12184                 }
12185                 else {
12186                     OP * const o = newOP(OP_PADSV, 0);
12187                     o->op_targ = tmp;
12188                     PL_lex_op = readline_overriden
12189                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12190                                 append_elem(OP_LIST, o,
12191                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12192                         : (OP*)newUNOP(OP_READLINE, 0, o);
12193                 }
12194             }
12195             else {
12196                 GV *gv;
12197                 ++d;
12198 intro_sym:
12199                 gv = gv_fetchpv(d,
12200                                 (PL_in_eval
12201                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
12202                                  : GV_ADDMULTI),
12203                                 SVt_PV);
12204                 PL_lex_op = readline_overriden
12205                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12206                             append_elem(OP_LIST,
12207                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12208                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12209                     : (OP*)newUNOP(OP_READLINE, 0,
12210                             newUNOP(OP_RV2SV, 0,
12211                                 newGVOP(OP_GV, 0, gv)));
12212             }
12213             if (!readline_overriden)
12214                 PL_lex_op->op_flags |= OPf_SPECIAL;
12215             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12216             pl_yylval.ival = OP_NULL;
12217         }
12218
12219         /* If it's none of the above, it must be a literal filehandle
12220            (<Foo::BAR> or <FOO>) so build a simple readline OP */
12221         else {
12222             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12223             PL_lex_op = readline_overriden
12224                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12225                         append_elem(OP_LIST,
12226                             newGVOP(OP_GV, 0, gv),
12227                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12228                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12229             pl_yylval.ival = OP_NULL;
12230         }
12231     }
12232
12233     return s;
12234 }
12235
12236
12237 /* scan_str
12238    takes: start position in buffer
12239           keep_quoted preserve \ on the embedded delimiter(s)
12240           keep_delims preserve the delimiters around the string
12241    returns: position to continue reading from buffer
12242    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12243         updates the read buffer.
12244
12245    This subroutine pulls a string out of the input.  It is called for:
12246         q               single quotes           q(literal text)
12247         '               single quotes           'literal text'
12248         qq              double quotes           qq(interpolate $here please)
12249         "               double quotes           "interpolate $here please"
12250         qx              backticks               qx(/bin/ls -l)
12251         `               backticks               `/bin/ls -l`
12252         qw              quote words             @EXPORT_OK = qw( func() $spam )
12253         m//             regexp match            m/this/
12254         s///            regexp substitute       s/this/that/
12255         tr///           string transliterate    tr/this/that/
12256         y///            string transliterate    y/this/that/
12257         ($*@)           sub prototypes          sub foo ($)
12258         (stuff)         sub attr parameters     sub foo : attr(stuff)
12259         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
12260
12261    In most of these cases (all but <>, patterns and transliterate)
12262    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
12263    calls scan_str().  s/// makes yylex() call scan_subst() which calls
12264    scan_str().  tr/// and y/// make yylex() call scan_trans() which
12265    calls scan_str().
12266
12267    It skips whitespace before the string starts, and treats the first
12268    character as the delimiter.  If the delimiter is one of ([{< then
12269    the corresponding "close" character )]}> is used as the closing
12270    delimiter.  It allows quoting of delimiters, and if the string has
12271    balanced delimiters ([{<>}]) it allows nesting.
12272
12273    On success, the SV with the resulting string is put into lex_stuff or,
12274    if that is already non-NULL, into lex_repl. The second case occurs only
12275    when parsing the RHS of the special constructs s/// and tr/// (y///).
12276    For convenience, the terminating delimiter character is stuffed into
12277    SvIVX of the SV.
12278 */
12279
12280 STATIC char *
12281 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12282 {
12283     dVAR;
12284     SV *sv;                             /* scalar value: string */
12285     const char *tmps;                   /* temp string, used for delimiter matching */
12286     register char *s = start;           /* current position in the buffer */
12287     register char term;                 /* terminating character */
12288     register char *to;                  /* current position in the sv's data */
12289     I32 brackets = 1;                   /* bracket nesting level */
12290     bool has_utf8 = FALSE;              /* is there any utf8 content? */
12291     I32 termcode;                       /* terminating char. code */
12292     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
12293     STRLEN termlen;                     /* length of terminating string */
12294     int last_off = 0;                   /* last position for nesting bracket */
12295 #ifdef PERL_MAD
12296     int stuffstart;
12297     char *tstart;
12298 #endif
12299
12300     PERL_ARGS_ASSERT_SCAN_STR;
12301
12302     /* skip space before the delimiter */
12303     if (isSPACE(*s)) {
12304         s = PEEKSPACE(s);
12305     }
12306
12307 #ifdef PERL_MAD
12308     if (PL_realtokenstart >= 0) {
12309         stuffstart = PL_realtokenstart;
12310         PL_realtokenstart = -1;
12311     }
12312     else
12313         stuffstart = start - SvPVX(PL_linestr);
12314 #endif
12315     /* mark where we are, in case we need to report errors */
12316     CLINE;
12317
12318     /* after skipping whitespace, the next character is the terminator */
12319     term = *s;
12320     if (!UTF) {
12321         termcode = termstr[0] = term;
12322         termlen = 1;
12323     }
12324     else {
12325         termcode = utf8_to_uvchr((U8*)s, &termlen);
12326         Copy(s, termstr, termlen, U8);
12327         if (!UTF8_IS_INVARIANT(term))
12328             has_utf8 = TRUE;
12329     }
12330
12331     /* mark where we are */
12332     PL_multi_start = CopLINE(PL_curcop);
12333     PL_multi_open = term;
12334
12335     /* find corresponding closing delimiter */
12336     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12337         termcode = termstr[0] = term = tmps[5];
12338
12339     PL_multi_close = term;
12340
12341     /* create a new SV to hold the contents.  79 is the SV's initial length.
12342        What a random number. */
12343     sv = newSV_type(SVt_PVIV);
12344     SvGROW(sv, 80);
12345     SvIV_set(sv, termcode);
12346     (void)SvPOK_only(sv);               /* validate pointer */
12347
12348     /* move past delimiter and try to read a complete string */
12349     if (keep_delims)
12350         sv_catpvn(sv, s, termlen);
12351     s += termlen;
12352 #ifdef PERL_MAD
12353     tstart = SvPVX(PL_linestr) + stuffstart;
12354     if (!PL_thisopen && !keep_delims) {
12355         PL_thisopen = newSVpvn(tstart, s - tstart);
12356         stuffstart = s - SvPVX(PL_linestr);
12357     }
12358 #endif
12359     for (;;) {
12360         if (PL_encoding && !UTF) {
12361             bool cont = TRUE;
12362
12363             while (cont) {
12364                 int offset = s - SvPVX_const(PL_linestr);
12365                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12366                                            &offset, (char*)termstr, termlen);
12367                 const char * const ns = SvPVX_const(PL_linestr) + offset;
12368                 char * const svlast = SvEND(sv) - 1;
12369
12370                 for (; s < ns; s++) {
12371                     if (*s == '\n' && !PL_rsfp)
12372                         CopLINE_inc(PL_curcop);
12373                 }
12374                 if (!found)
12375                     goto read_more_line;
12376                 else {
12377                     /* handle quoted delimiters */
12378                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12379                         const char *t;
12380                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12381                             t--;
12382                         if ((svlast-1 - t) % 2) {
12383                             if (!keep_quoted) {
12384                                 *(svlast-1) = term;
12385                                 *svlast = '\0';
12386                                 SvCUR_set(sv, SvCUR(sv) - 1);
12387                             }
12388                             continue;
12389                         }
12390                     }
12391                     if (PL_multi_open == PL_multi_close) {
12392                         cont = FALSE;
12393                     }
12394                     else {
12395                         const char *t;
12396                         char *w;
12397                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12398                             /* At here, all closes are "was quoted" one,
12399                                so we don't check PL_multi_close. */
12400                             if (*t == '\\') {
12401                                 if (!keep_quoted && *(t+1) == PL_multi_open)
12402                                     t++;
12403                                 else
12404                                     *w++ = *t++;
12405                             }
12406                             else if (*t == PL_multi_open)
12407                                 brackets++;
12408
12409                             *w = *t;
12410                         }
12411                         if (w < t) {
12412                             *w++ = term;
12413                             *w = '\0';
12414                             SvCUR_set(sv, w - SvPVX_const(sv));
12415                         }
12416                         last_off = w - SvPVX(sv);
12417                         if (--brackets <= 0)
12418                             cont = FALSE;
12419                     }
12420                 }
12421             }
12422             if (!keep_delims) {
12423                 SvCUR_set(sv, SvCUR(sv) - 1);
12424                 *SvEND(sv) = '\0';
12425             }
12426             break;
12427         }
12428
12429         /* extend sv if need be */
12430         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12431         /* set 'to' to the next character in the sv's string */
12432         to = SvPVX(sv)+SvCUR(sv);
12433
12434         /* if open delimiter is the close delimiter read unbridle */
12435         if (PL_multi_open == PL_multi_close) {
12436             for (; s < PL_bufend; s++,to++) {
12437                 /* embedded newlines increment the current line number */
12438                 if (*s == '\n' && !PL_rsfp)
12439                     CopLINE_inc(PL_curcop);
12440                 /* handle quoted delimiters */
12441                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12442                     if (!keep_quoted && s[1] == term)
12443                         s++;
12444                 /* any other quotes are simply copied straight through */
12445                     else
12446                         *to++ = *s++;
12447                 }
12448                 /* terminate when run out of buffer (the for() condition), or
12449                    have found the terminator */
12450                 else if (*s == term) {
12451                     if (termlen == 1)
12452                         break;
12453                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12454                         break;
12455                 }
12456                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12457                     has_utf8 = TRUE;
12458                 *to = *s;
12459             }
12460         }
12461
12462         /* if the terminator isn't the same as the start character (e.g.,
12463            matched brackets), we have to allow more in the quoting, and
12464            be prepared for nested brackets.
12465         */
12466         else {
12467             /* read until we run out of string, or we find the terminator */
12468             for (; s < PL_bufend; s++,to++) {
12469                 /* embedded newlines increment the line count */
12470                 if (*s == '\n' && !PL_rsfp)
12471                     CopLINE_inc(PL_curcop);
12472                 /* backslashes can escape the open or closing characters */
12473                 if (*s == '\\' && s+1 < PL_bufend) {
12474                     if (!keep_quoted &&
12475                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12476                         s++;
12477                     else
12478                         *to++ = *s++;
12479                 }
12480                 /* allow nested opens and closes */
12481                 else if (*s == PL_multi_close && --brackets <= 0)
12482                     break;
12483                 else if (*s == PL_multi_open)
12484                     brackets++;
12485                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12486                     has_utf8 = TRUE;
12487                 *to = *s;
12488             }
12489         }
12490         /* terminate the copied string and update the sv's end-of-string */
12491         *to = '\0';
12492         SvCUR_set(sv, to - SvPVX_const(sv));
12493
12494         /*
12495          * this next chunk reads more into the buffer if we're not done yet
12496          */
12497
12498         if (s < PL_bufend)
12499             break;              /* handle case where we are done yet :-) */
12500
12501 #ifndef PERL_STRICT_CR
12502         if (to - SvPVX_const(sv) >= 2) {
12503             if ((to[-2] == '\r' && to[-1] == '\n') ||
12504                 (to[-2] == '\n' && to[-1] == '\r'))
12505             {
12506                 to[-2] = '\n';
12507                 to--;
12508                 SvCUR_set(sv, to - SvPVX_const(sv));
12509             }
12510             else if (to[-1] == '\r')
12511                 to[-1] = '\n';
12512         }
12513         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12514             to[-1] = '\n';
12515 #endif
12516
12517      read_more_line:
12518         /* if we're out of file, or a read fails, bail and reset the current
12519            line marker so we can report where the unterminated string began
12520         */
12521 #ifdef PERL_MAD
12522         if (PL_madskills) {
12523             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12524             if (PL_thisstuff)
12525                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12526             else
12527                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12528         }
12529 #endif
12530         CopLINE_inc(PL_curcop);
12531         PL_bufptr = PL_bufend;
12532         if (!lex_next_chunk(0)) {
12533             sv_free(sv);
12534             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12535             return NULL;
12536         }
12537         s = PL_bufptr;
12538 #ifdef PERL_MAD
12539         stuffstart = 0;
12540 #endif
12541     }
12542
12543     /* at this point, we have successfully read the delimited string */
12544
12545     if (!PL_encoding || UTF) {
12546 #ifdef PERL_MAD
12547         if (PL_madskills) {
12548             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12549             const int len = s - tstart;
12550             if (PL_thisstuff)
12551                 sv_catpvn(PL_thisstuff, tstart, len);
12552             else
12553                 PL_thisstuff = newSVpvn(tstart, len);
12554             if (!PL_thisclose && !keep_delims)
12555                 PL_thisclose = newSVpvn(s,termlen);
12556         }
12557 #endif
12558
12559         if (keep_delims)
12560             sv_catpvn(sv, s, termlen);
12561         s += termlen;
12562     }
12563 #ifdef PERL_MAD
12564     else {
12565         if (PL_madskills) {
12566             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12567             const int len = s - tstart - termlen;
12568             if (PL_thisstuff)
12569                 sv_catpvn(PL_thisstuff, tstart, len);
12570             else
12571                 PL_thisstuff = newSVpvn(tstart, len);
12572             if (!PL_thisclose && !keep_delims)
12573                 PL_thisclose = newSVpvn(s - termlen,termlen);
12574         }
12575     }
12576 #endif
12577     if (has_utf8 || PL_encoding)
12578         SvUTF8_on(sv);
12579
12580     PL_multi_end = CopLINE(PL_curcop);
12581
12582     /* if we allocated too much space, give some back */
12583     if (SvCUR(sv) + 5 < SvLEN(sv)) {
12584         SvLEN_set(sv, SvCUR(sv) + 1);
12585         SvPV_renew(sv, SvLEN(sv));
12586     }
12587
12588     /* decide whether this is the first or second quoted string we've read
12589        for this op
12590     */
12591
12592     if (PL_lex_stuff)
12593         PL_lex_repl = sv;
12594     else
12595         PL_lex_stuff = sv;
12596     return s;
12597 }
12598
12599 /*
12600   scan_num
12601   takes: pointer to position in buffer
12602   returns: pointer to new position in buffer
12603   side-effects: builds ops for the constant in pl_yylval.op
12604
12605   Read a number in any of the formats that Perl accepts:
12606
12607   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12608   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12609   0b[01](_?[01])*
12610   0[0-7](_?[0-7])*
12611   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12612
12613   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12614   thing it reads.
12615
12616   If it reads a number without a decimal point or an exponent, it will
12617   try converting the number to an integer and see if it can do so
12618   without loss of precision.
12619 */
12620
12621 char *
12622 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12623 {
12624     dVAR;
12625     register const char *s = start;     /* current position in buffer */
12626     register char *d;                   /* destination in temp buffer */
12627     register char *e;                   /* end of temp buffer */
12628     NV nv;                              /* number read, as a double */
12629     SV *sv = NULL;                      /* place to put the converted number */
12630     bool floatit;                       /* boolean: int or float? */
12631     const char *lastub = NULL;          /* position of last underbar */
12632     static char const number_too_long[] = "Number too long";
12633
12634     PERL_ARGS_ASSERT_SCAN_NUM;
12635
12636     /* We use the first character to decide what type of number this is */
12637
12638     switch (*s) {
12639     default:
12640       Perl_croak(aTHX_ "panic: scan_num");
12641
12642     /* if it starts with a 0, it could be an octal number, a decimal in
12643        0.13 disguise, or a hexadecimal number, or a binary number. */
12644     case '0':
12645         {
12646           /* variables:
12647              u          holds the "number so far"
12648              shift      the power of 2 of the base
12649                         (hex == 4, octal == 3, binary == 1)
12650              overflowed was the number more than we can hold?
12651
12652              Shift is used when we add a digit.  It also serves as an "are
12653              we in octal/hex/binary?" indicator to disallow hex characters
12654              when in octal mode.
12655            */
12656             NV n = 0.0;
12657             UV u = 0;
12658             I32 shift;
12659             bool overflowed = FALSE;
12660             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
12661             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12662             static const char* const bases[5] =
12663               { "", "binary", "", "octal", "hexadecimal" };
12664             static const char* const Bases[5] =
12665               { "", "Binary", "", "Octal", "Hexadecimal" };
12666             static const char* const maxima[5] =
12667               { "",
12668                 "0b11111111111111111111111111111111",
12669                 "",
12670                 "037777777777",
12671                 "0xffffffff" };
12672             const char *base, *Base, *max;
12673
12674             /* check for hex */
12675             if (s[1] == 'x') {
12676                 shift = 4;
12677                 s += 2;
12678                 just_zero = FALSE;
12679             } else if (s[1] == 'b') {
12680                 shift = 1;
12681                 s += 2;
12682                 just_zero = FALSE;
12683             }
12684             /* check for a decimal in disguise */
12685             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12686                 goto decimal;
12687             /* so it must be octal */
12688             else {
12689                 shift = 3;
12690                 s++;
12691             }
12692
12693             if (*s == '_') {
12694                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12695                                "Misplaced _ in number");
12696                lastub = s++;
12697             }
12698
12699             base = bases[shift];
12700             Base = Bases[shift];
12701             max  = maxima[shift];
12702
12703             /* read the rest of the number */
12704             for (;;) {
12705                 /* x is used in the overflow test,
12706                    b is the digit we're adding on. */
12707                 UV x, b;
12708
12709                 switch (*s) {
12710
12711                 /* if we don't mention it, we're done */
12712                 default:
12713                     goto out;
12714
12715                 /* _ are ignored -- but warned about if consecutive */
12716                 case '_':
12717                     if (lastub && s == lastub + 1)
12718                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12719                                        "Misplaced _ in number");
12720                     lastub = s++;
12721                     break;
12722
12723                 /* 8 and 9 are not octal */
12724                 case '8': case '9':
12725                     if (shift == 3)
12726                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12727                     /* FALL THROUGH */
12728
12729                 /* octal digits */
12730                 case '2': case '3': case '4':
12731                 case '5': case '6': case '7':
12732                     if (shift == 1)
12733                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12734                     /* FALL THROUGH */
12735
12736                 case '0': case '1':
12737                     b = *s++ & 15;              /* ASCII digit -> value of digit */
12738                     goto digit;
12739
12740                 /* hex digits */
12741                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12742                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12743                     /* make sure they said 0x */
12744                     if (shift != 4)
12745                         goto out;
12746                     b = (*s++ & 7) + 9;
12747
12748                     /* Prepare to put the digit we have onto the end
12749                        of the number so far.  We check for overflows.
12750                     */
12751
12752                   digit:
12753                     just_zero = FALSE;
12754                     if (!overflowed) {
12755                         x = u << shift; /* make room for the digit */
12756
12757                         if ((x >> shift) != u
12758                             && !(PL_hints & HINT_NEW_BINARY)) {
12759                             overflowed = TRUE;
12760                             n = (NV) u;
12761                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12762                                              "Integer overflow in %s number",
12763                                              base);
12764                         } else
12765                             u = x | b;          /* add the digit to the end */
12766                     }
12767                     if (overflowed) {
12768                         n *= nvshift[shift];
12769                         /* If an NV has not enough bits in its
12770                          * mantissa to represent an UV this summing of
12771                          * small low-order numbers is a waste of time
12772                          * (because the NV cannot preserve the
12773                          * low-order bits anyway): we could just
12774                          * remember when did we overflow and in the
12775                          * end just multiply n by the right
12776                          * amount. */
12777                         n += (NV) b;
12778                     }
12779                     break;
12780                 }
12781             }
12782
12783           /* if we get here, we had success: make a scalar value from
12784              the number.
12785           */
12786           out:
12787
12788             /* final misplaced underbar check */
12789             if (s[-1] == '_') {
12790                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12791             }
12792
12793             sv = newSV(0);
12794             if (overflowed) {
12795                 if (n > 4294967295.0)
12796                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12797                                    "%s number > %s non-portable",
12798                                    Base, max);
12799                 sv_setnv(sv, n);
12800             }
12801             else {
12802 #if UVSIZE > 4
12803                 if (u > 0xffffffff)
12804                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12805                                    "%s number > %s non-portable",
12806                                    Base, max);
12807 #endif
12808                 sv_setuv(sv, u);
12809             }
12810             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12811                 sv = new_constant(start, s - start, "integer",
12812                                   sv, NULL, NULL, 0);
12813             else if (PL_hints & HINT_NEW_BINARY)
12814                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12815         }
12816         break;
12817
12818     /*
12819       handle decimal numbers.
12820       we're also sent here when we read a 0 as the first digit
12821     */
12822     case '1': case '2': case '3': case '4': case '5':
12823     case '6': case '7': case '8': case '9': case '.':
12824       decimal:
12825         d = PL_tokenbuf;
12826         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12827         floatit = FALSE;
12828
12829         /* read next group of digits and _ and copy into d */
12830         while (isDIGIT(*s) || *s == '_') {
12831             /* skip underscores, checking for misplaced ones
12832                if -w is on
12833             */
12834             if (*s == '_') {
12835                 if (lastub && s == lastub + 1)
12836                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12837                                    "Misplaced _ in number");
12838                 lastub = s++;
12839             }
12840             else {
12841                 /* check for end of fixed-length buffer */
12842                 if (d >= e)
12843                     Perl_croak(aTHX_ number_too_long);
12844                 /* if we're ok, copy the character */
12845                 *d++ = *s++;
12846             }
12847         }
12848
12849         /* final misplaced underbar check */
12850         if (lastub && s == lastub + 1) {
12851             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12852         }
12853
12854         /* read a decimal portion if there is one.  avoid
12855            3..5 being interpreted as the number 3. followed
12856            by .5
12857         */
12858         if (*s == '.' && s[1] != '.') {
12859             floatit = TRUE;
12860             *d++ = *s++;
12861
12862             if (*s == '_') {
12863                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12864                                "Misplaced _ in number");
12865                 lastub = s;
12866             }
12867
12868             /* copy, ignoring underbars, until we run out of digits.
12869             */
12870             for (; isDIGIT(*s) || *s == '_'; s++) {
12871                 /* fixed length buffer check */
12872                 if (d >= e)
12873                     Perl_croak(aTHX_ number_too_long);
12874                 if (*s == '_') {
12875                    if (lastub && s == lastub + 1)
12876                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12877                                       "Misplaced _ in number");
12878                    lastub = s;
12879                 }
12880                 else
12881                     *d++ = *s;
12882             }
12883             /* fractional part ending in underbar? */
12884             if (s[-1] == '_') {
12885                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12886                                "Misplaced _ in number");
12887             }
12888             if (*s == '.' && isDIGIT(s[1])) {
12889                 /* oops, it's really a v-string, but without the "v" */
12890                 s = start;
12891                 goto vstring;
12892             }
12893         }
12894
12895         /* read exponent part, if present */
12896         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12897             floatit = TRUE;
12898             s++;
12899
12900             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12901             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
12902
12903             /* stray preinitial _ */
12904             if (*s == '_') {
12905                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12906                                "Misplaced _ in number");
12907                 lastub = s++;
12908             }
12909
12910             /* allow positive or negative exponent */
12911             if (*s == '+' || *s == '-')
12912                 *d++ = *s++;
12913
12914             /* stray initial _ */
12915             if (*s == '_') {
12916                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12917                                "Misplaced _ in number");
12918                 lastub = s++;
12919             }
12920
12921             /* read digits of exponent */
12922             while (isDIGIT(*s) || *s == '_') {
12923                 if (isDIGIT(*s)) {
12924                     if (d >= e)
12925                         Perl_croak(aTHX_ number_too_long);
12926                     *d++ = *s++;
12927                 }
12928                 else {
12929                    if (((lastub && s == lastub + 1) ||
12930                         (!isDIGIT(s[1]) && s[1] != '_')))
12931                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12932                                       "Misplaced _ in number");
12933                    lastub = s++;
12934                 }
12935             }
12936         }
12937
12938
12939         /* make an sv from the string */
12940         sv = newSV(0);
12941
12942         /*
12943            We try to do an integer conversion first if no characters
12944            indicating "float" have been found.
12945          */
12946
12947         if (!floatit) {
12948             UV uv;
12949             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12950
12951             if (flags == IS_NUMBER_IN_UV) {
12952               if (uv <= IV_MAX)
12953                 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12954               else
12955                 sv_setuv(sv, uv);
12956             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12957               if (uv <= (UV) IV_MIN)
12958                 sv_setiv(sv, -(IV)uv);
12959               else
12960                 floatit = TRUE;
12961             } else
12962               floatit = TRUE;
12963         }
12964         if (floatit) {
12965             /* terminate the string */
12966             *d = '\0';
12967             nv = Atof(PL_tokenbuf);
12968             sv_setnv(sv, nv);
12969         }
12970
12971         if ( floatit
12972              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12973             const char *const key = floatit ? "float" : "integer";
12974             const STRLEN keylen = floatit ? 5 : 7;
12975             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12976                                 key, keylen, sv, NULL, NULL, 0);
12977         }
12978         break;
12979
12980     /* if it starts with a v, it could be a v-string */
12981     case 'v':
12982 vstring:
12983                 sv = newSV(5); /* preallocate storage space */
12984                 s = scan_vstring(s, PL_bufend, sv);
12985         break;
12986     }
12987
12988     /* make the op for the constant and return */
12989
12990     if (sv)
12991         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12992     else
12993         lvalp->opval = NULL;
12994
12995     return (char *)s;
12996 }
12997
12998 STATIC char *
12999 S_scan_formline(pTHX_ register char *s)
13000 {
13001     dVAR;
13002     register char *eol;
13003     register char *t;
13004     SV * const stuff = newSVpvs("");
13005     bool needargs = FALSE;
13006     bool eofmt = FALSE;
13007 #ifdef PERL_MAD
13008     char *tokenstart = s;
13009     SV* savewhite = NULL;
13010
13011     if (PL_madskills) {
13012         savewhite = PL_thiswhite;
13013         PL_thiswhite = 0;
13014     }
13015 #endif
13016
13017     PERL_ARGS_ASSERT_SCAN_FORMLINE;
13018
13019     while (!needargs) {
13020         if (*s == '.') {
13021             t = s+1;
13022 #ifdef PERL_STRICT_CR
13023             while (SPACE_OR_TAB(*t))
13024                 t++;
13025 #else
13026             while (SPACE_OR_TAB(*t) || *t == '\r')
13027                 t++;
13028 #endif
13029             if (*t == '\n' || t == PL_bufend) {
13030                 eofmt = TRUE;
13031                 break;
13032             }
13033         }
13034         if (PL_in_eval && !PL_rsfp) {
13035             eol = (char *) memchr(s,'\n',PL_bufend-s);
13036             if (!eol++)
13037                 eol = PL_bufend;
13038         }
13039         else
13040             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13041         if (*s != '#') {
13042             for (t = s; t < eol; t++) {
13043                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13044                     needargs = FALSE;
13045                     goto enough;        /* ~~ must be first line in formline */
13046                 }
13047                 if (*t == '@' || *t == '^')
13048                     needargs = TRUE;
13049             }
13050             if (eol > s) {
13051                 sv_catpvn(stuff, s, eol-s);
13052 #ifndef PERL_STRICT_CR
13053                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13054                     char *end = SvPVX(stuff) + SvCUR(stuff);
13055                     end[-2] = '\n';
13056                     end[-1] = '\0';
13057                     SvCUR_set(stuff, SvCUR(stuff) - 1);
13058                 }
13059 #endif
13060             }
13061             else
13062               break;
13063         }
13064         s = (char*)eol;
13065         if (PL_rsfp) {
13066             bool got_some;
13067 #ifdef PERL_MAD
13068             if (PL_madskills) {
13069                 if (PL_thistoken)
13070                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13071                 else
13072                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13073             }
13074 #endif
13075             PL_bufptr = PL_bufend;
13076             CopLINE_inc(PL_curcop);
13077             got_some = lex_next_chunk(0);
13078             CopLINE_dec(PL_curcop);
13079             s = PL_bufptr;
13080 #ifdef PERL_MAD
13081             tokenstart = PL_bufptr;
13082 #endif
13083             if (!got_some)
13084                 break;
13085         }
13086         incline(s);
13087     }
13088   enough:
13089     if (SvCUR(stuff)) {
13090         PL_expect = XTERM;
13091         if (needargs) {
13092             PL_lex_state = LEX_NORMAL;
13093             start_force(PL_curforce);
13094             NEXTVAL_NEXTTOKE.ival = 0;
13095             force_next(',');
13096         }
13097         else
13098             PL_lex_state = LEX_FORMLINE;
13099         if (!IN_BYTES) {
13100             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13101                 SvUTF8_on(stuff);
13102             else if (PL_encoding)
13103                 sv_recode_to_utf8(stuff, PL_encoding);
13104         }
13105         start_force(PL_curforce);
13106         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13107         force_next(THING);
13108         start_force(PL_curforce);
13109         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13110         force_next(LSTOP);
13111     }
13112     else {
13113         SvREFCNT_dec(stuff);
13114         if (eofmt)
13115             PL_lex_formbrack = 0;
13116         PL_bufptr = s;
13117     }
13118 #ifdef PERL_MAD
13119     if (PL_madskills) {
13120         if (PL_thistoken)
13121             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13122         else
13123             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13124         PL_thiswhite = savewhite;
13125     }
13126 #endif
13127     return s;
13128 }
13129
13130 I32
13131 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13132 {
13133     dVAR;
13134     const I32 oldsavestack_ix = PL_savestack_ix;
13135     CV* const outsidecv = PL_compcv;
13136
13137     if (PL_compcv) {
13138         assert(SvTYPE(PL_compcv) == SVt_PVCV);
13139     }
13140     SAVEI32(PL_subline);
13141     save_item(PL_subname);
13142     SAVESPTR(PL_compcv);
13143
13144     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13145     CvFLAGS(PL_compcv) |= flags;
13146
13147     PL_subline = CopLINE(PL_curcop);
13148     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13149     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13150     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13151
13152     return oldsavestack_ix;
13153 }
13154
13155 #ifdef __SC__
13156 #pragma segment Perl_yylex
13157 #endif
13158 static int
13159 S_yywarn(pTHX_ const char *const s)
13160 {
13161     dVAR;
13162
13163     PERL_ARGS_ASSERT_YYWARN;
13164
13165     PL_in_eval |= EVAL_WARNONLY;
13166     yyerror(s);
13167     PL_in_eval &= ~EVAL_WARNONLY;
13168     return 0;
13169 }
13170
13171 int
13172 Perl_yyerror(pTHX_ const char *const s)
13173 {
13174     dVAR;
13175     const char *where = NULL;
13176     const char *context = NULL;
13177     int contlen = -1;
13178     SV *msg;
13179     int yychar  = PL_parser->yychar;
13180
13181     PERL_ARGS_ASSERT_YYERROR;
13182
13183     if (!yychar || (yychar == ';' && !PL_rsfp))
13184         where = "at EOF";
13185     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13186       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13187       PL_oldbufptr != PL_bufptr) {
13188         /*
13189                 Only for NetWare:
13190                 The code below is removed for NetWare because it abends/crashes on NetWare
13191                 when the script has error such as not having the closing quotes like:
13192                     if ($var eq "value)
13193                 Checking of white spaces is anyway done in NetWare code.
13194         */
13195 #ifndef NETWARE
13196         while (isSPACE(*PL_oldoldbufptr))
13197             PL_oldoldbufptr++;
13198 #endif
13199         context = PL_oldoldbufptr;
13200         contlen = PL_bufptr - PL_oldoldbufptr;
13201     }
13202     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13203       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13204         /*
13205                 Only for NetWare:
13206                 The code below is removed for NetWare because it abends/crashes on NetWare
13207                 when the script has error such as not having the closing quotes like:
13208                     if ($var eq "value)
13209                 Checking of white spaces is anyway done in NetWare code.
13210         */
13211 #ifndef NETWARE
13212         while (isSPACE(*PL_oldbufptr))
13213             PL_oldbufptr++;
13214 #endif
13215         context = PL_oldbufptr;
13216         contlen = PL_bufptr - PL_oldbufptr;
13217     }
13218     else if (yychar > 255)
13219         where = "next token ???";
13220     else if (yychar == -2) { /* YYEMPTY */
13221         if (PL_lex_state == LEX_NORMAL ||
13222            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13223             where = "at end of line";
13224         else if (PL_lex_inpat)
13225             where = "within pattern";
13226         else
13227             where = "within string";
13228     }
13229     else {
13230         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13231         if (yychar < 32)
13232             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13233         else if (isPRINT_LC(yychar)) {
13234             const char string = yychar;
13235             sv_catpvn(where_sv, &string, 1);
13236         }
13237         else
13238             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13239         where = SvPVX_const(where_sv);
13240     }
13241     msg = sv_2mortal(newSVpv(s, 0));
13242     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13243         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13244     if (context)
13245         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13246     else
13247         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13248     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13249         Perl_sv_catpvf(aTHX_ msg,
13250         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13251                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13252         PL_multi_end = 0;
13253     }
13254     if (PL_in_eval & EVAL_WARNONLY) {
13255         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13256     }
13257     else
13258         qerror(msg);
13259     if (PL_error_count >= 10) {
13260         if (PL_in_eval && SvCUR(ERRSV))
13261             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13262                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
13263         else
13264             Perl_croak(aTHX_ "%s has too many errors.\n",
13265             OutCopFILE(PL_curcop));
13266     }
13267     PL_in_my = 0;
13268     PL_in_my_stash = NULL;
13269     return 0;
13270 }
13271 #ifdef __SC__
13272 #pragma segment Main
13273 #endif
13274
13275 STATIC char*
13276 S_swallow_bom(pTHX_ U8 *s)
13277 {
13278     dVAR;
13279     const STRLEN slen = SvCUR(PL_linestr);
13280
13281     PERL_ARGS_ASSERT_SWALLOW_BOM;
13282
13283     switch (s[0]) {
13284     case 0xFF:
13285         if (s[1] == 0xFE) {
13286             /* UTF-16 little-endian? (or UTF32-LE?) */
13287             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13288                 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
13289 #ifndef PERL_NO_UTF16_FILTER
13290             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
13291             s += 2;
13292             if (PL_bufend > (char*)s) {
13293                 s = add_utf16_textfilter(s, TRUE);
13294             }
13295 #else
13296             Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
13297 #endif
13298         }
13299         break;
13300     case 0xFE:
13301         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13302 #ifndef PERL_NO_UTF16_FILTER
13303             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13304             s += 2;
13305             if (PL_bufend > (char *)s) {
13306                 s = add_utf16_textfilter(s, FALSE);
13307             }
13308 #else
13309             Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
13310 #endif
13311         }
13312         break;
13313     case 0xEF:
13314         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13315             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13316             s += 3;                      /* UTF-8 */
13317         }
13318         break;
13319     case 0:
13320         if (slen > 3) {
13321              if (s[1] == 0) {
13322                   if (s[2] == 0xFE && s[3] == 0xFF) {
13323                        /* UTF-32 big-endian */
13324                        Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
13325                   }
13326              }
13327              else if (s[2] == 0 && s[3] != 0) {
13328                   /* Leading bytes
13329                    * 00 xx 00 xx
13330                    * are a good indicator of UTF-16BE. */
13331                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13332                 s = add_utf16_textfilter(s, FALSE);
13333              }
13334         }
13335 #ifdef EBCDIC
13336     case 0xDD:
13337         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13338             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13339             s += 4;                      /* UTF-8 */
13340         }
13341         break;
13342 #endif
13343
13344     default:
13345          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13346                   /* Leading bytes
13347                    * xx 00 xx 00
13348                    * are a good indicator of UTF-16LE. */
13349               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13350               s = add_utf16_textfilter(s, TRUE);
13351          }
13352     }
13353     return (char*)s;
13354 }
13355
13356
13357 #ifndef PERL_NO_UTF16_FILTER
13358 static I32
13359 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13360 {
13361     dVAR;
13362     SV *const filter = FILTER_DATA(idx);
13363     /* We re-use this each time round, throwing the contents away before we
13364        return.  */
13365     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13366     SV *const utf8_buffer = filter;
13367     IV status = IoPAGE(filter);
13368     const bool reverse = (bool) IoLINES(filter);
13369     I32 retval;
13370
13371     /* As we're automatically added, at the lowest level, and hence only called
13372        from this file, we can be sure that we're not called in block mode. Hence
13373        don't bother writing code to deal with block mode.  */
13374     if (maxlen) {
13375         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13376     }
13377     if (status < 0) {
13378         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13379     }
13380     DEBUG_P(PerlIO_printf(Perl_debug_log,
13381                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13382                           FPTR2DPTR(void *, S_utf16_textfilter),
13383                           reverse ? 'l' : 'b', idx, maxlen, status,
13384                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13385
13386     while (1) {
13387         STRLEN chars;
13388         STRLEN have;
13389         I32 newlen;
13390         U8 *end;
13391         /* First, look in our buffer of existing UTF-8 data:  */
13392         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13393
13394         if (nl) {
13395             ++nl;
13396         } else if (status == 0) {
13397             /* EOF */
13398             IoPAGE(filter) = 0;
13399             nl = SvEND(utf8_buffer);
13400         }
13401         if (nl) {
13402             STRLEN got = nl - SvPVX(utf8_buffer);
13403             /* Did we have anything to append?  */
13404             retval = got != 0;
13405             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13406             /* Everything else in this code works just fine if SVp_POK isn't
13407                set.  This, however, needs it, and we need it to work, else
13408                we loop infinitely because the buffer is never consumed.  */
13409             sv_chop(utf8_buffer, nl);
13410             break;
13411         }
13412
13413         /* OK, not a complete line there, so need to read some more UTF-16.
13414            Read an extra octect if the buffer currently has an odd number. */
13415         while (1) {
13416             if (status <= 0)
13417                 break;
13418             if (SvCUR(utf16_buffer) >= 2) {
13419                 /* Location of the high octet of the last complete code point.
13420                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13421                    *coupled* with all the benefits of partial reads and
13422                    endianness.  */
13423                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13424                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13425
13426                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13427                     break;
13428                 }
13429
13430                 /* We have the first half of a surrogate. Read more.  */
13431                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13432             }
13433
13434             status = FILTER_READ(idx + 1, utf16_buffer,
13435                                  160 + (SvCUR(utf16_buffer) & 1));
13436             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13437             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13438             if (status < 0) {
13439                 /* Error */
13440                 IoPAGE(filter) = status;
13441                 return status;
13442             }
13443         }
13444
13445         chars = SvCUR(utf16_buffer) >> 1;
13446         have = SvCUR(utf8_buffer);
13447         SvGROW(utf8_buffer, have + chars * 3 + 1);
13448
13449         if (reverse) {
13450             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13451                                          (U8*)SvPVX_const(utf8_buffer) + have,
13452                                          chars * 2, &newlen);
13453         } else {
13454             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13455                                 (U8*)SvPVX_const(utf8_buffer) + have,
13456                                 chars * 2, &newlen);
13457         }
13458         SvCUR_set(utf8_buffer, have + newlen);
13459         *end = '\0';
13460
13461         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13462            it's private to us, and utf16_to_utf8{,reversed} take a
13463            (pointer,length) pair, rather than a NUL-terminated string.  */
13464         if(SvCUR(utf16_buffer) & 1) {
13465             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13466             SvCUR_set(utf16_buffer, 1);
13467         } else {
13468             SvCUR_set(utf16_buffer, 0);
13469         }
13470     }
13471     DEBUG_P(PerlIO_printf(Perl_debug_log,
13472                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13473                           status,
13474                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13475     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13476     return retval;
13477 }
13478
13479 static U8 *
13480 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13481 {
13482     SV *filter = filter_add(S_utf16_textfilter, NULL);
13483
13484     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13485     sv_setpvs(filter, "");
13486     IoLINES(filter) = reversed;
13487     IoPAGE(filter) = 1; /* Not EOF */
13488
13489     /* Sadly, we have to return a valid pointer, come what may, so we have to
13490        ignore any error return from this.  */
13491     SvCUR_set(PL_linestr, 0);
13492     if (FILTER_READ(0, PL_linestr, 0)) {
13493         SvUTF8_on(PL_linestr);
13494     } else {
13495         SvUTF8_on(PL_linestr);
13496     }
13497     PL_bufend = SvEND(PL_linestr);
13498     return (U8*)SvPVX(PL_linestr);
13499 }
13500 #endif
13501
13502 /*
13503 Returns a pointer to the next character after the parsed
13504 vstring, as well as updating the passed in sv.
13505
13506 Function must be called like
13507
13508         sv = newSV(5);
13509         s = scan_vstring(s,e,sv);
13510
13511 where s and e are the start and end of the string.
13512 The sv should already be large enough to store the vstring
13513 passed in, for performance reasons.
13514
13515 */
13516
13517 char *
13518 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13519 {
13520     dVAR;
13521     const char *pos = s;
13522     const char *start = s;
13523
13524     PERL_ARGS_ASSERT_SCAN_VSTRING;
13525
13526     if (*pos == 'v') pos++;  /* get past 'v' */
13527     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13528         pos++;
13529     if ( *pos != '.') {
13530         /* this may not be a v-string if followed by => */
13531         const char *next = pos;
13532         while (next < e && isSPACE(*next))
13533             ++next;
13534         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13535             /* return string not v-string */
13536             sv_setpvn(sv,(char *)s,pos-s);
13537             return (char *)pos;
13538         }
13539     }
13540
13541     if (!isALPHA(*pos)) {
13542         U8 tmpbuf[UTF8_MAXBYTES+1];
13543
13544         if (*s == 'v')
13545             s++;  /* get past 'v' */
13546
13547         sv_setpvs(sv, "");
13548
13549         for (;;) {
13550             /* this is atoi() that tolerates underscores */
13551             U8 *tmpend;
13552             UV rev = 0;
13553             const char *end = pos;
13554             UV mult = 1;
13555             while (--end >= s) {
13556                 if (*end != '_') {
13557                     const UV orev = rev;
13558                     rev += (*end - '0') * mult;
13559                     mult *= 10;
13560                     if (orev > rev)
13561                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13562                                          "Integer overflow in decimal number");
13563                 }
13564             }
13565 #ifdef EBCDIC
13566             if (rev > 0x7FFFFFFF)
13567                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13568 #endif
13569             /* Append native character for the rev point */
13570             tmpend = uvchr_to_utf8(tmpbuf, rev);
13571             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13572             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13573                  SvUTF8_on(sv);
13574             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13575                  s = ++pos;
13576             else {
13577                  s = pos;
13578                  break;
13579             }
13580             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13581                  pos++;
13582         }
13583         SvPOK_on(sv);
13584         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13585         SvRMAGICAL_on(sv);
13586     }
13587     return (char *)s;
13588 }
13589
13590 int
13591 Perl_keyword_plugin_standard(pTHX_
13592         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13593 {
13594     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13595     PERL_UNUSED_CONTEXT;
13596     PERL_UNUSED_ARG(keyword_ptr);
13597     PERL_UNUSED_ARG(keyword_len);
13598     PERL_UNUSED_ARG(op_ptr);
13599     return KEYWORD_PLUGIN_DECLINE;
13600 }
13601
13602 /*
13603  * Local variables:
13604  * c-indentation-style: bsd
13605  * c-basic-offset: 4
13606  * indent-tabs-mode: t
13607  * End:
13608  *
13609  * ex: set ts=8 sts=4 sw=4 noet:
13610  */