toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* YYINITDEPTH -- initial size of the parser's stacks.  */
  49 #define YYINITDEPTH 200
  50
  51 /* XXX temporary backwards compatibility */
  52 #define PL_lex_brackets         (PL_parser->lex_brackets)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XFAKEBRACK 128
 130 #define XENUMMASK 127
 131
 132 #ifdef USE_UTF8_SCRIPTS
 133 #   define UTF (!IN_BYTES)
 134 #else
 135 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 136 #endif
 137
 138 /* The maximum number of characters preceding the unrecognized one to display */
 139 #define UNRECOGNIZED_PRECEDE_COUNT 10
 140
 141 /* In variables named $^X, these are the legal values for X.
 142  * 1999-02-27 mjd-perl-patch@plover.com */
 143 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 144
 145 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 146
 147 /* LEX_* are values for PL_lex_state, the state of the lexer.
 148  * They are arranged oddly so that the guard on the switch statement
 149  * can get by with a single comparison (if the compiler is smart enough).
 150  */
 151
 152 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 153
 154 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 155 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 156 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 157 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 158 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 159
 160                                    /* at end of code, eg "$x" followed by:  */
 161 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 162 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 163
 164 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 165                                         string or after \E, $foo, etc       */
 166 #define LEX_INTERPCONST          2 /* NOT USED */
 167 #define LEX_FORMLINE             1 /* expecting a format line               */
 168 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 169
 170
 171 #ifdef DEBUGGING
 172 static const char* const lex_state_names[] = {
 173     "KNOWNEXT",
 174     "FORMLINE",
 175     "INTERPCONST",
 176     "INTERPCONCAT",
 177     "INTERPENDMAYBE",
 178     "INTERPEND",
 179     "INTERPSTART",
 180     "INTERPPUSH",
 181     "INTERPCASEMOD",
 182     "INTERPNORMAL",
 183     "NORMAL"
 184 };
 185 #endif
 186
 187 #ifdef ff_next
 188 #undef ff_next
 189 #endif
 190
 191 #include "keywords.h"
 192
 193 /* CLINE is a macro that ensures PL_copline has a sane value */
 194
 195 #ifdef CLINE
 196 #undef CLINE
 197 #endif
 198 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 199
 200 #ifdef PERL_MAD
 201 #  define SKIPSPACE0(s) skipspace0(s)
 202 #  define SKIPSPACE1(s) skipspace1(s)
 203 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 204 #  define PEEKSPACE(s) skipspace2(s,0)
 205 #else
 206 #  define SKIPSPACE0(s) skipspace(s)
 207 #  define SKIPSPACE1(s) skipspace(s)
 208 #  define SKIPSPACE2(s,tsv) skipspace(s)
 209 #  define PEEKSPACE(s) skipspace(s)
 210 #endif
 211
 212 /*
 213  * Convenience functions to return different tokens and prime the
 214  * lexer for the next token.  They all take an argument.
 215  *
 216  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 217  * OPERATOR     : generic operator
 218  * AOPERATOR    : assignment operator
 219  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 220  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 221  * PREREF       : *EXPR where EXPR is not a simple identifier
 222  * TERM         : expression term
 223  * LOOPX        : loop exiting command (goto, last, dump, etc)
 224  * FTST         : file test operator
 225  * FUN0         : zero-argument function
 226  * FUN1         : not used, except for not, which isn't a UNIOP
 227  * BOop         : bitwise or or xor
 228  * BAop         : bitwise and
 229  * SHop         : shift operator
 230  * PWop         : power operator
 231  * PMop         : pattern-matching operator
 232  * Aop          : addition-level operator
 233  * Mop          : multiplication-level operator
 234  * Eop          : equality-testing operator
 235  * Rop          : relational operator <= != gt
 236  *
 237  * Also see LOP and lop() below.
 238  */
 239
 240 #ifdef DEBUGGING /* Serve -DT. */
 241 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 242 #else
 243 #   define REPORT(retval) (retval)
 244 #endif
 245
 246 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 247 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 248 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 249 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 252 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 253 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 254 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 255 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI2(f,x) { \
 273         pl_yylval.ival = f; \
 274         PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI2(f,XTERM)
 284 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 285
 286 #define UNIBRACK(f) { \
 287         pl_yylval.ival = f; \
 288         PL_bufptr = s; \
 289         PL_last_uni = PL_oldbufptr; \
 290         if (*s == '(') \
 291             return REPORT( (int)FUNC1 ); \
 292         s = PEEKSPACE(s); \
 293         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 294         }
 295
 296 /* grandfather return to old style */
 297 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 298
 299 #ifdef DEBUGGING
 300
 301 /* how to interpret the pl_yylval associated with the token */
 302 enum token_type {
 303     TOKENTYPE_NONE,
 304     TOKENTYPE_IVAL,
 305     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 306     TOKENTYPE_PVAL,
 307     TOKENTYPE_OPVAL,
 308     TOKENTYPE_GVVAL
 309 };
 310
 311 static struct debug_tokens {
 312     const int token;
 313     enum token_type type;
 314     const char *name;
 315 } const debug_tokens[] =
 316 {
 317     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 318     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 319     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 320     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 321     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 322     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 323     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 324     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 325     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 326     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 327     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 328     { DO,               TOKENTYPE_NONE,         "DO" },
 329     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 330     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 331     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 332     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 333     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 334     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 335     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 336     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 337     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 338     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 339     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 340     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 341     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 342     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 343     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 344     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 345     { IF,               TOKENTYPE_IVAL,         "IF" },
 346     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 347     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 348     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 349     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 350     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 351     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 352     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 353     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 354     { MY,               TOKENTYPE_IVAL,         "MY" },
 355     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 356     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 357     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 358     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 359     { OROR,             TOKENTYPE_NONE,         "OROR" },
 360     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 361     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 362     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 363     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 364     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 365     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 366     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 367     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 368     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 369     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 370     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 371     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 372     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 373     { SUB,              TOKENTYPE_NONE,         "SUB" },
 374     { THING,            TOKENTYPE_OPVAL,        "THING" },
 375     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 376     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 377     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 378     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 379     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 380     { USE,              TOKENTYPE_IVAL,         "USE" },
 381     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 382     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 383     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 384     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 385     { 0,                TOKENTYPE_NONE,         NULL }
 386 };
 387
 388 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 389
 390 STATIC int
 391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 392 {
 393     dVAR;
 394
 395     PERL_ARGS_ASSERT_TOKEREPORT;
 396
 397     if (DEBUG_T_TEST) {
 398         const char *name = NULL;
 399         enum token_type type = TOKENTYPE_NONE;
 400         const struct debug_tokens *p;
 401         SV* const report = newSVpvs("<== ");
 402
 403         for (p = debug_tokens; p->token; p++) {
 404             if (p->token == (int)rv) {
 405                 name = p->name;
 406                 type = p->type;
 407                 break;
 408             }
 409         }
 410         if (name)
 411             Perl_sv_catpv(aTHX_ report, name);
 412         else if ((char)rv > ' ' && (char)rv < '~')
 413             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 414         else if (!rv)
 415             sv_catpvs(report, "EOF");
 416         else
 417             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 418         switch (type) {
 419         case TOKENTYPE_NONE:
 420         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 421             break;
 422         case TOKENTYPE_IVAL:
 423             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 424             break;
 425         case TOKENTYPE_OPNUM:
 426             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 427                                     PL_op_name[lvalp->ival]);
 428             break;
 429         case TOKENTYPE_PVAL:
 430             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 431             break;
 432         case TOKENTYPE_OPVAL:
 433             if (lvalp->opval) {
 434                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 435                                     PL_op_name[lvalp->opval->op_type]);
 436                 if (lvalp->opval->op_type == OP_CONST) {
 437                     Perl_sv_catpvf(aTHX_ report, " %s",
 438                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 439                 }
 440
 441             }
 442             else
 443                 sv_catpvs(report, "(opval=null)");
 444             break;
 445         }
 446         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 447     };
 448     return (int)rv;
 449 }
 450
 451
 452 /* print the buffer with suitable escapes */
 453
 454 STATIC void
 455 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 456 {
 457     SV* const tmp = newSVpvs("");
 458
 459     PERL_ARGS_ASSERT_PRINTBUF;
 460
 461     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 static int
 468 S_deprecate_commaless_var_list(pTHX) {
 469     PL_expect = XTERM;
 470     deprecate("comma-less variable list");
 471     return REPORT(','); /* grandfather non-comma-format format */
 472 }
 473
 474 /*
 475  * S_ao
 476  *
 477  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 478  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 479  */
 480
 481 STATIC int
 482 S_ao(pTHX_ int toketype)
 483 {
 484     dVAR;
 485     if (*PL_bufptr == '=') {
 486         PL_bufptr++;
 487         if (toketype == ANDAND)
 488             pl_yylval.ival = OP_ANDASSIGN;
 489         else if (toketype == OROR)
 490             pl_yylval.ival = OP_ORASSIGN;
 491         else if (toketype == DORDOR)
 492             pl_yylval.ival = OP_DORASSIGN;
 493         toketype = ASSIGNOP;
 494     }
 495     return toketype;
 496 }
 497
 498 /*
 499  * S_no_op
 500  * When Perl expects an operator and finds something else, no_op
 501  * prints the warning.  It always prints "<something> found where
 502  * operator expected.  It prints "Missing semicolon on previous line?"
 503  * if the surprise occurs at the start of the line.  "do you need to
 504  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 505  * where the compiler doesn't know if foo is a method call or a function.
 506  * It prints "Missing operator before end of line" if there's nothing
 507  * after the missing operator, or "... before <...>" if there is something
 508  * after the missing operator.
 509  */
 510
 511 STATIC void
 512 S_no_op(pTHX_ const char *const what, char *s)
 513 {
 514     dVAR;
 515     char * const oldbp = PL_bufptr;
 516     const bool is_first = (PL_oldbufptr == PL_linestart);
 517
 518     PERL_ARGS_ASSERT_NO_OP;
 519
 520     if (!s)
 521         s = oldbp;
 522     else
 523         PL_bufptr = s;
 524     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 525     if (ckWARN_d(WARN_SYNTAX)) {
 526         if (is_first)
 527             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 528                     "\t(Missing semicolon on previous line?)\n");
 529         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 530             const char *t;
 531             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 532                 NOOP;
 533             if (t < PL_bufptr && isSPACE(*t))
 534                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 535                         "\t(Do you need to predeclare %.*s?)\n",
 536                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 537         }
 538         else {
 539             assert(s >= oldbp);
 540             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 541                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 542         }
 543     }
 544     PL_bufptr = oldbp;
 545 }
 546
 547 /*
 548  * S_missingterm
 549  * Complain about missing quote/regexp/heredoc terminator.
 550  * If it's called with NULL then it cauterizes the line buffer.
 551  * If we're in a delimited string and the delimiter is a control
 552  * character, it's reformatted into a two-char sequence like ^C.
 553  * This is fatal.
 554  */
 555
 556 STATIC void
 557 S_missingterm(pTHX_ char *s)
 558 {
 559     dVAR;
 560     char tmpbuf[3];
 561     char q;
 562     if (s) {
 563         char * const nl = strrchr(s,'\n');
 564         if (nl)
 565             *nl = '\0';
 566     }
 567     else if (isCNTRL(PL_multi_close)) {
 568         *tmpbuf = '^';
 569         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 570         tmpbuf[2] = '\0';
 571         s = tmpbuf;
 572     }
 573     else {
 574         *tmpbuf = (char)PL_multi_close;
 575         tmpbuf[1] = '\0';
 576         s = tmpbuf;
 577     }
 578     q = strchr(s,'"') ? '\'' : '"';
 579     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 580 }
 581
 582 #define FEATURE_IS_ENABLED(name)                                        \
 583         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 584             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 585 /* The longest string we pass in.  */
 586 #define MAX_FEATURE_LEN (sizeof("switch")-1)
 587
 588 /*
 589  * S_feature_is_enabled
 590  * Check whether the named feature is enabled.
 591  */
 592 STATIC bool
 593 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 594 {
 595     dVAR;
 596     HV * const hinthv = GvHV(PL_hintgv);
 597     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 598
 599     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 600
 601     assert(namelen <= MAX_FEATURE_LEN);
 602     memcpy(&he_name[8], name, namelen);
 603
 604     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 605 }
 606
 607 /*
 608  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 609  * utf16-to-utf8-reversed.
 610  */
 611
 612 #ifdef PERL_CR_FILTER
 613 static void
 614 strip_return(SV *sv)
 615 {
 616     register const char *s = SvPVX_const(sv);
 617     register const char * const e = s + SvCUR(sv);
 618
 619     PERL_ARGS_ASSERT_STRIP_RETURN;
 620
 621     /* outer loop optimized to do nothing if there are no CR-LFs */
 622     while (s < e) {
 623         if (*s++ == '\r' && *s == '\n') {
 624             /* hit a CR-LF, need to copy the rest */
 625             register char *d = s - 1;
 626             *d++ = *s++;
 627             while (s < e) {
 628                 if (*s == '\r' && s[1] == '\n')
 629                     s++;
 630                 *d++ = *s++;
 631             }
 632             SvCUR(sv) -= s - d;
 633             return;
 634         }
 635     }
 636 }
 637
 638 STATIC I32
 639 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 640 {
 641     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 642     if (count > 0 && !maxlen)
 643         strip_return(sv);
 644     return count;
 645 }
 646 #endif
 647
 648
 649
 650 /*
 651  * Perl_lex_start
 652  *
 653  * Create a parser object and initialise its parser and lexer fields
 654  *
 655  * rsfp       is the opened file handle to read from (if any),
 656  *
 657  * line       holds any initial content already read from the file (or in
 658  *            the case of no file, such as an eval, the whole contents);
 659  *
 660  * new_filter indicates that this is a new file and it shouldn't inherit
 661  *            the filters from the current parser (ie require).
 662  */
 663
 664 void
 665 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 666 {
 667     dVAR;
 668     const char *s = NULL;
 669     STRLEN len;
 670     yy_parser *parser, *oparser;
 671
 672     /* create and initialise a parser */
 673
 674     Newxz(parser, 1, yy_parser);
 675     parser->old_parser = oparser = PL_parser;
 676     PL_parser = parser;
 677
 678     Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
 679     parser->ps = parser->stack;
 680     parser->stack_size = YYINITDEPTH;
 681
 682     parser->stack->state = 0;
 683     parser->yyerrstatus = 0;
 684     parser->yychar = YYEMPTY;           /* Cause a token to be read.  */
 685
 686     /* on scope exit, free this parser and restore any outer one */
 687     SAVEPARSER(parser);
 688     parser->saved_curcop = PL_curcop;
 689
 690     /* initialise lexer state */
 691
 692 #ifdef PERL_MAD
 693     parser->curforce = -1;
 694 #else
 695     parser->nexttoke = 0;
 696 #endif
 697     parser->error_count = oparser ? oparser->error_count : 0;
 698     parser->copline = NOLINE;
 699     parser->lex_state = LEX_NORMAL;
 700     parser->expect = XSTATE;
 701     parser->rsfp = rsfp;
 702     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 703                 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 704
 705     Newx(parser->lex_brackstack, 120, char);
 706     Newx(parser->lex_casestack, 12, char);
 707     *parser->lex_casestack = '\0';
 708
 709     if (line) {
 710         s = SvPV_const(line, len);
 711     } else {
 712         len = 0;
 713     }
 714
 715     if (!len) {
 716         parser->linestr = newSVpvs("\n;");
 717     } else if (SvREADONLY(line) || s[len-1] != ';') {
 718         parser->linestr = newSVsv(line);
 719         if (s[len-1] != ';')
 720             sv_catpvs(parser->linestr, "\n;");
 721     } else {
 722         SvTEMP_off(line);
 723         SvREFCNT_inc_simple_void_NN(line);
 724         parser->linestr = line;
 725     }
 726     parser->oldoldbufptr =
 727         parser->oldbufptr =
 728         parser->bufptr =
 729         parser->linestart = SvPVX(parser->linestr);
 730     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 731     parser->last_lop = parser->last_uni = NULL;
 732 }
 733
 734
 735 /* delete a parser object */
 736
 737 void
 738 Perl_parser_free(pTHX_  const yy_parser *parser)
 739 {
 740     PERL_ARGS_ASSERT_PARSER_FREE;
 741
 742     PL_curcop = parser->saved_curcop;
 743     SvREFCNT_dec(parser->linestr);
 744
 745     if (parser->rsfp == PerlIO_stdin())
 746         PerlIO_clearerr(parser->rsfp);
 747     else if (parser->rsfp && (!parser->old_parser ||
 748                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 749         PerlIO_close(parser->rsfp);
 750     SvREFCNT_dec(parser->rsfp_filters);
 751
 752     Safefree(parser->stack);
 753     Safefree(parser->lex_brackstack);
 754     Safefree(parser->lex_casestack);
 755     PL_parser = parser->old_parser;
 756     Safefree(parser);
 757 }
 758
 759
 760 /*
 761  * Perl_lex_end
 762  * Finalizer for lexing operations.  Must be called when the parser is
 763  * done with the lexer.
 764  */
 765
 766 void
 767 Perl_lex_end(pTHX)
 768 {
 769     dVAR;
 770     PL_doextract = FALSE;
 771 }
 772
 773 /*
 774 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 775
 776 Buffer scalar containing the chunk currently under consideration of the
 777 text currently being lexed.  This is always a plain string scalar (for
 778 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 779 normal scalar means; instead refer to the buffer directly by the pointer
 780 variables described below.
 781
 782 The lexer maintains various C<char*> pointers to things in the
 783 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 784 reallocated, all of these pointers must be updated.  Don't attempt to
 785 do this manually, but rather use L</lex_grow_linestr> if you need to
 786 reallocate the buffer.
 787
 788 The content of the text chunk in the buffer is commonly exactly one
 789 complete line of input, up to and including a newline terminator,
 790 but there are situations where it is otherwise.  The octets of the
 791 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 792 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 793 flag on this scalar, which may disagree with it.
 794
 795 For direct examination of the buffer, the variable
 796 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 797 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 798 of these pointers is usually preferable to examination of the scalar
 799 through normal scalar means.
 800
 801 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 802
 803 Direct pointer to the end of the chunk of text currently being lexed, the
 804 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 805 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 806 always located at the end of the buffer, and does not count as part of
 807 the buffer's contents.
 808
 809 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 810
 811 Points to the current position of lexing inside the lexer buffer.
 812 Characters around this point may be freely examined, within
 813 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 814 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 815 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 816
 817 Lexing code (whether in the Perl core or not) moves this pointer past
 818 the characters that it consumes.  It is also expected to perform some
 819 bookkeeping whenever a newline character is consumed.  This movement
 820 can be more conveniently performed by the function L</lex_read_to>,
 821 which handles newlines appropriately.
 822
 823 Interpretation of the buffer's octets can be abstracted out by
 824 using the slightly higher-level functions L</lex_peek_unichar> and
 825 L</lex_read_unichar>.
 826
 827 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 828
 829 Points to the start of the current line inside the lexer buffer.
 830 This is useful for indicating at which column an error occurred, and
 831 not much else.  This must be updated by any lexing code that consumes
 832 a newline; the function L</lex_read_to> handles this detail.
 833
 834 =cut
 835 */
 836
 837 /*
 838 =for apidoc Amx|bool|lex_bufutf8
 839
 840 Indicates whether the octets in the lexer buffer
 841 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 842 of Unicode characters.  If not, they should be interpreted as Latin-1
 843 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 844
 845 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 846 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 847 encoding.
 848
 849 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 850 is significant, but not the whole story regarding the input character
 851 encoding.  Normally, when a file is being read, the scalar contains octets
 852 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 853 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 854 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 855 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 856 is in effect.  This logic may change in the future; use this function
 857 instead of implementing the logic yourself.
 858
 859 =cut
 860 */
 861
 862 bool
 863 Perl_lex_bufutf8(pTHX)
 864 {
 865     return UTF;
 866 }
 867
 868 /*
 869 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 870
 871 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 872 at least I<len> octets (including terminating NUL).  Returns a
 873 pointer to the reallocated buffer.  This is necessary before making
 874 any direct modification of the buffer that would increase its length.
 875 L</lex_stuff_pvn> provides a more convenient way to insert text into
 876 the buffer.
 877
 878 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 879 this function updates all of the lexer's variables that point directly
 880 into the buffer.
 881
 882 =cut
 883 */
 884
 885 char *
 886 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 887 {
 888     SV *linestr;
 889     char *buf;
 890     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 891     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 892     linestr = PL_parser->linestr;
 893     buf = SvPVX(linestr);
 894     if (len <= SvLEN(linestr))
 895         return buf;
 896     bufend_pos = PL_parser->bufend - buf;
 897     bufptr_pos = PL_parser->bufptr - buf;
 898     oldbufptr_pos = PL_parser->oldbufptr - buf;
 899     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 900     linestart_pos = PL_parser->linestart - buf;
 901     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 902     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 903     buf = sv_grow(linestr, len);
 904     PL_parser->bufend = buf + bufend_pos;
 905     PL_parser->bufptr = buf + bufptr_pos;
 906     PL_parser->oldbufptr = buf + oldbufptr_pos;
 907     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 908     PL_parser->linestart = buf + linestart_pos;
 909     if (PL_parser->last_uni)
 910         PL_parser->last_uni = buf + last_uni_pos;
 911     if (PL_parser->last_lop)
 912         PL_parser->last_lop = buf + last_lop_pos;
 913     return buf;
 914 }
 915
 916 /*
 917 =for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags
 918
 919 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 920 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 921 reallocating the buffer if necessary.  This means that lexing code that
 922 runs later will see the characters as if they had appeared in the input.
 923 It is not recommended to do this as part of normal parsing, and most
 924 uses of this facility run the risk of the inserted characters being
 925 interpreted in an unintended manner.
 926
 927 The string to be inserted is represented by I<len> octets starting
 928 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 929 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 930 The characters are recoded for the lexer buffer, according to how the
 931 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 932 to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
 933 function is more convenient.
 934
 935 =cut
 936 */
 937
 938 void
 939 Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
 940 {
 941     char *bufptr;
 942     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 943     if (flags & ~(LEX_STUFF_UTF8))
 944         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 945     if (UTF) {
 946         if (flags & LEX_STUFF_UTF8) {
 947             goto plain_copy;
 948         } else {
 949             STRLEN highhalf = 0;
 950             char *p, *e = pv+len;
 951             for (p = pv; p != e; p++)
 952                 highhalf += !!(((U8)*p) & 0x80);
 953             if (!highhalf)
 954                 goto plain_copy;
 955             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 956             bufptr = PL_parser->bufptr;
 957             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 958             PL_parser->bufend += len+highhalf;
 959             for (p = pv; p != e; p++) {
 960                 U8 c = (U8)*p;
 961                 if (c & 0x80) {
 962                     *bufptr++ = (char)(0xc0 | (c >> 6));
 963                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 964                 } else {
 965                     *bufptr++ = (char)c;
 966                 }
 967             }
 968         }
 969     } else {
 970         if (flags & LEX_STUFF_UTF8) {
 971             STRLEN highhalf = 0;
 972             char *p, *e = pv+len;
 973             for (p = pv; p != e; p++) {
 974                 U8 c = (U8)*p;
 975                 if (c >= 0xc4) {
 976                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 977                                 "non-Latin-1 character into Latin-1 input");
 978                 } else if (c >= 0xc2 && p+1 != e &&
 979                             (((U8)p[1]) & 0xc0) == 0x80) {
 980                     p++;
 981                     highhalf++;
 982                 } else if (c >= 0x80) {
 983                     /* malformed UTF-8 */
 984                     ENTER;
 985                     SAVESPTR(PL_warnhook);
 986                     PL_warnhook = PERL_WARNHOOK_FATAL;
 987                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 988                     LEAVE;
 989                 }
 990             }
 991             if (!highhalf)
 992                 goto plain_copy;
 993             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 994             bufptr = PL_parser->bufptr;
 995             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 996             PL_parser->bufend += len-highhalf;
 997             for (p = pv; p != e; p++) {
 998                 U8 c = (U8)*p;
 999                 if (c & 0x80) {
1000                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1001                     p++;
1002                 } else {
1003                     *bufptr++ = (char)c;
1004                 }
1005             }
1006         } else {
1007             plain_copy:
1008             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1009             bufptr = PL_parser->bufptr;
1010             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1011             PL_parser->bufend += len;
1012             Copy(pv, bufptr, len, char);
1013         }
1014     }
1015 }
1016
1017 /*
1018 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1019
1020 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022 reallocating the buffer if necessary.  This means that lexing code that
1023 runs later will see the characters as if they had appeared in the input.
1024 It is not recommended to do this as part of normal parsing, and most
1025 uses of this facility run the risk of the inserted characters being
1026 interpreted in an unintended manner.
1027
1028 The string to be inserted is the string value of I<sv>.  The characters
1029 are recoded for the lexer buffer, according to how the buffer is currently
1030 being interpreted (L</lex_bufutf8>).  If a string to be interpreted is
1031 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1032 need to construct a scalar.
1033
1034 =cut
1035 */
1036
1037 void
1038 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1039 {
1040     char *pv;
1041     STRLEN len;
1042     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1043     if (flags)
1044         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1045     pv = SvPV(sv, len);
1046     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1047 }
1048
1049 /*
1050 =for apidoc Amx|void|lex_unstuff|char *ptr
1051
1052 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1053 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1054 This hides the discarded text from any lexing code that runs later,
1055 as if the text had never appeared.
1056
1057 This is not the normal way to consume lexed text.  For that, use
1058 L</lex_read_to>.
1059
1060 =cut
1061 */
1062
1063 void
1064 Perl_lex_unstuff(pTHX_ char *ptr)
1065 {
1066     char *buf, *bufend;
1067     STRLEN unstuff_len;
1068     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1069     buf = PL_parser->bufptr;
1070     if (ptr < buf)
1071         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1072     if (ptr == buf)
1073         return;
1074     bufend = PL_parser->bufend;
1075     if (ptr > bufend)
1076         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1077     unstuff_len = ptr - buf;
1078     Move(ptr, buf, bufend+1-ptr, char);
1079     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1080     PL_parser->bufend = bufend - unstuff_len;
1081 }
1082
1083 /*
1084 =for apidoc Amx|void|lex_read_to|char *ptr
1085
1086 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1087 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1088 performing the correct bookkeeping whenever a newline character is passed.
1089 This is the normal way to consume lexed text.
1090
1091 Interpretation of the buffer's octets can be abstracted out by
1092 using the slightly higher-level functions L</lex_peek_unichar> and
1093 L</lex_read_unichar>.
1094
1095 =cut
1096 */
1097
1098 void
1099 Perl_lex_read_to(pTHX_ char *ptr)
1100 {
1101     char *s;
1102     PERL_ARGS_ASSERT_LEX_READ_TO;
1103     s = PL_parser->bufptr;
1104     if (ptr < s || ptr > PL_parser->bufend)
1105         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1106     for (; s != ptr; s++)
1107         if (*s == '\n') {
1108             CopLINE_inc(PL_curcop);
1109             PL_parser->linestart = s+1;
1110         }
1111     PL_parser->bufptr = ptr;
1112 }
1113
1114 /*
1115 =for apidoc Amx|void|lex_discard_to|char *ptr
1116
1117 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1118 up to I<ptr>.  The remaining content of the buffer will be moved, and
1119 all pointers into the buffer updated appropriately.  I<ptr> must not
1120 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1121 it is not permitted to discard text that has yet to be lexed.
1122
1123 Normally it is not necessarily to do this directly, because it suffices to
1124 use the implicit discarding behaviour of L</lex_next_chunk> and things
1125 based on it.  However, if a token stretches across multiple lines,
1126 and the lexing code has kept multiple lines of text in the buffer fof
1127 that purpose, then after completion of the token it would be wise to
1128 explicitly discard the now-unneeded earlier lines, to avoid future
1129 multi-line tokens growing the buffer without bound.
1130
1131 =cut
1132 */
1133
1134 void
1135 Perl_lex_discard_to(pTHX_ char *ptr)
1136 {
1137     char *buf;
1138     STRLEN discard_len;
1139     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1140     buf = SvPVX(PL_parser->linestr);
1141     if (ptr < buf)
1142         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1143     if (ptr == buf)
1144         return;
1145     if (ptr > PL_parser->bufptr)
1146         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1147     discard_len = ptr - buf;
1148     if (PL_parser->oldbufptr < ptr)
1149         PL_parser->oldbufptr = ptr;
1150     if (PL_parser->oldoldbufptr < ptr)
1151         PL_parser->oldoldbufptr = ptr;
1152     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1153         PL_parser->last_uni = NULL;
1154     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1155         PL_parser->last_lop = NULL;
1156     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1157     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1158     PL_parser->bufend -= discard_len;
1159     PL_parser->bufptr -= discard_len;
1160     PL_parser->oldbufptr -= discard_len;
1161     PL_parser->oldoldbufptr -= discard_len;
1162     if (PL_parser->last_uni)
1163         PL_parser->last_uni -= discard_len;
1164     if (PL_parser->last_lop)
1165         PL_parser->last_lop -= discard_len;
1166 }
1167
1168 /*
1169 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1170
1171 Reads in the next chunk of text to be lexed, appending it to
1172 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1173 looked to the end of the current chunk and wants to know more.  It is
1174 usual, but not necessary, for lexing to have consumed the entirety of
1175 the current chunk at this time.
1176
1177 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1178 chunk (i.e., the current chunk has been entirely consumed), normally the
1179 current chunk will be discarded at the same time that the new chunk is
1180 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1181 will not be discarded.  If the current chunk has not been entirely
1182 consumed, then it will not be discarded regardless of the flag.
1183
1184 Returns true if some new text was added to the buffer, or false if the
1185 buffer has reached the end of the input text.
1186
1187 =cut
1188 */
1189
1190 #define LEX_FAKE_EOF 0x80000000
1191
1192 bool
1193 Perl_lex_next_chunk(pTHX_ U32 flags)
1194 {
1195     SV *linestr;
1196     char *buf;
1197     STRLEN old_bufend_pos, new_bufend_pos;
1198     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1199     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1200     bool got_some_for_debugger = 0;
1201     bool got_some;
1202     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1203         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1204     linestr = PL_parser->linestr;
1205     buf = SvPVX(linestr);
1206     if (!(flags & LEX_KEEP_PREVIOUS) &&
1207             PL_parser->bufptr == PL_parser->bufend) {
1208         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1209         linestart_pos = 0;
1210         if (PL_parser->last_uni != PL_parser->bufend)
1211             PL_parser->last_uni = NULL;
1212         if (PL_parser->last_lop != PL_parser->bufend)
1213             PL_parser->last_lop = NULL;
1214         last_uni_pos = last_lop_pos = 0;
1215         *buf = 0;
1216         SvCUR(linestr) = 0;
1217     } else {
1218         old_bufend_pos = PL_parser->bufend - buf;
1219         bufptr_pos = PL_parser->bufptr - buf;
1220         oldbufptr_pos = PL_parser->oldbufptr - buf;
1221         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1222         linestart_pos = PL_parser->linestart - buf;
1223         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1224         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1225     }
1226     if (flags & LEX_FAKE_EOF) {
1227         goto eof;
1228     } else if (!PL_parser->rsfp) {
1229         got_some = 0;
1230     } else if (filter_gets(linestr, old_bufend_pos)) {
1231         got_some = 1;
1232         got_some_for_debugger = 1;
1233     } else {
1234         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1235             sv_setpvs(linestr, "");
1236         eof:
1237         /* End of real input.  Close filehandle (unless it was STDIN),
1238          * then add implicit termination.
1239          */
1240         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1241             PerlIO_clearerr(PL_parser->rsfp);
1242         else if (PL_parser->rsfp)
1243             (void)PerlIO_close(PL_parser->rsfp);
1244         PL_parser->rsfp = NULL;
1245         PL_doextract = FALSE;
1246 #ifdef PERL_MAD
1247         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1248             PL_faketokens = 1;
1249 #endif
1250         if (!PL_in_eval && PL_minus_p) {
1251             sv_catpvs(linestr,
1252                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1253             PL_minus_n = PL_minus_p = 0;
1254         } else if (!PL_in_eval && PL_minus_n) {
1255             sv_catpvs(linestr, /*{*/";}");
1256             PL_minus_n = 0;
1257         } else
1258             sv_catpvs(linestr, ";");
1259         got_some = 1;
1260     }
1261     buf = SvPVX(linestr);
1262     new_bufend_pos = SvCUR(linestr);
1263     PL_parser->bufend = buf + new_bufend_pos;
1264     PL_parser->bufptr = buf + bufptr_pos;
1265     PL_parser->oldbufptr = buf + oldbufptr_pos;
1266     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1267     PL_parser->linestart = buf + linestart_pos;
1268     if (PL_parser->last_uni)
1269         PL_parser->last_uni = buf + last_uni_pos;
1270     if (PL_parser->last_lop)
1271         PL_parser->last_lop = buf + last_lop_pos;
1272     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1273             PL_curstash != PL_debstash) {
1274         /* debugger active and we're not compiling the debugger code,
1275          * so store the line into the debugger's array of lines
1276          */
1277         update_debugger_info(NULL, buf+old_bufend_pos,
1278             new_bufend_pos-old_bufend_pos);
1279     }
1280     return got_some;
1281 }
1282
1283 /*
1284 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1285
1286 Looks ahead one (Unicode) character in the text currently being lexed.
1287 Returns the codepoint (unsigned integer value) of the next character,
1288 or -1 if lexing has reached the end of the input text.  To consume the
1289 peeked character, use L</lex_read_unichar>.
1290
1291 If the next character is in (or extends into) the next chunk of input
1292 text, the next chunk will be read in.  Normally the current chunk will be
1293 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1294 then the current chunk will not be discarded.
1295
1296 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1297 is encountered, an exception is generated.
1298
1299 =cut
1300 */
1301
1302 I32
1303 Perl_lex_peek_unichar(pTHX_ U32 flags)
1304 {
1305     char *s, *bufend;
1306     if (flags & ~(LEX_KEEP_PREVIOUS))
1307         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1308     s = PL_parser->bufptr;
1309     bufend = PL_parser->bufend;
1310     if (UTF) {
1311         U8 head;
1312         I32 unichar;
1313         STRLEN len, retlen;
1314         if (s == bufend) {
1315             if (!lex_next_chunk(flags))
1316                 return -1;
1317             s = PL_parser->bufptr;
1318             bufend = PL_parser->bufend;
1319         }
1320         head = (U8)*s;
1321         if (!(head & 0x80))
1322             return head;
1323         if (head & 0x40) {
1324             len = PL_utf8skip[head];
1325             while ((STRLEN)(bufend-s) < len) {
1326                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1327                     break;
1328                 s = PL_parser->bufptr;
1329                 bufend = PL_parser->bufend;
1330             }
1331         }
1332         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1333         if (retlen == (STRLEN)-1) {
1334             /* malformed UTF-8 */
1335             ENTER;
1336             SAVESPTR(PL_warnhook);
1337             PL_warnhook = PERL_WARNHOOK_FATAL;
1338             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1339             LEAVE;
1340         }
1341         return unichar;
1342     } else {
1343         if (s == bufend) {
1344             if (!lex_next_chunk(flags))
1345                 return -1;
1346             s = PL_parser->bufptr;
1347         }
1348         return (U8)*s;
1349     }
1350 }
1351
1352 /*
1353 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1354
1355 Reads the next (Unicode) character in the text currently being lexed.
1356 Returns the codepoint (unsigned integer value) of the character read,
1357 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1358 if lexing has reached the end of the input text.  To non-destructively
1359 examine the next character, use L</lex_peek_unichar> instead.
1360
1361 If the next character is in (or extends into) the next chunk of input
1362 text, the next chunk will be read in.  Normally the current chunk will be
1363 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1364 then the current chunk will not be discarded.
1365
1366 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1367 is encountered, an exception is generated.
1368
1369 =cut
1370 */
1371
1372 I32
1373 Perl_lex_read_unichar(pTHX_ U32 flags)
1374 {
1375     I32 c;
1376     if (flags & ~(LEX_KEEP_PREVIOUS))
1377         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1378     c = lex_peek_unichar(flags);
1379     if (c != -1) {
1380         if (c == '\n')
1381             CopLINE_inc(PL_curcop);
1382         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1383     }
1384     return c;
1385 }
1386
1387 /*
1388 =for apidoc Amx|void|lex_read_space|U32 flags
1389
1390 Reads optional spaces, in Perl style, in the text currently being
1391 lexed.  The spaces may include ordinary whitespace characters and
1392 Perl-style comments.  C<#line> directives are processed if encountered.
1393 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1394 at a non-space character (or the end of the input text).
1395
1396 If spaces extend into the next chunk of input text, the next chunk will
1397 be read in.  Normally the current chunk will be discarded at the same
1398 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1399 chunk will not be discarded.
1400
1401 =cut
1402 */
1403
1404 void
1405 Perl_lex_read_space(pTHX_ U32 flags)
1406 {
1407     char *s, *bufend;
1408     bool need_incline = 0;
1409     if (flags & ~(LEX_KEEP_PREVIOUS))
1410         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1411 #ifdef PERL_MAD
1412     if (PL_skipwhite) {
1413         sv_free(PL_skipwhite);
1414         PL_skipwhite = NULL;
1415     }
1416     if (PL_madskills)
1417         PL_skipwhite = newSVpvs("");
1418 #endif /* PERL_MAD */
1419     s = PL_parser->bufptr;
1420     bufend = PL_parser->bufend;
1421     while (1) {
1422         char c = *s;
1423         if (c == '#') {
1424             do {
1425                 c = *++s;
1426             } while (!(c == '\n' || (c == 0 && s == bufend)));
1427         } else if (c == '\n') {
1428             s++;
1429             PL_parser->linestart = s;
1430             if (s == bufend)
1431                 need_incline = 1;
1432             else
1433                 incline(s);
1434         } else if (isSPACE(c)) {
1435             s++;
1436         } else if (c == 0 && s == bufend) {
1437             bool got_more;
1438 #ifdef PERL_MAD
1439             if (PL_madskills)
1440                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1441 #endif /* PERL_MAD */
1442             PL_parser->bufptr = s;
1443             CopLINE_inc(PL_curcop);
1444             got_more = lex_next_chunk(flags);
1445             CopLINE_dec(PL_curcop);
1446             s = PL_parser->bufptr;
1447             bufend = PL_parser->bufend;
1448             if (!got_more)
1449                 break;
1450             if (need_incline && PL_parser->rsfp) {
1451                 incline(s);
1452                 need_incline = 0;
1453             }
1454         } else {
1455             break;
1456         }
1457     }
1458 #ifdef PERL_MAD
1459     if (PL_madskills)
1460         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1461 #endif /* PERL_MAD */
1462     PL_parser->bufptr = s;
1463 }
1464
1465 /*
1466  * S_incline
1467  * This subroutine has nothing to do with tilting, whether at windmills
1468  * or pinball tables.  Its name is short for "increment line".  It
1469  * increments the current line number in CopLINE(PL_curcop) and checks
1470  * to see whether the line starts with a comment of the form
1471  *    # line 500 "foo.pm"
1472  * If so, it sets the current line number and file to the values in the comment.
1473  */
1474
1475 STATIC void
1476 S_incline(pTHX_ const char *s)
1477 {
1478     dVAR;
1479     const char *t;
1480     const char *n;
1481     const char *e;
1482
1483     PERL_ARGS_ASSERT_INCLINE;
1484
1485     CopLINE_inc(PL_curcop);
1486     if (*s++ != '#')
1487         return;
1488     while (SPACE_OR_TAB(*s))
1489         s++;
1490     if (strnEQ(s, "line", 4))
1491         s += 4;
1492     else
1493         return;
1494     if (SPACE_OR_TAB(*s))
1495         s++;
1496     else
1497         return;
1498     while (SPACE_OR_TAB(*s))
1499         s++;
1500     if (!isDIGIT(*s))
1501         return;
1502
1503     n = s;
1504     while (isDIGIT(*s))
1505         s++;
1506     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1507         return;
1508     while (SPACE_OR_TAB(*s))
1509         s++;
1510     if (*s == '"' && (t = strchr(s+1, '"'))) {
1511         s++;
1512         e = t + 1;
1513     }
1514     else {
1515         t = s;
1516         while (!isSPACE(*t))
1517             t++;
1518         e = t;
1519     }
1520     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1521         e++;
1522     if (*e != '\n' && *e != '\0')
1523         return;         /* false alarm */
1524
1525     if (t - s > 0) {
1526         const STRLEN len = t - s;
1527 #ifndef USE_ITHREADS
1528         SV *const temp_sv = CopFILESV(PL_curcop);
1529         const char *cf;
1530         STRLEN tmplen;
1531
1532         if (temp_sv) {
1533             cf = SvPVX(temp_sv);
1534             tmplen = SvCUR(temp_sv);
1535         } else {
1536             cf = NULL;
1537             tmplen = 0;
1538         }
1539
1540         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1541             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1542              * to *{"::_<newfilename"} */
1543             /* However, the long form of evals is only turned on by the
1544                debugger - usually they're "(eval %lu)" */
1545             char smallbuf[128];
1546             char *tmpbuf;
1547             GV **gvp;
1548             STRLEN tmplen2 = len;
1549             if (tmplen + 2 <= sizeof smallbuf)
1550                 tmpbuf = smallbuf;
1551             else
1552                 Newx(tmpbuf, tmplen + 2, char);
1553             tmpbuf[0] = '_';
1554             tmpbuf[1] = '<';
1555             memcpy(tmpbuf + 2, cf, tmplen);
1556             tmplen += 2;
1557             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1558             if (gvp) {
1559                 char *tmpbuf2;
1560                 GV *gv2;
1561
1562                 if (tmplen2 + 2 <= sizeof smallbuf)
1563                     tmpbuf2 = smallbuf;
1564                 else
1565                     Newx(tmpbuf2, tmplen2 + 2, char);
1566
1567                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1568                     /* Either they malloc'd it, or we malloc'd it,
1569                        so no prefix is present in ours.  */
1570                     tmpbuf2[0] = '_';
1571                     tmpbuf2[1] = '<';
1572                 }
1573
1574                 memcpy(tmpbuf2 + 2, s, tmplen2);
1575                 tmplen2 += 2;
1576
1577                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1578                 if (!isGV(gv2)) {
1579                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1580                     /* adjust ${"::_<newfilename"} to store the new file name */
1581                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1582                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1583                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1584                 }
1585
1586                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1587             }
1588             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1589         }
1590 #endif
1591         CopFILE_free(PL_curcop);
1592         CopFILE_setn(PL_curcop, s, len);
1593     }
1594     CopLINE_set(PL_curcop, atoi(n)-1);
1595 }
1596
1597 #ifdef PERL_MAD
1598 /* skip space before PL_thistoken */
1599
1600 STATIC char *
1601 S_skipspace0(pTHX_ register char *s)
1602 {
1603     PERL_ARGS_ASSERT_SKIPSPACE0;
1604
1605     s = skipspace(s);
1606     if (!PL_madskills)
1607         return s;
1608     if (PL_skipwhite) {
1609         if (!PL_thiswhite)
1610             PL_thiswhite = newSVpvs("");
1611         sv_catsv(PL_thiswhite, PL_skipwhite);
1612         sv_free(PL_skipwhite);
1613         PL_skipwhite = 0;
1614     }
1615     PL_realtokenstart = s - SvPVX(PL_linestr);
1616     return s;
1617 }
1618
1619 /* skip space after PL_thistoken */
1620
1621 STATIC char *
1622 S_skipspace1(pTHX_ register char *s)
1623 {
1624     const char *start = s;
1625     I32 startoff = start - SvPVX(PL_linestr);
1626
1627     PERL_ARGS_ASSERT_SKIPSPACE1;
1628
1629     s = skipspace(s);
1630     if (!PL_madskills)
1631         return s;
1632     start = SvPVX(PL_linestr) + startoff;
1633     if (!PL_thistoken && PL_realtokenstart >= 0) {
1634         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1635         PL_thistoken = newSVpvn(tstart, start - tstart);
1636     }
1637     PL_realtokenstart = -1;
1638     if (PL_skipwhite) {
1639         if (!PL_nextwhite)
1640             PL_nextwhite = newSVpvs("");
1641         sv_catsv(PL_nextwhite, PL_skipwhite);
1642         sv_free(PL_skipwhite);
1643         PL_skipwhite = 0;
1644     }
1645     return s;
1646 }
1647
1648 STATIC char *
1649 S_skipspace2(pTHX_ register char *s, SV **svp)
1650 {
1651     char *start;
1652     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1653     const I32 startoff = s - SvPVX(PL_linestr);
1654
1655     PERL_ARGS_ASSERT_SKIPSPACE2;
1656
1657     s = skipspace(s);
1658     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1659     if (!PL_madskills || !svp)
1660         return s;
1661     start = SvPVX(PL_linestr) + startoff;
1662     if (!PL_thistoken && PL_realtokenstart >= 0) {
1663         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1664         PL_thistoken = newSVpvn(tstart, start - tstart);
1665         PL_realtokenstart = -1;
1666     }
1667     if (PL_skipwhite) {
1668         if (!*svp)
1669             *svp = newSVpvs("");
1670         sv_setsv(*svp, PL_skipwhite);
1671         sv_free(PL_skipwhite);
1672         PL_skipwhite = 0;
1673     }
1674
1675     return s;
1676 }
1677 #endif
1678
1679 STATIC void
1680 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1681 {
1682     AV *av = CopFILEAVx(PL_curcop);
1683     if (av) {
1684         SV * const sv = newSV_type(SVt_PVMG);
1685         if (orig_sv)
1686             sv_setsv(sv, orig_sv);
1687         else
1688             sv_setpvn(sv, buf, len);
1689         (void)SvIOK_on(sv);
1690         SvIV_set(sv, 0);
1691         av_store(av, (I32)CopLINE(PL_curcop), sv);
1692     }
1693 }
1694
1695 /*
1696  * S_skipspace
1697  * Called to gobble the appropriate amount and type of whitespace.
1698  * Skips comments as well.
1699  */
1700
1701 STATIC char *
1702 S_skipspace(pTHX_ register char *s)
1703 {
1704 #ifdef PERL_MAD
1705     char *start = s;
1706 #endif /* PERL_MAD */
1707     PERL_ARGS_ASSERT_SKIPSPACE;
1708 #ifdef PERL_MAD
1709     if (PL_skipwhite) {
1710         sv_free(PL_skipwhite);
1711         PL_skipwhite = NULL;
1712     }
1713 #endif /* PERL_MAD */
1714     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1715         while (s < PL_bufend && SPACE_OR_TAB(*s))
1716             s++;
1717     } else if (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE) {
1718         while (isSPACE(*s) && *s != '\n')
1719             s++;
1720         if (*s == '#') {
1721             do {
1722                 s++;
1723             } while (s != PL_bufend && *s != '\n');
1724         }
1725         if (*s == '\n')
1726             s++;
1727     } else {
1728         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1729         PL_bufptr = s;
1730         lex_read_space(LEX_KEEP_PREVIOUS);
1731         s = PL_bufptr;
1732         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1733         if (PL_linestart > PL_bufptr)
1734             PL_bufptr = PL_linestart;
1735         return s;
1736     }
1737 #ifdef PERL_MAD
1738     if (PL_madskills)
1739         PL_skipwhite = newSVpvn(start, s-start);
1740 #endif /* PERL_MAD */
1741     return s;
1742 }
1743
1744 /*
1745  * S_check_uni
1746  * Check the unary operators to ensure there's no ambiguity in how they're
1747  * used.  An ambiguous piece of code would be:
1748  *     rand + 5
1749  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1750  * the +5 is its argument.
1751  */
1752
1753 STATIC void
1754 S_check_uni(pTHX)
1755 {
1756     dVAR;
1757     const char *s;
1758     const char *t;
1759
1760     if (PL_oldoldbufptr != PL_last_uni)
1761         return;
1762     while (isSPACE(*PL_last_uni))
1763         PL_last_uni++;
1764     s = PL_last_uni;
1765     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1766         s++;
1767     if ((t = strchr(s, '(')) && t < PL_bufptr)
1768         return;
1769
1770     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1771                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1772                      (int)(s - PL_last_uni), PL_last_uni);
1773 }
1774
1775 /*
1776  * LOP : macro to build a list operator.  Its behaviour has been replaced
1777  * with a subroutine, S_lop() for which LOP is just another name.
1778  */
1779
1780 #define LOP(f,x) return lop(f,x,s)
1781
1782 /*
1783  * S_lop
1784  * Build a list operator (or something that might be one).  The rules:
1785  *  - if we have a next token, then it's a list operator [why?]
1786  *  - if the next thing is an opening paren, then it's a function
1787  *  - else it's a list operator
1788  */
1789
1790 STATIC I32
1791 S_lop(pTHX_ I32 f, int x, char *s)
1792 {
1793     dVAR;
1794
1795     PERL_ARGS_ASSERT_LOP;
1796
1797     pl_yylval.ival = f;
1798     CLINE;
1799     PL_expect = x;
1800     PL_bufptr = s;
1801     PL_last_lop = PL_oldbufptr;
1802     PL_last_lop_op = (OPCODE)f;
1803 #ifdef PERL_MAD
1804     if (PL_lasttoke)
1805         return REPORT(LSTOP);
1806 #else
1807     if (PL_nexttoke)
1808         return REPORT(LSTOP);
1809 #endif
1810     if (*s == '(')
1811         return REPORT(FUNC);
1812     s = PEEKSPACE(s);
1813     if (*s == '(')
1814         return REPORT(FUNC);
1815     else
1816         return REPORT(LSTOP);
1817 }
1818
1819 #ifdef PERL_MAD
1820  /*
1821  * S_start_force
1822  * Sets up for an eventual force_next().  start_force(0) basically does
1823  * an unshift, while start_force(-1) does a push.  yylex removes items
1824  * on the "pop" end.
1825  */
1826
1827 STATIC void
1828 S_start_force(pTHX_ int where)
1829 {
1830     int i;
1831
1832     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1833         where = PL_lasttoke;
1834     assert(PL_curforce < 0 || PL_curforce == where);
1835     if (PL_curforce != where) {
1836         for (i = PL_lasttoke; i > where; --i) {
1837             PL_nexttoke[i] = PL_nexttoke[i-1];
1838         }
1839         PL_lasttoke++;
1840     }
1841     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1842         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1843     PL_curforce = where;
1844     if (PL_nextwhite) {
1845         if (PL_madskills)
1846             curmad('^', newSVpvs(""));
1847         CURMAD('_', PL_nextwhite);
1848     }
1849 }
1850
1851 STATIC void
1852 S_curmad(pTHX_ char slot, SV *sv)
1853 {
1854     MADPROP **where;
1855
1856     if (!sv)
1857         return;
1858     if (PL_curforce < 0)
1859         where = &PL_thismad;
1860     else
1861         where = &PL_nexttoke[PL_curforce].next_mad;
1862
1863     if (PL_faketokens)
1864         sv_setpvs(sv, "");
1865     else {
1866         if (!IN_BYTES) {
1867             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1868                 SvUTF8_on(sv);
1869             else if (PL_encoding) {
1870                 sv_recode_to_utf8(sv, PL_encoding);
1871             }
1872         }
1873     }
1874
1875     /* keep a slot open for the head of the list? */
1876     if (slot != '_' && *where && (*where)->mad_key == '^') {
1877         (*where)->mad_key = slot;
1878         sv_free(MUTABLE_SV(((*where)->mad_val)));
1879         (*where)->mad_val = (void*)sv;
1880     }
1881     else
1882         addmad(newMADsv(slot, sv), where, 0);
1883 }
1884 #else
1885 #  define start_force(where)    NOOP
1886 #  define curmad(slot, sv)      NOOP
1887 #endif
1888
1889 /*
1890  * S_force_next
1891  * When the lexer realizes it knows the next token (for instance,
1892  * it is reordering tokens for the parser) then it can call S_force_next
1893  * to know what token to return the next time the lexer is called.  Caller
1894  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1895  * and possibly PL_expect to ensure the lexer handles the token correctly.
1896  */
1897
1898 STATIC void
1899 S_force_next(pTHX_ I32 type)
1900 {
1901     dVAR;
1902 #ifdef DEBUGGING
1903     if (DEBUG_T_TEST) {
1904         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1905         tokereport(type, &NEXTVAL_NEXTTOKE);
1906     }
1907 #endif
1908 #ifdef PERL_MAD
1909     if (PL_curforce < 0)
1910         start_force(PL_lasttoke);
1911     PL_nexttoke[PL_curforce].next_type = type;
1912     if (PL_lex_state != LEX_KNOWNEXT)
1913         PL_lex_defer = PL_lex_state;
1914     PL_lex_state = LEX_KNOWNEXT;
1915     PL_lex_expect = PL_expect;
1916     PL_curforce = -1;
1917 #else
1918     PL_nexttype[PL_nexttoke] = type;
1919     PL_nexttoke++;
1920     if (PL_lex_state != LEX_KNOWNEXT) {
1921         PL_lex_defer = PL_lex_state;
1922         PL_lex_expect = PL_expect;
1923         PL_lex_state = LEX_KNOWNEXT;
1924     }
1925 #endif
1926 }
1927
1928 STATIC SV *
1929 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1930 {
1931     dVAR;
1932     SV * const sv = newSVpvn_utf8(start, len,
1933                                   !IN_BYTES
1934                                   && UTF
1935                                   && !is_ascii_string((const U8*)start, len)
1936                                   && is_utf8_string((const U8*)start, len));
1937     return sv;
1938 }
1939
1940 /*
1941  * S_force_word
1942  * When the lexer knows the next thing is a word (for instance, it has
1943  * just seen -> and it knows that the next char is a word char, then
1944  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1945  * lookahead.
1946  *
1947  * Arguments:
1948  *   char *start : buffer position (must be within PL_linestr)
1949  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1950  *   int check_keyword : if true, Perl checks to make sure the word isn't
1951  *       a keyword (do this if the word is a label, e.g. goto FOO)
1952  *   int allow_pack : if true, : characters will also be allowed (require,
1953  *       use, etc. do this)
1954  *   int allow_initial_tick : used by the "sub" lexer only.
1955  */
1956
1957 STATIC char *
1958 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1959 {
1960     dVAR;
1961     register char *s;
1962     STRLEN len;
1963
1964     PERL_ARGS_ASSERT_FORCE_WORD;
1965
1966     start = SKIPSPACE1(start);
1967     s = start;
1968     if (isIDFIRST_lazy_if(s,UTF) ||
1969         (allow_pack && *s == ':') ||
1970         (allow_initial_tick && *s == '\'') )
1971     {
1972         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1973         if (check_keyword && keyword(PL_tokenbuf, len, 0))
1974             return start;
1975         start_force(PL_curforce);
1976         if (PL_madskills)
1977             curmad('X', newSVpvn(start,s-start));
1978         if (token == METHOD) {
1979             s = SKIPSPACE1(s);
1980             if (*s == '(')
1981                 PL_expect = XTERM;
1982             else {
1983                 PL_expect = XOPERATOR;
1984             }
1985         }
1986         if (PL_madskills)
1987             curmad('g', newSVpvs( "forced" ));
1988         NEXTVAL_NEXTTOKE.opval
1989             = (OP*)newSVOP(OP_CONST,0,
1990                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1991         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1992         force_next(token);
1993     }
1994     return s;
1995 }
1996
1997 /*
1998  * S_force_ident
1999  * Called when the lexer wants $foo *foo &foo etc, but the program
2000  * text only contains the "foo" portion.  The first argument is a pointer
2001  * to the "foo", and the second argument is the type symbol to prefix.
2002  * Forces the next token to be a "WORD".
2003  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2004  */
2005
2006 STATIC void
2007 S_force_ident(pTHX_ register const char *s, int kind)
2008 {
2009     dVAR;
2010
2011     PERL_ARGS_ASSERT_FORCE_IDENT;
2012
2013     if (*s) {
2014         const STRLEN len = strlen(s);
2015         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2016         start_force(PL_curforce);
2017         NEXTVAL_NEXTTOKE.opval = o;
2018         force_next(WORD);
2019         if (kind) {
2020             o->op_private = OPpCONST_ENTERED;
2021             /* XXX see note in pp_entereval() for why we forgo typo
2022                warnings if the symbol must be introduced in an eval.
2023                GSAR 96-10-12 */
2024             gv_fetchpvn_flags(s, len,
2025                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2026                               : GV_ADD,
2027                               kind == '$' ? SVt_PV :
2028                               kind == '@' ? SVt_PVAV :
2029                               kind == '%' ? SVt_PVHV :
2030                               SVt_PVGV
2031                               );
2032         }
2033     }
2034 }
2035
2036 NV
2037 Perl_str_to_version(pTHX_ SV *sv)
2038 {
2039     NV retval = 0.0;
2040     NV nshift = 1.0;
2041     STRLEN len;
2042     const char *start = SvPV_const(sv,len);
2043     const char * const end = start + len;
2044     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2045
2046     PERL_ARGS_ASSERT_STR_TO_VERSION;
2047
2048     while (start < end) {
2049         STRLEN skip;
2050         UV n;
2051         if (utf)
2052             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2053         else {
2054             n = *(U8*)start;
2055             skip = 1;
2056         }
2057         retval += ((NV)n)/nshift;
2058         start += skip;
2059         nshift *= 1000;
2060     }
2061     return retval;
2062 }
2063
2064 /*
2065  * S_force_version
2066  * Forces the next token to be a version number.
2067  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2068  * and if "guessing" is TRUE, then no new token is created (and the caller
2069  * must use an alternative parsing method).
2070  */
2071
2072 STATIC char *
2073 S_force_version(pTHX_ char *s, int guessing)
2074 {
2075     dVAR;
2076     OP *version = NULL;
2077     char *d;
2078 #ifdef PERL_MAD
2079     I32 startoff = s - SvPVX(PL_linestr);
2080 #endif
2081
2082     PERL_ARGS_ASSERT_FORCE_VERSION;
2083
2084     s = SKIPSPACE1(s);
2085
2086     d = s;
2087     if (*d == 'v')
2088         d++;
2089     if (isDIGIT(*d)) {
2090         while (isDIGIT(*d) || *d == '_' || *d == '.')
2091             d++;
2092 #ifdef PERL_MAD
2093         if (PL_madskills) {
2094             start_force(PL_curforce);
2095             curmad('X', newSVpvn(s,d-s));
2096         }
2097 #endif
2098         if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
2099             SV *ver;
2100             s = scan_num(s, &pl_yylval);
2101             version = pl_yylval.opval;
2102             ver = cSVOPx(version)->op_sv;
2103             if (SvPOK(ver) && !SvNIOK(ver)) {
2104                 SvUPGRADE(ver, SVt_PVNV);
2105                 SvNV_set(ver, str_to_version(ver));
2106                 SvNOK_on(ver);          /* hint that it is a version */
2107             }
2108         }
2109         else if (guessing) {
2110 #ifdef PERL_MAD
2111             if (PL_madskills) {
2112                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2113                 PL_nextwhite = 0;
2114                 s = SvPVX(PL_linestr) + startoff;
2115             }
2116 #endif
2117             return s;
2118         }
2119     }
2120
2121 #ifdef PERL_MAD
2122     if (PL_madskills && !version) {
2123         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2124         PL_nextwhite = 0;
2125         s = SvPVX(PL_linestr) + startoff;
2126     }
2127 #endif
2128     /* NOTE: The parser sees the package name and the VERSION swapped */
2129     start_force(PL_curforce);
2130     NEXTVAL_NEXTTOKE.opval = version;
2131     force_next(WORD);
2132
2133     return s;
2134 }
2135
2136 /*
2137  * S_tokeq
2138  * Tokenize a quoted string passed in as an SV.  It finds the next
2139  * chunk, up to end of string or a backslash.  It may make a new
2140  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2141  * turns \\ into \.
2142  */
2143
2144 STATIC SV *
2145 S_tokeq(pTHX_ SV *sv)
2146 {
2147     dVAR;
2148     register char *s;
2149     register char *send;
2150     register char *d;
2151     STRLEN len = 0;
2152     SV *pv = sv;
2153
2154     PERL_ARGS_ASSERT_TOKEQ;
2155
2156     if (!SvLEN(sv))
2157         goto finish;
2158
2159     s = SvPV_force(sv, len);
2160     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2161         goto finish;
2162     send = s + len;
2163     while (s < send && *s != '\\')
2164         s++;
2165     if (s == send)
2166         goto finish;
2167     d = s;
2168     if ( PL_hints & HINT_NEW_STRING ) {
2169         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2170     }
2171     while (s < send) {
2172         if (*s == '\\') {
2173             if (s + 1 < send && (s[1] == '\\'))
2174                 s++;            /* all that, just for this */
2175         }
2176         *d++ = *s++;
2177     }
2178     *d = '\0';
2179     SvCUR_set(sv, d - SvPVX_const(sv));
2180   finish:
2181     if ( PL_hints & HINT_NEW_STRING )
2182        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2183     return sv;
2184 }
2185
2186 /*
2187  * Now come three functions related to double-quote context,
2188  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2189  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2190  * interact with PL_lex_state, and create fake ( ... ) argument lists
2191  * to handle functions and concatenation.
2192  * They assume that whoever calls them will be setting up a fake
2193  * join call, because each subthing puts a ',' after it.  This lets
2194  *   "lower \luPpEr"
2195  * become
2196  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2197  *
2198  * (I'm not sure whether the spurious commas at the end of lcfirst's
2199  * arguments and join's arguments are created or not).
2200  */
2201
2202 /*
2203  * S_sublex_start
2204  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2205  *
2206  * Pattern matching will set PL_lex_op to the pattern-matching op to
2207  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2208  *
2209  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2210  *
2211  * Everything else becomes a FUNC.
2212  *
2213  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2214  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2215  * call to S_sublex_push().
2216  */
2217
2218 STATIC I32
2219 S_sublex_start(pTHX)
2220 {
2221     dVAR;
2222     register const I32 op_type = pl_yylval.ival;
2223
2224     if (op_type == OP_NULL) {
2225         pl_yylval.opval = PL_lex_op;
2226         PL_lex_op = NULL;
2227         return THING;
2228     }
2229     if (op_type == OP_CONST || op_type == OP_READLINE) {
2230         SV *sv = tokeq(PL_lex_stuff);
2231
2232         if (SvTYPE(sv) == SVt_PVIV) {
2233             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2234             STRLEN len;
2235             const char * const p = SvPV_const(sv, len);
2236             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2237             SvREFCNT_dec(sv);
2238             sv = nsv;
2239         }
2240         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2241         PL_lex_stuff = NULL;
2242         /* Allow <FH> // "foo" */
2243         if (op_type == OP_READLINE)
2244             PL_expect = XTERMORDORDOR;
2245         return THING;
2246     }
2247     else if (op_type == OP_BACKTICK && PL_lex_op) {
2248         /* readpipe() vas overriden */
2249         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2250         pl_yylval.opval = PL_lex_op;
2251         PL_lex_op = NULL;
2252         PL_lex_stuff = NULL;
2253         return THING;
2254     }
2255
2256     PL_sublex_info.super_state = PL_lex_state;
2257     PL_sublex_info.sub_inwhat = (U16)op_type;
2258     PL_sublex_info.sub_op = PL_lex_op;
2259     PL_lex_state = LEX_INTERPPUSH;
2260
2261     PL_expect = XTERM;
2262     if (PL_lex_op) {
2263         pl_yylval.opval = PL_lex_op;
2264         PL_lex_op = NULL;
2265         return PMFUNC;
2266     }
2267     else
2268         return FUNC;
2269 }
2270
2271 /*
2272  * S_sublex_push
2273  * Create a new scope to save the lexing state.  The scope will be
2274  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2275  * to the uc, lc, etc. found before.
2276  * Sets PL_lex_state to LEX_INTERPCONCAT.
2277  */
2278
2279 STATIC I32
2280 S_sublex_push(pTHX)
2281 {
2282     dVAR;
2283     ENTER;
2284
2285     PL_lex_state = PL_sublex_info.super_state;
2286     SAVEBOOL(PL_lex_dojoin);
2287     SAVEI32(PL_lex_brackets);
2288     SAVEI32(PL_lex_casemods);
2289     SAVEI32(PL_lex_starts);
2290     SAVEI8(PL_lex_state);
2291     SAVEVPTR(PL_lex_inpat);
2292     SAVEI16(PL_lex_inwhat);
2293     SAVECOPLINE(PL_curcop);
2294     SAVEPPTR(PL_bufptr);
2295     SAVEPPTR(PL_bufend);
2296     SAVEPPTR(PL_oldbufptr);
2297     SAVEPPTR(PL_oldoldbufptr);
2298     SAVEPPTR(PL_last_lop);
2299     SAVEPPTR(PL_last_uni);
2300     SAVEPPTR(PL_linestart);
2301     SAVESPTR(PL_linestr);
2302     SAVEGENERICPV(PL_lex_brackstack);
2303     SAVEGENERICPV(PL_lex_casestack);
2304
2305     PL_linestr = PL_lex_stuff;
2306     PL_lex_stuff = NULL;
2307
2308     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2309         = SvPVX(PL_linestr);
2310     PL_bufend += SvCUR(PL_linestr);
2311     PL_last_lop = PL_last_uni = NULL;
2312     SAVEFREESV(PL_linestr);
2313
2314     PL_lex_dojoin = FALSE;
2315     PL_lex_brackets = 0;
2316     Newx(PL_lex_brackstack, 120, char);
2317     Newx(PL_lex_casestack, 12, char);
2318     PL_lex_casemods = 0;
2319     *PL_lex_casestack = '\0';
2320     PL_lex_starts = 0;
2321     PL_lex_state = LEX_INTERPCONCAT;
2322     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2323
2324     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2325     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2326         PL_lex_inpat = PL_sublex_info.sub_op;
2327     else
2328         PL_lex_inpat = NULL;
2329
2330     return '(';
2331 }
2332
2333 /*
2334  * S_sublex_done
2335  * Restores lexer state after a S_sublex_push.
2336  */
2337
2338 STATIC I32
2339 S_sublex_done(pTHX)
2340 {
2341     dVAR;
2342     if (!PL_lex_starts++) {
2343         SV * const sv = newSVpvs("");
2344         if (SvUTF8(PL_linestr))
2345             SvUTF8_on(sv);
2346         PL_expect = XOPERATOR;
2347         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2348         return THING;
2349     }
2350
2351     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2352         PL_lex_state = LEX_INTERPCASEMOD;
2353         return yylex();
2354     }
2355
2356     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2357     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2358         PL_linestr = PL_lex_repl;
2359         PL_lex_inpat = 0;
2360         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2361         PL_bufend += SvCUR(PL_linestr);
2362         PL_last_lop = PL_last_uni = NULL;
2363         SAVEFREESV(PL_linestr);
2364         PL_lex_dojoin = FALSE;
2365         PL_lex_brackets = 0;
2366         PL_lex_casemods = 0;
2367         *PL_lex_casestack = '\0';
2368         PL_lex_starts = 0;
2369         if (SvEVALED(PL_lex_repl)) {
2370             PL_lex_state = LEX_INTERPNORMAL;
2371             PL_lex_starts++;
2372             /*  we don't clear PL_lex_repl here, so that we can check later
2373                 whether this is an evalled subst; that means we rely on the
2374                 logic to ensure sublex_done() is called again only via the
2375                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2376         }
2377         else {
2378             PL_lex_state = LEX_INTERPCONCAT;
2379             PL_lex_repl = NULL;
2380         }
2381         return ',';
2382     }
2383     else {
2384 #ifdef PERL_MAD
2385         if (PL_madskills) {
2386             if (PL_thiswhite) {
2387                 if (!PL_endwhite)
2388                     PL_endwhite = newSVpvs("");
2389                 sv_catsv(PL_endwhite, PL_thiswhite);
2390                 PL_thiswhite = 0;
2391             }
2392             if (PL_thistoken)
2393                 sv_setpvs(PL_thistoken,"");
2394             else
2395                 PL_realtokenstart = -1;
2396         }
2397 #endif
2398         LEAVE;
2399         PL_bufend = SvPVX(PL_linestr);
2400         PL_bufend += SvCUR(PL_linestr);
2401         PL_expect = XOPERATOR;
2402         PL_sublex_info.sub_inwhat = 0;
2403         return ')';
2404     }
2405 }
2406
2407 /*
2408   scan_const
2409
2410   Extracts a pattern, double-quoted string, or transliteration.  This
2411   is terrifying code.
2412
2413   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2414   processing a pattern (PL_lex_inpat is true), a transliteration
2415   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2416
2417   Returns a pointer to the character scanned up to. If this is
2418   advanced from the start pointer supplied (i.e. if anything was
2419   successfully parsed), will leave an OP for the substring scanned
2420   in pl_yylval. Caller must intuit reason for not parsing further
2421   by looking at the next characters herself.
2422
2423   In patterns:
2424     backslashes:
2425       double-quoted style: \r and \n
2426       regexp special ones: \D \s
2427       constants: \x31
2428       backrefs: \1
2429       case and quoting: \U \Q \E
2430     stops on @ and $, but not for $ as tail anchor
2431
2432   In transliterations:
2433     characters are VERY literal, except for - not at the start or end
2434     of the string, which indicates a range. If the range is in bytes,
2435     scan_const expands the range to the full set of intermediate
2436     characters. If the range is in utf8, the hyphen is replaced with
2437     a certain range mark which will be handled by pmtrans() in op.c.
2438
2439   In double-quoted strings:
2440     backslashes:
2441       double-quoted style: \r and \n
2442       constants: \x31
2443       deprecated backrefs: \1 (in substitution replacements)
2444       case and quoting: \U \Q \E
2445     stops on @ and $
2446
2447   scan_const does *not* construct ops to handle interpolated strings.
2448   It stops processing as soon as it finds an embedded $ or @ variable
2449   and leaves it to the caller to work out what's going on.
2450
2451   embedded arrays (whether in pattern or not) could be:
2452       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2453
2454   $ in double-quoted strings must be the symbol of an embedded scalar.
2455
2456   $ in pattern could be $foo or could be tail anchor.  Assumption:
2457   it's a tail anchor if $ is the last thing in the string, or if it's
2458   followed by one of "()| \r\n\t"
2459
2460   \1 (backreferences) are turned into $1
2461
2462   The structure of the code is
2463       while (there's a character to process) {
2464           handle transliteration ranges
2465           skip regexp comments /(?#comment)/ and codes /(?{code})/
2466           skip #-initiated comments in //x patterns
2467           check for embedded arrays
2468           check for embedded scalars
2469           if (backslash) {
2470               leave intact backslashes from leaveit (below)
2471               deprecate \1 in substitution replacements
2472               handle string-changing backslashes \l \U \Q \E, etc.
2473               switch (what was escaped) {
2474                   handle \- in a transliteration (becomes a literal -)
2475                   handle \132 (octal characters)
2476                   handle \x15 and \x{1234} (hex characters)
2477                   handle \N{name} (named characters)
2478                   handle \cV (control characters)
2479                   handle printf-style backslashes (\f, \r, \n, etc)
2480               } (end switch)
2481               continue
2482           } (end if backslash)
2483           handle regular character
2484     } (end while character to read)
2485
2486 */
2487
2488 STATIC char *
2489 S_scan_const(pTHX_ char *start)
2490 {
2491     dVAR;
2492     register char *send = PL_bufend;            /* end of the constant */
2493     SV *sv = newSV(send - start);               /* sv for the constant.  See
2494                                                    note below on sizing. */
2495     register char *s = start;                   /* start of the constant */
2496     register char *d = SvPVX(sv);               /* destination for copies */
2497     bool dorange = FALSE;                       /* are we in a translit range? */
2498     bool didrange = FALSE;                      /* did we just finish a range? */
2499     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
2500     I32  this_utf8 = UTF;                       /* Is the source string assumed
2501                                                    to be UTF8?  But, this can
2502                                                    show as true when the source
2503                                                    isn't utf8, as for example
2504                                                    when it is entirely composed
2505                                                    of hex constants */
2506
2507     /* Note on sizing:  The scanned constant is placed into sv, which is
2508      * initialized by newSV() assuming one byte of output for every byte of
2509      * input.  This routine expects newSV() to allocate an extra byte for a
2510      * trailing NUL, which this routine will append if it gets to the end of
2511      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2512      * CAPITAL LETTER A}), or more output than input if the constant ends up
2513      * recoded to utf8, but each time a construct is found that might increase
2514      * the needed size, SvGROW() is called.  Its size parameter each time is
2515      * based on the best guess estimate at the time, namely the length used so
2516      * far, plus the length the current construct will occupy, plus room for
2517      * the trailing NUL, plus one byte for every input byte still unscanned */
2518
2519     UV uv;
2520 #ifdef EBCDIC
2521     UV literal_endpoint = 0;
2522     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2523 #endif
2524
2525     PERL_ARGS_ASSERT_SCAN_CONST;
2526
2527     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2528         /* If we are doing a trans and we know we want UTF8 set expectation */
2529         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2530         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2531     }
2532
2533
2534     while (s < send || dorange) {
2535         /* get transliterations out of the way (they're most literal) */
2536         if (PL_lex_inwhat == OP_TRANS) {
2537             /* expand a range A-Z to the full set of characters.  AIE! */
2538             if (dorange) {
2539                 I32 i;                          /* current expanded character */
2540                 I32 min;                        /* first character in range */
2541                 I32 max;                        /* last character in range */
2542
2543 #ifdef EBCDIC
2544                 UV uvmax = 0;
2545 #endif
2546
2547                 if (has_utf8
2548 #ifdef EBCDIC
2549                     && !native_range
2550 #endif
2551                     ) {
2552                     char * const c = (char*)utf8_hop((U8*)d, -1);
2553                     char *e = d++;
2554                     while (e-- > c)
2555                         *(e + 1) = *e;
2556                     *c = (char)UTF_TO_NATIVE(0xff);
2557                     /* mark the range as done, and continue */
2558                     dorange = FALSE;
2559                     didrange = TRUE;
2560                     continue;
2561                 }
2562
2563                 i = d - SvPVX_const(sv);                /* remember current offset */
2564 #ifdef EBCDIC
2565                 SvGROW(sv,
2566                        SvLEN(sv) + (has_utf8 ?
2567                                     (512 - UTF_CONTINUATION_MARK +
2568                                      UNISKIP(0x100))
2569                                     : 256));
2570                 /* How many two-byte within 0..255: 128 in UTF-8,
2571                  * 96 in UTF-8-mod. */
2572 #else
2573                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2574 #endif
2575                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2576 #ifdef EBCDIC
2577                 if (has_utf8) {
2578                     int j;
2579                     for (j = 0; j <= 1; j++) {
2580                         char * const c = (char*)utf8_hop((U8*)d, -1);
2581                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2582                         if (j)
2583                             min = (U8)uv;
2584                         else if (uv < 256)
2585                             max = (U8)uv;
2586                         else {
2587                             max = (U8)0xff; /* only to \xff */
2588                             uvmax = uv; /* \x{100} to uvmax */
2589                         }
2590                         d = c; /* eat endpoint chars */
2591                      }
2592                 }
2593                else {
2594 #endif
2595                    d -= 2;              /* eat the first char and the - */
2596                    min = (U8)*d;        /* first char in range */
2597                    max = (U8)d[1];      /* last char in range  */
2598 #ifdef EBCDIC
2599                }
2600 #endif
2601
2602                 if (min > max) {
2603                     Perl_croak(aTHX_
2604                                "Invalid range \"%c-%c\" in transliteration operator",
2605                                (char)min, (char)max);
2606                 }
2607
2608 #ifdef EBCDIC
2609                 if (literal_endpoint == 2 &&
2610                     ((isLOWER(min) && isLOWER(max)) ||
2611                      (isUPPER(min) && isUPPER(max)))) {
2612                     if (isLOWER(min)) {
2613                         for (i = min; i <= max; i++)
2614                             if (isLOWER(i))
2615                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2616                     } else {
2617                         for (i = min; i <= max; i++)
2618                             if (isUPPER(i))
2619                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2620                     }
2621                 }
2622                 else
2623 #endif
2624                     for (i = min; i <= max; i++)
2625 #ifdef EBCDIC
2626                         if (has_utf8) {
2627                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2628                             if (UNI_IS_INVARIANT(ch))
2629                                 *d++ = (U8)i;
2630                             else {
2631                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2632                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2633                             }
2634                         }
2635                         else
2636 #endif
2637                             *d++ = (char)i;
2638
2639 #ifdef EBCDIC
2640                 if (uvmax) {
2641                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2642                     if (uvmax > 0x101)
2643                         *d++ = (char)UTF_TO_NATIVE(0xff);
2644                     if (uvmax > 0x100)
2645                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2646                 }
2647 #endif
2648
2649                 /* mark the range as done, and continue */
2650                 dorange = FALSE;
2651                 didrange = TRUE;
2652 #ifdef EBCDIC
2653                 literal_endpoint = 0;
2654 #endif
2655                 continue;
2656             }
2657
2658             /* range begins (ignore - as first or last char) */
2659             else if (*s == '-' && s+1 < send  && s != start) {
2660                 if (didrange) {
2661                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2662                 }
2663                 if (has_utf8
2664 #ifdef EBCDIC
2665                     && !native_range
2666 #endif
2667                     ) {
2668                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2669                     s++;
2670                     continue;
2671                 }
2672                 dorange = TRUE;
2673                 s++;
2674             }
2675             else {
2676                 didrange = FALSE;
2677 #ifdef EBCDIC
2678                 literal_endpoint = 0;
2679                 native_range = TRUE;
2680 #endif
2681             }
2682         }
2683
2684         /* if we get here, we're not doing a transliteration */
2685
2686         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2687            except for the last char, which will be done separately. */
2688         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2689             if (s[2] == '#') {
2690                 while (s+1 < send && *s != ')')
2691                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2692             }
2693             else if (s[2] == '{' /* This should match regcomp.c */
2694                     || (s[2] == '?' && s[3] == '{'))
2695             {
2696                 I32 count = 1;
2697                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2698                 char c;
2699
2700                 while (count && (c = *regparse)) {
2701                     if (c == '\\' && regparse[1])
2702                         regparse++;
2703                     else if (c == '{')
2704                         count++;
2705                     else if (c == '}')
2706                         count--;
2707                     regparse++;
2708                 }
2709                 if (*regparse != ')')
2710                     regparse--;         /* Leave one char for continuation. */
2711                 while (s < regparse)
2712                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2713             }
2714         }
2715
2716         /* likewise skip #-initiated comments in //x patterns */
2717         else if (*s == '#' && PL_lex_inpat &&
2718           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2719             while (s+1 < send && *s != '\n')
2720                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2721         }
2722
2723         /* check for embedded arrays
2724            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2725            */
2726         else if (*s == '@' && s[1]) {
2727             if (isALNUM_lazy_if(s+1,UTF))
2728                 break;
2729             if (strchr(":'{$", s[1]))
2730                 break;
2731             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2732                 break; /* in regexp, neither @+ nor @- are interpolated */
2733         }
2734
2735         /* check for embedded scalars.  only stop if we're sure it's a
2736            variable.
2737         */
2738         else if (*s == '$') {
2739             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2740                 break;
2741             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2742                 if (s[1] == '\\') {
2743                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2744                                    "Possible unintended interpolation of $\\ in regex");
2745                 }
2746                 break;          /* in regexp, $ might be tail anchor */
2747             }
2748         }
2749
2750         /* End of else if chain - OP_TRANS rejoin rest */
2751
2752         /* backslashes */
2753         if (*s == '\\' && s+1 < send) {
2754             s++;
2755
2756             /* deprecate \1 in strings and substitution replacements */
2757             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2758                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2759             {
2760                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2761                 *--s = '$';
2762                 break;
2763             }
2764
2765             /* string-change backslash escapes */
2766             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2767                 --s;
2768                 break;
2769             }
2770             /* skip any other backslash escapes in a pattern */
2771             else if (PL_lex_inpat) {
2772                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2773                 goto default_action;
2774             }
2775
2776             /* if we get here, it's either a quoted -, or a digit */
2777             switch (*s) {
2778
2779             /* quoted - in transliterations */
2780             case '-':
2781                 if (PL_lex_inwhat == OP_TRANS) {
2782                     *d++ = *s++;
2783                     continue;
2784                 }
2785                 /* FALL THROUGH */
2786             default:
2787                 {
2788                     if ((isALPHA(*s) || isDIGIT(*s)))
2789                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2790                                        "Unrecognized escape \\%c passed through",
2791                                        *s);
2792                     /* default action is to copy the quoted character */
2793                     goto default_action;
2794                 }
2795
2796             /* eg. \132 indicates the octal constant 0x132 */
2797             case '0': case '1': case '2': case '3':
2798             case '4': case '5': case '6': case '7':
2799                 {
2800                     I32 flags = 0;
2801                     STRLEN len = 3;
2802                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2803                     s += len;
2804                 }
2805                 goto NUM_ESCAPE_INSERT;
2806
2807             /* eg. \x24 indicates the hex constant 0x24 */
2808             case 'x':
2809                 ++s;
2810                 if (*s == '{') {
2811                     char* const e = strchr(s, '}');
2812                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2813                       PERL_SCAN_DISALLOW_PREFIX;
2814                     STRLEN len;
2815
2816                     ++s;
2817                     if (!e) {
2818                         yyerror("Missing right brace on \\x{}");
2819                         continue;
2820                     }
2821                     len = e - s;
2822                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2823                     s = e + 1;
2824                 }
2825                 else {
2826                     {
2827                         STRLEN len = 2;
2828                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2829                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2830                         s += len;
2831                     }
2832                 }
2833
2834               NUM_ESCAPE_INSERT:
2835                 /* Insert oct, hex, or \N{U+...} escaped character.  There will
2836                  * always be enough room in sv since such escapes will be
2837                  * longer than any UTF-8 sequence they can end up as, except if
2838                  * they force us to recode the rest of the string into utf8 */
2839
2840                 /* Here uv is the ordinal of the next character being added in
2841                  * unicode (converted from native).  (It has to be done before
2842                  * here because \N is interpreted as unicode, and oct and hex
2843                  * as native.) */
2844                 if (!UNI_IS_INVARIANT(uv)) {
2845                     if (!has_utf8 && uv > 255) {
2846                         /* Might need to recode whatever we have accumulated so
2847                          * far if it contains any chars variant in utf8 or
2848                          * utf-ebcdic. */
2849
2850                         SvCUR_set(sv, d - SvPVX_const(sv));
2851                         SvPOK_on(sv);
2852                         *d = '\0';
2853                         /* See Note on sizing above.  */
2854                         sv_utf8_upgrade_flags_grow(sv,
2855                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2856                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
2857                         d = SvPVX(sv) + SvCUR(sv);
2858                         has_utf8 = TRUE;
2859                     }
2860
2861                     if (has_utf8) {
2862                         d = (char*)uvuni_to_utf8((U8*)d, uv);
2863                         if (PL_lex_inwhat == OP_TRANS &&
2864                             PL_sublex_info.sub_op) {
2865                             PL_sublex_info.sub_op->op_private |=
2866                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2867                                              : OPpTRANS_TO_UTF);
2868                         }
2869 #ifdef EBCDIC
2870                         if (uv > 255 && !dorange)
2871                             native_range = FALSE;
2872 #endif
2873                     }
2874                     else {
2875                         *d++ = (char)uv;
2876                     }
2877                 }
2878                 else {
2879                     *d++ = (char) uv;
2880                 }
2881                 continue;
2882
2883             /* \N{LATIN SMALL LETTER A} is a named character, and so is
2884              * \N{U+0041} */
2885             case 'N':
2886                 ++s;
2887                 if (*s == '{') {
2888                     char* e = strchr(s, '}');
2889                     SV *res;
2890                     STRLEN len;
2891                     const char *str;
2892
2893                     if (!e) {
2894                         yyerror("Missing right brace on \\N{}");
2895                         e = s - 1;
2896                         goto cont_scan;
2897                     }
2898                     if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2899                         /* \N{U+...} The ... is a unicode value even on EBCDIC
2900                          * machines */
2901                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2902                           PERL_SCAN_DISALLOW_PREFIX;
2903                         s += 3;
2904                         len = e - s;
2905                         uv = grok_hex(s, &len, &flags, NULL);
2906                         if ( e > s && len != (STRLEN)(e - s) ) {
2907                             uv = 0xFFFD;
2908                         }
2909                         s = e + 1;
2910                         goto NUM_ESCAPE_INSERT;
2911                     }
2912                     res = newSVpvn(s + 1, e - s - 1);
2913                     res = new_constant( NULL, 0, "charnames",
2914                                         res, NULL, s - 2, e - s + 3 );
2915                     if (has_utf8)
2916                         sv_utf8_upgrade(res);
2917                     str = SvPV_const(res,len);
2918 #ifdef EBCDIC_NEVER_MIND
2919                     /* charnames uses pack U and that has been
2920                      * recently changed to do the below uni->native
2921                      * mapping, so this would be redundant (and wrong,
2922                      * the code point would be doubly converted).
2923                      * But leave this in just in case the pack U change
2924                      * gets revoked, but the semantics is still
2925                      * desireable for charnames. --jhi */
2926                     {
2927                          UV uv = utf8_to_uvchr((const U8*)str, 0);
2928
2929                          if (uv < 0x100) {
2930                               U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2931
2932                               d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2933                               sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2934                               str = SvPV_const(res, len);
2935                          }
2936                     }
2937 #endif
2938                     /* If destination is not in utf8 but this new character is,
2939                      * recode the dest to utf8 */
2940                     if (!has_utf8 && SvUTF8(res)) {
2941                         SvCUR_set(sv, d - SvPVX_const(sv));
2942                         SvPOK_on(sv);
2943                         *d = '\0';
2944                         /* See Note on sizing above.  */
2945                         sv_utf8_upgrade_flags_grow(sv,
2946                                             SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2947                                             len + (STRLEN)(send - s) + 1);
2948                         d = SvPVX(sv) + SvCUR(sv);
2949                         has_utf8 = TRUE;
2950                     } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2951
2952                         /* See Note on sizing above.  (NOTE: SvCUR() is not set
2953                          * correctly here). */
2954                         const STRLEN off = d - SvPVX_const(sv);
2955                         d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
2956                     }
2957 #ifdef EBCDIC
2958                     if (!dorange)
2959                         native_range = FALSE; /* \N{} is guessed to be Unicode */
2960 #endif
2961                     Copy(str, d, len, char);
2962                     d += len;
2963                     SvREFCNT_dec(res);
2964                   cont_scan:
2965                     s = e + 1;
2966                 }
2967                 else
2968                     yyerror("Missing braces on \\N{}");
2969                 continue;
2970
2971             /* \c is a control character */
2972             case 'c':
2973                 s++;
2974                 if (s < send) {
2975                     U8 c = *s++;
2976 #ifdef EBCDIC
2977                     if (isLOWER(c))
2978                         c = toUPPER(c);
2979 #endif
2980                     *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2981                 }
2982                 else {
2983                     yyerror("Missing control char name in \\c");
2984                 }
2985                 continue;
2986
2987             /* printf-style backslashes, formfeeds, newlines, etc */
2988             case 'b':
2989                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2990                 break;
2991             case 'n':
2992                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2993                 break;
2994             case 'r':
2995                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2996                 break;
2997             case 'f':
2998                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2999                 break;
3000             case 't':
3001                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3002                 break;
3003             case 'e':
3004                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3005                 break;
3006             case 'a':
3007                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3008                 break;
3009             } /* end switch */
3010
3011             s++;
3012             continue;
3013         } /* end if (backslash) */
3014 #ifdef EBCDIC
3015         else
3016             literal_endpoint++;
3017 #endif
3018
3019     default_action:
3020         /* If we started with encoded form, or already know we want it,
3021            then encode the next character */
3022         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3023             STRLEN len  = 1;
3024
3025
3026             /* One might think that it is wasted effort in the case of the
3027              * source being utf8 (this_utf8 == TRUE) to take the next character
3028              * in the source, convert it to an unsigned value, and then convert
3029              * it back again.  But the source has not been validated here.  The
3030              * routine that does the conversion checks for errors like
3031              * malformed utf8 */
3032
3033             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3034             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3035             if (!has_utf8) {
3036                 SvCUR_set(sv, d - SvPVX_const(sv));
3037                 SvPOK_on(sv);
3038                 *d = '\0';
3039                 /* See Note on sizing above.  */
3040                 sv_utf8_upgrade_flags_grow(sv,
3041                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3042                                         need + (STRLEN)(send - s) + 1);
3043                 d = SvPVX(sv) + SvCUR(sv);
3044                 has_utf8 = TRUE;
3045             } else if (need > len) {
3046                 /* encoded value larger than old, may need extra space (NOTE:
3047                  * SvCUR() is not set correctly here).   See Note on sizing
3048                  * above.  */
3049                 const STRLEN off = d - SvPVX_const(sv);
3050                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3051             }
3052             s += len;
3053
3054             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3055 #ifdef EBCDIC
3056             if (uv > 255 && !dorange)
3057                 native_range = FALSE;
3058 #endif
3059         }
3060         else {
3061             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3062         }
3063     } /* while loop to process each character */
3064
3065     /* terminate the string and set up the sv */
3066     *d = '\0';
3067     SvCUR_set(sv, d - SvPVX_const(sv));
3068     if (SvCUR(sv) >= SvLEN(sv))
3069         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3070
3071     SvPOK_on(sv);
3072     if (PL_encoding && !has_utf8) {
3073         sv_recode_to_utf8(sv, PL_encoding);
3074         if (SvUTF8(sv))
3075             has_utf8 = TRUE;
3076     }
3077     if (has_utf8) {
3078         SvUTF8_on(sv);
3079         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3080             PL_sublex_info.sub_op->op_private |=
3081                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3082         }
3083     }
3084
3085     /* shrink the sv if we allocated more than we used */
3086     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3087         SvPV_shrink_to_cur(sv);
3088     }
3089
3090     /* return the substring (via pl_yylval) only if we parsed anything */
3091     if (s > PL_bufptr) {
3092         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3093             const char *const key = PL_lex_inpat ? "qr" : "q";
3094             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3095             const char *type;
3096             STRLEN typelen;
3097
3098             if (PL_lex_inwhat == OP_TRANS) {
3099                 type = "tr";
3100                 typelen = 2;
3101             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3102                 type = "s";
3103                 typelen = 1;
3104             } else  {
3105                 type = "qq";
3106                 typelen = 2;
3107             }
3108
3109             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3110                                 type, typelen);
3111         }
3112         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3113     } else
3114         SvREFCNT_dec(sv);
3115     return s;
3116 }
3117
3118 /* S_intuit_more
3119  * Returns TRUE if there's more to the expression (e.g., a subscript),
3120  * FALSE otherwise.
3121  *
3122  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3123  *
3124  * ->[ and ->{ return TRUE
3125  * { and [ outside a pattern are always subscripts, so return TRUE
3126  * if we're outside a pattern and it's not { or [, then return FALSE
3127  * if we're in a pattern and the first char is a {
3128  *   {4,5} (any digits around the comma) returns FALSE
3129  * if we're in a pattern and the first char is a [
3130  *   [] returns FALSE
3131  *   [SOMETHING] has a funky algorithm to decide whether it's a
3132  *      character class or not.  It has to deal with things like
3133  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3134  * anything else returns TRUE
3135  */
3136
3137 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3138
3139 STATIC int
3140 S_intuit_more(pTHX_ register char *s)
3141 {
3142     dVAR;
3143
3144     PERL_ARGS_ASSERT_INTUIT_MORE;
3145
3146     if (PL_lex_brackets)
3147         return TRUE;
3148     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3149         return TRUE;
3150     if (*s != '{' && *s != '[')
3151         return FALSE;
3152     if (!PL_lex_inpat)
3153         return TRUE;
3154
3155     /* In a pattern, so maybe we have {n,m}. */
3156     if (*s == '{') {
3157         s++;
3158         if (!isDIGIT(*s))
3159             return TRUE;
3160         while (isDIGIT(*s))
3161             s++;
3162         if (*s == ',')
3163             s++;
3164         while (isDIGIT(*s))
3165             s++;
3166         if (*s == '}')
3167             return FALSE;
3168         return TRUE;
3169
3170     }
3171
3172     /* On the other hand, maybe we have a character class */
3173
3174     s++;
3175     if (*s == ']' || *s == '^')
3176         return FALSE;
3177     else {
3178         /* this is terrifying, and it works */
3179         int weight = 2;         /* let's weigh the evidence */
3180         char seen[256];
3181         unsigned char un_char = 255, last_un_char;
3182         const char * const send = strchr(s,']');
3183         char tmpbuf[sizeof PL_tokenbuf * 4];
3184
3185         if (!send)              /* has to be an expression */
3186             return TRUE;
3187
3188         Zero(seen,256,char);
3189         if (*s == '$')
3190             weight -= 3;
3191         else if (isDIGIT(*s)) {
3192             if (s[1] != ']') {
3193                 if (isDIGIT(s[1]) && s[2] == ']')
3194                     weight -= 10;
3195             }
3196             else
3197                 weight -= 100;
3198         }
3199         for (; s < send; s++) {
3200             last_un_char = un_char;
3201             un_char = (unsigned char)*s;
3202             switch (*s) {
3203             case '@':
3204             case '&':
3205             case '$':
3206                 weight -= seen[un_char] * 10;
3207                 if (isALNUM_lazy_if(s+1,UTF)) {
3208                     int len;
3209                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3210                     len = (int)strlen(tmpbuf);
3211                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3212                         weight -= 100;
3213                     else
3214                         weight -= 10;
3215                 }
3216                 else if (*s == '$' && s[1] &&
3217                   strchr("[#!%*<>()-=",s[1])) {
3218                     if (/*{*/ strchr("])} =",s[2]))
3219                         weight -= 10;
3220                     else
3221                         weight -= 1;
3222                 }
3223                 break;
3224             case '\\':
3225                 un_char = 254;
3226                 if (s[1]) {
3227                     if (strchr("wds]",s[1]))
3228                         weight += 100;
3229                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3230                         weight += 1;
3231                     else if (strchr("rnftbxcav",s[1]))
3232                         weight += 40;
3233                     else if (isDIGIT(s[1])) {
3234                         weight += 40;
3235                         while (s[1] && isDIGIT(s[1]))
3236                             s++;
3237                     }
3238                 }
3239                 else
3240                     weight += 100;
3241                 break;
3242             case '-':
3243                 if (s[1] == '\\')
3244                     weight += 50;
3245                 if (strchr("aA01! ",last_un_char))
3246                     weight += 30;
3247                 if (strchr("zZ79~",s[1]))
3248                     weight += 30;
3249                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3250                     weight -= 5;        /* cope with negative subscript */
3251                 break;
3252             default:
3253                 if (!isALNUM(last_un_char)
3254                     && !(last_un_char == '$' || last_un_char == '@'
3255                          || last_un_char == '&')
3256                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3257                     char *d = tmpbuf;
3258                     while (isALPHA(*s))
3259                         *d++ = *s++;
3260                     *d = '\0';
3261                     if (keyword(tmpbuf, d - tmpbuf, 0))
3262                         weight -= 150;
3263                 }
3264                 if (un_char == last_un_char + 1)
3265                     weight += 5;
3266                 weight -= seen[un_char];
3267                 break;
3268             }
3269             seen[un_char]++;
3270         }
3271         if (weight >= 0)        /* probably a character class */
3272             return FALSE;
3273     }
3274
3275     return TRUE;
3276 }
3277
3278 /*
3279  * S_intuit_method
3280  *
3281  * Does all the checking to disambiguate
3282  *   foo bar
3283  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3284  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3285  *
3286  * First argument is the stuff after the first token, e.g. "bar".
3287  *
3288  * Not a method if bar is a filehandle.
3289  * Not a method if foo is a subroutine prototyped to take a filehandle.
3290  * Not a method if it's really "Foo $bar"
3291  * Method if it's "foo $bar"
3292  * Not a method if it's really "print foo $bar"
3293  * Method if it's really "foo package::" (interpreted as package->foo)
3294  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3295  * Not a method if bar is a filehandle or package, but is quoted with
3296  *   =>
3297  */
3298
3299 STATIC int
3300 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3301 {
3302     dVAR;
3303     char *s = start + (*start == '$');
3304     char tmpbuf[sizeof PL_tokenbuf];
3305     STRLEN len;
3306     GV* indirgv;
3307 #ifdef PERL_MAD
3308     int soff;
3309 #endif
3310
3311     PERL_ARGS_ASSERT_INTUIT_METHOD;
3312
3313     if (gv) {
3314         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3315             return 0;
3316         if (cv) {
3317             if (SvPOK(cv)) {
3318                 const char *proto = SvPVX_const(cv);
3319                 if (proto) {
3320                     if (*proto == ';')
3321                         proto++;
3322                     if (*proto == '*')
3323                         return 0;
3324                 }
3325             }
3326         } else
3327             gv = NULL;
3328     }
3329     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3330     /* start is the beginning of the possible filehandle/object,
3331      * and s is the end of it
3332      * tmpbuf is a copy of it
3333      */
3334
3335     if (*start == '$') {
3336         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3337                 isUPPER(*PL_tokenbuf))
3338             return 0;
3339 #ifdef PERL_MAD
3340         len = start - SvPVX(PL_linestr);
3341 #endif
3342         s = PEEKSPACE(s);
3343 #ifdef PERL_MAD
3344         start = SvPVX(PL_linestr) + len;
3345 #endif
3346         PL_bufptr = start;
3347         PL_expect = XREF;
3348         return *s == '(' ? FUNCMETH : METHOD;
3349     }
3350     if (!keyword(tmpbuf, len, 0)) {
3351         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3352             len -= 2;
3353             tmpbuf[len] = '\0';
3354 #ifdef PERL_MAD
3355             soff = s - SvPVX(PL_linestr);
3356 #endif
3357             goto bare_package;
3358         }
3359         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3360         if (indirgv && GvCVu(indirgv))
3361             return 0;
3362         /* filehandle or package name makes it a method */
3363         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3364 #ifdef PERL_MAD
3365             soff = s - SvPVX(PL_linestr);
3366 #endif
3367             s = PEEKSPACE(s);
3368             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3369                 return 0;       /* no assumptions -- "=>" quotes bearword */
3370       bare_package:
3371             start_force(PL_curforce);
3372             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3373                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3374             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3375             if (PL_madskills)
3376                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3377             PL_expect = XTERM;
3378             force_next(WORD);
3379             PL_bufptr = s;
3380 #ifdef PERL_MAD
3381             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3382 #endif
3383             return *s == '(' ? FUNCMETH : METHOD;
3384         }
3385     }
3386     return 0;
3387 }
3388
3389 /* Encoded script support. filter_add() effectively inserts a
3390  * 'pre-processing' function into the current source input stream.
3391  * Note that the filter function only applies to the current source file
3392  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3393  *
3394  * The datasv parameter (which may be NULL) can be used to pass
3395  * private data to this instance of the filter. The filter function
3396  * can recover the SV using the FILTER_DATA macro and use it to
3397  * store private buffers and state information.
3398  *
3399  * The supplied datasv parameter is upgraded to a PVIO type
3400  * and the IoDIRP/IoANY field is used to store the function pointer,
3401  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3402  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3403  * private use must be set using malloc'd pointers.
3404  */
3405
3406 SV *
3407 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3408 {
3409     dVAR;
3410     if (!funcp)
3411         return NULL;
3412
3413     if (!PL_parser)
3414         return NULL;
3415
3416     if (!PL_rsfp_filters)
3417         PL_rsfp_filters = newAV();
3418     if (!datasv)
3419         datasv = newSV(0);
3420     SvUPGRADE(datasv, SVt_PVIO);
3421     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3422     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3423     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3424                           FPTR2DPTR(void *, IoANY(datasv)),
3425                           SvPV_nolen(datasv)));
3426     av_unshift(PL_rsfp_filters, 1);
3427     av_store(PL_rsfp_filters, 0, datasv) ;
3428     return(datasv);
3429 }
3430
3431
3432 /* Delete most recently added instance of this filter function. */
3433 void
3434 Perl_filter_del(pTHX_ filter_t funcp)
3435 {
3436     dVAR;
3437     SV *datasv;
3438
3439     PERL_ARGS_ASSERT_FILTER_DEL;
3440
3441 #ifdef DEBUGGING
3442     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3443                           FPTR2DPTR(void*, funcp)));
3444 #endif
3445     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3446         return;
3447     /* if filter is on top of stack (usual case) just pop it off */
3448     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3449     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3450         IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
3451         IoANY(datasv) = (void *)NULL;
3452         sv_free(av_pop(PL_rsfp_filters));
3453
3454         return;
3455     }
3456     /* we need to search for the correct entry and clear it     */
3457     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3458 }
3459
3460
3461 /* Invoke the idxth filter function for the current rsfp.        */
3462 /* maxlen 0 = read one text line */
3463 I32
3464 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3465 {
3466     dVAR;
3467     filter_t funcp;
3468     SV *datasv = NULL;
3469     /* This API is bad. It should have been using unsigned int for maxlen.
3470        Not sure if we want to change the API, but if not we should sanity
3471        check the value here.  */
3472     const unsigned int correct_length
3473         = maxlen < 0 ?
3474 #ifdef PERL_MICRO
3475         0x7FFFFFFF
3476 #else
3477         INT_MAX
3478 #endif
3479         : maxlen;
3480
3481     PERL_ARGS_ASSERT_FILTER_READ;
3482
3483     if (!PL_parser || !PL_rsfp_filters)
3484         return -1;
3485     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3486         /* Provide a default input filter to make life easy.    */
3487         /* Note that we append to the line. This is handy.      */
3488         DEBUG_P(PerlIO_printf(Perl_debug_log,
3489                               "filter_read %d: from rsfp\n", idx));
3490         if (correct_length) {
3491             /* Want a block */
3492             int len ;
3493             const int old_len = SvCUR(buf_sv);
3494
3495             /* ensure buf_sv is large enough */
3496             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3497             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3498                                    correct_length)) <= 0) {
3499                 if (PerlIO_error(PL_rsfp))
3500                     return -1;          /* error */
3501                 else
3502                     return 0 ;          /* end of file */
3503             }
3504             SvCUR_set(buf_sv, old_len + len) ;
3505             SvPVX(buf_sv)[old_len + len] = '\0';
3506         } else {
3507             /* Want a line */
3508             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3509                 if (PerlIO_error(PL_rsfp))
3510                     return -1;          /* error */
3511                 else
3512                     return 0 ;          /* end of file */
3513             }
3514         }
3515         return SvCUR(buf_sv);
3516     }
3517     /* Skip this filter slot if filter has been deleted */
3518     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3519         DEBUG_P(PerlIO_printf(Perl_debug_log,
3520                               "filter_read %d: skipped (filter deleted)\n",
3521                               idx));
3522         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3523     }
3524     /* Get function pointer hidden within datasv        */
3525     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3526     DEBUG_P(PerlIO_printf(Perl_debug_log,
3527                           "filter_read %d: via function %p (%s)\n",
3528                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3529     /* Call function. The function is expected to       */
3530     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3531     /* Return: <0:error, =0:eof, >0:not eof             */
3532     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3533 }
3534
3535 STATIC char *
3536 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3537 {
3538     dVAR;
3539
3540     PERL_ARGS_ASSERT_FILTER_GETS;
3541
3542 #ifdef PERL_CR_FILTER
3543     if (!PL_rsfp_filters) {
3544         filter_add(S_cr_textfilter,NULL);
3545     }
3546 #endif
3547     if (PL_rsfp_filters) {
3548         if (!append)
3549             SvCUR_set(sv, 0);   /* start with empty line        */
3550         if (FILTER_READ(0, sv, 0) > 0)
3551             return ( SvPVX(sv) ) ;
3552         else
3553             return NULL ;
3554     }
3555     else
3556         return (sv_gets(sv, PL_rsfp, append));
3557 }
3558
3559 STATIC HV *
3560 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3561 {
3562     dVAR;
3563     GV *gv;
3564
3565     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3566
3567     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3568         return PL_curstash;
3569
3570     if (len > 2 &&
3571         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3572         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3573     {
3574         return GvHV(gv);                        /* Foo:: */
3575     }
3576
3577     /* use constant CLASS => 'MyClass' */
3578     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3579     if (gv && GvCV(gv)) {
3580         SV * const sv = cv_const_sv(GvCV(gv));
3581         if (sv)
3582             pkgname = SvPV_const(sv, len);
3583     }
3584
3585     return gv_stashpvn(pkgname, len, 0);
3586 }
3587
3588 /*
3589  * S_readpipe_override
3590  * Check whether readpipe() is overriden, and generates the appropriate
3591  * optree, provided sublex_start() is called afterwards.
3592  */
3593 STATIC void
3594 S_readpipe_override(pTHX)
3595 {
3596     GV **gvp;
3597     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3598     pl_yylval.ival = OP_BACKTICK;
3599     if ((gv_readpipe
3600                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3601             ||
3602             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3603              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3604              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3605     {
3606         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3607             append_elem(OP_LIST,
3608                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3609                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3610     }
3611 }
3612
3613 #ifdef PERL_MAD
3614  /*
3615  * Perl_madlex
3616  * The intent of this yylex wrapper is to minimize the changes to the
3617  * tokener when we aren't interested in collecting madprops.  It remains
3618  * to be seen how successful this strategy will be...
3619  */
3620
3621 int
3622 Perl_madlex(pTHX)
3623 {
3624     int optype;
3625     char *s = PL_bufptr;
3626
3627     /* make sure PL_thiswhite is initialized */
3628     PL_thiswhite = 0;
3629     PL_thismad = 0;
3630
3631     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3632     if (PL_pending_ident)
3633         return S_pending_ident(aTHX);
3634
3635     /* previous token ate up our whitespace? */
3636     if (!PL_lasttoke && PL_nextwhite) {
3637         PL_thiswhite = PL_nextwhite;
3638         PL_nextwhite = 0;
3639     }
3640
3641     /* isolate the token, and figure out where it is without whitespace */
3642     PL_realtokenstart = -1;
3643     PL_thistoken = 0;
3644     optype = yylex();
3645     s = PL_bufptr;
3646     assert(PL_curforce < 0);
3647
3648     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3649         if (!PL_thistoken) {
3650             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3651                 PL_thistoken = newSVpvs("");
3652             else {
3653                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3654                 PL_thistoken = newSVpvn(tstart, s - tstart);
3655             }
3656         }
3657         if (PL_thismad) /* install head */
3658             CURMAD('X', PL_thistoken);
3659     }
3660
3661     /* last whitespace of a sublex? */
3662     if (optype == ')' && PL_endwhite) {
3663         CURMAD('X', PL_endwhite);
3664     }
3665
3666     if (!PL_thismad) {
3667
3668         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
3669         if (!PL_thiswhite && !PL_endwhite && !optype) {
3670             sv_free(PL_thistoken);
3671             PL_thistoken = 0;
3672             return 0;
3673         }
3674
3675         /* put off final whitespace till peg */
3676         if (optype == ';' && !PL_rsfp) {
3677             PL_nextwhite = PL_thiswhite;
3678             PL_thiswhite = 0;
3679         }
3680         else if (PL_thisopen) {
3681             CURMAD('q', PL_thisopen);
3682             if (PL_thistoken)
3683                 sv_free(PL_thistoken);
3684             PL_thistoken = 0;
3685         }
3686         else {
3687             /* Store actual token text as madprop X */
3688             CURMAD('X', PL_thistoken);
3689         }
3690
3691         if (PL_thiswhite) {
3692             /* add preceding whitespace as madprop _ */
3693             CURMAD('_', PL_thiswhite);
3694         }
3695
3696         if (PL_thisstuff) {
3697             /* add quoted material as madprop = */
3698             CURMAD('=', PL_thisstuff);
3699         }
3700
3701         if (PL_thisclose) {
3702             /* add terminating quote as madprop Q */
3703             CURMAD('Q', PL_thisclose);
3704         }
3705     }
3706
3707     /* special processing based on optype */
3708
3709     switch (optype) {
3710
3711     /* opval doesn't need a TOKEN since it can already store mp */
3712     case WORD:
3713     case METHOD:
3714     case FUNCMETH:
3715     case THING:
3716     case PMFUNC:
3717     case PRIVATEREF:
3718     case FUNC0SUB:
3719     case UNIOPSUB:
3720     case LSTOPSUB:
3721         if (pl_yylval.opval)
3722             append_madprops(PL_thismad, pl_yylval.opval, 0);
3723         PL_thismad = 0;
3724         return optype;
3725
3726     /* fake EOF */
3727     case 0:
3728         optype = PEG;
3729         if (PL_endwhite) {
3730             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3731             PL_endwhite = 0;
3732         }
3733         break;
3734
3735     case ']':
3736     case '}':
3737         if (PL_faketokens)
3738             break;
3739         /* remember any fake bracket that lexer is about to discard */
3740         if (PL_lex_brackets == 1 &&
3741             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3742         {
3743             s = PL_bufptr;
3744             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3745                 s++;
3746             if (*s == '}') {
3747                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3748                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3749                 PL_thiswhite = 0;
3750                 PL_bufptr = s - 1;
3751                 break;  /* don't bother looking for trailing comment */
3752             }
3753             else
3754                 s = PL_bufptr;
3755         }
3756         if (optype == ']')
3757             break;
3758         /* FALLTHROUGH */
3759
3760     /* attach a trailing comment to its statement instead of next token */
3761     case ';':
3762         if (PL_faketokens)
3763             break;
3764         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3765             s = PL_bufptr;
3766             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3767                 s++;
3768             if (*s == '\n' || *s == '#') {
3769                 while (s < PL_bufend && *s != '\n')
3770                     s++;
3771                 if (s < PL_bufend)
3772                     s++;
3773                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3774                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3775                 PL_thiswhite = 0;
3776                 PL_bufptr = s;
3777             }
3778         }
3779         break;
3780
3781     /* pval */
3782     case LABEL:
3783         break;
3784
3785     /* ival */
3786     default:
3787         break;
3788
3789     }
3790
3791     /* Create new token struct.  Note: opvals return early above. */
3792     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3793     PL_thismad = 0;
3794     return optype;
3795 }
3796 #endif
3797
3798 STATIC char *
3799 S_tokenize_use(pTHX_ int is_use, char *s) {
3800     dVAR;
3801
3802     PERL_ARGS_ASSERT_TOKENIZE_USE;
3803
3804     if (PL_expect != XSTATE)
3805         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3806                     is_use ? "use" : "no"));
3807     s = SKIPSPACE1(s);
3808     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3809         s = force_version(s, TRUE);
3810         if (*s == ';' || *s == '}'
3811                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
3812             start_force(PL_curforce);
3813             NEXTVAL_NEXTTOKE.opval = NULL;
3814             force_next(WORD);
3815         }
3816         else if (*s == 'v') {
3817             s = force_word(s,WORD,FALSE,TRUE,FALSE);
3818             s = force_version(s, FALSE);
3819         }
3820     }
3821     else {
3822         s = force_word(s,WORD,FALSE,TRUE,FALSE);
3823         s = force_version(s, FALSE);
3824     }
3825     pl_yylval.ival = is_use;
3826     return s;
3827 }
3828 #ifdef DEBUGGING
3829     static const char* const exp_name[] =
3830         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3831           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3832         };
3833 #endif
3834
3835 /*
3836   yylex
3837
3838   Works out what to call the token just pulled out of the input
3839   stream.  The yacc parser takes care of taking the ops we return and
3840   stitching them into a tree.
3841
3842   Returns:
3843     PRIVATEREF
3844
3845   Structure:
3846       if read an identifier
3847           if we're in a my declaration
3848               croak if they tried to say my($foo::bar)
3849               build the ops for a my() declaration
3850           if it's an access to a my() variable
3851               are we in a sort block?
3852                   croak if my($a); $a <=> $b
3853               build ops for access to a my() variable
3854           if in a dq string, and they've said @foo and we can't find @foo
3855               croak
3856           build ops for a bareword
3857       if we already built the token before, use it.
3858 */
3859
3860
3861 #ifdef __SC__
3862 #pragma segment Perl_yylex
3863 #endif
3864 int
3865 Perl_yylex(pTHX)
3866 {
3867     dVAR;
3868     register char *s = PL_bufptr;
3869     register char *d;
3870     STRLEN len;
3871     bool bof = FALSE;
3872     U32 fake_eof = 0;
3873
3874     /* orig_keyword, gvp, and gv are initialized here because
3875      * jump to the label just_a_word_zero can bypass their
3876      * initialization later. */
3877     I32 orig_keyword = 0;
3878     GV *gv = NULL;
3879     GV **gvp = NULL;
3880
3881     DEBUG_T( {
3882         SV* tmp = newSVpvs("");
3883         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3884             (IV)CopLINE(PL_curcop),
3885             lex_state_names[PL_lex_state],
3886             exp_name[PL_expect],
3887             pv_display(tmp, s, strlen(s), 0, 60));
3888         SvREFCNT_dec(tmp);
3889     } );
3890     /* check if there's an identifier for us to look at */
3891     if (PL_pending_ident)
3892         return REPORT(S_pending_ident(aTHX));
3893
3894     /* no identifier pending identification */
3895
3896     switch (PL_lex_state) {
3897 #ifdef COMMENTARY
3898     case LEX_NORMAL:            /* Some compilers will produce faster */
3899     case LEX_INTERPNORMAL:      /* code if we comment these out. */
3900         break;
3901 #endif
3902
3903     /* when we've already built the next token, just pull it out of the queue */
3904     case LEX_KNOWNEXT:
3905 #ifdef PERL_MAD
3906         PL_lasttoke--;
3907         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3908         if (PL_madskills) {
3909             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3910             PL_nexttoke[PL_lasttoke].next_mad = 0;
3911             if (PL_thismad && PL_thismad->mad_key == '_') {
3912                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
3913                 PL_thismad->mad_val = 0;
3914                 mad_free(PL_thismad);
3915                 PL_thismad = 0;
3916             }
3917         }
3918         if (!PL_lasttoke) {
3919             PL_lex_state = PL_lex_defer;
3920             PL_expect = PL_lex_expect;
3921             PL_lex_defer = LEX_NORMAL;
3922             if (!PL_nexttoke[PL_lasttoke].next_type)
3923                 return yylex();
3924         }
3925 #else
3926         PL_nexttoke--;
3927         pl_yylval = PL_nextval[PL_nexttoke];
3928         if (!PL_nexttoke) {
3929             PL_lex_state = PL_lex_defer;
3930             PL_expect = PL_lex_expect;
3931             PL_lex_defer = LEX_NORMAL;
3932         }
3933 #endif
3934 #ifdef PERL_MAD
3935         /* FIXME - can these be merged?  */
3936         return(PL_nexttoke[PL_lasttoke].next_type);
3937 #else
3938         return REPORT(PL_nexttype[PL_nexttoke]);
3939 #endif
3940
3941     /* interpolated case modifiers like \L \U, including \Q and \E.
3942        when we get here, PL_bufptr is at the \
3943     */
3944     case LEX_INTERPCASEMOD:
3945 #ifdef DEBUGGING
3946         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3947             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3948 #endif
3949         /* handle \E or end of string */
3950         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3951             /* if at a \E */
3952             if (PL_lex_casemods) {
3953                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3954                 PL_lex_casestack[PL_lex_casemods] = '\0';
3955
3956                 if (PL_bufptr != PL_bufend
3957                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3958                     PL_bufptr += 2;
3959                     PL_lex_state = LEX_INTERPCONCAT;
3960 #ifdef PERL_MAD
3961                     if (PL_madskills)
3962                         PL_thistoken = newSVpvs("\\E");
3963 #endif
3964                 }
3965                 return REPORT(')');
3966             }
3967 #ifdef PERL_MAD
3968             while (PL_bufptr != PL_bufend &&
3969               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3970                 if (!PL_thiswhite)
3971                     PL_thiswhite = newSVpvs("");
3972                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3973                 PL_bufptr += 2;
3974             }
3975 #else
3976             if (PL_bufptr != PL_bufend)
3977                 PL_bufptr += 2;
3978 #endif
3979             PL_lex_state = LEX_INTERPCONCAT;
3980             return yylex();
3981         }
3982         else {
3983             DEBUG_T({ PerlIO_printf(Perl_debug_log,
3984               "### Saw case modifier\n"); });
3985             s = PL_bufptr + 1;
3986             if (s[1] == '\\' && s[2] == 'E') {
3987 #ifdef PERL_MAD
3988                 if (!PL_thiswhite)
3989                     PL_thiswhite = newSVpvs("");
3990                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3991 #endif
3992                 PL_bufptr = s + 3;
3993                 PL_lex_state = LEX_INTERPCONCAT;
3994                 return yylex();
3995             }
3996             else {
3997                 I32 tmp;
3998                 if (!PL_madskills) /* when just compiling don't need correct */
3999                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4000                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4001                 if ((*s == 'L' || *s == 'U') &&
4002                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4003                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4004                     return REPORT(')');
4005                 }
4006                 if (PL_lex_casemods > 10)
4007                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4008                 PL_lex_casestack[PL_lex_casemods++] = *s;
4009                 PL_lex_casestack[PL_lex_casemods] = '\0';
4010                 PL_lex_state = LEX_INTERPCONCAT;
4011                 start_force(PL_curforce);
4012                 NEXTVAL_NEXTTOKE.ival = 0;
4013                 force_next('(');
4014                 start_force(PL_curforce);
4015                 if (*s == 'l')
4016                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4017                 else if (*s == 'u')
4018                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4019                 else if (*s == 'L')
4020                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4021                 else if (*s == 'U')
4022                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4023                 else if (*s == 'Q')
4024                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4025                 else
4026                     Perl_croak(aTHX_ "panic: yylex");
4027                 if (PL_madskills) {
4028                     SV* const tmpsv = newSVpvs("\\ ");
4029                     /* replace the space with the character we want to escape
4030                      */
4031                     SvPVX(tmpsv)[1] = *s;
4032                     curmad('_', tmpsv);
4033                 }
4034                 PL_bufptr = s + 1;
4035             }
4036             force_next(FUNC);
4037             if (PL_lex_starts) {
4038                 s = PL_bufptr;
4039                 PL_lex_starts = 0;
4040 #ifdef PERL_MAD
4041                 if (PL_madskills) {
4042                     if (PL_thistoken)
4043                         sv_free(PL_thistoken);
4044                     PL_thistoken = newSVpvs("");
4045                 }
4046 #endif
4047                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4048                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4049                     OPERATOR(',');
4050                 else
4051                     Aop(OP_CONCAT);
4052             }
4053             else
4054                 return yylex();
4055         }
4056
4057     case LEX_INTERPPUSH:
4058         return REPORT(sublex_push());
4059
4060     case LEX_INTERPSTART:
4061         if (PL_bufptr == PL_bufend)
4062             return REPORT(sublex_done());
4063         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4064               "### Interpolated variable\n"); });
4065         PL_expect = XTERM;
4066         PL_lex_dojoin = (*PL_bufptr == '@');
4067         PL_lex_state = LEX_INTERPNORMAL;
4068         if (PL_lex_dojoin) {
4069             start_force(PL_curforce);
4070             NEXTVAL_NEXTTOKE.ival = 0;
4071             force_next(',');
4072             start_force(PL_curforce);
4073             force_ident("\"", '$');
4074             start_force(PL_curforce);
4075             NEXTVAL_NEXTTOKE.ival = 0;
4076             force_next('$');
4077             start_force(PL_curforce);
4078             NEXTVAL_NEXTTOKE.ival = 0;
4079             force_next('(');
4080             start_force(PL_curforce);
4081             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4082             force_next(FUNC);
4083         }
4084         if (PL_lex_starts++) {
4085             s = PL_bufptr;
4086 #ifdef PERL_MAD
4087             if (PL_madskills) {
4088                 if (PL_thistoken)
4089                     sv_free(PL_thistoken);
4090                 PL_thistoken = newSVpvs("");
4091             }
4092 #endif
4093             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4094             if (!PL_lex_casemods && PL_lex_inpat)
4095                 OPERATOR(',');
4096             else
4097                 Aop(OP_CONCAT);
4098         }
4099         return yylex();
4100
4101     case LEX_INTERPENDMAYBE:
4102         if (intuit_more(PL_bufptr)) {
4103             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4104             break;
4105         }
4106         /* FALL THROUGH */
4107
4108     case LEX_INTERPEND:
4109         if (PL_lex_dojoin) {
4110             PL_lex_dojoin = FALSE;
4111             PL_lex_state = LEX_INTERPCONCAT;
4112 #ifdef PERL_MAD
4113             if (PL_madskills) {
4114                 if (PL_thistoken)
4115                     sv_free(PL_thistoken);
4116                 PL_thistoken = newSVpvs("");
4117             }
4118 #endif
4119             return REPORT(')');
4120         }
4121         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4122             && SvEVALED(PL_lex_repl))
4123         {
4124             if (PL_bufptr != PL_bufend)
4125                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4126             PL_lex_repl = NULL;
4127         }
4128         /* FALLTHROUGH */
4129     case LEX_INTERPCONCAT:
4130 #ifdef DEBUGGING
4131         if (PL_lex_brackets)
4132             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4133 #endif
4134         if (PL_bufptr == PL_bufend)
4135             return REPORT(sublex_done());
4136
4137         if (SvIVX(PL_linestr) == '\'') {
4138             SV *sv = newSVsv(PL_linestr);
4139             if (!PL_lex_inpat)
4140                 sv = tokeq(sv);
4141             else if ( PL_hints & HINT_NEW_RE )
4142                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4143             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4144             s = PL_bufend;
4145         }
4146         else {
4147             s = scan_const(PL_bufptr);
4148             if (*s == '\\')
4149                 PL_lex_state = LEX_INTERPCASEMOD;
4150             else
4151                 PL_lex_state = LEX_INTERPSTART;
4152         }
4153
4154         if (s != PL_bufptr) {
4155             start_force(PL_curforce);
4156             if (PL_madskills) {
4157                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4158             }
4159             NEXTVAL_NEXTTOKE = pl_yylval;
4160             PL_expect = XTERM;
4161             force_next(THING);
4162             if (PL_lex_starts++) {
4163 #ifdef PERL_MAD
4164                 if (PL_madskills) {
4165                     if (PL_thistoken)
4166                         sv_free(PL_thistoken);
4167                     PL_thistoken = newSVpvs("");
4168                 }
4169 #endif
4170                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4171                 if (!PL_lex_casemods && PL_lex_inpat)
4172                     OPERATOR(',');
4173                 else
4174                     Aop(OP_CONCAT);
4175             }
4176             else {
4177                 PL_bufptr = s;
4178                 return yylex();
4179             }
4180         }
4181
4182         return yylex();
4183     case LEX_FORMLINE:
4184         PL_lex_state = LEX_NORMAL;
4185         s = scan_formline(PL_bufptr);
4186         if (!PL_lex_formbrack)
4187             goto rightbracket;
4188         OPERATOR(';');
4189     }
4190
4191     s = PL_bufptr;
4192     PL_oldoldbufptr = PL_oldbufptr;
4193     PL_oldbufptr = s;
4194
4195   retry:
4196 #ifdef PERL_MAD
4197     if (PL_thistoken) {
4198         sv_free(PL_thistoken);
4199         PL_thistoken = 0;
4200     }
4201     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4202 #endif
4203     switch (*s) {
4204     default:
4205         if (isIDFIRST_lazy_if(s,UTF))
4206             goto keylookup;
4207         {
4208         unsigned char c = *s;
4209         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4210         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4211             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4212         } else {
4213             d = PL_linestart;
4214         }
4215         *s = '\0';
4216         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4217     }
4218     case 4:
4219     case 26:
4220         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4221     case 0:
4222 #ifdef PERL_MAD
4223         if (PL_madskills)
4224             PL_faketokens = 0;
4225 #endif
4226         if (!PL_rsfp) {
4227             PL_last_uni = 0;
4228             PL_last_lop = 0;
4229             if (PL_lex_brackets) {
4230                 yyerror((const char *)
4231                         (PL_lex_formbrack
4232                          ? "Format not terminated"
4233                          : "Missing right curly or square bracket"));
4234             }
4235             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4236                         "### Tokener got EOF\n");
4237             } );
4238             TOKEN(0);
4239         }
4240         if (s++ < PL_bufend)
4241             goto retry;                 /* ignore stray nulls */
4242         PL_last_uni = 0;
4243         PL_last_lop = 0;
4244         if (!PL_in_eval && !PL_preambled) {
4245             PL_preambled = TRUE;
4246 #ifdef PERL_MAD
4247             if (PL_madskills)
4248                 PL_faketokens = 1;
4249 #endif
4250             if (PL_perldb) {
4251                 /* Generate a string of Perl code to load the debugger.
4252                  * If PERL5DB is set, it will return the contents of that,
4253                  * otherwise a compile-time require of perl5db.pl.  */
4254
4255                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4256
4257                 if (pdb) {
4258                     sv_setpv(PL_linestr, pdb);
4259                     sv_catpvs(PL_linestr,";");
4260                 } else {
4261                     SETERRNO(0,SS_NORMAL);
4262                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4263                 }
4264             } else
4265                 sv_setpvs(PL_linestr,"");
4266             if (PL_preambleav) {
4267                 SV **svp = AvARRAY(PL_preambleav);
4268                 SV **const end = svp + AvFILLp(PL_preambleav);
4269                 while(svp <= end) {
4270                     sv_catsv(PL_linestr, *svp);
4271                     ++svp;
4272                     sv_catpvs(PL_linestr, ";");
4273                 }
4274                 sv_free(MUTABLE_SV(PL_preambleav));
4275                 PL_preambleav = NULL;
4276             }
4277             if (PL_minus_E)
4278                 sv_catpvs(PL_linestr,
4279                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4280             if (PL_minus_n || PL_minus_p) {
4281                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4282                 if (PL_minus_l)
4283                     sv_catpvs(PL_linestr,"chomp;");
4284                 if (PL_minus_a) {
4285                     if (PL_minus_F) {
4286                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4287                              || *PL_splitstr == '"')
4288                               && strchr(PL_splitstr + 1, *PL_splitstr))
4289                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4290                         else {
4291                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4292                                bytes can be used as quoting characters.  :-) */
4293                             const char *splits = PL_splitstr;
4294                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4295                             do {
4296                                 /* Need to \ \s  */
4297                                 if (*splits == '\\')
4298                                     sv_catpvn(PL_linestr, splits, 1);
4299                                 sv_catpvn(PL_linestr, splits, 1);
4300                             } while (*splits++);
4301                             /* This loop will embed the trailing NUL of
4302                                PL_linestr as the last thing it does before
4303                                terminating.  */
4304                             sv_catpvs(PL_linestr, ");");
4305                         }
4306                     }
4307                     else
4308                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4309                 }
4310             }
4311             sv_catpvs(PL_linestr, "\n");
4312             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4313             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4314             PL_last_lop = PL_last_uni = NULL;
4315             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4316                 update_debugger_info(PL_linestr, NULL, 0);
4317             goto retry;
4318         }
4319         do {
4320             fake_eof = 0;
4321             bof = PL_rsfp ? TRUE : FALSE;
4322             if (0) {
4323               fake_eof:
4324                 fake_eof = LEX_FAKE_EOF;
4325             }
4326             PL_bufptr = PL_bufend;
4327             CopLINE_inc(PL_curcop);
4328             if (!lex_next_chunk(fake_eof)) {
4329                 CopLINE_dec(PL_curcop);
4330                 s = PL_bufptr;
4331                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4332             }
4333             CopLINE_dec(PL_curcop);
4334 #ifdef PERL_MAD
4335             if (!PL_rsfp)
4336                 PL_realtokenstart = -1;
4337 #endif
4338             s = PL_bufptr;
4339             /* If it looks like the start of a BOM or raw UTF-16,
4340              * check if it in fact is. */
4341             if (bof && PL_rsfp &&
4342                      (*s == 0 ||
4343                       *(U8*)s == 0xEF ||
4344                       *(U8*)s >= 0xFE ||
4345                       s[1] == 0)) {
4346                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4347                 if (bof) {
4348                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4349                     s = swallow_bom((U8*)s);
4350                 }
4351             }
4352             if (PL_doextract) {
4353                 /* Incest with pod. */
4354 #ifdef PERL_MAD
4355                 if (PL_madskills)
4356                     sv_catsv(PL_thiswhite, PL_linestr);
4357 #endif
4358                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4359                     sv_setpvs(PL_linestr, "");
4360                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4361                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4362                     PL_last_lop = PL_last_uni = NULL;
4363                     PL_doextract = FALSE;
4364                 }
4365             }
4366             if (PL_rsfp)
4367                 incline(s);
4368         } while (PL_doextract);
4369         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4370         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4371         PL_last_lop = PL_last_uni = NULL;
4372         if (CopLINE(PL_curcop) == 1) {
4373             while (s < PL_bufend && isSPACE(*s))
4374                 s++;
4375             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4376                 s++;
4377 #ifdef PERL_MAD
4378             if (PL_madskills)
4379                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4380 #endif
4381             d = NULL;
4382             if (!PL_in_eval) {
4383                 if (*s == '#' && *(s+1) == '!')
4384                     d = s + 2;
4385 #ifdef ALTERNATE_SHEBANG
4386                 else {
4387                     static char const as[] = ALTERNATE_SHEBANG;
4388                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4389                         d = s + (sizeof(as) - 1);
4390                 }
4391 #endif /* ALTERNATE_SHEBANG */
4392             }
4393             if (d) {
4394                 char *ipath;
4395                 char *ipathend;
4396
4397                 while (isSPACE(*d))
4398                     d++;
4399                 ipath = d;
4400                 while (*d && !isSPACE(*d))
4401                     d++;
4402                 ipathend = d;
4403
4404 #ifdef ARG_ZERO_IS_SCRIPT
4405                 if (ipathend > ipath) {
4406                     /*
4407                      * HP-UX (at least) sets argv[0] to the script name,
4408                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4409                      * at least, set argv[0] to the basename of the Perl
4410                      * interpreter. So, having found "#!", we'll set it right.
4411                      */
4412                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4413                                                     SVt_PV)); /* $^X */
4414                     assert(SvPOK(x) || SvGMAGICAL(x));
4415                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4416                         sv_setpvn(x, ipath, ipathend - ipath);
4417                         SvSETMAGIC(x);
4418                     }
4419                     else {
4420                         STRLEN blen;
4421                         STRLEN llen;
4422                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4423                         const char * const lstart = SvPV_const(x,llen);
4424                         if (llen < blen) {
4425                             bstart += blen - llen;
4426                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4427                                 sv_setpvn(x, ipath, ipathend - ipath);
4428                                 SvSETMAGIC(x);
4429                             }
4430                         }
4431                     }
4432                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4433                 }
4434 #endif /* ARG_ZERO_IS_SCRIPT */
4435
4436                 /*
4437                  * Look for options.
4438                  */
4439                 d = instr(s,"perl -");
4440                 if (!d) {
4441                     d = instr(s,"perl");
4442 #if defined(DOSISH)
4443                     /* avoid getting into infinite loops when shebang
4444                      * line contains "Perl" rather than "perl" */
4445                     if (!d) {
4446                         for (d = ipathend-4; d >= ipath; --d) {
4447                             if ((*d == 'p' || *d == 'P')
4448                                 && !ibcmp(d, "perl", 4))
4449                             {
4450                                 break;
4451                             }
4452                         }
4453                         if (d < ipath)
4454                             d = NULL;
4455                     }
4456 #endif
4457                 }
4458 #ifdef ALTERNATE_SHEBANG
4459                 /*
4460                  * If the ALTERNATE_SHEBANG on this system starts with a
4461                  * character that can be part of a Perl expression, then if
4462                  * we see it but not "perl", we're probably looking at the
4463                  * start of Perl code, not a request to hand off to some
4464                  * other interpreter.  Similarly, if "perl" is there, but
4465                  * not in the first 'word' of the line, we assume the line
4466                  * contains the start of the Perl program.
4467                  */
4468                 if (d && *s != '#') {
4469                     const char *c = ipath;
4470                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4471                         c++;
4472                     if (c < d)
4473                         d = NULL;       /* "perl" not in first word; ignore */
4474                     else
4475                         *s = '#';       /* Don't try to parse shebang line */
4476                 }
4477 #endif /* ALTERNATE_SHEBANG */
4478                 if (!d &&
4479                     *s == '#' &&
4480                     ipathend > ipath &&
4481                     !PL_minus_c &&
4482                     !instr(s,"indir") &&
4483                     instr(PL_origargv[0],"perl"))
4484                 {
4485                     dVAR;
4486                     char **newargv;
4487
4488                     *ipathend = '\0';
4489                     s = ipathend + 1;
4490                     while (s < PL_bufend && isSPACE(*s))
4491                         s++;
4492                     if (s < PL_bufend) {
4493                         Newx(newargv,PL_origargc+3,char*);
4494                         newargv[1] = s;
4495                         while (s < PL_bufend && !isSPACE(*s))
4496                             s++;
4497                         *s = '\0';
4498                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4499                     }
4500                     else
4501                         newargv = PL_origargv;
4502                     newargv[0] = ipath;
4503                     PERL_FPU_PRE_EXEC
4504                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4505                     PERL_FPU_POST_EXEC
4506                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4507                 }
4508                 if (d) {
4509                     while (*d && !isSPACE(*d))
4510                         d++;
4511                     while (SPACE_OR_TAB(*d))
4512                         d++;
4513
4514                     if (*d++ == '-') {
4515                         const bool switches_done = PL_doswitches;
4516                         const U32 oldpdb = PL_perldb;
4517                         const bool oldn = PL_minus_n;
4518                         const bool oldp = PL_minus_p;
4519                         const char *d1 = d;
4520
4521                         do {
4522                             bool baduni = FALSE;
4523                             if (*d1 == 'C') {
4524                                 const char *d2 = d1 + 1;
4525                                 if (parse_unicode_opts((const char **)&d2)
4526                                     != PL_unicode)
4527                                     baduni = TRUE;
4528                             }
4529                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4530                                 const char * const m = d1;
4531                                 while (*d1 && !isSPACE(*d1))
4532                                     d1++;
4533                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4534                                       (int)(d1 - m), m);
4535                             }
4536                             d1 = moreswitches(d1);
4537                         } while (d1);
4538                         if (PL_doswitches && !switches_done) {
4539                             int argc = PL_origargc;
4540                             char **argv = PL_origargv;
4541                             do {
4542                                 argc--,argv++;
4543                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4544                             init_argv_symbols(argc,argv);
4545                         }
4546                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4547                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4548                               /* if we have already added "LINE: while (<>) {",
4549                                  we must not do it again */
4550                         {
4551                             sv_setpvs(PL_linestr, "");
4552                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4553                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4554                             PL_last_lop = PL_last_uni = NULL;
4555                             PL_preambled = FALSE;
4556                             if (PERLDB_LINE || PERLDB_SAVESRC)
4557                                 (void)gv_fetchfile(PL_origfilename);
4558                             goto retry;
4559                         }
4560                     }
4561                 }
4562             }
4563         }
4564         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4565             PL_bufptr = s;
4566             PL_lex_state = LEX_FORMLINE;
4567             return yylex();
4568         }
4569         goto retry;
4570     case '\r':
4571 #ifdef PERL_STRICT_CR
4572         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4573         Perl_croak(aTHX_
4574       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4575 #endif
4576     case ' ': case '\t': case '\f': case 013:
4577 #ifdef PERL_MAD
4578         PL_realtokenstart = -1;
4579         if (!PL_thiswhite)
4580             PL_thiswhite = newSVpvs("");
4581         sv_catpvn(PL_thiswhite, s, 1);
4582 #endif
4583         s++;
4584         goto retry;
4585     case '#':
4586     case '\n':
4587 #ifdef PERL_MAD
4588         PL_realtokenstart = -1;
4589         if (PL_madskills)
4590             PL_faketokens = 0;
4591 #endif
4592         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4593             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4594                 /* handle eval qq[#line 1 "foo"\n ...] */
4595                 CopLINE_dec(PL_curcop);
4596                 incline(s);
4597             }
4598             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4599                 s = SKIPSPACE0(s);
4600                 if (!PL_in_eval || PL_rsfp)
4601                     incline(s);
4602             }
4603             else {
4604                 d = s;
4605                 while (d < PL_bufend && *d != '\n')
4606                     d++;
4607                 if (d < PL_bufend)
4608                     d++;
4609                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4610                   Perl_croak(aTHX_ "panic: input overflow");
4611 #ifdef PERL_MAD
4612                 if (PL_madskills)
4613                     PL_thiswhite = newSVpvn(s, d - s);
4614 #endif
4615                 s = d;
4616                 incline(s);
4617             }
4618             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4619                 PL_bufptr = s;
4620                 PL_lex_state = LEX_FORMLINE;
4621                 return yylex();
4622             }
4623         }
4624         else {
4625 #ifdef PERL_MAD
4626             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4627                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4628                     PL_faketokens = 0;
4629                     s = SKIPSPACE0(s);
4630                     TOKEN(PEG); /* make sure any #! line is accessible */
4631                 }
4632                 s = SKIPSPACE0(s);
4633             }
4634             else {
4635 /*              if (PL_madskills && PL_lex_formbrack) { */
4636                     d = s;
4637                     while (d < PL_bufend && *d != '\n')
4638                         d++;
4639                     if (d < PL_bufend)
4640                         d++;
4641                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4642                       Perl_croak(aTHX_ "panic: input overflow");
4643                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4644                         if (!PL_thiswhite)
4645                             PL_thiswhite = newSVpvs("");
4646                         if (CopLINE(PL_curcop) == 1) {
4647                             sv_setpvs(PL_thiswhite, "");
4648                             PL_faketokens = 0;
4649                         }
4650                         sv_catpvn(PL_thiswhite, s, d - s);
4651                     }
4652                     s = d;
4653 /*              }
4654                 *s = '\0';
4655                 PL_bufend = s; */
4656             }
4657 #else
4658             *s = '\0';
4659             PL_bufend = s;
4660 #endif
4661         }
4662         goto retry;
4663     case '-':
4664         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4665             I32 ftst = 0;
4666             char tmp;
4667
4668             s++;
4669             PL_bufptr = s;
4670             tmp = *s++;
4671
4672             while (s < PL_bufend && SPACE_OR_TAB(*s))
4673                 s++;
4674
4675             if (strnEQ(s,"=>",2)) {
4676                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4677                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4678                 OPERATOR('-');          /* unary minus */
4679             }
4680             PL_last_uni = PL_oldbufptr;
4681             switch (tmp) {
4682             case 'r': ftst = OP_FTEREAD;        break;
4683             case 'w': ftst = OP_FTEWRITE;       break;
4684             case 'x': ftst = OP_FTEEXEC;        break;
4685             case 'o': ftst = OP_FTEOWNED;       break;
4686             case 'R': ftst = OP_FTRREAD;        break;
4687             case 'W': ftst = OP_FTRWRITE;       break;
4688             case 'X': ftst = OP_FTREXEC;        break;
4689             case 'O': ftst = OP_FTROWNED;       break;
4690             case 'e': ftst = OP_FTIS;           break;
4691             case 'z': ftst = OP_FTZERO;         break;
4692             case 's': ftst = OP_FTSIZE;         break;
4693             case 'f': ftst = OP_FTFILE;         break;
4694             case 'd': ftst = OP_FTDIR;          break;
4695             case 'l': ftst = OP_FTLINK;         break;
4696             case 'p': ftst = OP_FTPIPE;         break;
4697             case 'S': ftst = OP_FTSOCK;         break;
4698             case 'u': ftst = OP_FTSUID;         break;
4699             case 'g': ftst = OP_FTSGID;         break;
4700             case 'k': ftst = OP_FTSVTX;         break;
4701             case 'b': ftst = OP_FTBLK;          break;
4702             case 'c': ftst = OP_FTCHR;          break;
4703             case 't': ftst = OP_FTTTY;          break;
4704             case 'T': ftst = OP_FTTEXT;         break;
4705             case 'B': ftst = OP_FTBINARY;       break;
4706             case 'M': case 'A': case 'C':
4707                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4708                 switch (tmp) {
4709                 case 'M': ftst = OP_FTMTIME;    break;
4710                 case 'A': ftst = OP_FTATIME;    break;
4711                 case 'C': ftst = OP_FTCTIME;    break;
4712                 default:                        break;
4713                 }
4714                 break;
4715             default:
4716                 break;
4717             }
4718             if (ftst) {
4719                 PL_last_lop_op = (OPCODE)ftst;
4720                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4721                         "### Saw file test %c\n", (int)tmp);
4722                 } );
4723                 FTST(ftst);
4724             }
4725             else {
4726                 /* Assume it was a minus followed by a one-letter named
4727                  * subroutine call (or a -bareword), then. */
4728                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4729                         "### '-%c' looked like a file test but was not\n",
4730                         (int) tmp);
4731                 } );
4732                 s = --PL_bufptr;
4733             }
4734         }
4735         {
4736             const char tmp = *s++;
4737             if (*s == tmp) {
4738                 s++;
4739                 if (PL_expect == XOPERATOR)
4740                     TERM(POSTDEC);
4741                 else
4742                     OPERATOR(PREDEC);
4743             }
4744             else if (*s == '>') {
4745                 s++;
4746                 s = SKIPSPACE1(s);
4747                 if (isIDFIRST_lazy_if(s,UTF)) {
4748                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4749                     TOKEN(ARROW);
4750                 }
4751                 else if (*s == '$')
4752                     OPERATOR(ARROW);
4753                 else
4754                     TERM(ARROW);
4755             }
4756             if (PL_expect == XOPERATOR)
4757                 Aop(OP_SUBTRACT);
4758             else {
4759                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4760                     check_uni();
4761                 OPERATOR('-');          /* unary minus */
4762             }
4763         }
4764
4765     case '+':
4766         {
4767             const char tmp = *s++;
4768             if (*s == tmp) {
4769                 s++;
4770                 if (PL_expect == XOPERATOR)
4771                     TERM(POSTINC);
4772                 else
4773                     OPERATOR(PREINC);
4774             }
4775             if (PL_expect == XOPERATOR)
4776                 Aop(OP_ADD);
4777             else {
4778                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4779                     check_uni();
4780                 OPERATOR('+');
4781             }
4782         }
4783
4784     case '*':
4785         if (PL_expect != XOPERATOR) {
4786             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4787             PL_expect = XOPERATOR;
4788             force_ident(PL_tokenbuf, '*');
4789             if (!*PL_tokenbuf)
4790                 PREREF('*');
4791             TERM('*');
4792         }
4793         s++;
4794         if (*s == '*') {
4795             s++;
4796             PWop(OP_POW);
4797         }
4798         Mop(OP_MULTIPLY);
4799
4800     case '%':
4801         if (PL_expect == XOPERATOR) {
4802             ++s;
4803             Mop(OP_MODULO);
4804         }
4805         PL_tokenbuf[0] = '%';
4806         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4807                 sizeof PL_tokenbuf - 1, FALSE);
4808         if (!PL_tokenbuf[1]) {
4809             PREREF('%');
4810         }
4811         PL_pending_ident = '%';
4812         TERM('%');
4813
4814     case '^':
4815         s++;
4816         BOop(OP_BIT_XOR);
4817     case '[':
4818         PL_lex_brackets++;
4819         {
4820             const char tmp = *s++;
4821             OPERATOR(tmp);
4822         }
4823     case '~':
4824         if (s[1] == '~'
4825             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4826         {
4827             s += 2;
4828             Eop(OP_SMARTMATCH);
4829         }
4830     case ',':
4831         {
4832             const char tmp = *s++;
4833             OPERATOR(tmp);
4834         }
4835     case ':':
4836         if (s[1] == ':') {
4837             len = 0;
4838             goto just_a_word_zero_gv;
4839         }
4840         s++;
4841         switch (PL_expect) {
4842             OP *attrs;
4843 #ifdef PERL_MAD
4844             I32 stuffstart;
4845 #endif
4846         case XOPERATOR:
4847             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4848                 break;
4849             PL_bufptr = s;      /* update in case we back off */
4850             if (*s == '=') {
4851                 deprecate(":= for an empty attribute list");
4852             }
4853             goto grabattrs;
4854         case XATTRBLOCK:
4855             PL_expect = XBLOCK;
4856             goto grabattrs;
4857         case XATTRTERM:
4858             PL_expect = XTERMBLOCK;
4859          grabattrs:
4860 #ifdef PERL_MAD
4861             stuffstart = s - SvPVX(PL_linestr) - 1;
4862 #endif
4863             s = PEEKSPACE(s);
4864             attrs = NULL;
4865             while (isIDFIRST_lazy_if(s,UTF)) {
4866                 I32 tmp;
4867                 SV *sv;
4868                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4869                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4870                     if (tmp < 0) tmp = -tmp;
4871                     switch (tmp) {
4872                     case KEY_or:
4873                     case KEY_and:
4874                     case KEY_for:
4875                     case KEY_foreach:
4876                     case KEY_unless:
4877                     case KEY_if:
4878                     case KEY_while:
4879                     case KEY_until:
4880                         goto got_attrs;
4881                     default:
4882                         break;
4883                     }
4884                 }
4885                 sv = newSVpvn(s, len);
4886                 if (*d == '(') {
4887                     d = scan_str(d,TRUE,TRUE);
4888                     if (!d) {
4889                         /* MUST advance bufptr here to avoid bogus
4890                            "at end of line" context messages from yyerror().
4891                          */
4892                         PL_bufptr = s + len;
4893                         yyerror("Unterminated attribute parameter in attribute list");
4894                         if (attrs)
4895                             op_free(attrs);
4896                         sv_free(sv);
4897                         return REPORT(0);       /* EOF indicator */
4898                     }
4899                 }
4900                 if (PL_lex_stuff) {
4901                     sv_catsv(sv, PL_lex_stuff);
4902                     attrs = append_elem(OP_LIST, attrs,
4903                                         newSVOP(OP_CONST, 0, sv));
4904                     SvREFCNT_dec(PL_lex_stuff);
4905                     PL_lex_stuff = NULL;
4906                 }
4907                 else {
4908                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4909                         sv_free(sv);
4910                         if (PL_in_my == KEY_our) {
4911                             deprecate(":unique");
4912                         }
4913                         else
4914                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4915                     }
4916
4917                     /* NOTE: any CV attrs applied here need to be part of
4918                        the CVf_BUILTIN_ATTRS define in cv.h! */
4919                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4920                         sv_free(sv);
4921                         CvLVALUE_on(PL_compcv);
4922                     }
4923                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4924                         sv_free(sv);
4925                         deprecate(":locked");
4926                     }
4927                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4928                         sv_free(sv);
4929                         CvMETHOD_on(PL_compcv);
4930                     }
4931                     /* After we've set the flags, it could be argued that
4932                        we don't need to do the attributes.pm-based setting
4933                        process, and shouldn't bother appending recognized
4934                        flags.  To experiment with that, uncomment the
4935                        following "else".  (Note that's already been
4936                        uncommented.  That keeps the above-applied built-in
4937                        attributes from being intercepted (and possibly
4938                        rejected) by a package's attribute routines, but is
4939                        justified by the performance win for the common case
4940                        of applying only built-in attributes.) */
4941                     else
4942                         attrs = append_elem(OP_LIST, attrs,
4943                                             newSVOP(OP_CONST, 0,
4944                                                     sv));
4945                 }
4946                 s = PEEKSPACE(d);
4947                 if (*s == ':' && s[1] != ':')
4948                     s = PEEKSPACE(s+1);
4949                 else if (s == d)
4950                     break;      /* require real whitespace or :'s */
4951                 /* XXX losing whitespace on sequential attributes here */
4952             }
4953             {
4954                 const char tmp
4955                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4956                 if (*s != ';' && *s != '}' && *s != tmp
4957                     && (tmp != '=' || *s != ')')) {
4958                     const char q = ((*s == '\'') ? '"' : '\'');
4959                     /* If here for an expression, and parsed no attrs, back
4960                        off. */
4961                     if (tmp == '=' && !attrs) {
4962                         s = PL_bufptr;
4963                         break;
4964                     }
4965                     /* MUST advance bufptr here to avoid bogus "at end of line"
4966                        context messages from yyerror().
4967                     */
4968                     PL_bufptr = s;
4969                     yyerror( (const char *)
4970                              (*s
4971                               ? Perl_form(aTHX_ "Invalid separator character "
4972                                           "%c%c%c in attribute list", q, *s, q)
4973                               : "Unterminated attribute list" ) );
4974                     if (attrs)
4975                         op_free(attrs);
4976                     OPERATOR(':');
4977                 }
4978             }
4979         got_attrs:
4980             if (attrs) {
4981                 start_force(PL_curforce);
4982                 NEXTVAL_NEXTTOKE.opval = attrs;
4983                 CURMAD('_', PL_nextwhite);
4984                 force_next(THING);
4985             }
4986 #ifdef PERL_MAD
4987             if (PL_madskills) {
4988                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4989                                      (s - SvPVX(PL_linestr)) - stuffstart);
4990             }
4991 #endif
4992             TOKEN(COLONATTR);
4993         }
4994         OPERATOR(':');
4995     case '(':
4996         s++;
4997         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4998             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
4999         else
5000             PL_expect = XTERM;
5001         s = SKIPSPACE1(s);
5002         TOKEN('(');
5003     case ';':
5004         CLINE;
5005         {
5006             const char tmp = *s++;
5007             OPERATOR(tmp);
5008         }
5009     case ')':
5010         {
5011             const char tmp = *s++;
5012             s = SKIPSPACE1(s);
5013             if (*s == '{')
5014                 PREBLOCK(tmp);
5015             TERM(tmp);
5016         }
5017     case ']':
5018         s++;
5019         if (PL_lex_brackets <= 0)
5020             yyerror("Unmatched right square bracket");
5021         else
5022             --PL_lex_brackets;
5023         if (PL_lex_state == LEX_INTERPNORMAL) {
5024             if (PL_lex_brackets == 0) {
5025                 if (*s == '-' && s[1] == '>')
5026                     PL_lex_state = LEX_INTERPENDMAYBE;
5027                 else if (*s != '[' && *s != '{')
5028                     PL_lex_state = LEX_INTERPEND;
5029             }
5030         }
5031         TERM(']');
5032     case '{':
5033       leftbracket:
5034         s++;
5035         if (PL_lex_brackets > 100) {
5036             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5037         }
5038         switch (PL_expect) {
5039         case XTERM:
5040             if (PL_lex_formbrack) {
5041                 s--;
5042                 PRETERMBLOCK(DO);
5043             }
5044             if (PL_oldoldbufptr == PL_last_lop)
5045                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5046             else
5047                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5048             OPERATOR(HASHBRACK);
5049         case XOPERATOR:
5050             while (s < PL_bufend && SPACE_OR_TAB(*s))
5051                 s++;
5052             d = s;
5053             PL_tokenbuf[0] = '\0';
5054             if (d < PL_bufend && *d == '-') {
5055                 PL_tokenbuf[0] = '-';
5056                 d++;
5057                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5058                     d++;
5059             }
5060             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5061                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5062                               FALSE, &len);
5063                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5064                     d++;
5065                 if (*d == '}') {
5066                     const char minus = (PL_tokenbuf[0] == '-');
5067                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5068                     if (minus)
5069                         force_next('-');
5070                 }
5071             }
5072             /* FALL THROUGH */
5073         case XATTRBLOCK:
5074         case XBLOCK:
5075             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5076             PL_expect = XSTATE;
5077             break;
5078         case XATTRTERM:
5079         case XTERMBLOCK:
5080             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5081             PL_expect = XSTATE;
5082             break;
5083         default: {
5084                 const char *t;
5085                 if (PL_oldoldbufptr == PL_last_lop)
5086                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5087                 else
5088                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5089                 s = SKIPSPACE1(s);
5090                 if (*s == '}') {
5091                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5092                         PL_expect = XTERM;
5093                         /* This hack is to get the ${} in the message. */
5094                         PL_bufptr = s+1;
5095                         yyerror("syntax error");
5096                         break;
5097                     }
5098                     OPERATOR(HASHBRACK);
5099                 }
5100                 /* This hack serves to disambiguate a pair of curlies
5101                  * as being a block or an anon hash.  Normally, expectation
5102                  * determines that, but in cases where we're not in a
5103                  * position to expect anything in particular (like inside
5104                  * eval"") we have to resolve the ambiguity.  This code
5105                  * covers the case where the first term in the curlies is a
5106                  * quoted string.  Most other cases need to be explicitly
5107                  * disambiguated by prepending a "+" before the opening
5108                  * curly in order to force resolution as an anon hash.
5109                  *
5110                  * XXX should probably propagate the outer expectation
5111                  * into eval"" to rely less on this hack, but that could
5112                  * potentially break current behavior of eval"".
5113                  * GSAR 97-07-21
5114                  */
5115                 t = s;
5116                 if (*s == '\'' || *s == '"' || *s == '`') {
5117                     /* common case: get past first string, handling escapes */
5118                     for (t++; t < PL_bufend && *t != *s;)
5119                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5120                             t++;
5121                     t++;
5122                 }
5123                 else if (*s == 'q') {
5124                     if (++t < PL_bufend
5125                         && (!isALNUM(*t)
5126                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5127                                 && !isALNUM(*t))))
5128                     {
5129                         /* skip q//-like construct */
5130                         const char *tmps;
5131                         char open, close, term;
5132                         I32 brackets = 1;
5133
5134                         while (t < PL_bufend && isSPACE(*t))
5135                             t++;
5136                         /* check for q => */
5137                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5138                             OPERATOR(HASHBRACK);
5139                         }
5140                         term = *t;
5141                         open = term;
5142                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5143                             term = tmps[5];
5144                         close = term;
5145                         if (open == close)
5146                             for (t++; t < PL_bufend; t++) {
5147                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5148                                     t++;
5149                                 else if (*t == open)
5150                                     break;
5151                             }
5152                         else {
5153                             for (t++; t < PL_bufend; t++) {
5154                                 if (*t == '\\' && t+1 < PL_bufend)
5155                                     t++;
5156                                 else if (*t == close && --brackets <= 0)
5157                                     break;
5158                                 else if (*t == open)
5159                                     brackets++;
5160                             }
5161                         }
5162                         t++;
5163                     }
5164                     else
5165                         /* skip plain q word */
5166                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5167                              t += UTF8SKIP(t);
5168                 }
5169                 else if (isALNUM_lazy_if(t,UTF)) {
5170                     t += UTF8SKIP(t);
5171                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5172                          t += UTF8SKIP(t);
5173                 }
5174                 while (t < PL_bufend && isSPACE(*t))
5175                     t++;
5176                 /* if comma follows first term, call it an anon hash */
5177                 /* XXX it could be a comma expression with loop modifiers */
5178                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5179                                    || (*t == '=' && t[1] == '>')))
5180                     OPERATOR(HASHBRACK);
5181                 if (PL_expect == XREF)
5182                     PL_expect = XTERM;
5183                 else {
5184                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5185                     PL_expect = XSTATE;
5186                 }
5187             }
5188             break;
5189         }
5190         pl_yylval.ival = CopLINE(PL_curcop);
5191         if (isSPACE(*s) || *s == '#')
5192             PL_copline = NOLINE;   /* invalidate current command line number */
5193         TOKEN('{');
5194     case '}':
5195       rightbracket:
5196         s++;
5197         if (PL_lex_brackets <= 0)
5198             yyerror("Unmatched right curly bracket");
5199         else
5200             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5201         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5202             PL_lex_formbrack = 0;
5203         if (PL_lex_state == LEX_INTERPNORMAL) {
5204             if (PL_lex_brackets == 0) {
5205                 if (PL_expect & XFAKEBRACK) {
5206                     PL_expect &= XENUMMASK;
5207                     PL_lex_state = LEX_INTERPEND;
5208                     PL_bufptr = s;
5209 #if 0
5210                     if (PL_madskills) {
5211                         if (!PL_thiswhite)
5212                             PL_thiswhite = newSVpvs("");
5213                         sv_catpvs(PL_thiswhite,"}");
5214                     }
5215 #endif
5216                     return yylex();     /* ignore fake brackets */
5217                 }
5218                 if (*s == '-' && s[1] == '>')
5219                     PL_lex_state = LEX_INTERPENDMAYBE;
5220                 else if (*s != '[' && *s != '{')
5221                     PL_lex_state = LEX_INTERPEND;
5222             }
5223         }
5224         if (PL_expect & XFAKEBRACK) {
5225             PL_expect &= XENUMMASK;
5226             PL_bufptr = s;
5227             return yylex();             /* ignore fake brackets */
5228         }
5229         start_force(PL_curforce);
5230         if (PL_madskills) {
5231             curmad('X', newSVpvn(s-1,1));
5232             CURMAD('_', PL_thiswhite);
5233         }
5234         force_next('}');
5235 #ifdef PERL_MAD
5236         if (!PL_thistoken)
5237             PL_thistoken = newSVpvs("");
5238 #endif
5239         TOKEN(';');
5240     case '&':
5241         s++;
5242         if (*s++ == '&')
5243             AOPERATOR(ANDAND);
5244         s--;
5245         if (PL_expect == XOPERATOR) {
5246             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5247                 && isIDFIRST_lazy_if(s,UTF))
5248             {
5249                 CopLINE_dec(PL_curcop);
5250                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5251                 CopLINE_inc(PL_curcop);
5252             }
5253             BAop(OP_BIT_AND);
5254         }
5255
5256         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5257         if (*PL_tokenbuf) {
5258             PL_expect = XOPERATOR;
5259             force_ident(PL_tokenbuf, '&');
5260         }
5261         else
5262             PREREF('&');
5263         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5264         TERM('&');
5265
5266     case '|':
5267         s++;
5268         if (*s++ == '|')
5269             AOPERATOR(OROR);
5270         s--;
5271         BOop(OP_BIT_OR);
5272     case '=':
5273         s++;
5274         {
5275             const char tmp = *s++;
5276             if (tmp == '=')
5277                 Eop(OP_EQ);
5278             if (tmp == '>')
5279                 OPERATOR(',');
5280             if (tmp == '~')
5281                 PMop(OP_MATCH);
5282             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5283                 && strchr("+-*/%.^&|<",tmp))
5284                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5285                             "Reversed %c= operator",(int)tmp);
5286             s--;
5287             if (PL_expect == XSTATE && isALPHA(tmp) &&
5288                 (s == PL_linestart+1 || s[-2] == '\n') )
5289                 {
5290                     if (PL_in_eval && !PL_rsfp) {
5291                         d = PL_bufend;
5292                         while (s < d) {
5293                             if (*s++ == '\n') {
5294                                 incline(s);
5295                                 if (strnEQ(s,"=cut",4)) {
5296                                     s = strchr(s,'\n');
5297                                     if (s)
5298                                         s++;
5299                                     else
5300                                         s = d;
5301                                     incline(s);
5302                                     goto retry;
5303                                 }
5304                             }
5305                         }
5306                         goto retry;
5307                     }
5308 #ifdef PERL_MAD
5309                     if (PL_madskills) {
5310                         if (!PL_thiswhite)
5311                             PL_thiswhite = newSVpvs("");
5312                         sv_catpvn(PL_thiswhite, PL_linestart,
5313                                   PL_bufend - PL_linestart);
5314                     }
5315 #endif
5316                     s = PL_bufend;
5317                     PL_doextract = TRUE;
5318                     goto retry;
5319                 }
5320         }
5321         if (PL_lex_brackets < PL_lex_formbrack) {
5322             const char *t = s;
5323 #ifdef PERL_STRICT_CR
5324             while (SPACE_OR_TAB(*t))
5325 #else
5326             while (SPACE_OR_TAB(*t) || *t == '\r')
5327 #endif
5328                 t++;
5329             if (*t == '\n' || *t == '#') {
5330                 s--;
5331                 PL_expect = XBLOCK;
5332                 goto leftbracket;
5333             }
5334         }
5335         pl_yylval.ival = 0;
5336         OPERATOR(ASSIGNOP);
5337     case '!':
5338         s++;
5339         {
5340             const char tmp = *s++;
5341             if (tmp == '=') {
5342                 /* was this !=~ where !~ was meant?
5343                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5344
5345                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5346                     const char *t = s+1;
5347
5348                     while (t < PL_bufend && isSPACE(*t))
5349                         ++t;
5350
5351                     if (*t == '/' || *t == '?' ||
5352                         ((*t == 'm' || *t == 's' || *t == 'y')
5353                          && !isALNUM(t[1])) ||
5354                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5355                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5356                                     "!=~ should be !~");
5357                 }
5358                 Eop(OP_NE);
5359             }
5360             if (tmp == '~')
5361                 PMop(OP_NOT);
5362         }
5363         s--;
5364         OPERATOR('!');
5365     case '<':
5366         if (PL_expect != XOPERATOR) {
5367             if (s[1] != '<' && !strchr(s,'>'))
5368                 check_uni();
5369             if (s[1] == '<')
5370                 s = scan_heredoc(s);
5371             else
5372                 s = scan_inputsymbol(s);
5373             TERM(sublex_start());
5374         }
5375         s++;
5376         {
5377             char tmp = *s++;
5378             if (tmp == '<')
5379                 SHop(OP_LEFT_SHIFT);
5380             if (tmp == '=') {
5381                 tmp = *s++;
5382                 if (tmp == '>')
5383                     Eop(OP_NCMP);
5384                 s--;
5385                 Rop(OP_LE);
5386             }
5387         }
5388         s--;
5389         Rop(OP_LT);
5390     case '>':
5391         s++;
5392         {
5393             const char tmp = *s++;
5394             if (tmp == '>')
5395                 SHop(OP_RIGHT_SHIFT);
5396             else if (tmp == '=')
5397                 Rop(OP_GE);
5398         }
5399         s--;
5400         Rop(OP_GT);
5401
5402     case '$':
5403         CLINE;
5404
5405         if (PL_expect == XOPERATOR) {
5406             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5407                 return deprecate_commaless_var_list();
5408             }
5409         }
5410
5411         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
5412             PL_tokenbuf[0] = '@';
5413             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5414                            sizeof PL_tokenbuf - 1, FALSE);
5415             if (PL_expect == XOPERATOR)
5416                 no_op("Array length", s);
5417             if (!PL_tokenbuf[1])
5418                 PREREF(DOLSHARP);
5419             PL_expect = XOPERATOR;
5420             PL_pending_ident = '#';
5421             TOKEN(DOLSHARP);
5422         }
5423
5424         PL_tokenbuf[0] = '$';
5425         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5426                        sizeof PL_tokenbuf - 1, FALSE);
5427         if (PL_expect == XOPERATOR)
5428             no_op("Scalar", s);
5429         if (!PL_tokenbuf[1]) {
5430             if (s == PL_bufend)
5431                 yyerror("Final $ should be \\$ or $name");
5432             PREREF('$');
5433         }
5434
5435         /* This kludge not intended to be bulletproof. */
5436         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5437             pl_yylval.opval = newSVOP(OP_CONST, 0,
5438                                    newSViv(CopARYBASE_get(&PL_compiling)));
5439             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5440             TERM(THING);
5441         }
5442
5443         d = s;
5444         {
5445             const char tmp = *s;
5446             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5447                 s = SKIPSPACE1(s);
5448
5449             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5450                 && intuit_more(s)) {
5451                 if (*s == '[') {
5452                     PL_tokenbuf[0] = '@';
5453                     if (ckWARN(WARN_SYNTAX)) {
5454                         char *t = s+1;
5455
5456                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5457                             t++;
5458                         if (*t++ == ',') {
5459                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5460                             while (t < PL_bufend && *t != ']')
5461                                 t++;
5462                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5463                                         "Multidimensional syntax %.*s not supported",
5464                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5465                         }
5466                     }
5467                 }
5468                 else if (*s == '{') {
5469                     char *t;
5470                     PL_tokenbuf[0] = '%';
5471                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5472                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5473                         {
5474                             char tmpbuf[sizeof PL_tokenbuf];
5475                             do {
5476                                 t++;
5477                             } while (isSPACE(*t));
5478                             if (isIDFIRST_lazy_if(t,UTF)) {
5479                                 STRLEN len;
5480                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5481                                               &len);
5482                                 while (isSPACE(*t))
5483                                     t++;
5484                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5485                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5486                                                 "You need to quote \"%s\"",
5487                                                 tmpbuf);
5488                             }
5489                         }
5490                 }
5491             }
5492
5493             PL_expect = XOPERATOR;
5494             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5495                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5496                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5497                     PL_expect = XOPERATOR;
5498                 else if (strchr("$@\"'`q", *s))
5499                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5500                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5501                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5502                 else if (isIDFIRST_lazy_if(s,UTF)) {
5503                     char tmpbuf[sizeof PL_tokenbuf];
5504                     int t2;
5505                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5506                     if ((t2 = keyword(tmpbuf, len, 0))) {
5507                         /* binary operators exclude handle interpretations */
5508                         switch (t2) {
5509                         case -KEY_x:
5510                         case -KEY_eq:
5511                         case -KEY_ne:
5512                         case -KEY_gt:
5513                         case -KEY_lt:
5514                         case -KEY_ge:
5515                         case -KEY_le:
5516                         case -KEY_cmp:
5517                             break;
5518                         default:
5519                             PL_expect = XTERM;  /* e.g. print $fh length() */
5520                             break;
5521                         }
5522                     }
5523                     else {
5524                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5525                     }
5526                 }
5527                 else if (isDIGIT(*s))
5528                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5529                 else if (*s == '.' && isDIGIT(s[1]))
5530                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5531                 else if ((*s == '?' || *s == '-' || *s == '+')
5532                          && !isSPACE(s[1]) && s[1] != '=')
5533                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5534                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5535                          && s[1] != '/')
5536                     PL_expect = XTERM;          /* e.g. print $fh /.../
5537                                                    XXX except DORDOR operator
5538                                                 */
5539                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5540                          && s[2] != '=')
5541                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5542             }
5543         }
5544         PL_pending_ident = '$';
5545         TOKEN('$');
5546
5547     case '@':
5548         if (PL_expect == XOPERATOR)
5549             no_op("Array", s);
5550         PL_tokenbuf[0] = '@';
5551         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5552         if (!PL_tokenbuf[1]) {
5553             PREREF('@');
5554         }
5555         if (PL_lex_state == LEX_NORMAL)
5556             s = SKIPSPACE1(s);
5557         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5558             if (*s == '{')
5559                 PL_tokenbuf[0] = '%';
5560
5561             /* Warn about @ where they meant $. */
5562             if (*s == '[' || *s == '{') {
5563                 if (ckWARN(WARN_SYNTAX)) {
5564                     const char *t = s + 1;
5565                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5566                         t++;
5567                     if (*t == '}' || *t == ']') {
5568                         t++;
5569                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5570                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5571                             "Scalar value %.*s better written as $%.*s",
5572                             (int)(t-PL_bufptr), PL_bufptr,
5573                             (int)(t-PL_bufptr-1), PL_bufptr+1);
5574                     }
5575                 }
5576             }
5577         }
5578         PL_pending_ident = '@';
5579         TERM('@');
5580
5581      case '/':                  /* may be division, defined-or, or pattern */
5582         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5583             s += 2;
5584             AOPERATOR(DORDOR);
5585         }
5586      case '?':                  /* may either be conditional or pattern */
5587         if (PL_expect == XOPERATOR) {
5588              char tmp = *s++;
5589              if(tmp == '?') {
5590                 OPERATOR('?');
5591              }
5592              else {
5593                  tmp = *s++;
5594                  if(tmp == '/') {
5595                      /* A // operator. */
5596                     AOPERATOR(DORDOR);
5597                  }
5598                  else {
5599                      s--;
5600                      Mop(OP_DIVIDE);
5601                  }
5602              }
5603          }
5604          else {
5605              /* Disable warning on "study /blah/" */
5606              if (PL_oldoldbufptr == PL_last_uni
5607               && (*PL_last_uni != 's' || s - PL_last_uni < 5
5608                   || memNE(PL_last_uni, "study", 5)
5609                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
5610               ))
5611                  check_uni();
5612              s = scan_pat(s,OP_MATCH);
5613              TERM(sublex_start());
5614          }
5615
5616     case '.':
5617         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5618 #ifdef PERL_STRICT_CR
5619             && s[1] == '\n'
5620 #else
5621             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5622 #endif
5623             && (s == PL_linestart || s[-1] == '\n') )
5624         {
5625             PL_lex_formbrack = 0;
5626             PL_expect = XSTATE;
5627             goto rightbracket;
5628         }
5629         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5630             s += 3;
5631             OPERATOR(YADAYADA);
5632         }
5633         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5634             char tmp = *s++;
5635             if (*s == tmp) {
5636                 s++;
5637                 if (*s == tmp) {
5638                     s++;
5639                     pl_yylval.ival = OPf_SPECIAL;
5640                 }
5641                 else
5642                     pl_yylval.ival = 0;
5643                 OPERATOR(DOTDOT);
5644             }
5645             if (PL_expect != XOPERATOR)
5646                 check_uni();
5647             Aop(OP_CONCAT);
5648         }
5649         /* FALL THROUGH */
5650     case '0': case '1': case '2': case '3': case '4':
5651     case '5': case '6': case '7': case '8': case '9':
5652         s = scan_num(s, &pl_yylval);
5653         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5654         if (PL_expect == XOPERATOR)
5655             no_op("Number",s);
5656         TERM(THING);
5657
5658     case '\'':
5659         s = scan_str(s,!!PL_madskills,FALSE);
5660         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5661         if (PL_expect == XOPERATOR) {
5662             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5663                 return deprecate_commaless_var_list();
5664             }
5665             else
5666                 no_op("String",s);
5667         }
5668         if (!s)
5669             missingterm(NULL);
5670         pl_yylval.ival = OP_CONST;
5671         TERM(sublex_start());
5672
5673     case '"':
5674         s = scan_str(s,!!PL_madskills,FALSE);
5675         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5676         if (PL_expect == XOPERATOR) {
5677             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5678                 return deprecate_commaless_var_list();
5679             }
5680             else
5681                 no_op("String",s);
5682         }
5683         if (!s)
5684             missingterm(NULL);
5685         pl_yylval.ival = OP_CONST;
5686         /* FIXME. I think that this can be const if char *d is replaced by
5687            more localised variables.  */
5688         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5689             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5690                 pl_yylval.ival = OP_STRINGIFY;
5691                 break;
5692             }
5693         }
5694         TERM(sublex_start());
5695
5696     case '`':
5697         s = scan_str(s,!!PL_madskills,FALSE);
5698         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5699         if (PL_expect == XOPERATOR)
5700             no_op("Backticks",s);
5701         if (!s)
5702             missingterm(NULL);
5703         readpipe_override();
5704         TERM(sublex_start());
5705
5706     case '\\':
5707         s++;
5708         if (PL_lex_inwhat && isDIGIT(*s))
5709             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5710                            *s, *s);
5711         if (PL_expect == XOPERATOR)
5712             no_op("Backslash",s);
5713         OPERATOR(REFGEN);
5714
5715     case 'v':
5716         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5717             char *start = s + 2;
5718             while (isDIGIT(*start) || *start == '_')
5719                 start++;
5720             if (*start == '.' && isDIGIT(start[1])) {
5721                 s = scan_num(s, &pl_yylval);
5722                 TERM(THING);
5723             }
5724             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5725             else if (!isALPHA(*start) && (PL_expect == XTERM
5726                         || PL_expect == XREF || PL_expect == XSTATE
5727                         || PL_expect == XTERMORDORDOR)) {
5728                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5729                 if (!gv) {
5730                     s = scan_num(s, &pl_yylval);
5731                     TERM(THING);
5732                 }
5733             }
5734         }
5735         goto keylookup;
5736     case 'x':
5737         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5738             s++;
5739             Mop(OP_REPEAT);
5740         }
5741         goto keylookup;
5742
5743     case '_':
5744     case 'a': case 'A':
5745     case 'b': case 'B':
5746     case 'c': case 'C':
5747     case 'd': case 'D':
5748     case 'e': case 'E':
5749     case 'f': case 'F':
5750     case 'g': case 'G':
5751     case 'h': case 'H':
5752     case 'i': case 'I':
5753     case 'j': case 'J':
5754     case 'k': case 'K':
5755     case 'l': case 'L':
5756     case 'm': case 'M':
5757     case 'n': case 'N':
5758     case 'o': case 'O':
5759     case 'p': case 'P':
5760     case 'q': case 'Q':
5761     case 'r': case 'R':
5762     case 's': case 'S':
5763     case 't': case 'T':
5764     case 'u': case 'U':
5765               case 'V':
5766     case 'w': case 'W':
5767               case 'X':
5768     case 'y': case 'Y':
5769     case 'z': case 'Z':
5770
5771       keylookup: {
5772         bool anydelim;
5773         I32 tmp;
5774
5775         orig_keyword = 0;
5776         gv = NULL;
5777         gvp = NULL;
5778
5779         PL_bufptr = s;
5780         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5781
5782         /* Some keywords can be followed by any delimiter, including ':' */
5783         anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5784                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5785                              (PL_tokenbuf[0] == 'q' &&
5786                               strchr("qwxr", PL_tokenbuf[1])))));
5787
5788         /* x::* is just a word, unless x is "CORE" */
5789         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5790             goto just_a_word;
5791
5792         d = s;
5793         while (d < PL_bufend && isSPACE(*d))
5794                 d++;    /* no comments skipped here, or s### is misparsed */
5795
5796         /* Is this a word before a => operator? */
5797         if (*d == '=' && d[1] == '>') {
5798             CLINE;
5799             pl_yylval.opval
5800                 = (OP*)newSVOP(OP_CONST, 0,
5801                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5802             pl_yylval.opval->op_private = OPpCONST_BARE;
5803             TERM(WORD);
5804         }
5805
5806         /* Check for plugged-in keyword */
5807         {
5808             OP *o;
5809             int result;
5810             char *saved_bufptr = PL_bufptr;
5811             PL_bufptr = s;
5812             result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
5813             s = PL_bufptr;
5814             if (result == KEYWORD_PLUGIN_DECLINE) {
5815                 /* not a plugged-in keyword */
5816                 PL_bufptr = saved_bufptr;
5817             } else if (result == KEYWORD_PLUGIN_STMT) {
5818                 pl_yylval.opval = o;
5819                 CLINE;
5820                 PL_expect = XSTATE;
5821                 return REPORT(PLUGSTMT);
5822             } else if (result == KEYWORD_PLUGIN_EXPR) {
5823                 pl_yylval.opval = o;
5824                 CLINE;
5825                 PL_expect = XOPERATOR;
5826                 return REPORT(PLUGEXPR);
5827             } else {
5828                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
5829                                         PL_tokenbuf);
5830             }
5831         }
5832
5833         /* Check for built-in keyword */
5834         tmp = keyword(PL_tokenbuf, len, 0);
5835
5836         /* Is this a label? */
5837         if (!anydelim && PL_expect == XSTATE
5838               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5839             if (tmp)
5840                 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5841             s = d + 1;
5842             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5843             CLINE;
5844             TOKEN(LABEL);
5845         }
5846
5847         if (tmp < 0) {                  /* second-class keyword? */
5848             GV *ogv = NULL;     /* override (winner) */
5849             GV *hgv = NULL;     /* hidden (loser) */
5850             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5851                 CV *cv;
5852                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5853                     (cv = GvCVu(gv)))
5854                 {
5855                     if (GvIMPORTED_CV(gv))
5856                         ogv = gv;
5857                     else if (! CvMETHOD(cv))
5858                         hgv = gv;
5859                 }
5860                 if (!ogv &&
5861                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5862                     (gv = *gvp) && isGV_with_GP(gv) &&
5863                     GvCVu(gv) && GvIMPORTED_CV(gv))
5864                 {
5865                     ogv = gv;
5866                 }
5867             }
5868             if (ogv) {
5869                 orig_keyword = tmp;
5870                 tmp = 0;                /* overridden by import or by GLOBAL */
5871             }
5872             else if (gv && !gvp
5873                      && -tmp==KEY_lock  /* XXX generalizable kludge */
5874                      && GvCVu(gv))
5875             {
5876                 tmp = 0;                /* any sub overrides "weak" keyword */
5877             }
5878             else {                      /* no override */
5879                 tmp = -tmp;
5880                 if (tmp == KEY_dump) {
5881                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5882                                    "dump() better written as CORE::dump()");
5883                 }
5884                 gv = NULL;
5885                 gvp = 0;
5886                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
5887                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5888                                    "Ambiguous call resolved as CORE::%s(), %s",
5889                                    GvENAME(hgv), "qualify as such or use &");
5890             }
5891         }
5892
5893       reserved_word:
5894         switch (tmp) {
5895
5896         default:                        /* not a keyword */
5897             /* Trade off - by using this evil construction we can pull the
5898                variable gv into the block labelled keylookup. If not, then
5899                we have to give it function scope so that the goto from the
5900                earlier ':' case doesn't bypass the initialisation.  */
5901             if (0) {
5902             just_a_word_zero_gv:
5903                 gv = NULL;
5904                 gvp = NULL;
5905                 orig_keyword = 0;
5906             }
5907           just_a_word: {
5908                 SV *sv;
5909                 int pkgname = 0;
5910                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5911                 OP *rv2cv_op;
5912                 CV *cv;
5913 #ifdef PERL_MAD
5914                 SV *nextPL_nextwhite = 0;
5915 #endif
5916
5917
5918                 /* Get the rest if it looks like a package qualifier */
5919
5920                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5921                     STRLEN morelen;
5922                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5923                                   TRUE, &morelen);
5924                     if (!morelen)
5925                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5926                                 *s == '\'' ? "'" : "::");
5927                     len += morelen;
5928                     pkgname = 1;
5929                 }
5930
5931                 if (PL_expect == XOPERATOR) {
5932                     if (PL_bufptr == PL_linestart) {
5933                         CopLINE_dec(PL_curcop);
5934                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5935                         CopLINE_inc(PL_curcop);
5936                     }
5937                     else
5938                         no_op("Bareword",s);
5939                 }
5940
5941                 /* Look for a subroutine with this name in current package,
5942                    unless name is "Foo::", in which case Foo is a bearword
5943                    (and a package name). */
5944
5945                 if (len > 2 && !PL_madskills &&
5946                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5947                 {
5948                     if (ckWARN(WARN_BAREWORD)
5949                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5950                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5951                             "Bareword \"%s\" refers to nonexistent package",
5952                              PL_tokenbuf);
5953                     len -= 2;
5954                     PL_tokenbuf[len] = '\0';
5955                     gv = NULL;
5956                     gvp = 0;
5957                 }
5958                 else {
5959                     if (!gv) {
5960                         /* Mustn't actually add anything to a symbol table.
5961                            But also don't want to "initialise" any placeholder
5962                            constants that might already be there into full
5963                            blown PVGVs with attached PVCV.  */
5964                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5965                                                GV_NOADD_NOINIT, SVt_PVCV);
5966                     }
5967                     len = 0;
5968                 }
5969
5970                 /* if we saw a global override before, get the right name */
5971
5972                 if (gvp) {
5973                     sv = newSVpvs("CORE::GLOBAL::");
5974                     sv_catpv(sv,PL_tokenbuf);
5975                 }
5976                 else {
5977                     /* If len is 0, newSVpv does strlen(), which is correct.
5978                        If len is non-zero, then it will be the true length,
5979                        and so the scalar will be created correctly.  */
5980                     sv = newSVpv(PL_tokenbuf,len);
5981                 }
5982 #ifdef PERL_MAD
5983                 if (PL_madskills && !PL_thistoken) {
5984                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5985                     PL_thistoken = newSVpvn(start,s - start);
5986                     PL_realtokenstart = s - SvPVX(PL_linestr);
5987                 }
5988 #endif
5989
5990                 /* Presume this is going to be a bareword of some sort. */
5991
5992                 CLINE;
5993                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5994                 pl_yylval.opval->op_private = OPpCONST_BARE;
5995                 /* UTF-8 package name? */
5996                 if (UTF && !IN_BYTES &&
5997                     is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5998                     SvUTF8_on(sv);
5999
6000                 /* And if "Foo::", then that's what it certainly is. */
6001
6002                 if (len)
6003                     goto safe_bareword;
6004
6005                 cv = NULL;
6006                 {
6007                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6008                     const_op->op_private = OPpCONST_BARE;
6009                     rv2cv_op = newCVREF(0, const_op);
6010                 }
6011                 if (rv2cv_op->op_type == OP_RV2CV &&
6012                         (rv2cv_op->op_flags & OPf_KIDS)) {
6013                     OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6014                     switch (rv_op->op_type) {
6015                         case OP_CONST: {
6016                             SV *sv = cSVOPx_sv(rv_op);
6017                             if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6018                                 cv = (CV*)SvRV(sv);
6019                         } break;
6020                         case OP_GV: {
6021                             GV *gv = cGVOPx_gv(rv_op);
6022                             CV *maybe_cv = GvCVu(gv);
6023                             if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6024                                 cv = maybe_cv;
6025                         } break;
6026                     }
6027                 }
6028
6029                 /* See if it's the indirect object for a list operator. */
6030
6031                 if (PL_oldoldbufptr &&
6032                     PL_oldoldbufptr < PL_bufptr &&
6033                     (PL_oldoldbufptr == PL_last_lop
6034                      || PL_oldoldbufptr == PL_last_uni) &&
6035                     /* NO SKIPSPACE BEFORE HERE! */
6036                     (PL_expect == XREF ||
6037                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6038                 {
6039                     bool immediate_paren = *s == '(';
6040
6041                     /* (Now we can afford to cross potential line boundary.) */
6042                     s = SKIPSPACE2(s,nextPL_nextwhite);
6043 #ifdef PERL_MAD
6044                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6045 #endif
6046
6047                     /* Two barewords in a row may indicate method call. */
6048
6049                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6050                         (tmp = intuit_method(s, gv, cv))) {
6051                         op_free(rv2cv_op);
6052                         return REPORT(tmp);
6053                     }
6054
6055                     /* If not a declared subroutine, it's an indirect object. */
6056                     /* (But it's an indir obj regardless for sort.) */
6057                     /* Also, if "_" follows a filetest operator, it's a bareword */
6058
6059                     if (
6060                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6061                          (!cv &&
6062                         (PL_last_lop_op != OP_MAPSTART &&
6063                          PL_last_lop_op != OP_GREPSTART))))
6064                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6065                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6066                        )
6067                     {
6068                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6069                         goto bareword;
6070                     }
6071                 }
6072
6073                 PL_expect = XOPERATOR;
6074 #ifdef PERL_MAD
6075                 if (isSPACE(*s))
6076                     s = SKIPSPACE2(s,nextPL_nextwhite);
6077                 PL_nextwhite = nextPL_nextwhite;
6078 #else
6079                 s = skipspace(s);
6080 #endif
6081
6082                 /* Is this a word before a => operator? */
6083                 if (*s == '=' && s[1] == '>' && !pkgname) {
6084                     op_free(rv2cv_op);
6085                     CLINE;
6086                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6087                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6088                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6089                     TERM(WORD);
6090                 }
6091
6092                 /* If followed by a paren, it's certainly a subroutine. */
6093                 if (*s == '(') {
6094                     CLINE;
6095                     if (cv) {
6096                         d = s + 1;
6097                         while (SPACE_OR_TAB(*d))
6098                             d++;
6099                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6100                             s = d + 1;
6101                             goto its_constant;
6102                         }
6103                     }
6104 #ifdef PERL_MAD
6105                     if (PL_madskills) {
6106                         PL_nextwhite = PL_thiswhite;
6107                         PL_thiswhite = 0;
6108                     }
6109                     start_force(PL_curforce);
6110 #endif
6111                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6112                     PL_expect = XOPERATOR;
6113 #ifdef PERL_MAD
6114                     if (PL_madskills) {
6115                         PL_nextwhite = nextPL_nextwhite;
6116                         curmad('X', PL_thistoken);
6117                         PL_thistoken = newSVpvs("");
6118                     }
6119 #endif
6120                     op_free(rv2cv_op);
6121                     force_next(WORD);
6122                     pl_yylval.ival = 0;
6123                     TOKEN('&');
6124                 }
6125
6126                 /* If followed by var or block, call it a method (unless sub) */
6127
6128                 if ((*s == '$' || *s == '{') && !cv) {
6129                     op_free(rv2cv_op);
6130                     PL_last_lop = PL_oldbufptr;
6131                     PL_last_lop_op = OP_METHOD;
6132                     PREBLOCK(METHOD);
6133                 }
6134
6135                 /* If followed by a bareword, see if it looks like indir obj. */
6136
6137                 if (!orig_keyword
6138                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6139                         && (tmp = intuit_method(s, gv, cv))) {
6140                     op_free(rv2cv_op);
6141                     return REPORT(tmp);
6142                 }
6143
6144                 /* Not a method, so call it a subroutine (if defined) */
6145
6146                 if (cv) {
6147                     if (lastchar == '-')
6148                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6149                                          "Ambiguous use of -%s resolved as -&%s()",
6150                                          PL_tokenbuf, PL_tokenbuf);
6151                     /* Check for a constant sub */
6152                     if ((sv = cv_const_sv(cv))) {
6153                   its_constant:
6154                         op_free(rv2cv_op);
6155                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6156                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6157                         pl_yylval.opval->op_private = 0;
6158                         TOKEN(WORD);
6159                     }
6160
6161                     op_free(pl_yylval.opval);
6162                     pl_yylval.opval = rv2cv_op;
6163                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6164                     PL_last_lop = PL_oldbufptr;
6165                     PL_last_lop_op = OP_ENTERSUB;
6166                     /* Is there a prototype? */
6167                     if (
6168 #ifdef PERL_MAD
6169                         cv &&
6170 #endif
6171                         SvPOK(cv))
6172                     {
6173                         STRLEN protolen;
6174                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6175                         if (!protolen)
6176                             TERM(FUNC0SUB);
6177                         if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
6178                             OPERATOR(UNIOPSUB);
6179                         while (*proto == ';')
6180                             proto++;
6181                         if (*proto == '&' && *s == '{') {
6182                             if (PL_curstash)
6183                                 sv_setpvs(PL_subname, "__ANON__");
6184                             else
6185                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6186                             PREBLOCK(LSTOPSUB);
6187                         }
6188                     }
6189 #ifdef PERL_MAD
6190                     {
6191                         if (PL_madskills) {
6192                             PL_nextwhite = PL_thiswhite;
6193                             PL_thiswhite = 0;
6194                         }
6195                         start_force(PL_curforce);
6196                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6197                         PL_expect = XTERM;
6198                         if (PL_madskills) {
6199                             PL_nextwhite = nextPL_nextwhite;
6200                             curmad('X', PL_thistoken);
6201                             PL_thistoken = newSVpvs("");
6202                         }
6203                         force_next(WORD);
6204                         TOKEN(NOAMP);
6205                     }
6206                 }
6207
6208                 /* Guess harder when madskills require "best effort". */
6209                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6210                     int probable_sub = 0;
6211                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6212                         probable_sub = 1;
6213                     else if (isALPHA(*s)) {
6214                         char tmpbuf[1024];
6215                         STRLEN tmplen;
6216                         d = s;
6217                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6218                         if (!keyword(tmpbuf, tmplen, 0))
6219                             probable_sub = 1;
6220                         else {
6221                             while (d < PL_bufend && isSPACE(*d))
6222                                 d++;
6223                             if (*d == '=' && d[1] == '>')
6224                                 probable_sub = 1;
6225                         }
6226                     }
6227                     if (probable_sub) {
6228                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6229                         op_free(pl_yylval.opval);
6230                         pl_yylval.opval = rv2cv_op;
6231                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6232                         PL_last_lop = PL_oldbufptr;
6233                         PL_last_lop_op = OP_ENTERSUB;
6234                         PL_nextwhite = PL_thiswhite;
6235                         PL_thiswhite = 0;
6236                         start_force(PL_curforce);
6237                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6238                         PL_expect = XTERM;
6239                         PL_nextwhite = nextPL_nextwhite;
6240                         curmad('X', PL_thistoken);
6241                         PL_thistoken = newSVpvs("");
6242                         force_next(WORD);
6243                         TOKEN(NOAMP);
6244                     }
6245 #else
6246                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6247                     PL_expect = XTERM;
6248                     force_next(WORD);
6249                     TOKEN(NOAMP);
6250 #endif
6251                 }
6252
6253                 /* Call it a bare word */
6254
6255                 if (PL_hints & HINT_STRICT_SUBS)
6256                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6257                 else {
6258                 bareword:
6259                     /* after "print" and similar functions (corresponding to
6260                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6261                      * a filehandle should be subject to "strict subs".
6262                      * Likewise for the optional indirect-object argument to system
6263                      * or exec, which can't be a bareword */
6264                     if ((PL_last_lop_op == OP_PRINT
6265                             || PL_last_lop_op == OP_PRTF
6266                             || PL_last_lop_op == OP_SAY
6267                             || PL_last_lop_op == OP_SYSTEM
6268                             || PL_last_lop_op == OP_EXEC)
6269                             && (PL_hints & HINT_STRICT_SUBS))
6270                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6271                     if (lastchar != '-') {
6272                         if (ckWARN(WARN_RESERVED)) {
6273                             d = PL_tokenbuf;
6274                             while (isLOWER(*d))
6275                                 d++;
6276                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6277                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6278                                        PL_tokenbuf);
6279                         }
6280                     }
6281                 }
6282                 op_free(rv2cv_op);
6283
6284             safe_bareword:
6285                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6286                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6287                                      "Operator or semicolon missing before %c%s",
6288                                      lastchar, PL_tokenbuf);
6289                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6290                                      "Ambiguous use of %c resolved as operator %c",
6291                                      lastchar, lastchar);
6292                 }
6293                 TOKEN(WORD);
6294             }
6295
6296         case KEY___FILE__:
6297             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6298                                         newSVpv(CopFILE(PL_curcop),0));
6299             TERM(THING);
6300
6301         case KEY___LINE__:
6302             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6303                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6304             TERM(THING);
6305
6306         case KEY___PACKAGE__:
6307             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6308                                         (PL_curstash
6309                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6310                                          : &PL_sv_undef));
6311             TERM(THING);
6312
6313         case KEY___DATA__:
6314         case KEY___END__: {
6315             GV *gv;
6316             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6317                 const char *pname = "main";
6318                 if (PL_tokenbuf[2] == 'D')
6319                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6320                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6321                                 SVt_PVIO);
6322                 GvMULTI_on(gv);
6323                 if (!GvIO(gv))
6324                     GvIOp(gv) = newIO();
6325                 IoIFP(GvIOp(gv)) = PL_rsfp;
6326 #if defined(HAS_FCNTL) && defined(F_SETFD)
6327                 {
6328                     const int fd = PerlIO_fileno(PL_rsfp);
6329                     fcntl(fd,F_SETFD,fd >= 3);
6330                 }
6331 #endif
6332                 /* Mark this internal pseudo-handle as clean */
6333                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6334                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6335                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6336                 else
6337                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6338 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6339                 /* if the script was opened in binmode, we need to revert
6340                  * it to text mode for compatibility; but only iff it has CRs
6341                  * XXX this is a questionable hack at best. */
6342                 if (PL_bufend-PL_bufptr > 2
6343                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6344                 {
6345                     Off_t loc = 0;
6346                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6347                         loc = PerlIO_tell(PL_rsfp);
6348                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6349                     }
6350 #ifdef NETWARE
6351                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6352 #else
6353                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6354 #endif  /* NETWARE */
6355 #ifdef PERLIO_IS_STDIO /* really? */
6356 #  if defined(__BORLANDC__)
6357                         /* XXX see note in do_binmode() */
6358                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6359 #  endif
6360 #endif
6361                         if (loc > 0)
6362                             PerlIO_seek(PL_rsfp, loc, 0);
6363                     }
6364                 }
6365 #endif
6366 #ifdef PERLIO_LAYERS
6367                 if (!IN_BYTES) {
6368                     if (UTF)
6369                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6370                     else if (PL_encoding) {
6371                         SV *name;
6372                         dSP;
6373                         ENTER;
6374                         SAVETMPS;
6375                         PUSHMARK(sp);
6376                         EXTEND(SP, 1);
6377                         XPUSHs(PL_encoding);
6378                         PUTBACK;
6379                         call_method("name", G_SCALAR);
6380                         SPAGAIN;
6381                         name = POPs;
6382                         PUTBACK;
6383                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6384                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6385                                                       SVfARG(name)));
6386                         FREETMPS;
6387                         LEAVE;
6388                     }
6389                 }
6390 #endif
6391 #ifdef PERL_MAD
6392                 if (PL_madskills) {
6393                     if (PL_realtokenstart >= 0) {
6394                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6395                         if (!PL_endwhite)
6396                             PL_endwhite = newSVpvs("");
6397                         sv_catsv(PL_endwhite, PL_thiswhite);
6398                         PL_thiswhite = 0;
6399                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6400                         PL_realtokenstart = -1;
6401                     }
6402                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6403                            != NULL) ;
6404                 }
6405 #endif
6406                 PL_rsfp = NULL;
6407             }
6408             goto fake_eof;
6409         }
6410
6411         case KEY_AUTOLOAD:
6412         case KEY_DESTROY:
6413         case KEY_BEGIN:
6414         case KEY_UNITCHECK:
6415         case KEY_CHECK:
6416         case KEY_INIT:
6417         case KEY_END:
6418             if (PL_expect == XSTATE) {
6419                 s = PL_bufptr;
6420                 goto really_sub;
6421             }
6422             goto just_a_word;
6423
6424         case KEY_CORE:
6425             if (*s == ':' && s[1] == ':') {
6426                 s += 2;
6427                 d = s;
6428                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6429                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6430                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6431                 if (tmp < 0)
6432                     tmp = -tmp;
6433                 else if (tmp == KEY_require || tmp == KEY_do)
6434                     /* that's a way to remember we saw "CORE::" */
6435                     orig_keyword = tmp;
6436                 goto reserved_word;
6437             }
6438             goto just_a_word;
6439
6440         case KEY_abs:
6441             UNI(OP_ABS);
6442
6443         case KEY_alarm:
6444             UNI(OP_ALARM);
6445
6446         case KEY_accept:
6447             LOP(OP_ACCEPT,XTERM);
6448
6449         case KEY_and:
6450             OPERATOR(ANDOP);
6451
6452         case KEY_atan2:
6453             LOP(OP_ATAN2,XTERM);
6454
6455         case KEY_bind:
6456             LOP(OP_BIND,XTERM);
6457
6458         case KEY_binmode:
6459             LOP(OP_BINMODE,XTERM);
6460
6461         case KEY_bless:
6462             LOP(OP_BLESS,XTERM);
6463
6464         case KEY_break:
6465             FUN0(OP_BREAK);
6466
6467         case KEY_chop:
6468             UNI(OP_CHOP);
6469
6470         case KEY_continue:
6471             /* When 'use switch' is in effect, continue has a dual
6472                life as a control operator. */
6473             {
6474                 if (!FEATURE_IS_ENABLED("switch"))
6475                     PREBLOCK(CONTINUE);
6476                 else {
6477                     /* We have to disambiguate the two senses of
6478                       "continue". If the next token is a '{' then
6479                       treat it as the start of a continue block;
6480                       otherwise treat it as a control operator.
6481                      */
6482                     s = skipspace(s);
6483                     if (*s == '{')
6484             PREBLOCK(CONTINUE);
6485                     else
6486                         FUN0(OP_CONTINUE);
6487                 }
6488             }
6489
6490         case KEY_chdir:
6491             /* may use HOME */
6492             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6493             UNI(OP_CHDIR);
6494
6495         case KEY_close:
6496             UNI(OP_CLOSE);
6497
6498         case KEY_closedir:
6499             UNI(OP_CLOSEDIR);
6500
6501         case KEY_cmp:
6502             Eop(OP_SCMP);
6503
6504         case KEY_caller:
6505             UNI(OP_CALLER);
6506
6507         case KEY_crypt:
6508 #ifdef FCRYPT
6509             if (!PL_cryptseen) {
6510                 PL_cryptseen = TRUE;
6511                 init_des();
6512             }
6513 #endif
6514             LOP(OP_CRYPT,XTERM);
6515
6516         case KEY_chmod:
6517             LOP(OP_CHMOD,XTERM);
6518
6519         case KEY_chown:
6520             LOP(OP_CHOWN,XTERM);
6521
6522         case KEY_connect:
6523             LOP(OP_CONNECT,XTERM);
6524
6525         case KEY_chr:
6526             UNI(OP_CHR);
6527
6528         case KEY_cos:
6529             UNI(OP_COS);
6530
6531         case KEY_chroot:
6532             UNI(OP_CHROOT);
6533
6534         case KEY_default:
6535             PREBLOCK(DEFAULT);
6536
6537         case KEY_do:
6538             s = SKIPSPACE1(s);
6539             if (*s == '{')
6540                 PRETERMBLOCK(DO);
6541             if (*s != '\'')
6542                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6543             if (orig_keyword == KEY_do) {
6544                 orig_keyword = 0;
6545                 pl_yylval.ival = 1;
6546             }
6547             else
6548                 pl_yylval.ival = 0;
6549             OPERATOR(DO);
6550
6551         case KEY_die:
6552             PL_hints |= HINT_BLOCK_SCOPE;
6553             LOP(OP_DIE,XTERM);
6554
6555         case KEY_defined:
6556             UNI(OP_DEFINED);
6557
6558         case KEY_delete:
6559             UNI(OP_DELETE);
6560
6561         case KEY_dbmopen:
6562             gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6563             LOP(OP_DBMOPEN,XTERM);
6564
6565         case KEY_dbmclose:
6566             UNI(OP_DBMCLOSE);
6567
6568         case KEY_dump:
6569             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6570             LOOPX(OP_DUMP);
6571
6572         case KEY_else:
6573             PREBLOCK(ELSE);
6574
6575         case KEY_elsif:
6576             pl_yylval.ival = CopLINE(PL_curcop);
6577             OPERATOR(ELSIF);
6578
6579         case KEY_eq:
6580             Eop(OP_SEQ);
6581
6582         case KEY_exists:
6583             UNI(OP_EXISTS);
6584
6585         case KEY_exit:
6586             if (PL_madskills)
6587                 UNI(OP_INT);
6588             UNI(OP_EXIT);
6589
6590         case KEY_eval:
6591             s = SKIPSPACE1(s);
6592             PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
6593             UNIBRACK(OP_ENTEREVAL);
6594
6595         case KEY_eof:
6596             UNI(OP_EOF);
6597
6598         case KEY_exp:
6599             UNI(OP_EXP);
6600
6601         case KEY_each:
6602             UNI(OP_EACH);
6603
6604         case KEY_exec:
6605             LOP(OP_EXEC,XREF);
6606
6607         case KEY_endhostent:
6608             FUN0(OP_EHOSTENT);
6609
6610         case KEY_endnetent:
6611             FUN0(OP_ENETENT);
6612
6613         case KEY_endservent:
6614             FUN0(OP_ESERVENT);
6615
6616         case KEY_endprotoent:
6617             FUN0(OP_EPROTOENT);
6618
6619         case KEY_endpwent:
6620             FUN0(OP_EPWENT);
6621
6622         case KEY_endgrent:
6623             FUN0(OP_EGRENT);
6624
6625         case KEY_for:
6626         case KEY_foreach:
6627             pl_yylval.ival = CopLINE(PL_curcop);
6628             s = SKIPSPACE1(s);
6629             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6630                 char *p = s;
6631 #ifdef PERL_MAD
6632                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6633 #endif
6634
6635                 if ((PL_bufend - p) >= 3 &&
6636                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6637                     p += 2;
6638                 else if ((PL_bufend - p) >= 4 &&
6639                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6640                     p += 3;
6641                 p = PEEKSPACE(p);
6642                 if (isIDFIRST_lazy_if(p,UTF)) {
6643                     p = scan_ident(p, PL_bufend,
6644                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6645                     p = PEEKSPACE(p);
6646                 }
6647                 if (*p != '$')
6648                     Perl_croak(aTHX_ "Missing $ on loop variable");
6649 #ifdef PERL_MAD
6650                 s = SvPVX(PL_linestr) + soff;
6651 #endif
6652             }
6653             OPERATOR(FOR);
6654
6655         case KEY_formline:
6656             LOP(OP_FORMLINE,XTERM);
6657
6658         case KEY_fork:
6659             FUN0(OP_FORK);
6660
6661         case KEY_fcntl:
6662             LOP(OP_FCNTL,XTERM);
6663
6664         case KEY_fileno:
6665             UNI(OP_FILENO);
6666
6667         case KEY_flock:
6668             LOP(OP_FLOCK,XTERM);
6669
6670         case KEY_gt:
6671             Rop(OP_SGT);
6672
6673         case KEY_ge:
6674             Rop(OP_SGE);
6675
6676         case KEY_grep:
6677             LOP(OP_GREPSTART, XREF);
6678
6679         case KEY_goto:
6680             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6681             LOOPX(OP_GOTO);
6682
6683         case KEY_gmtime:
6684             UNI(OP_GMTIME);
6685
6686         case KEY_getc:
6687             UNIDOR(OP_GETC);
6688
6689         case KEY_getppid:
6690             FUN0(OP_GETPPID);
6691
6692         case KEY_getpgrp:
6693             UNI(OP_GETPGRP);
6694
6695         case KEY_getpriority:
6696             LOP(OP_GETPRIORITY,XTERM);
6697
6698         case KEY_getprotobyname:
6699             UNI(OP_GPBYNAME);
6700
6701         case KEY_getprotobynumber:
6702             LOP(OP_GPBYNUMBER,XTERM);
6703
6704         case KEY_getprotoent:
6705             FUN0(OP_GPROTOENT);
6706
6707         case KEY_getpwent:
6708             FUN0(OP_GPWENT);
6709
6710         case KEY_getpwnam:
6711             UNI(OP_GPWNAM);
6712
6713         case KEY_getpwuid:
6714             UNI(OP_GPWUID);
6715
6716         case KEY_getpeername:
6717             UNI(OP_GETPEERNAME);
6718
6719         case KEY_gethostbyname:
6720             UNI(OP_GHBYNAME);
6721
6722         case KEY_gethostbyaddr:
6723             LOP(OP_GHBYADDR,XTERM);
6724
6725         case KEY_gethostent:
6726             FUN0(OP_GHOSTENT);
6727
6728         case KEY_getnetbyname:
6729             UNI(OP_GNBYNAME);
6730
6731         case KEY_getnetbyaddr:
6732             LOP(OP_GNBYADDR,XTERM);
6733
6734         case KEY_getnetent:
6735             FUN0(OP_GNETENT);
6736
6737         case KEY_getservbyname:
6738             LOP(OP_GSBYNAME,XTERM);
6739
6740         case KEY_getservbyport:
6741             LOP(OP_GSBYPORT,XTERM);
6742
6743         case KEY_getservent:
6744             FUN0(OP_GSERVENT);
6745
6746         case KEY_getsockname:
6747             UNI(OP_GETSOCKNAME);
6748
6749         case KEY_getsockopt:
6750             LOP(OP_GSOCKOPT,XTERM);
6751
6752         case KEY_getgrent:
6753             FUN0(OP_GGRENT);
6754
6755         case KEY_getgrnam:
6756             UNI(OP_GGRNAM);
6757
6758         case KEY_getgrgid:
6759             UNI(OP_GGRGID);
6760
6761         case KEY_getlogin:
6762             FUN0(OP_GETLOGIN);
6763
6764         case KEY_given:
6765             pl_yylval.ival = CopLINE(PL_curcop);
6766             OPERATOR(GIVEN);
6767
6768         case KEY_glob:
6769             LOP(OP_GLOB,XTERM);
6770
6771         case KEY_hex:
6772             UNI(OP_HEX);
6773
6774         case KEY_if:
6775             pl_yylval.ival = CopLINE(PL_curcop);
6776             OPERATOR(IF);
6777
6778         case KEY_index:
6779             LOP(OP_INDEX,XTERM);
6780
6781         case KEY_int:
6782             UNI(OP_INT);
6783
6784         case KEY_ioctl:
6785             LOP(OP_IOCTL,XTERM);
6786
6787         case KEY_join:
6788             LOP(OP_JOIN,XTERM);
6789
6790         case KEY_keys:
6791             UNI(OP_KEYS);
6792
6793         case KEY_kill:
6794             LOP(OP_KILL,XTERM);
6795
6796         case KEY_last:
6797             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6798             LOOPX(OP_LAST);
6799
6800         case KEY_lc:
6801             UNI(OP_LC);
6802
6803         case KEY_lcfirst:
6804             UNI(OP_LCFIRST);
6805
6806         case KEY_local:
6807             pl_yylval.ival = 0;
6808             OPERATOR(LOCAL);
6809
6810         case KEY_length:
6811             UNI(OP_LENGTH);
6812
6813         case KEY_lt:
6814             Rop(OP_SLT);
6815
6816         case KEY_le:
6817             Rop(OP_SLE);
6818
6819         case KEY_localtime:
6820             UNI(OP_LOCALTIME);
6821
6822         case KEY_log:
6823             UNI(OP_LOG);
6824
6825         case KEY_link:
6826             LOP(OP_LINK,XTERM);
6827
6828         case KEY_listen:
6829             LOP(OP_LISTEN,XTERM);
6830
6831         case KEY_lock:
6832             UNI(OP_LOCK);
6833
6834         case KEY_lstat:
6835             UNI(OP_LSTAT);
6836
6837         case KEY_m:
6838             s = scan_pat(s,OP_MATCH);
6839             TERM(sublex_start());
6840
6841         case KEY_map:
6842             LOP(OP_MAPSTART, XREF);
6843
6844         case KEY_mkdir:
6845             LOP(OP_MKDIR,XTERM);
6846
6847         case KEY_msgctl:
6848             LOP(OP_MSGCTL,XTERM);
6849
6850         case KEY_msgget:
6851             LOP(OP_MSGGET,XTERM);
6852
6853         case KEY_msgrcv:
6854             LOP(OP_MSGRCV,XTERM);
6855
6856         case KEY_msgsnd:
6857             LOP(OP_MSGSND,XTERM);
6858
6859         case KEY_our:
6860         case KEY_my:
6861         case KEY_state:
6862             PL_in_my = (U16)tmp;
6863             s = SKIPSPACE1(s);
6864             if (isIDFIRST_lazy_if(s,UTF)) {
6865 #ifdef PERL_MAD
6866                 char* start = s;
6867 #endif
6868                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6869                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6870                     goto really_sub;
6871                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6872                 if (!PL_in_my_stash) {
6873                     char tmpbuf[1024];
6874                     PL_bufptr = s;
6875                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6876                     yyerror(tmpbuf);
6877                 }
6878 #ifdef PERL_MAD
6879                 if (PL_madskills) {     /* just add type to declarator token */
6880                     sv_catsv(PL_thistoken, PL_nextwhite);
6881                     PL_nextwhite = 0;
6882                     sv_catpvn(PL_thistoken, start, s - start);
6883                 }
6884 #endif
6885             }
6886             pl_yylval.ival = 1;
6887             OPERATOR(MY);
6888
6889         case KEY_next:
6890             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6891             LOOPX(OP_NEXT);
6892
6893         case KEY_ne:
6894             Eop(OP_SNE);
6895
6896         case KEY_no:
6897             s = tokenize_use(0, s);
6898             OPERATOR(USE);
6899
6900         case KEY_not:
6901             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6902                 FUN1(OP_NOT);
6903             else
6904                 OPERATOR(NOTOP);
6905
6906         case KEY_open:
6907             s = SKIPSPACE1(s);
6908             if (isIDFIRST_lazy_if(s,UTF)) {
6909                 const char *t;
6910                 for (d = s; isALNUM_lazy_if(d,UTF);)
6911                     d++;
6912                 for (t=d; isSPACE(*t);)
6913                     t++;
6914                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6915                     /* [perl #16184] */
6916                     && !(t[0] == '=' && t[1] == '>')
6917                 ) {
6918                     int parms_len = (int)(d-s);
6919                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6920                            "Precedence problem: open %.*s should be open(%.*s)",
6921                             parms_len, s, parms_len, s);
6922                 }
6923             }
6924             LOP(OP_OPEN,XTERM);
6925
6926         case KEY_or:
6927             pl_yylval.ival = OP_OR;
6928             OPERATOR(OROP);
6929
6930         case KEY_ord:
6931             UNI(OP_ORD);
6932
6933         case KEY_oct:
6934             UNI(OP_OCT);
6935
6936         case KEY_opendir:
6937             LOP(OP_OPEN_DIR,XTERM);
6938
6939         case KEY_print:
6940             checkcomma(s,PL_tokenbuf,"filehandle");
6941             LOP(OP_PRINT,XREF);
6942
6943         case KEY_printf:
6944             checkcomma(s,PL_tokenbuf,"filehandle");
6945             LOP(OP_PRTF,XREF);
6946
6947         case KEY_prototype:
6948             UNI(OP_PROTOTYPE);
6949
6950         case KEY_push:
6951             LOP(OP_PUSH,XTERM);
6952
6953         case KEY_pop:
6954             UNIDOR(OP_POP);
6955
6956         case KEY_pos:
6957             UNIDOR(OP_POS);
6958
6959         case KEY_pack:
6960             LOP(OP_PACK,XTERM);
6961
6962         case KEY_package:
6963             s = force_word(s,WORD,FALSE,TRUE,FALSE);
6964             s = force_version(s, FALSE);
6965             OPERATOR(PACKAGE);
6966
6967         case KEY_pipe:
6968             LOP(OP_PIPE_OP,XTERM);
6969
6970         case KEY_q:
6971             s = scan_str(s,!!PL_madskills,FALSE);
6972             if (!s)
6973                 missingterm(NULL);
6974             pl_yylval.ival = OP_CONST;
6975             TERM(sublex_start());
6976
6977         case KEY_quotemeta:
6978             UNI(OP_QUOTEMETA);
6979
6980         case KEY_qw:
6981             s = scan_str(s,!!PL_madskills,FALSE);
6982             if (!s)
6983                 missingterm(NULL);
6984             PL_expect = XOPERATOR;
6985             force_next(')');
6986             if (SvCUR(PL_lex_stuff)) {
6987                 OP *words = NULL;
6988                 int warned = 0;
6989                 d = SvPV_force(PL_lex_stuff, len);
6990                 while (len) {
6991                     for (; isSPACE(*d) && len; --len, ++d)
6992                         /**/;
6993                     if (len) {
6994                         SV *sv;
6995                         const char *b = d;
6996                         if (!warned && ckWARN(WARN_QW)) {
6997                             for (; !isSPACE(*d) && len; --len, ++d) {
6998                                 if (*d == ',') {
6999                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7000                                         "Possible attempt to separate words with commas");
7001                                     ++warned;
7002                                 }
7003                                 else if (*d == '#') {
7004                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7005                                         "Possible attempt to put comments in qw() list");
7006                                     ++warned;
7007                                 }
7008                             }
7009                         }
7010                         else {
7011                             for (; !isSPACE(*d) && len; --len, ++d)
7012                                 /**/;
7013                         }
7014                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7015                         words = append_elem(OP_LIST, words,
7016                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7017                     }
7018                 }
7019                 if (words) {
7020                     start_force(PL_curforce);
7021                     NEXTVAL_NEXTTOKE.opval = words;
7022                     force_next(THING);
7023                 }
7024             }
7025             if (PL_lex_stuff) {
7026                 SvREFCNT_dec(PL_lex_stuff);
7027                 PL_lex_stuff = NULL;
7028             }
7029             PL_expect = XTERM;
7030             TOKEN('(');
7031
7032         case KEY_qq:
7033             s = scan_str(s,!!PL_madskills,FALSE);
7034             if (!s)
7035                 missingterm(NULL);
7036             pl_yylval.ival = OP_STRINGIFY;
7037             if (SvIVX(PL_lex_stuff) == '\'')
7038                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
7039             TERM(sublex_start());
7040
7041         case KEY_qr:
7042             s = scan_pat(s,OP_QR);
7043             TERM(sublex_start());
7044
7045         case KEY_qx:
7046             s = scan_str(s,!!PL_madskills,FALSE);
7047             if (!s)
7048                 missingterm(NULL);
7049             readpipe_override();
7050             TERM(sublex_start());
7051
7052         case KEY_return:
7053             OLDLOP(OP_RETURN);
7054
7055         case KEY_require:
7056             s = SKIPSPACE1(s);
7057             if (isDIGIT(*s)) {
7058                 s = force_version(s, FALSE);
7059             }
7060             else if (*s != 'v' || !isDIGIT(s[1])
7061                     || (s = force_version(s, TRUE), *s == 'v'))
7062             {
7063                 *PL_tokenbuf = '\0';
7064                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7065                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7066                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7067                 else if (*s == '<')
7068                     yyerror("<> should be quotes");
7069             }
7070             if (orig_keyword == KEY_require) {
7071                 orig_keyword = 0;
7072                 pl_yylval.ival = 1;
7073             }
7074             else
7075                 pl_yylval.ival = 0;
7076             PL_expect = XTERM;
7077             PL_bufptr = s;
7078             PL_last_uni = PL_oldbufptr;
7079             PL_last_lop_op = OP_REQUIRE;
7080             s = skipspace(s);
7081             return REPORT( (int)REQUIRE );
7082
7083         case KEY_reset:
7084             UNI(OP_RESET);
7085
7086         case KEY_redo:
7087             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7088             LOOPX(OP_REDO);
7089
7090         case KEY_rename:
7091             LOP(OP_RENAME,XTERM);
7092
7093         case KEY_rand:
7094             UNI(OP_RAND);
7095
7096         case KEY_rmdir:
7097             UNI(OP_RMDIR);
7098
7099         case KEY_rindex:
7100             LOP(OP_RINDEX,XTERM);
7101
7102         case KEY_read:
7103             LOP(OP_READ,XTERM);
7104
7105         case KEY_readdir:
7106             UNI(OP_READDIR);
7107
7108         case KEY_readline:
7109             UNIDOR(OP_READLINE);
7110
7111         case KEY_readpipe:
7112             UNIDOR(OP_BACKTICK);
7113
7114         case KEY_rewinddir:
7115             UNI(OP_REWINDDIR);
7116
7117         case KEY_recv:
7118             LOP(OP_RECV,XTERM);
7119
7120         case KEY_reverse:
7121             LOP(OP_REVERSE,XTERM);
7122
7123         case KEY_readlink:
7124             UNIDOR(OP_READLINK);
7125
7126         case KEY_ref:
7127             UNI(OP_REF);
7128
7129         case KEY_s:
7130             s = scan_subst(s);
7131             if (pl_yylval.opval)
7132                 TERM(sublex_start());
7133             else
7134                 TOKEN(1);       /* force error */
7135
7136         case KEY_say:
7137             checkcomma(s,PL_tokenbuf,"filehandle");
7138             LOP(OP_SAY,XREF);
7139
7140         case KEY_chomp:
7141             UNI(OP_CHOMP);
7142
7143         case KEY_scalar:
7144             UNI(OP_SCALAR);
7145
7146         case KEY_select:
7147             LOP(OP_SELECT,XTERM);
7148
7149         case KEY_seek:
7150             LOP(OP_SEEK,XTERM);
7151
7152         case KEY_semctl:
7153             LOP(OP_SEMCTL,XTERM);
7154
7155         case KEY_semget:
7156             LOP(OP_SEMGET,XTERM);
7157
7158         case KEY_semop:
7159             LOP(OP_SEMOP,XTERM);
7160
7161         case KEY_send:
7162             LOP(OP_SEND,XTERM);
7163
7164         case KEY_setpgrp:
7165             LOP(OP_SETPGRP,XTERM);
7166
7167         case KEY_setpriority:
7168             LOP(OP_SETPRIORITY,XTERM);
7169
7170         case KEY_sethostent:
7171             UNI(OP_SHOSTENT);
7172
7173         case KEY_setnetent:
7174             UNI(OP_SNETENT);
7175
7176         case KEY_setservent:
7177             UNI(OP_SSERVENT);
7178
7179         case KEY_setprotoent:
7180             UNI(OP_SPROTOENT);
7181
7182         case KEY_setpwent:
7183             FUN0(OP_SPWENT);
7184
7185         case KEY_setgrent:
7186             FUN0(OP_SGRENT);
7187
7188         case KEY_seekdir:
7189             LOP(OP_SEEKDIR,XTERM);
7190
7191         case KEY_setsockopt:
7192             LOP(OP_SSOCKOPT,XTERM);
7193
7194         case KEY_shift:
7195             UNIDOR(OP_SHIFT);
7196
7197         case KEY_shmctl:
7198             LOP(OP_SHMCTL,XTERM);
7199
7200         case KEY_shmget:
7201             LOP(OP_SHMGET,XTERM);
7202
7203         case KEY_shmread:
7204             LOP(OP_SHMREAD,XTERM);
7205
7206         case KEY_shmwrite:
7207             LOP(OP_SHMWRITE,XTERM);
7208
7209         case KEY_shutdown:
7210             LOP(OP_SHUTDOWN,XTERM);
7211
7212         case KEY_sin:
7213             UNI(OP_SIN);
7214
7215         case KEY_sleep:
7216             UNI(OP_SLEEP);
7217
7218         case KEY_socket:
7219             LOP(OP_SOCKET,XTERM);
7220
7221         case KEY_socketpair:
7222             LOP(OP_SOCKPAIR,XTERM);
7223
7224         case KEY_sort:
7225             checkcomma(s,PL_tokenbuf,"subroutine name");
7226             s = SKIPSPACE1(s);
7227             if (*s == ';' || *s == ')')         /* probably a close */
7228                 Perl_croak(aTHX_ "sort is now a reserved word");
7229             PL_expect = XTERM;
7230             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7231             LOP(OP_SORT,XREF);
7232
7233         case KEY_split:
7234             LOP(OP_SPLIT,XTERM);
7235
7236         case KEY_sprintf:
7237             LOP(OP_SPRINTF,XTERM);
7238
7239         case KEY_splice:
7240             LOP(OP_SPLICE,XTERM);
7241
7242         case KEY_sqrt:
7243             UNI(OP_SQRT);
7244
7245         case KEY_srand:
7246             UNI(OP_SRAND);
7247
7248         case KEY_stat:
7249             UNI(OP_STAT);
7250
7251         case KEY_study:
7252             UNI(OP_STUDY);
7253
7254         case KEY_substr:
7255             LOP(OP_SUBSTR,XTERM);
7256
7257         case KEY_format:
7258         case KEY_sub:
7259           really_sub:
7260             {
7261                 char tmpbuf[sizeof PL_tokenbuf];
7262                 SSize_t tboffset = 0;
7263                 expectation attrful;
7264                 bool have_name, have_proto;
7265                 const int key = tmp;
7266
7267 #ifdef PERL_MAD
7268                 SV *tmpwhite = 0;
7269
7270                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7271                 SV *subtoken = newSVpvn(tstart, s - tstart);
7272                 PL_thistoken = 0;
7273
7274                 d = s;
7275                 s = SKIPSPACE2(s,tmpwhite);
7276 #else
7277                 s = skipspace(s);
7278 #endif
7279
7280                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7281                     (*s == ':' && s[1] == ':'))
7282                 {
7283 #ifdef PERL_MAD
7284                     SV *nametoke = NULL;
7285 #endif
7286
7287                     PL_expect = XBLOCK;
7288                     attrful = XATTRBLOCK;
7289                     /* remember buffer pos'n for later force_word */
7290                     tboffset = s - PL_oldbufptr;
7291                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7292 #ifdef PERL_MAD
7293                     if (PL_madskills)
7294                         nametoke = newSVpvn(s, d - s);
7295 #endif
7296                     if (memchr(tmpbuf, ':', len))
7297                         sv_setpvn(PL_subname, tmpbuf, len);
7298                     else {
7299                         sv_setsv(PL_subname,PL_curstname);
7300                         sv_catpvs(PL_subname,"::");
7301                         sv_catpvn(PL_subname,tmpbuf,len);
7302                     }
7303                     have_name = TRUE;
7304
7305 #ifdef PERL_MAD
7306
7307                     start_force(0);
7308                     CURMAD('X', nametoke);
7309                     CURMAD('_', tmpwhite);
7310                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7311                                       FALSE, TRUE, TRUE);
7312
7313                     s = SKIPSPACE2(d,tmpwhite);
7314 #else
7315                     s = skipspace(d);
7316 #endif
7317                 }
7318                 else {
7319                     if (key == KEY_my)
7320                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7321                     PL_expect = XTERMBLOCK;
7322                     attrful = XATTRTERM;
7323                     sv_setpvs(PL_subname,"?");
7324                     have_name = FALSE;
7325                 }
7326
7327                 if (key == KEY_format) {
7328                     if (*s == '=')
7329                         PL_lex_formbrack = PL_lex_brackets + 1;
7330 #ifdef PERL_MAD
7331                     PL_thistoken = subtoken;
7332                     s = d;
7333 #else
7334                     if (have_name)
7335                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7336                                           FALSE, TRUE, TRUE);
7337 #endif
7338                     OPERATOR(FORMAT);
7339                 }
7340
7341                 /* Look for a prototype */
7342                 if (*s == '(') {
7343                     char *p;
7344                     bool bad_proto = FALSE;
7345                     bool in_brackets = FALSE;
7346                     char greedy_proto = ' ';
7347                     bool proto_after_greedy_proto = FALSE;
7348                     bool must_be_last = FALSE;
7349                     bool underscore = FALSE;
7350                     bool seen_underscore = FALSE;
7351                     const bool warnsyntax = ckWARN(WARN_SYNTAX);
7352
7353                     s = scan_str(s,!!PL_madskills,FALSE);
7354                     if (!s)
7355                         Perl_croak(aTHX_ "Prototype not terminated");
7356                     /* strip spaces and check for bad characters */
7357                     d = SvPVX(PL_lex_stuff);
7358                     tmp = 0;
7359                     for (p = d; *p; ++p) {
7360                         if (!isSPACE(*p)) {
7361                             d[tmp++] = *p;
7362
7363                             if (warnsyntax) {
7364                                 if (must_be_last)
7365                                     proto_after_greedy_proto = TRUE;
7366                                 if (!strchr("$@%*;[]&\\_", *p)) {
7367                                     bad_proto = TRUE;
7368                                 }
7369                                 else {
7370                                     if ( underscore ) {
7371                                         if ( *p != ';' )
7372                                             bad_proto = TRUE;
7373                                         underscore = FALSE;
7374                                     }
7375                                     if ( *p == '[' ) {
7376                                         in_brackets = TRUE;
7377                                     }
7378                                     else if ( *p == ']' ) {
7379                                         in_brackets = FALSE;
7380                                     }
7381                                     else if ( (*p == '@' || *p == '%') &&
7382                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
7383                                          !in_brackets ) {
7384                                         must_be_last = TRUE;
7385                                         greedy_proto = *p;
7386                                     }
7387                                     else if ( *p == '_' ) {
7388                                         underscore = seen_underscore = TRUE;
7389                                     }
7390                                 }
7391                             }
7392                         }
7393                     }
7394                     d[tmp] = '\0';
7395                     if (proto_after_greedy_proto)
7396                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7397                                     "Prototype after '%c' for %"SVf" : %s",
7398                                     greedy_proto, SVfARG(PL_subname), d);
7399                     if (bad_proto)
7400                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7401                                     "Illegal character %sin prototype for %"SVf" : %s",
7402                                     seen_underscore ? "after '_' " : "",
7403                                     SVfARG(PL_subname), d);
7404                     SvCUR_set(PL_lex_stuff, tmp);
7405                     have_proto = TRUE;
7406
7407 #ifdef PERL_MAD
7408                     start_force(0);
7409                     CURMAD('q', PL_thisopen);
7410                     CURMAD('_', tmpwhite);
7411                     CURMAD('=', PL_thisstuff);
7412                     CURMAD('Q', PL_thisclose);
7413                     NEXTVAL_NEXTTOKE.opval =
7414                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7415                     PL_lex_stuff = NULL;
7416                     force_next(THING);
7417
7418                     s = SKIPSPACE2(s,tmpwhite);
7419 #else
7420                     s = skipspace(s);
7421 #endif
7422                 }
7423                 else
7424                     have_proto = FALSE;
7425
7426                 if (*s == ':' && s[1] != ':')
7427                     PL_expect = attrful;
7428                 else if (*s != '{' && key == KEY_sub) {
7429                     if (!have_name)
7430                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7431                     else if (*s != ';')
7432                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7433                 }
7434
7435 #ifdef PERL_MAD
7436                 start_force(0);
7437                 if (tmpwhite) {
7438                     if (PL_madskills)
7439                         curmad('^', newSVpvs(""));
7440                     CURMAD('_', tmpwhite);
7441                 }
7442                 force_next(0);
7443
7444                 PL_thistoken = subtoken;
7445 #else
7446                 if (have_proto) {
7447                     NEXTVAL_NEXTTOKE.opval =
7448                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7449                     PL_lex_stuff = NULL;
7450                     force_next(THING);
7451                 }
7452 #endif
7453                 if (!have_name) {
7454                     if (PL_curstash)
7455                         sv_setpvs(PL_subname, "__ANON__");
7456                     else
7457                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7458                     TOKEN(ANONSUB);
7459                 }
7460 #ifndef PERL_MAD
7461                 (void) force_word(PL_oldbufptr + tboffset, WORD,
7462                                   FALSE, TRUE, TRUE);
7463 #endif
7464                 if (key == KEY_my)
7465                     TOKEN(MYSUB);
7466                 TOKEN(SUB);
7467             }
7468
7469         case KEY_system:
7470             LOP(OP_SYSTEM,XREF);
7471
7472         case KEY_symlink:
7473             LOP(OP_SYMLINK,XTERM);
7474
7475         case KEY_syscall:
7476             LOP(OP_SYSCALL,XTERM);
7477
7478         case KEY_sysopen:
7479             LOP(OP_SYSOPEN,XTERM);
7480
7481         case KEY_sysseek:
7482             LOP(OP_SYSSEEK,XTERM);
7483
7484         case KEY_sysread:
7485             LOP(OP_SYSREAD,XTERM);
7486
7487         case KEY_syswrite:
7488             LOP(OP_SYSWRITE,XTERM);
7489
7490         case KEY_tr:
7491             s = scan_trans(s);
7492             TERM(sublex_start());
7493
7494         case KEY_tell:
7495             UNI(OP_TELL);
7496
7497         case KEY_telldir:
7498             UNI(OP_TELLDIR);
7499
7500         case KEY_tie:
7501             LOP(OP_TIE,XTERM);
7502
7503         case KEY_tied:
7504             UNI(OP_TIED);
7505
7506         case KEY_time:
7507             FUN0(OP_TIME);
7508
7509         case KEY_times:
7510             FUN0(OP_TMS);
7511
7512         case KEY_truncate:
7513             LOP(OP_TRUNCATE,XTERM);
7514
7515         case KEY_uc:
7516             UNI(OP_UC);
7517
7518         case KEY_ucfirst:
7519             UNI(OP_UCFIRST);
7520
7521         case KEY_untie:
7522             UNI(OP_UNTIE);
7523
7524         case KEY_until:
7525             pl_yylval.ival = CopLINE(PL_curcop);
7526             OPERATOR(UNTIL);
7527
7528         case KEY_unless:
7529             pl_yylval.ival = CopLINE(PL_curcop);
7530             OPERATOR(UNLESS);
7531
7532         case KEY_unlink:
7533             LOP(OP_UNLINK,XTERM);
7534
7535         case KEY_undef:
7536             UNIDOR(OP_UNDEF);
7537
7538         case KEY_unpack:
7539             LOP(OP_UNPACK,XTERM);
7540
7541         case KEY_utime:
7542             LOP(OP_UTIME,XTERM);
7543
7544         case KEY_umask:
7545             UNIDOR(OP_UMASK);
7546
7547         case KEY_unshift:
7548             LOP(OP_UNSHIFT,XTERM);
7549
7550         case KEY_use:
7551             s = tokenize_use(1, s);
7552             OPERATOR(USE);
7553
7554         case KEY_values:
7555             UNI(OP_VALUES);
7556
7557         case KEY_vec:
7558             LOP(OP_VEC,XTERM);
7559
7560         case KEY_when:
7561             pl_yylval.ival = CopLINE(PL_curcop);
7562             OPERATOR(WHEN);
7563
7564         case KEY_while:
7565             pl_yylval.ival = CopLINE(PL_curcop);
7566             OPERATOR(WHILE);
7567
7568         case KEY_warn:
7569             PL_hints |= HINT_BLOCK_SCOPE;
7570             LOP(OP_WARN,XTERM);
7571
7572         case KEY_wait:
7573             FUN0(OP_WAIT);
7574
7575         case KEY_waitpid:
7576             LOP(OP_WAITPID,XTERM);
7577
7578         case KEY_wantarray:
7579             FUN0(OP_WANTARRAY);
7580
7581         case KEY_write:
7582 #ifdef EBCDIC
7583         {
7584             char ctl_l[2];
7585             ctl_l[0] = toCTRL('L');
7586             ctl_l[1] = '\0';
7587             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7588         }
7589 #else
7590             /* Make sure $^L is defined */
7591             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7592 #endif
7593             UNI(OP_ENTERWRITE);
7594
7595         case KEY_x:
7596             if (PL_expect == XOPERATOR)
7597                 Mop(OP_REPEAT);
7598             check_uni();
7599             goto just_a_word;
7600
7601         case KEY_xor:
7602             pl_yylval.ival = OP_XOR;
7603             OPERATOR(OROP);
7604
7605         case KEY_y:
7606             s = scan_trans(s);
7607             TERM(sublex_start());
7608         }
7609     }}
7610 }
7611 #ifdef __SC__
7612 #pragma segment Main
7613 #endif
7614
7615 static int
7616 S_pending_ident(pTHX)
7617 {
7618     dVAR;
7619     register char *d;
7620     PADOFFSET tmp = 0;
7621     /* pit holds the identifier we read and pending_ident is reset */
7622     char pit = PL_pending_ident;
7623     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7624     /* All routes through this function want to know if there is a colon.  */
7625     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7626     PL_pending_ident = 0;
7627
7628     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7629     DEBUG_T({ PerlIO_printf(Perl_debug_log,
7630           "### Pending identifier '%s'\n", PL_tokenbuf); });
7631
7632     /* if we're in a my(), we can't allow dynamics here.
7633        $foo'bar has already been turned into $foo::bar, so
7634        just check for colons.
7635
7636        if it's a legal name, the OP is a PADANY.
7637     */
7638     if (PL_in_my) {
7639         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
7640             if (has_colon)
7641                 yyerror(Perl_form(aTHX_ "No package name allowed for "
7642                                   "variable %s in \"our\"",
7643                                   PL_tokenbuf));
7644             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7645         }
7646         else {
7647             if (has_colon)
7648                 yyerror(Perl_form(aTHX_ PL_no_myglob,
7649                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7650
7651             pl_yylval.opval = newOP(OP_PADANY, 0);
7652             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7653             return PRIVATEREF;
7654         }
7655     }
7656
7657     /*
7658        build the ops for accesses to a my() variable.
7659
7660        Deny my($a) or my($b) in a sort block, *if* $a or $b is
7661        then used in a comparison.  This catches most, but not
7662        all cases.  For instance, it catches
7663            sort { my($a); $a <=> $b }
7664        but not
7665            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7666        (although why you'd do that is anyone's guess).
7667     */
7668
7669     if (!has_colon) {
7670         if (!PL_in_my)
7671             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
7672         if (tmp != NOT_IN_PAD) {
7673             /* might be an "our" variable" */
7674             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7675                 /* build ops for a bareword */
7676                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
7677                 HEK * const stashname = HvNAME_HEK(stash);
7678                 SV *  const sym = newSVhek(stashname);
7679                 sv_catpvs(sym, "::");
7680                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7681                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7682                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7683                 gv_fetchsv(sym,
7684                     (PL_in_eval
7685                         ? (GV_ADDMULTI | GV_ADDINEVAL)
7686                         : GV_ADDMULTI
7687                     ),
7688                     ((PL_tokenbuf[0] == '$') ? SVt_PV
7689                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7690                      : SVt_PVHV));
7691                 return WORD;
7692             }
7693
7694             /* if it's a sort block and they're naming $a or $b */
7695             if (PL_last_lop_op == OP_SORT &&
7696                 PL_tokenbuf[0] == '$' &&
7697                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7698                 && !PL_tokenbuf[2])
7699             {
7700                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7701                      d < PL_bufend && *d != '\n';
7702                      d++)
7703                 {
7704                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7705                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7706                               PL_tokenbuf);
7707                     }
7708                 }
7709             }
7710
7711             pl_yylval.opval = newOP(OP_PADANY, 0);
7712             pl_yylval.opval->op_targ = tmp;
7713             return PRIVATEREF;
7714         }
7715     }
7716
7717     /*
7718        Whine if they've said @foo in a doublequoted string,
7719        and @foo isn't a variable we can find in the symbol
7720        table.
7721     */
7722     if (ckWARN(WARN_AMBIGUOUS) &&
7723         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7724         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7725                                          SVt_PVAV);
7726         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7727                 /* DO NOT warn for @- and @+ */
7728                 && !( PL_tokenbuf[2] == '\0' &&
7729                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7730            )
7731         {
7732             /* Downgraded from fatal to warning 20000522 mjd */
7733             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7734                         "Possible unintended interpolation of %s in string",
7735                         PL_tokenbuf);
7736         }
7737     }
7738
7739     /* build ops for a bareword */
7740     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7741                                                       tokenbuf_len - 1));
7742     pl_yylval.opval->op_private = OPpCONST_ENTERED;
7743     gv_fetchpvn_flags(
7744             PL_tokenbuf + 1, tokenbuf_len - 1,
7745             /* If the identifier refers to a stash, don't autovivify it.
7746              * Change 24660 had the side effect of causing symbol table
7747              * hashes to always be defined, even if they were freshly
7748              * created and the only reference in the entire program was
7749              * the single statement with the defined %foo::bar:: test.
7750              * It appears that all code in the wild doing this actually
7751              * wants to know whether sub-packages have been loaded, so
7752              * by avoiding auto-vivifying symbol tables, we ensure that
7753              * defined %foo::bar:: continues to be false, and the existing
7754              * tests still give the expected answers, even though what
7755              * they're actually testing has now changed subtly.
7756              */
7757             (*PL_tokenbuf == '%'
7758              && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7759              && d[-1] == ':'
7760              ? 0
7761              : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7762             ((PL_tokenbuf[0] == '$') ? SVt_PV
7763              : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7764              : SVt_PVHV));
7765     return WORD;
7766 }
7767
7768 /*
7769  *  The following code was generated by perl_keyword.pl.
7770  */
7771
7772 I32
7773 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7774 {
7775     dVAR;
7776
7777     PERL_ARGS_ASSERT_KEYWORD;
7778
7779   switch (len)
7780   {
7781     case 1: /* 5 tokens of length 1 */
7782       switch (name[0])
7783       {
7784         case 'm':
7785           {                                       /* m          */
7786             return KEY_m;
7787           }
7788
7789         case 'q':
7790           {                                       /* q          */
7791             return KEY_q;
7792           }
7793
7794         case 's':
7795           {                                       /* s          */
7796             return KEY_s;
7797           }
7798
7799         case 'x':
7800           {                                       /* x          */
7801             return -KEY_x;
7802           }
7803
7804         case 'y':
7805           {                                       /* y          */
7806             return KEY_y;
7807           }
7808
7809         default:
7810           goto unknown;
7811       }
7812
7813     case 2: /* 18 tokens of length 2 */
7814       switch (name[0])
7815       {
7816         case 'd':
7817           if (name[1] == 'o')
7818           {                                       /* do         */
7819             return KEY_do;
7820           }
7821
7822           goto unknown;
7823
7824         case 'e':
7825           if (name[1] == 'q')
7826           {                                       /* eq         */
7827             return -KEY_eq;
7828           }
7829
7830           goto unknown;
7831
7832         case 'g':
7833           switch (name[1])
7834           {
7835             case 'e':
7836               {                                   /* ge         */
7837                 return -KEY_ge;
7838               }
7839
7840             case 't':
7841               {                                   /* gt         */
7842                 return -KEY_gt;
7843               }
7844
7845             default:
7846               goto unknown;
7847           }
7848
7849         case 'i':
7850           if (name[1] == 'f')
7851           {                                       /* if         */
7852             return KEY_if;
7853           }
7854
7855           goto unknown;
7856
7857         case 'l':
7858           switch (name[1])
7859           {
7860             case 'c':
7861               {                                   /* lc         */
7862                 return -KEY_lc;
7863               }
7864
7865             case 'e':
7866               {                                   /* le         */
7867                 return -KEY_le;
7868               }
7869
7870             case 't':
7871               {                                   /* lt         */
7872                 return -KEY_lt;
7873               }
7874
7875             default:
7876               goto unknown;
7877           }
7878
7879         case 'm':
7880           if (name[1] == 'y')
7881           {                                       /* my         */
7882             return KEY_my;
7883           }
7884
7885           goto unknown;
7886
7887         case 'n':
7888           switch (name[1])
7889           {
7890             case 'e':
7891               {                                   /* ne         */
7892                 return -KEY_ne;
7893               }
7894
7895             case 'o':
7896               {                                   /* no         */
7897                 return KEY_no;
7898               }
7899
7900             default:
7901               goto unknown;
7902           }
7903
7904         case 'o':
7905           if (name[1] == 'r')
7906           {                                       /* or         */
7907             return -KEY_or;
7908           }
7909
7910           goto unknown;
7911
7912         case 'q':
7913           switch (name[1])
7914           {
7915             case 'q':
7916               {                                   /* qq         */
7917                 return KEY_qq;
7918               }
7919
7920             case 'r':
7921               {                                   /* qr         */
7922                 return KEY_qr;
7923               }
7924
7925             case 'w':
7926               {                                   /* qw         */
7927                 return KEY_qw;
7928               }
7929
7930             case 'x':
7931               {                                   /* qx         */
7932                 return KEY_qx;
7933               }
7934
7935             default:
7936               goto unknown;
7937           }
7938
7939         case 't':
7940           if (name[1] == 'r')
7941           {                                       /* tr         */
7942             return KEY_tr;
7943           }
7944
7945           goto unknown;
7946
7947         case 'u':
7948           if (name[1] == 'c')
7949           {                                       /* uc         */
7950             return -KEY_uc;
7951           }
7952
7953           goto unknown;
7954
7955         default:
7956           goto unknown;
7957       }
7958
7959     case 3: /* 29 tokens of length 3 */
7960       switch (name[0])
7961       {
7962         case 'E':
7963           if (name[1] == 'N' &&
7964               name[2] == 'D')
7965           {                                       /* END        */
7966             return KEY_END;
7967           }
7968
7969           goto unknown;
7970
7971         case 'a':
7972           switch (name[1])
7973           {
7974             case 'b':
7975               if (name[2] == 's')
7976               {                                   /* abs        */
7977                 return -KEY_abs;
7978               }
7979
7980               goto unknown;
7981
7982             case 'n':
7983               if (name[2] == 'd')
7984               {                                   /* and        */
7985                 return -KEY_and;
7986               }
7987
7988               goto unknown;
7989
7990             default:
7991               goto unknown;
7992           }
7993
7994         case 'c':
7995           switch (name[1])
7996           {
7997             case 'h':
7998               if (name[2] == 'r')
7999               {                                   /* chr        */
8000                 return -KEY_chr;
8001               }
8002
8003               goto unknown;
8004
8005             case 'm':
8006               if (name[2] == 'p')
8007               {                                   /* cmp        */
8008                 return -KEY_cmp;
8009               }
8010
8011               goto unknown;
8012
8013             case 'o':
8014               if (name[2] == 's')
8015               {                                   /* cos        */
8016                 return -KEY_cos;
8017               }
8018
8019               goto unknown;
8020
8021             default:
8022               goto unknown;
8023           }
8024
8025         case 'd':
8026           if (name[1] == 'i' &&
8027               name[2] == 'e')
8028           {                                       /* die        */
8029             return -KEY_die;
8030           }
8031
8032           goto unknown;
8033
8034         case 'e':
8035           switch (name[1])
8036           {
8037             case 'o':
8038               if (name[2] == 'f')
8039               {                                   /* eof        */
8040                 return -KEY_eof;
8041               }
8042
8043               goto unknown;
8044
8045             case 'x':
8046               if (name[2] == 'p')
8047               {                                   /* exp        */
8048                 return -KEY_exp;
8049               }
8050
8051               goto unknown;
8052
8053             default:
8054               goto unknown;
8055           }
8056
8057         case 'f':
8058           if (name[1] == 'o' &&
8059               name[2] == 'r')
8060           {                                       /* for        */
8061             return KEY_for;
8062           }
8063
8064           goto unknown;
8065
8066         case 'h':
8067           if (name[1] == 'e' &&
8068               name[2] == 'x')
8069           {                                       /* hex        */
8070             return -KEY_hex;
8071           }
8072
8073           goto unknown;
8074
8075         case 'i':
8076           if (name[1] == 'n' &&
8077               name[2] == 't')
8078           {                                       /* int        */
8079             return -KEY_int;
8080           }
8081
8082           goto unknown;
8083
8084         case 'l':
8085           if (name[1] == 'o' &&
8086               name[2] == 'g')
8087           {                                       /* log        */
8088             return -KEY_log;
8089           }
8090
8091           goto unknown;
8092
8093         case 'm':
8094           if (name[1] == 'a' &&
8095               name[2] == 'p')
8096           {                                       /* map        */
8097             return KEY_map;
8098           }
8099
8100           goto unknown;
8101
8102         case 'n':
8103           if (name[1] == 'o' &&
8104               name[2] == 't')
8105           {                                       /* not        */
8106             return -KEY_not;
8107           }
8108
8109           goto unknown;
8110
8111         case 'o':
8112           switch (name[1])
8113           {
8114             case 'c':
8115               if (name[2] == 't')
8116               {                                   /* oct        */
8117                 return -KEY_oct;
8118               }
8119
8120               goto unknown;
8121
8122             case 'r':
8123               if (name[2] == 'd')
8124               {                                   /* ord        */
8125                 return -KEY_ord;
8126               }
8127
8128               goto unknown;
8129
8130             case 'u':
8131               if (name[2] == 'r')
8132               {                                   /* our        */
8133                 return KEY_our;
8134               }
8135
8136               goto unknown;
8137
8138             default:
8139               goto unknown;
8140           }
8141
8142         case 'p':
8143           if (name[1] == 'o')
8144           {
8145             switch (name[2])
8146             {
8147               case 'p':
8148                 {                                 /* pop        */
8149                   return -KEY_pop;
8150                 }
8151
8152               case 's':
8153                 {                                 /* pos        */
8154                   return KEY_pos;
8155                 }
8156
8157               default:
8158                 goto unknown;
8159             }
8160           }
8161
8162           goto unknown;
8163
8164         case 'r':
8165           if (name[1] == 'e' &&
8166               name[2] == 'f')
8167           {                                       /* ref        */
8168             return -KEY_ref;
8169           }
8170
8171           goto unknown;
8172
8173         case 's':
8174           switch (name[1])
8175           {
8176             case 'a':
8177               if (name[2] == 'y')
8178               {                                   /* say        */
8179                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8180               }
8181
8182               goto unknown;
8183
8184             case 'i':
8185               if (name[2] == 'n')
8186               {                                   /* sin        */
8187                 return -KEY_sin;
8188               }
8189
8190               goto unknown;
8191
8192             case 'u':
8193               if (name[2] == 'b')
8194               {                                   /* sub        */
8195                 return KEY_sub;
8196               }
8197
8198               goto unknown;
8199
8200             default:
8201               goto unknown;
8202           }
8203
8204         case 't':
8205           if (name[1] == 'i' &&
8206               name[2] == 'e')
8207           {                                       /* tie        */
8208             return KEY_tie;
8209           }
8210
8211           goto unknown;
8212
8213         case 'u':
8214           if (name[1] == 's' &&
8215               name[2] == 'e')
8216           {                                       /* use        */
8217             return KEY_use;
8218           }
8219
8220           goto unknown;
8221
8222         case 'v':
8223           if (name[1] == 'e' &&
8224               name[2] == 'c')
8225           {                                       /* vec        */
8226             return -KEY_vec;
8227           }
8228
8229           goto unknown;
8230
8231         case 'x':
8232           if (name[1] == 'o' &&
8233               name[2] == 'r')
8234           {                                       /* xor        */
8235             return -KEY_xor;
8236           }
8237
8238           goto unknown;
8239
8240         default:
8241           goto unknown;
8242       }
8243
8244     case 4: /* 41 tokens of length 4 */
8245       switch (name[0])
8246       {
8247         case 'C':
8248           if (name[1] == 'O' &&
8249               name[2] == 'R' &&
8250               name[3] == 'E')
8251           {                                       /* CORE       */
8252             return -KEY_CORE;
8253           }
8254
8255           goto unknown;
8256
8257         case 'I':
8258           if (name[1] == 'N' &&
8259               name[2] == 'I' &&
8260               name[3] == 'T')
8261           {                                       /* INIT       */
8262             return KEY_INIT;
8263           }
8264
8265           goto unknown;
8266
8267         case 'b':
8268           if (name[1] == 'i' &&
8269               name[2] == 'n' &&
8270               name[3] == 'd')
8271           {                                       /* bind       */
8272             return -KEY_bind;
8273           }
8274
8275           goto unknown;
8276
8277         case 'c':
8278           if (name[1] == 'h' &&
8279               name[2] == 'o' &&
8280               name[3] == 'p')
8281           {                                       /* chop       */
8282             return -KEY_chop;
8283           }
8284
8285           goto unknown;
8286
8287         case 'd':
8288           if (name[1] == 'u' &&
8289               name[2] == 'm' &&
8290               name[3] == 'p')
8291           {                                       /* dump       */
8292             return -KEY_dump;
8293           }
8294
8295           goto unknown;
8296
8297         case 'e':
8298           switch (name[1])
8299           {
8300             case 'a':
8301               if (name[2] == 'c' &&
8302                   name[3] == 'h')
8303               {                                   /* each       */
8304                 return -KEY_each;
8305               }
8306
8307               goto unknown;
8308
8309             case 'l':
8310               if (name[2] == 's' &&
8311                   name[3] == 'e')
8312               {                                   /* else       */
8313                 return KEY_else;
8314               }
8315
8316               goto unknown;
8317
8318             case 'v':
8319               if (name[2] == 'a' &&
8320                   name[3] == 'l')
8321               {                                   /* eval       */
8322                 return KEY_eval;
8323               }
8324
8325               goto unknown;
8326
8327             case 'x':
8328               switch (name[2])
8329               {
8330                 case 'e':
8331                   if (name[3] == 'c')
8332                   {                               /* exec       */
8333                     return -KEY_exec;
8334                   }
8335
8336                   goto unknown;
8337
8338                 case 'i':
8339                   if (name[3] == 't')
8340                   {                               /* exit       */
8341                     return -KEY_exit;
8342                   }
8343
8344                   goto unknown;
8345
8346                 default:
8347                   goto unknown;
8348               }
8349
8350             default:
8351               goto unknown;
8352           }
8353
8354         case 'f':
8355           if (name[1] == 'o' &&
8356               name[2] == 'r' &&
8357               name[3] == 'k')
8358           {                                       /* fork       */
8359             return -KEY_fork;
8360           }
8361
8362           goto unknown;
8363
8364         case 'g':
8365           switch (name[1])
8366           {
8367             case 'e':
8368               if (name[2] == 't' &&
8369                   name[3] == 'c')
8370               {                                   /* getc       */
8371                 return -KEY_getc;
8372               }
8373
8374               goto unknown;
8375
8376             case 'l':
8377               if (name[2] == 'o' &&
8378                   name[3] == 'b')
8379               {                                   /* glob       */
8380                 return KEY_glob;
8381               }
8382
8383               goto unknown;
8384
8385             case 'o':
8386               if (name[2] == 't' &&
8387                   name[3] == 'o')
8388               {                                   /* goto       */
8389                 return KEY_goto;
8390               }
8391
8392               goto unknown;
8393
8394             case 'r':
8395               if (name[2] == 'e' &&
8396                   name[3] == 'p')
8397               {                                   /* grep       */
8398                 return KEY_grep;
8399               }
8400
8401               goto unknown;
8402
8403             default:
8404               goto unknown;
8405           }
8406
8407         case 'j':
8408           if (name[1] == 'o' &&
8409               name[2] == 'i' &&
8410               name[3] == 'n')
8411           {                                       /* join       */
8412             return -KEY_join;
8413           }
8414
8415           goto unknown;
8416
8417         case 'k':
8418           switch (name[1])
8419           {
8420             case 'e':
8421               if (name[2] == 'y' &&
8422                   name[3] == 's')
8423               {                                   /* keys       */
8424                 return -KEY_keys;
8425               }
8426
8427               goto unknown;
8428
8429             case 'i':
8430               if (name[2] == 'l' &&
8431                   name[3] == 'l')
8432               {                                   /* kill       */
8433                 return -KEY_kill;
8434               }
8435
8436               goto unknown;
8437
8438             default:
8439               goto unknown;
8440           }
8441
8442         case 'l':
8443           switch (name[1])
8444           {
8445             case 'a':
8446               if (name[2] == 's' &&
8447                   name[3] == 't')
8448               {                                   /* last       */
8449                 return KEY_last;
8450               }
8451
8452               goto unknown;
8453
8454             case 'i':
8455               if (name[2] == 'n' &&
8456                   name[3] == 'k')
8457               {                                   /* link       */
8458                 return -KEY_link;
8459               }
8460
8461               goto unknown;
8462
8463             case 'o':
8464               if (name[2] == 'c' &&
8465                   name[3] == 'k')
8466               {                                   /* lock       */
8467                 return -KEY_lock;
8468               }
8469
8470               goto unknown;
8471
8472             default:
8473               goto unknown;
8474           }
8475
8476         case 'n':
8477           if (name[1] == 'e' &&
8478               name[2] == 'x' &&
8479               name[3] == 't')
8480           {                                       /* next       */
8481             return KEY_next;
8482           }
8483
8484           goto unknown;
8485
8486         case 'o':
8487           if (name[1] == 'p' &&
8488               name[2] == 'e' &&
8489               name[3] == 'n')
8490           {                                       /* open       */
8491             return -KEY_open;
8492           }
8493
8494           goto unknown;
8495
8496         case 'p':
8497           switch (name[1])
8498           {
8499             case 'a':
8500               if (name[2] == 'c' &&
8501                   name[3] == 'k')
8502               {                                   /* pack       */
8503                 return -KEY_pack;
8504               }
8505
8506               goto unknown;
8507
8508             case 'i':
8509               if (name[2] == 'p' &&
8510                   name[3] == 'e')
8511               {                                   /* pipe       */
8512                 return -KEY_pipe;
8513               }
8514
8515               goto unknown;
8516
8517             case 'u':
8518               if (name[2] == 's' &&
8519                   name[3] == 'h')
8520               {                                   /* push       */
8521                 return -KEY_push;
8522               }
8523
8524               goto unknown;
8525
8526             default:
8527               goto unknown;
8528           }
8529
8530         case 'r':
8531           switch (name[1])
8532           {
8533             case 'a':
8534               if (name[2] == 'n' &&
8535                   name[3] == 'd')
8536               {                                   /* rand       */
8537                 return -KEY_rand;
8538               }
8539
8540               goto unknown;
8541
8542             case 'e':
8543               switch (name[2])
8544               {
8545                 case 'a':
8546                   if (name[3] == 'd')
8547                   {                               /* read       */
8548                     return -KEY_read;
8549                   }
8550
8551                   goto unknown;
8552
8553                 case 'c':
8554                   if (name[3] == 'v')
8555                   {                               /* recv       */
8556                     return -KEY_recv;
8557                   }
8558
8559                   goto unknown;
8560
8561                 case 'd':
8562                   if (name[3] == 'o')
8563                   {                               /* redo       */
8564                     return KEY_redo;
8565                   }
8566
8567                   goto unknown;
8568
8569                 default:
8570                   goto unknown;
8571               }
8572
8573             default:
8574               goto unknown;
8575           }
8576
8577         case 's':
8578           switch (name[1])
8579           {
8580             case 'e':
8581               switch (name[2])
8582               {
8583                 case 'e':
8584                   if (name[3] == 'k')
8585                   {                               /* seek       */
8586                     return -KEY_seek;
8587                   }
8588
8589                   goto unknown;
8590
8591                 case 'n':
8592                   if (name[3] == 'd')
8593                   {                               /* send       */
8594                     return -KEY_send;
8595                   }
8596
8597                   goto unknown;
8598
8599                 default:
8600                   goto unknown;
8601               }
8602
8603             case 'o':
8604               if (name[2] == 'r' &&
8605                   name[3] == 't')
8606               {                                   /* sort       */
8607                 return KEY_sort;
8608               }
8609
8610               goto unknown;
8611
8612             case 'q':
8613               if (name[2] == 'r' &&
8614                   name[3] == 't')
8615               {                                   /* sqrt       */
8616                 return -KEY_sqrt;
8617               }
8618
8619               goto unknown;
8620
8621             case 't':
8622               if (name[2] == 'a' &&
8623                   name[3] == 't')
8624               {                                   /* stat       */
8625                 return -KEY_stat;
8626               }
8627
8628               goto unknown;
8629
8630             default:
8631               goto unknown;
8632           }
8633
8634         case 't':
8635           switch (name[1])
8636           {
8637             case 'e':
8638               if (name[2] == 'l' &&
8639                   name[3] == 'l')
8640               {                                   /* tell       */
8641                 return -KEY_tell;
8642               }
8643
8644               goto unknown;
8645
8646             case 'i':
8647               switch (name[2])
8648               {
8649                 case 'e':
8650                   if (name[3] == 'd')
8651                   {                               /* tied       */
8652                     return KEY_tied;
8653                   }
8654
8655                   goto unknown;
8656
8657                 case 'm':
8658                   if (name[3] == 'e')
8659                   {                               /* time       */
8660                     return -KEY_time;
8661                   }
8662
8663                   goto unknown;
8664
8665                 default:
8666                   goto unknown;
8667               }
8668
8669             default:
8670               goto unknown;
8671           }
8672
8673         case 'w':
8674           switch (name[1])
8675           {
8676             case 'a':
8677               switch (name[2])
8678               {
8679                 case 'i':
8680                   if (name[3] == 't')
8681                   {                               /* wait       */
8682                     return -KEY_wait;
8683                   }
8684
8685                   goto unknown;
8686
8687                 case 'r':
8688                   if (name[3] == 'n')
8689                   {                               /* warn       */
8690                     return -KEY_warn;
8691                   }
8692
8693                   goto unknown;
8694
8695                 default:
8696                   goto unknown;
8697               }
8698
8699             case 'h':
8700               if (name[2] == 'e' &&
8701                   name[3] == 'n')
8702               {                                   /* when       */
8703                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8704               }
8705
8706               goto unknown;
8707
8708             default:
8709               goto unknown;
8710           }
8711
8712         default:
8713           goto unknown;
8714       }
8715
8716     case 5: /* 39 tokens of length 5 */
8717       switch (name[0])
8718       {
8719         case 'B':
8720           if (name[1] == 'E' &&
8721               name[2] == 'G' &&
8722               name[3] == 'I' &&
8723               name[4] == 'N')
8724           {                                       /* BEGIN      */
8725             return KEY_BEGIN;
8726           }
8727
8728           goto unknown;
8729
8730         case 'C':
8731           if (name[1] == 'H' &&
8732               name[2] == 'E' &&
8733               name[3] == 'C' &&
8734               name[4] == 'K')
8735           {                                       /* CHECK      */
8736             return KEY_CHECK;
8737           }
8738
8739           goto unknown;
8740
8741         case 'a':
8742           switch (name[1])
8743           {
8744             case 'l':
8745               if (name[2] == 'a' &&
8746                   name[3] == 'r' &&
8747                   name[4] == 'm')
8748               {                                   /* alarm      */
8749                 return -KEY_alarm;
8750               }
8751
8752               goto unknown;
8753
8754             case 't':
8755               if (name[2] == 'a' &&
8756                   name[3] == 'n' &&
8757                   name[4] == '2')
8758               {                                   /* atan2      */
8759                 return -KEY_atan2;
8760               }
8761
8762               goto unknown;
8763
8764             default:
8765               goto unknown;
8766           }
8767
8768         case 'b':
8769           switch (name[1])
8770           {
8771             case 'l':
8772               if (name[2] == 'e' &&
8773                   name[3] == 's' &&
8774                   name[4] == 's')
8775               {                                   /* bless      */
8776                 return -KEY_bless;
8777               }
8778
8779               goto unknown;
8780
8781             case 'r':
8782               if (name[2] == 'e' &&
8783                   name[3] == 'a' &&
8784                   name[4] == 'k')
8785               {                                   /* break      */
8786                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8787               }
8788
8789               goto unknown;
8790
8791             default:
8792               goto unknown;
8793           }
8794
8795         case 'c':
8796           switch (name[1])
8797           {
8798             case 'h':
8799               switch (name[2])
8800               {
8801                 case 'd':
8802                   if (name[3] == 'i' &&
8803                       name[4] == 'r')
8804                   {                               /* chdir      */
8805                     return -KEY_chdir;
8806                   }
8807
8808                   goto unknown;
8809
8810                 case 'm':
8811                   if (name[3] == 'o' &&
8812                       name[4] == 'd')
8813                   {                               /* chmod      */
8814                     return -KEY_chmod;
8815                   }
8816
8817                   goto unknown;
8818
8819                 case 'o':
8820                   switch (name[3])
8821                   {
8822                     case 'm':
8823                       if (name[4] == 'p')
8824                       {                           /* chomp      */
8825                         return -KEY_chomp;
8826                       }
8827
8828                       goto unknown;
8829
8830                     case 'w':
8831                       if (name[4] == 'n')
8832                       {                           /* chown      */
8833                         return -KEY_chown;
8834                       }
8835
8836                       goto unknown;
8837
8838                     default:
8839                       goto unknown;
8840                   }
8841
8842                 default:
8843                   goto unknown;
8844               }
8845
8846             case 'l':
8847               if (name[2] == 'o' &&
8848                   name[3] == 's' &&
8849                   name[4] == 'e')
8850               {                                   /* close      */
8851                 return -KEY_close;
8852               }
8853
8854               goto unknown;
8855
8856             case 'r':
8857               if (name[2] == 'y' &&
8858                   name[3] == 'p' &&
8859                   name[4] == 't')
8860               {                                   /* crypt      */
8861                 return -KEY_crypt;
8862               }
8863
8864               goto unknown;
8865
8866             default:
8867               goto unknown;
8868           }
8869
8870         case 'e':
8871           if (name[1] == 'l' &&
8872               name[2] == 's' &&
8873               name[3] == 'i' &&
8874               name[4] == 'f')
8875           {                                       /* elsif      */
8876             return KEY_elsif;
8877           }
8878
8879           goto unknown;
8880
8881         case 'f':
8882           switch (name[1])
8883           {
8884             case 'c':
8885               if (name[2] == 'n' &&
8886                   name[3] == 't' &&
8887                   name[4] == 'l')
8888               {                                   /* fcntl      */
8889                 return -KEY_fcntl;
8890               }
8891
8892               goto unknown;
8893
8894             case 'l':
8895               if (name[2] == 'o' &&
8896                   name[3] == 'c' &&
8897                   name[4] == 'k')
8898               {                                   /* flock      */
8899                 return -KEY_flock;
8900               }
8901
8902               goto unknown;
8903
8904             default:
8905               goto unknown;
8906           }
8907
8908         case 'g':
8909           if (name[1] == 'i' &&
8910               name[2] == 'v' &&
8911               name[3] == 'e' &&
8912               name[4] == 'n')
8913           {                                       /* given      */
8914             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8915           }
8916
8917           goto unknown;
8918
8919         case 'i':
8920           switch (name[1])
8921           {
8922             case 'n':
8923               if (name[2] == 'd' &&
8924                   name[3] == 'e' &&
8925                   name[4] == 'x')
8926               {                                   /* index      */
8927                 return -KEY_index;
8928               }
8929
8930               goto unknown;
8931
8932             case 'o':
8933               if (name[2] == 'c' &&
8934                   name[3] == 't' &&
8935                   name[4] == 'l')
8936               {                                   /* ioctl      */
8937                 return -KEY_ioctl;
8938               }
8939
8940               goto unknown;
8941
8942             default:
8943               goto unknown;
8944           }
8945
8946         case 'l':
8947           switch (name[1])
8948           {
8949             case 'o':
8950               if (name[2] == 'c' &&
8951                   name[3] == 'a' &&
8952                   name[4] == 'l')
8953               {                                   /* local      */
8954                 return KEY_local;
8955               }
8956
8957               goto unknown;
8958
8959             case 's':
8960               if (name[2] == 't' &&
8961                   name[3] == 'a' &&
8962                   name[4] == 't')
8963               {                                   /* lstat      */
8964                 return -KEY_lstat;
8965               }
8966
8967               goto unknown;
8968
8969             default:
8970               goto unknown;
8971           }
8972
8973         case 'm':
8974           if (name[1] == 'k' &&
8975               name[2] == 'd' &&
8976               name[3] == 'i' &&
8977               name[4] == 'r')
8978           {                                       /* mkdir      */
8979             return -KEY_mkdir;
8980           }
8981
8982           goto unknown;
8983
8984         case 'p':
8985           if (name[1] == 'r' &&
8986               name[2] == 'i' &&
8987               name[3] == 'n' &&
8988               name[4] == 't')
8989           {                                       /* print      */
8990             return KEY_print;
8991           }
8992
8993           goto unknown;
8994
8995         case 'r':
8996           switch (name[1])
8997           {
8998             case 'e':
8999               if (name[2] == 's' &&
9000                   name[3] == 'e' &&
9001                   name[4] == 't')
9002               {                                   /* reset      */
9003                 return -KEY_reset;
9004               }
9005
9006               goto unknown;
9007
9008             case 'm':
9009               if (name[2] == 'd' &&
9010                   name[3] == 'i' &&
9011                   name[4] == 'r')
9012               {                                   /* rmdir      */
9013                 return -KEY_rmdir;
9014               }
9015
9016               goto unknown;
9017
9018             default:
9019               goto unknown;
9020           }
9021
9022         case 's':
9023           switch (name[1])
9024           {
9025             case 'e':
9026               if (name[2] == 'm' &&
9027                   name[3] == 'o' &&
9028                   name[4] == 'p')
9029               {                                   /* semop      */
9030                 return -KEY_semop;
9031               }
9032
9033               goto unknown;
9034
9035             case 'h':
9036               if (name[2] == 'i' &&
9037                   name[3] == 'f' &&
9038                   name[4] == 't')
9039               {                                   /* shift      */
9040                 return -KEY_shift;
9041               }
9042
9043               goto unknown;
9044
9045             case 'l':
9046               if (name[2] == 'e' &&
9047                   name[3] == 'e' &&
9048                   name[4] == 'p')
9049               {                                   /* sleep      */
9050                 return -KEY_sleep;
9051               }
9052
9053               goto unknown;
9054
9055             case 'p':
9056               if (name[2] == 'l' &&
9057                   name[3] == 'i' &&
9058                   name[4] == 't')
9059               {                                   /* split      */
9060                 return KEY_split;
9061               }
9062
9063               goto unknown;
9064
9065             case 'r':
9066               if (name[2] == 'a' &&
9067                   name[3] == 'n' &&
9068                   name[4] == 'd')
9069               {                                   /* srand      */
9070                 return -KEY_srand;
9071               }
9072
9073               goto unknown;
9074
9075             case 't':
9076               switch (name[2])
9077               {
9078                 case 'a':
9079                   if (name[3] == 't' &&
9080                       name[4] == 'e')
9081                   {                               /* state      */
9082                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9083                   }
9084
9085                   goto unknown;
9086
9087                 case 'u':
9088                   if (name[3] == 'd' &&
9089                       name[4] == 'y')
9090                   {                               /* study      */
9091                     return KEY_study;
9092                   }
9093
9094                   goto unknown;
9095
9096                 default:
9097                   goto unknown;
9098               }
9099
9100             default:
9101               goto unknown;
9102           }
9103
9104         case 't':
9105           if (name[1] == 'i' &&
9106               name[2] == 'm' &&
9107               name[3] == 'e' &&
9108               name[4] == 's')
9109           {                                       /* times      */
9110             return -KEY_times;
9111           }
9112
9113           goto unknown;
9114
9115         case 'u':
9116           switch (name[1])
9117           {
9118             case 'm':
9119               if (name[2] == 'a' &&
9120                   name[3] == 's' &&
9121                   name[4] == 'k')
9122               {                                   /* umask      */
9123                 return -KEY_umask;
9124               }
9125
9126               goto unknown;
9127
9128             case 'n':
9129               switch (name[2])
9130               {
9131                 case 'd':
9132                   if (name[3] == 'e' &&
9133                       name[4] == 'f')
9134                   {                               /* undef      */
9135                     return KEY_undef;
9136                   }
9137
9138                   goto unknown;
9139
9140                 case 't':
9141                   if (name[3] == 'i')
9142                   {
9143                     switch (name[4])
9144                     {
9145                       case 'e':
9146                         {                         /* untie      */
9147                           return KEY_untie;
9148                         }
9149
9150                       case 'l':
9151                         {                         /* until      */
9152                           return KEY_until;
9153                         }
9154
9155                       default:
9156                         goto unknown;
9157                     }
9158                   }
9159
9160                   goto unknown;
9161
9162                 default:
9163                   goto unknown;
9164               }
9165
9166             case 't':
9167               if (name[2] == 'i' &&
9168                   name[3] == 'm' &&
9169                   name[4] == 'e')
9170               {                                   /* utime      */
9171                 return -KEY_utime;
9172               }
9173
9174               goto unknown;
9175
9176             default:
9177               goto unknown;
9178           }
9179
9180         case 'w':
9181           switch (name[1])
9182           {
9183             case 'h':
9184               if (name[2] == 'i' &&
9185                   name[3] == 'l' &&
9186                   name[4] == 'e')
9187               {                                   /* while      */
9188                 return KEY_while;
9189               }
9190
9191               goto unknown;
9192
9193             case 'r':
9194               if (name[2] == 'i' &&
9195                   name[3] == 't' &&
9196                   name[4] == 'e')
9197               {                                   /* write      */
9198                 return -KEY_write;
9199               }
9200
9201               goto unknown;
9202
9203             default:
9204               goto unknown;
9205           }
9206
9207         default:
9208           goto unknown;
9209       }
9210
9211     case 6: /* 33 tokens of length 6 */
9212       switch (name[0])
9213       {
9214         case 'a':
9215           if (name[1] == 'c' &&
9216               name[2] == 'c' &&
9217               name[3] == 'e' &&
9218               name[4] == 'p' &&
9219               name[5] == 't')
9220           {                                       /* accept     */
9221             return -KEY_accept;
9222           }
9223
9224           goto unknown;
9225
9226         case 'c':
9227           switch (name[1])
9228           {
9229             case 'a':
9230               if (name[2] == 'l' &&
9231                   name[3] == 'l' &&
9232                   name[4] == 'e' &&
9233                   name[5] == 'r')
9234               {                                   /* caller     */
9235                 return -KEY_caller;
9236               }
9237
9238               goto unknown;
9239
9240             case 'h':
9241               if (name[2] == 'r' &&
9242                   name[3] == 'o' &&
9243                   name[4] == 'o' &&
9244                   name[5] == 't')
9245               {                                   /* chroot     */
9246                 return -KEY_chroot;
9247               }
9248
9249               goto unknown;
9250
9251             default:
9252               goto unknown;
9253           }
9254
9255         case 'd':
9256           if (name[1] == 'e' &&
9257               name[2] == 'l' &&
9258               name[3] == 'e' &&
9259               name[4] == 't' &&
9260               name[5] == 'e')
9261           {                                       /* delete     */
9262             return KEY_delete;
9263           }
9264
9265           goto unknown;
9266
9267         case 'e':
9268           switch (name[1])
9269           {
9270             case 'l':
9271               if (name[2] == 's' &&
9272                   name[3] == 'e' &&
9273                   name[4] == 'i' &&
9274                   name[5] == 'f')
9275               {                                   /* elseif     */
9276                   Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9277               }
9278
9279               goto unknown;
9280
9281             case 'x':
9282               if (name[2] == 'i' &&
9283                   name[3] == 's' &&
9284                   name[4] == 't' &&
9285                   name[5] == 's')
9286               {                                   /* exists     */
9287                 return KEY_exists;
9288               }
9289
9290               goto unknown;
9291
9292             default:
9293               goto unknown;
9294           }
9295
9296         case 'f':
9297           switch (name[1])
9298           {
9299             case 'i':
9300               if (name[2] == 'l' &&
9301                   name[3] == 'e' &&
9302                   name[4] == 'n' &&
9303                   name[5] == 'o')
9304               {                                   /* fileno     */
9305                 return -KEY_fileno;
9306               }
9307
9308               goto unknown;
9309
9310             case 'o':
9311               if (name[2] == 'r' &&
9312                   name[3] == 'm' &&
9313                   name[4] == 'a' &&
9314                   name[5] == 't')
9315               {                                   /* format     */
9316                 return KEY_format;
9317               }
9318
9319               goto unknown;
9320
9321             default:
9322               goto unknown;
9323           }
9324
9325         case 'g':
9326           if (name[1] == 'm' &&
9327               name[2] == 't' &&
9328               name[3] == 'i' &&
9329               name[4] == 'm' &&
9330               name[5] == 'e')
9331           {                                       /* gmtime     */
9332             return -KEY_gmtime;
9333           }
9334
9335           goto unknown;
9336
9337         case 'l':
9338           switch (name[1])
9339           {
9340             case 'e':
9341               if (name[2] == 'n' &&
9342                   name[3] == 'g' &&
9343                   name[4] == 't' &&
9344                   name[5] == 'h')
9345               {                                   /* length     */
9346                 return -KEY_length;
9347               }
9348
9349               goto unknown;
9350
9351             case 'i':
9352               if (name[2] == 's' &&
9353                   name[3] == 't' &&
9354                   name[4] == 'e' &&
9355                   name[5] == 'n')
9356               {                                   /* listen     */
9357                 return -KEY_listen;
9358               }
9359
9360               goto unknown;
9361
9362             default:
9363               goto unknown;
9364           }
9365
9366         case 'm':
9367           if (name[1] == 's' &&
9368               name[2] == 'g')
9369           {
9370             switch (name[3])
9371             {
9372               case 'c':
9373                 if (name[4] == 't' &&
9374                     name[5] == 'l')
9375                 {                                 /* msgctl     */
9376                   return -KEY_msgctl;
9377                 }
9378
9379                 goto unknown;
9380
9381               case 'g':
9382                 if (name[4] == 'e' &&
9383                     name[5] == 't')
9384                 {                                 /* msgget     */
9385                   return -KEY_msgget;
9386                 }
9387
9388                 goto unknown;
9389
9390               case 'r':
9391                 if (name[4] == 'c' &&
9392                     name[5] == 'v')
9393                 {                                 /* msgrcv     */
9394                   return -KEY_msgrcv;
9395                 }
9396
9397                 goto unknown;
9398
9399               case 's':
9400                 if (name[4] == 'n' &&
9401                     name[5] == 'd')
9402                 {                                 /* msgsnd     */
9403                   return -KEY_msgsnd;
9404                 }
9405
9406                 goto unknown;
9407
9408               default:
9409                 goto unknown;
9410             }
9411           }
9412
9413           goto unknown;
9414
9415         case 'p':
9416           if (name[1] == 'r' &&
9417               name[2] == 'i' &&
9418               name[3] == 'n' &&
9419               name[4] == 't' &&
9420               name[5] == 'f')
9421           {                                       /* printf     */
9422             return KEY_printf;
9423           }
9424
9425           goto unknown;
9426
9427         case 'r':
9428           switch (name[1])
9429           {
9430             case 'e':
9431               switch (name[2])
9432               {
9433                 case 'n':
9434                   if (name[3] == 'a' &&
9435                       name[4] == 'm' &&
9436                       name[5] == 'e')
9437                   {                               /* rename     */
9438                     return -KEY_rename;
9439                   }
9440
9441                   goto unknown;
9442
9443                 case 't':
9444                   if (name[3] == 'u' &&
9445                       name[4] == 'r' &&
9446                       name[5] == 'n')
9447                   {                               /* return     */
9448                     return KEY_return;
9449                   }
9450
9451                   goto unknown;
9452
9453                 default:
9454                   goto unknown;
9455               }
9456
9457             case 'i':
9458               if (name[2] == 'n' &&
9459                   name[3] == 'd' &&
9460                   name[4] == 'e' &&
9461                   name[5] == 'x')
9462               {                                   /* rindex     */
9463                 return -KEY_rindex;
9464               }
9465
9466               goto unknown;
9467
9468             default:
9469               goto unknown;
9470           }
9471
9472         case 's':
9473           switch (name[1])
9474           {
9475             case 'c':
9476               if (name[2] == 'a' &&
9477                   name[3] == 'l' &&
9478                   name[4] == 'a' &&
9479                   name[5] == 'r')
9480               {                                   /* scalar     */
9481                 return KEY_scalar;
9482               }
9483
9484               goto unknown;
9485
9486             case 'e':
9487               switch (name[2])
9488               {
9489                 case 'l':
9490                   if (name[3] == 'e' &&
9491                       name[4] == 'c' &&
9492                       name[5] == 't')
9493                   {                               /* select     */
9494                     return -KEY_select;
9495                   }
9496
9497                   goto unknown;
9498
9499                 case 'm':
9500                   switch (name[3])
9501                   {
9502                     case 'c':
9503                       if (name[4] == 't' &&
9504                           name[5] == 'l')
9505                       {                           /* semctl     */
9506                         return -KEY_semctl;
9507                       }
9508
9509                       goto unknown;
9510
9511                     case 'g':
9512                       if (name[4] == 'e' &&
9513                           name[5] == 't')
9514                       {                           /* semget     */
9515                         return -KEY_semget;
9516                       }
9517
9518                       goto unknown;
9519
9520                     default:
9521                       goto unknown;
9522                   }
9523
9524                 default:
9525                   goto unknown;
9526               }
9527
9528             case 'h':
9529               if (name[2] == 'm')
9530               {
9531                 switch (name[3])
9532                 {
9533                   case 'c':
9534                     if (name[4] == 't' &&
9535                         name[5] == 'l')
9536                     {                             /* shmctl     */
9537                       return -KEY_shmctl;
9538                     }
9539
9540                     goto unknown;
9541
9542                   case 'g':
9543                     if (name[4] == 'e' &&
9544                         name[5] == 't')
9545                     {                             /* shmget     */
9546                       return -KEY_shmget;
9547                     }
9548
9549                     goto unknown;
9550
9551                   default:
9552                     goto unknown;
9553                 }
9554               }
9555
9556               goto unknown;
9557
9558             case 'o':
9559               if (name[2] == 'c' &&
9560                   name[3] == 'k' &&
9561                   name[4] == 'e' &&
9562                   name[5] == 't')
9563               {                                   /* socket     */
9564                 return -KEY_socket;
9565               }
9566
9567               goto unknown;
9568
9569             case 'p':
9570               if (name[2] == 'l' &&
9571                   name[3] == 'i' &&
9572                   name[4] == 'c' &&
9573                   name[5] == 'e')
9574               {                                   /* splice     */
9575                 return -KEY_splice;
9576               }
9577
9578               goto unknown;
9579
9580             case 'u':
9581               if (name[2] == 'b' &&
9582                   name[3] == 's' &&
9583                   name[4] == 't' &&
9584                   name[5] == 'r')
9585               {                                   /* substr     */
9586                 return -KEY_substr;
9587               }
9588
9589               goto unknown;
9590
9591             case 'y':
9592               if (name[2] == 's' &&
9593                   name[3] == 't' &&
9594                   name[4] == 'e' &&
9595                   name[5] == 'm')
9596               {                                   /* system     */
9597                 return -KEY_system;
9598               }
9599
9600               goto unknown;
9601
9602             default:
9603               goto unknown;
9604           }
9605
9606         case 'u':
9607           if (name[1] == 'n')
9608           {
9609             switch (name[2])
9610             {
9611               case 'l':
9612                 switch (name[3])
9613                 {
9614                   case 'e':
9615                     if (name[4] == 's' &&
9616                         name[5] == 's')
9617                     {                             /* unless     */
9618                       return KEY_unless;
9619                     }
9620
9621                     goto unknown;
9622
9623                   case 'i':
9624                     if (name[4] == 'n' &&
9625                         name[5] == 'k')
9626                     {                             /* unlink     */
9627                       return -KEY_unlink;
9628                     }
9629
9630                     goto unknown;
9631
9632                   default:
9633                     goto unknown;
9634                 }
9635
9636               case 'p':
9637                 if (name[3] == 'a' &&
9638                     name[4] == 'c' &&
9639                     name[5] == 'k')
9640                 {                                 /* unpack     */
9641                   return -KEY_unpack;
9642                 }
9643
9644                 goto unknown;
9645
9646               default:
9647                 goto unknown;
9648             }
9649           }
9650
9651           goto unknown;
9652
9653         case 'v':
9654           if (name[1] == 'a' &&
9655               name[2] == 'l' &&
9656               name[3] == 'u' &&
9657               name[4] == 'e' &&
9658               name[5] == 's')
9659           {                                       /* values     */
9660             return -KEY_values;
9661           }
9662
9663           goto unknown;
9664
9665         default:
9666           goto unknown;
9667       }
9668
9669     case 7: /* 29 tokens of length 7 */
9670       switch (name[0])
9671       {
9672         case 'D':
9673           if (name[1] == 'E' &&
9674               name[2] == 'S' &&
9675               name[3] == 'T' &&
9676               name[4] == 'R' &&
9677               name[5] == 'O' &&
9678               name[6] == 'Y')
9679           {                                       /* DESTROY    */
9680             return KEY_DESTROY;
9681           }
9682
9683           goto unknown;
9684
9685         case '_':
9686           if (name[1] == '_' &&
9687               name[2] == 'E' &&
9688               name[3] == 'N' &&
9689               name[4] == 'D' &&
9690               name[5] == '_' &&
9691               name[6] == '_')
9692           {                                       /* __END__    */
9693             return KEY___END__;
9694           }
9695
9696           goto unknown;
9697
9698         case 'b':
9699           if (name[1] == 'i' &&
9700               name[2] == 'n' &&
9701               name[3] == 'm' &&
9702               name[4] == 'o' &&
9703               name[5] == 'd' &&
9704               name[6] == 'e')
9705           {                                       /* binmode    */
9706             return -KEY_binmode;
9707           }
9708
9709           goto unknown;
9710
9711         case 'c':
9712           if (name[1] == 'o' &&
9713               name[2] == 'n' &&
9714               name[3] == 'n' &&
9715               name[4] == 'e' &&
9716               name[5] == 'c' &&
9717               name[6] == 't')
9718           {                                       /* connect    */
9719             return -KEY_connect;
9720           }
9721
9722           goto unknown;
9723
9724         case 'd':
9725           switch (name[1])
9726           {
9727             case 'b':
9728               if (name[2] == 'm' &&
9729                   name[3] == 'o' &&
9730                   name[4] == 'p' &&
9731                   name[5] == 'e' &&
9732                   name[6] == 'n')
9733               {                                   /* dbmopen    */
9734                 return -KEY_dbmopen;
9735               }
9736
9737               goto unknown;
9738
9739             case 'e':
9740               if (name[2] == 'f')
9741               {
9742                 switch (name[3])
9743                 {
9744                   case 'a':
9745                     if (name[4] == 'u' &&
9746                         name[5] == 'l' &&
9747                         name[6] == 't')
9748                     {                             /* default    */
9749                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9750                     }
9751
9752                     goto unknown;
9753
9754                   case 'i':
9755                     if (name[4] == 'n' &&
9756                         name[5] == 'e' &&
9757                         name[6] == 'd')
9758                     {                             /* defined    */
9759                       return KEY_defined;
9760                     }
9761
9762                     goto unknown;
9763
9764                   default:
9765                     goto unknown;
9766                 }
9767               }
9768
9769               goto unknown;
9770
9771             default:
9772               goto unknown;
9773           }
9774
9775         case 'f':
9776           if (name[1] == 'o' &&
9777               name[2] == 'r' &&
9778               name[3] == 'e' &&
9779               name[4] == 'a' &&
9780               name[5] == 'c' &&
9781               name[6] == 'h')
9782           {                                       /* foreach    */
9783             return KEY_foreach;
9784           }
9785
9786           goto unknown;
9787
9788         case 'g':
9789           if (name[1] == 'e' &&
9790               name[2] == 't' &&
9791               name[3] == 'p')
9792           {
9793             switch (name[4])
9794             {
9795               case 'g':
9796                 if (name[5] == 'r' &&
9797                     name[6] == 'p')
9798                 {                                 /* getpgrp    */
9799                   return -KEY_getpgrp;
9800                 }
9801
9802                 goto unknown;
9803
9804               case 'p':
9805                 if (name[5] == 'i' &&
9806                     name[6] == 'd')
9807                 {                                 /* getppid    */
9808                   return -KEY_getppid;
9809                 }
9810
9811                 goto unknown;
9812
9813               default:
9814                 goto unknown;
9815             }
9816           }
9817
9818           goto unknown;
9819
9820         case 'l':
9821           if (name[1] == 'c' &&
9822               name[2] == 'f' &&
9823               name[3] == 'i' &&
9824               name[4] == 'r' &&
9825               name[5] == 's' &&
9826               name[6] == 't')
9827           {                                       /* lcfirst    */
9828             return -KEY_lcfirst;
9829           }
9830
9831           goto unknown;
9832
9833         case 'o':
9834           if (name[1] == 'p' &&
9835               name[2] == 'e' &&
9836               name[3] == 'n' &&
9837               name[4] == 'd' &&
9838               name[5] == 'i' &&
9839               name[6] == 'r')
9840           {                                       /* opendir    */
9841             return -KEY_opendir;
9842           }
9843
9844           goto unknown;
9845
9846         case 'p':
9847           if (name[1] == 'a' &&
9848               name[2] == 'c' &&
9849               name[3] == 'k' &&
9850               name[4] == 'a' &&
9851               name[5] == 'g' &&
9852               name[6] == 'e')
9853           {                                       /* package    */
9854             return KEY_package;
9855           }
9856
9857           goto unknown;
9858
9859         case 'r':
9860           if (name[1] == 'e')
9861           {
9862             switch (name[2])
9863             {
9864               case 'a':
9865                 if (name[3] == 'd' &&
9866                     name[4] == 'd' &&
9867                     name[5] == 'i' &&
9868                     name[6] == 'r')
9869                 {                                 /* readdir    */
9870                   return -KEY_readdir;
9871                 }
9872
9873                 goto unknown;
9874
9875               case 'q':
9876                 if (name[3] == 'u' &&
9877                     name[4] == 'i' &&
9878                     name[5] == 'r' &&
9879                     name[6] == 'e')
9880                 {                                 /* require    */
9881                   return KEY_require;
9882                 }
9883
9884                 goto unknown;
9885
9886               case 'v':
9887                 if (name[3] == 'e' &&
9888                     name[4] == 'r' &&
9889                     name[5] == 's' &&
9890                     name[6] == 'e')
9891                 {                                 /* reverse    */
9892                   return -KEY_reverse;
9893                 }
9894
9895                 goto unknown;
9896
9897               default:
9898                 goto unknown;
9899             }
9900           }
9901
9902           goto unknown;
9903
9904         case 's':
9905           switch (name[1])
9906           {
9907             case 'e':
9908               switch (name[2])
9909               {
9910                 case 'e':
9911                   if (name[3] == 'k' &&
9912                       name[4] == 'd' &&
9913                       name[5] == 'i' &&
9914                       name[6] == 'r')
9915                   {                               /* seekdir    */
9916                     return -KEY_seekdir;
9917                   }
9918
9919                   goto unknown;
9920
9921                 case 't':
9922                   if (name[3] == 'p' &&
9923                       name[4] == 'g' &&
9924                       name[5] == 'r' &&
9925                       name[6] == 'p')
9926                   {                               /* setpgrp    */
9927                     return -KEY_setpgrp;
9928                   }
9929
9930                   goto unknown;
9931
9932                 default:
9933                   goto unknown;
9934               }
9935
9936             case 'h':
9937               if (name[2] == 'm' &&
9938                   name[3] == 'r' &&
9939                   name[4] == 'e' &&
9940                   name[5] == 'a' &&
9941                   name[6] == 'd')
9942               {                                   /* shmread    */
9943                 return -KEY_shmread;
9944               }
9945
9946               goto unknown;
9947
9948             case 'p':
9949               if (name[2] == 'r' &&
9950                   name[3] == 'i' &&
9951                   name[4] == 'n' &&
9952                   name[5] == 't' &&
9953                   name[6] == 'f')
9954               {                                   /* sprintf    */
9955                 return -KEY_sprintf;
9956               }
9957
9958               goto unknown;
9959
9960             case 'y':
9961               switch (name[2])
9962               {
9963                 case 'm':
9964                   if (name[3] == 'l' &&
9965                       name[4] == 'i' &&
9966                       name[5] == 'n' &&
9967                       name[6] == 'k')
9968                   {                               /* symlink    */
9969                     return -KEY_symlink;
9970                   }
9971
9972                   goto unknown;
9973
9974                 case 's':
9975                   switch (name[3])
9976                   {
9977                     case 'c':
9978                       if (name[4] == 'a' &&
9979                           name[5] == 'l' &&
9980                           name[6] == 'l')
9981                       {                           /* syscall    */
9982                         return -KEY_syscall;
9983                       }
9984
9985                       goto unknown;
9986
9987                     case 'o':
9988                       if (name[4] == 'p' &&
9989                           name[5] == 'e' &&
9990                           name[6] == 'n')
9991                       {                           /* sysopen    */
9992                         return -KEY_sysopen;
9993                       }
9994
9995                       goto unknown;
9996
9997                     case 'r':
9998                       if (name[4] == 'e' &&
9999                           name[5] == 'a' &&
10000                           name[6] == 'd')
10001                       {                           /* sysread    */
10002                         return -KEY_sysread;
10003                       }
10004
10005                       goto unknown;
10006
10007                     case 's':
10008                       if (name[4] == 'e' &&
10009                           name[5] == 'e' &&
10010                           name[6] == 'k')
10011                       {                           /* sysseek    */
10012                         return -KEY_sysseek;
10013                       }
10014
10015                       goto unknown;
10016
10017                     default:
10018                       goto unknown;
10019                   }
10020
10021                 default:
10022                   goto unknown;
10023               }
10024
10025             default:
10026               goto unknown;
10027           }
10028
10029         case 't':
10030           if (name[1] == 'e' &&
10031               name[2] == 'l' &&
10032               name[3] == 'l' &&
10033               name[4] == 'd' &&
10034               name[5] == 'i' &&
10035               name[6] == 'r')
10036           {                                       /* telldir    */
10037             return -KEY_telldir;
10038           }
10039
10040           goto unknown;
10041
10042         case 'u':
10043           switch (name[1])
10044           {
10045             case 'c':
10046               if (name[2] == 'f' &&
10047                   name[3] == 'i' &&
10048                   name[4] == 'r' &&
10049                   name[5] == 's' &&
10050                   name[6] == 't')
10051               {                                   /* ucfirst    */
10052                 return -KEY_ucfirst;
10053               }
10054
10055               goto unknown;
10056
10057             case 'n':
10058               if (name[2] == 's' &&
10059                   name[3] == 'h' &&
10060                   name[4] == 'i' &&
10061                   name[5] == 'f' &&
10062                   name[6] == 't')
10063               {                                   /* unshift    */
10064                 return -KEY_unshift;
10065               }
10066
10067               goto unknown;
10068
10069             default:
10070               goto unknown;
10071           }
10072
10073         case 'w':
10074           if (name[1] == 'a' &&
10075               name[2] == 'i' &&
10076               name[3] == 't' &&
10077               name[4] == 'p' &&
10078               name[5] == 'i' &&
10079               name[6] == 'd')
10080           {                                       /* waitpid    */
10081             return -KEY_waitpid;
10082           }
10083
10084           goto unknown;
10085
10086         default:
10087           goto unknown;
10088       }
10089
10090     case 8: /* 26 tokens of length 8 */
10091       switch (name[0])
10092       {
10093         case 'A':
10094           if (name[1] == 'U' &&
10095               name[2] == 'T' &&
10096               name[3] == 'O' &&
10097               name[4] == 'L' &&
10098               name[5] == 'O' &&
10099               name[6] == 'A' &&
10100               name[7] == 'D')
10101           {                                       /* AUTOLOAD   */
10102             return KEY_AUTOLOAD;
10103           }
10104
10105           goto unknown;
10106
10107         case '_':
10108           if (name[1] == '_')
10109           {
10110             switch (name[2])
10111             {
10112               case 'D':
10113                 if (name[3] == 'A' &&
10114                     name[4] == 'T' &&
10115                     name[5] == 'A' &&
10116                     name[6] == '_' &&
10117                     name[7] == '_')
10118                 {                                 /* __DATA__   */
10119                   return KEY___DATA__;
10120                 }
10121
10122                 goto unknown;
10123
10124               case 'F':
10125                 if (name[3] == 'I' &&
10126                     name[4] == 'L' &&
10127                     name[5] == 'E' &&
10128                     name[6] == '_' &&
10129                     name[7] == '_')
10130                 {                                 /* __FILE__   */
10131                   return -KEY___FILE__;
10132                 }
10133
10134                 goto unknown;
10135
10136               case 'L':
10137                 if (name[3] == 'I' &&
10138                     name[4] == 'N' &&
10139                     name[5] == 'E' &&
10140                     name[6] == '_' &&
10141                     name[7] == '_')
10142                 {                                 /* __LINE__   */
10143                   return -KEY___LINE__;
10144                 }
10145
10146                 goto unknown;
10147
10148               default:
10149                 goto unknown;
10150             }
10151           }
10152
10153           goto unknown;
10154
10155         case 'c':
10156           switch (name[1])
10157           {
10158             case 'l':
10159               if (name[2] == 'o' &&
10160                   name[3] == 's' &&
10161                   name[4] == 'e' &&
10162                   name[5] == 'd' &&
10163                   name[6] == 'i' &&
10164                   name[7] == 'r')
10165               {                                   /* closedir   */
10166                 return -KEY_closedir;
10167               }
10168
10169               goto unknown;
10170
10171             case 'o':
10172               if (name[2] == 'n' &&
10173                   name[3] == 't' &&
10174                   name[4] == 'i' &&
10175                   name[5] == 'n' &&
10176                   name[6] == 'u' &&
10177                   name[7] == 'e')
10178               {                                   /* continue   */
10179                 return -KEY_continue;
10180               }
10181
10182               goto unknown;
10183
10184             default:
10185               goto unknown;
10186           }
10187
10188         case 'd':
10189           if (name[1] == 'b' &&
10190               name[2] == 'm' &&
10191               name[3] == 'c' &&
10192               name[4] == 'l' &&
10193               name[5] == 'o' &&
10194               name[6] == 's' &&
10195               name[7] == 'e')
10196           {                                       /* dbmclose   */
10197             return -KEY_dbmclose;
10198           }
10199
10200           goto unknown;
10201
10202         case 'e':
10203           if (name[1] == 'n' &&
10204               name[2] == 'd')
10205           {
10206             switch (name[3])
10207             {
10208               case 'g':
10209                 if (name[4] == 'r' &&
10210                     name[5] == 'e' &&
10211                     name[6] == 'n' &&
10212                     name[7] == 't')
10213                 {                                 /* endgrent   */
10214                   return -KEY_endgrent;
10215                 }
10216
10217                 goto unknown;
10218
10219               case 'p':
10220                 if (name[4] == 'w' &&
10221                     name[5] == 'e' &&
10222                     name[6] == 'n' &&
10223                     name[7] == 't')
10224                 {                                 /* endpwent   */
10225                   return -KEY_endpwent;
10226                 }
10227
10228                 goto unknown;
10229
10230               default:
10231                 goto unknown;
10232             }
10233           }
10234
10235           goto unknown;
10236
10237         case 'f':
10238           if (name[1] == 'o' &&
10239               name[2] == 'r' &&
10240               name[3] == 'm' &&
10241               name[4] == 'l' &&
10242               name[5] == 'i' &&
10243               name[6] == 'n' &&
10244               name[7] == 'e')
10245           {                                       /* formline   */
10246             return -KEY_formline;
10247           }
10248
10249           goto unknown;
10250
10251         case 'g':
10252           if (name[1] == 'e' &&
10253               name[2] == 't')
10254           {
10255             switch (name[3])
10256             {
10257               case 'g':
10258                 if (name[4] == 'r')
10259                 {
10260                   switch (name[5])
10261                   {
10262                     case 'e':
10263                       if (name[6] == 'n' &&
10264                           name[7] == 't')
10265                       {                           /* getgrent   */
10266                         return -KEY_getgrent;
10267                       }
10268
10269                       goto unknown;
10270
10271                     case 'g':
10272                       if (name[6] == 'i' &&
10273                           name[7] == 'd')
10274                       {                           /* getgrgid   */
10275                         return -KEY_getgrgid;
10276                       }
10277
10278                       goto unknown;
10279
10280                     case 'n':
10281                       if (name[6] == 'a' &&
10282                           name[7] == 'm')
10283                       {                           /* getgrnam   */
10284                         return -KEY_getgrnam;
10285                       }
10286
10287                       goto unknown;
10288
10289                     default:
10290                       goto unknown;
10291                   }
10292                 }
10293
10294                 goto unknown;
10295
10296               case 'l':
10297                 if (name[4] == 'o' &&
10298                     name[5] == 'g' &&
10299                     name[6] == 'i' &&
10300                     name[7] == 'n')
10301                 {                                 /* getlogin   */
10302                   return -KEY_getlogin;
10303                 }
10304
10305                 goto unknown;
10306
10307               case 'p':
10308                 if (name[4] == 'w')
10309                 {
10310                   switch (name[5])
10311                   {
10312                     case 'e':
10313                       if (name[6] == 'n' &&
10314                           name[7] == 't')
10315                       {                           /* getpwent   */
10316                         return -KEY_getpwent;
10317                       }
10318
10319                       goto unknown;
10320
10321                     case 'n':
10322                       if (name[6] == 'a' &&
10323                           name[7] == 'm')
10324                       {                           /* getpwnam   */
10325                         return -KEY_getpwnam;
10326                       }
10327
10328                       goto unknown;
10329
10330                     case 'u':
10331                       if (name[6] == 'i' &&
10332                           name[7] == 'd')
10333                       {                           /* getpwuid   */
10334                         return -KEY_getpwuid;
10335                       }
10336
10337                       goto unknown;
10338
10339                     default:
10340                       goto unknown;
10341                   }
10342                 }
10343
10344                 goto unknown;
10345
10346               default:
10347                 goto unknown;
10348             }
10349           }
10350
10351           goto unknown;
10352
10353         case 'r':
10354           if (name[1] == 'e' &&
10355               name[2] == 'a' &&
10356               name[3] == 'd')
10357           {
10358             switch (name[4])
10359             {
10360               case 'l':
10361                 if (name[5] == 'i' &&
10362                     name[6] == 'n')
10363                 {
10364                   switch (name[7])
10365                   {
10366                     case 'e':
10367                       {                           /* readline   */
10368                         return -KEY_readline;
10369                       }
10370
10371                     case 'k':
10372                       {                           /* readlink   */
10373                         return -KEY_readlink;
10374                       }
10375
10376                     default:
10377                       goto unknown;
10378                   }
10379                 }
10380
10381                 goto unknown;
10382
10383               case 'p':
10384                 if (name[5] == 'i' &&
10385                     name[6] == 'p' &&
10386                     name[7] == 'e')
10387                 {                                 /* readpipe   */
10388                   return -KEY_readpipe;
10389                 }
10390
10391                 goto unknown;
10392
10393               default:
10394                 goto unknown;
10395             }
10396           }
10397
10398           goto unknown;
10399
10400         case 's':
10401           switch (name[1])
10402           {
10403             case 'e':
10404               if (name[2] == 't')
10405               {
10406                 switch (name[3])
10407                 {
10408                   case 'g':
10409                     if (name[4] == 'r' &&
10410                         name[5] == 'e' &&
10411                         name[6] == 'n' &&
10412                         name[7] == 't')
10413                     {                             /* setgrent   */
10414                       return -KEY_setgrent;
10415                     }
10416
10417                     goto unknown;
10418
10419                   case 'p':
10420                     if (name[4] == 'w' &&
10421                         name[5] == 'e' &&
10422                         name[6] == 'n' &&
10423                         name[7] == 't')
10424                     {                             /* setpwent   */
10425                       return -KEY_setpwent;
10426                     }
10427
10428                     goto unknown;
10429
10430                   default:
10431                     goto unknown;
10432                 }
10433               }
10434
10435               goto unknown;
10436
10437             case 'h':
10438               switch (name[2])
10439               {
10440                 case 'm':
10441                   if (name[3] == 'w' &&
10442                       name[4] == 'r' &&
10443                       name[5] == 'i' &&
10444                       name[6] == 't' &&
10445                       name[7] == 'e')
10446                   {                               /* shmwrite   */
10447                     return -KEY_shmwrite;
10448                   }
10449
10450                   goto unknown;
10451
10452                 case 'u':
10453                   if (name[3] == 't' &&
10454                       name[4] == 'd' &&
10455                       name[5] == 'o' &&
10456                       name[6] == 'w' &&
10457                       name[7] == 'n')
10458                   {                               /* shutdown   */
10459                     return -KEY_shutdown;
10460                   }
10461
10462                   goto unknown;
10463
10464                 default:
10465                   goto unknown;
10466               }
10467
10468             case 'y':
10469               if (name[2] == 's' &&
10470                   name[3] == 'w' &&
10471                   name[4] == 'r' &&
10472                   name[5] == 'i' &&
10473                   name[6] == 't' &&
10474                   name[7] == 'e')
10475               {                                   /* syswrite   */
10476                 return -KEY_syswrite;
10477               }
10478
10479               goto unknown;
10480
10481             default:
10482               goto unknown;
10483           }
10484
10485         case 't':
10486           if (name[1] == 'r' &&
10487               name[2] == 'u' &&
10488               name[3] == 'n' &&
10489               name[4] == 'c' &&
10490               name[5] == 'a' &&
10491               name[6] == 't' &&
10492               name[7] == 'e')
10493           {                                       /* truncate   */
10494             return -KEY_truncate;
10495           }
10496
10497           goto unknown;
10498
10499         default:
10500           goto unknown;
10501       }
10502
10503     case 9: /* 9 tokens of length 9 */
10504       switch (name[0])
10505       {
10506         case 'U':
10507           if (name[1] == 'N' &&
10508               name[2] == 'I' &&
10509               name[3] == 'T' &&
10510               name[4] == 'C' &&
10511               name[5] == 'H' &&
10512               name[6] == 'E' &&
10513               name[7] == 'C' &&
10514               name[8] == 'K')
10515           {                                       /* UNITCHECK  */
10516             return KEY_UNITCHECK;
10517           }
10518
10519           goto unknown;
10520
10521         case 'e':
10522           if (name[1] == 'n' &&
10523               name[2] == 'd' &&
10524               name[3] == 'n' &&
10525               name[4] == 'e' &&
10526               name[5] == 't' &&
10527               name[6] == 'e' &&
10528               name[7] == 'n' &&
10529               name[8] == 't')
10530           {                                       /* endnetent  */
10531             return -KEY_endnetent;
10532           }
10533
10534           goto unknown;
10535
10536         case 'g':
10537           if (name[1] == 'e' &&
10538               name[2] == 't' &&
10539               name[3] == 'n' &&
10540               name[4] == 'e' &&
10541               name[5] == 't' &&
10542               name[6] == 'e' &&
10543               name[7] == 'n' &&
10544               name[8] == 't')
10545           {                                       /* getnetent  */
10546             return -KEY_getnetent;
10547           }
10548
10549           goto unknown;
10550
10551         case 'l':
10552           if (name[1] == 'o' &&
10553               name[2] == 'c' &&
10554               name[3] == 'a' &&
10555               name[4] == 'l' &&
10556               name[5] == 't' &&
10557               name[6] == 'i' &&
10558               name[7] == 'm' &&
10559               name[8] == 'e')
10560           {                                       /* localtime  */
10561             return -KEY_localtime;
10562           }
10563
10564           goto unknown;
10565
10566         case 'p':
10567           if (name[1] == 'r' &&
10568               name[2] == 'o' &&
10569               name[3] == 't' &&
10570               name[4] == 'o' &&
10571               name[5] == 't' &&
10572               name[6] == 'y' &&
10573               name[7] == 'p' &&
10574               name[8] == 'e')
10575           {                                       /* prototype  */
10576             return KEY_prototype;
10577           }
10578
10579           goto unknown;
10580
10581         case 'q':
10582           if (name[1] == 'u' &&
10583               name[2] == 'o' &&
10584               name[3] == 't' &&
10585               name[4] == 'e' &&
10586               name[5] == 'm' &&
10587               name[6] == 'e' &&
10588               name[7] == 't' &&
10589               name[8] == 'a')
10590           {                                       /* quotemeta  */
10591             return -KEY_quotemeta;
10592           }
10593
10594           goto unknown;
10595
10596         case 'r':
10597           if (name[1] == 'e' &&
10598               name[2] == 'w' &&
10599               name[3] == 'i' &&
10600               name[4] == 'n' &&
10601               name[5] == 'd' &&
10602               name[6] == 'd' &&
10603               name[7] == 'i' &&
10604               name[8] == 'r')
10605           {                                       /* rewinddir  */
10606             return -KEY_rewinddir;
10607           }
10608
10609           goto unknown;
10610
10611         case 's':
10612           if (name[1] == 'e' &&
10613               name[2] == 't' &&
10614               name[3] == 'n' &&
10615               name[4] == 'e' &&
10616               name[5] == 't' &&
10617               name[6] == 'e' &&
10618               name[7] == 'n' &&
10619               name[8] == 't')
10620           {                                       /* setnetent  */
10621             return -KEY_setnetent;
10622           }
10623
10624           goto unknown;
10625
10626         case 'w':
10627           if (name[1] == 'a' &&
10628               name[2] == 'n' &&
10629               name[3] == 't' &&
10630               name[4] == 'a' &&
10631               name[5] == 'r' &&
10632               name[6] == 'r' &&
10633               name[7] == 'a' &&
10634               name[8] == 'y')
10635           {                                       /* wantarray  */
10636             return -KEY_wantarray;
10637           }
10638
10639           goto unknown;
10640
10641         default:
10642           goto unknown;
10643       }
10644
10645     case 10: /* 9 tokens of length 10 */
10646       switch (name[0])
10647       {
10648         case 'e':
10649           if (name[1] == 'n' &&
10650               name[2] == 'd')
10651           {
10652             switch (name[3])
10653             {
10654               case 'h':
10655                 if (name[4] == 'o' &&
10656                     name[5] == 's' &&
10657                     name[6] == 't' &&
10658                     name[7] == 'e' &&
10659                     name[8] == 'n' &&
10660                     name[9] == 't')
10661                 {                                 /* endhostent */
10662                   return -KEY_endhostent;
10663                 }
10664
10665                 goto unknown;
10666
10667               case 's':
10668                 if (name[4] == 'e' &&
10669                     name[5] == 'r' &&
10670                     name[6] == 'v' &&
10671                     name[7] == 'e' &&
10672                     name[8] == 'n' &&
10673                     name[9] == 't')
10674                 {                                 /* endservent */
10675                   return -KEY_endservent;
10676                 }
10677
10678                 goto unknown;
10679
10680               default:
10681                 goto unknown;
10682             }
10683           }
10684
10685           goto unknown;
10686
10687         case 'g':
10688           if (name[1] == 'e' &&
10689               name[2] == 't')
10690           {
10691             switch (name[3])
10692             {
10693               case 'h':
10694                 if (name[4] == 'o' &&
10695                     name[5] == 's' &&
10696                     name[6] == 't' &&
10697                     name[7] == 'e' &&
10698                     name[8] == 'n' &&
10699                     name[9] == 't')
10700                 {                                 /* gethostent */
10701                   return -KEY_gethostent;
10702                 }
10703
10704                 goto unknown;
10705
10706               case 's':
10707                 switch (name[4])
10708                 {
10709                   case 'e':
10710                     if (name[5] == 'r' &&
10711                         name[6] == 'v' &&
10712                         name[7] == 'e' &&
10713                         name[8] == 'n' &&
10714                         name[9] == 't')
10715                     {                             /* getservent */
10716                       return -KEY_getservent;
10717                     }
10718
10719                     goto unknown;
10720
10721                   case 'o':
10722                     if (name[5] == 'c' &&
10723                         name[6] == 'k' &&
10724                         name[7] == 'o' &&
10725                         name[8] == 'p' &&
10726                         name[9] == 't')
10727                     {                             /* getsockopt */
10728                       return -KEY_getsockopt;
10729                     }
10730
10731                     goto unknown;
10732
10733                   default:
10734                     goto unknown;
10735                 }
10736
10737               default:
10738                 goto unknown;
10739             }
10740           }
10741
10742           goto unknown;
10743
10744         case 's':
10745           switch (name[1])
10746           {
10747             case 'e':
10748               if (name[2] == 't')
10749               {
10750                 switch (name[3])
10751                 {
10752                   case 'h':
10753                     if (name[4] == 'o' &&
10754                         name[5] == 's' &&
10755                         name[6] == 't' &&
10756                         name[7] == 'e' &&
10757                         name[8] == 'n' &&
10758                         name[9] == 't')
10759                     {                             /* sethostent */
10760                       return -KEY_sethostent;
10761                     }
10762
10763                     goto unknown;
10764
10765                   case 's':
10766                     switch (name[4])
10767                     {
10768                       case 'e':
10769                         if (name[5] == 'r' &&
10770                             name[6] == 'v' &&
10771                             name[7] == 'e' &&
10772                             name[8] == 'n' &&
10773                             name[9] == 't')
10774                         {                         /* setservent */
10775                           return -KEY_setservent;
10776                         }
10777
10778                         goto unknown;
10779
10780                       case 'o':
10781                         if (name[5] == 'c' &&
10782                             name[6] == 'k' &&
10783                             name[7] == 'o' &&
10784                             name[8] == 'p' &&
10785                             name[9] == 't')
10786                         {                         /* setsockopt */
10787                           return -KEY_setsockopt;
10788                         }
10789
10790                         goto unknown;
10791
10792                       default:
10793                         goto unknown;
10794                     }
10795
10796                   default:
10797                     goto unknown;
10798                 }
10799               }
10800
10801               goto unknown;
10802
10803             case 'o':
10804               if (name[2] == 'c' &&
10805                   name[3] == 'k' &&
10806                   name[4] == 'e' &&
10807                   name[5] == 't' &&
10808                   name[6] == 'p' &&
10809                   name[7] == 'a' &&
10810                   name[8] == 'i' &&
10811                   name[9] == 'r')
10812               {                                   /* socketpair */
10813                 return -KEY_socketpair;
10814               }
10815
10816               goto unknown;
10817
10818             default:
10819               goto unknown;
10820           }
10821
10822         default:
10823           goto unknown;
10824       }
10825
10826     case 11: /* 8 tokens of length 11 */
10827       switch (name[0])
10828       {
10829         case '_':
10830           if (name[1] == '_' &&
10831               name[2] == 'P' &&
10832               name[3] == 'A' &&
10833               name[4] == 'C' &&
10834               name[5] == 'K' &&
10835               name[6] == 'A' &&
10836               name[7] == 'G' &&
10837               name[8] == 'E' &&
10838               name[9] == '_' &&
10839               name[10] == '_')
10840           {                                       /* __PACKAGE__ */
10841             return -KEY___PACKAGE__;
10842           }
10843
10844           goto unknown;
10845
10846         case 'e':
10847           if (name[1] == 'n' &&
10848               name[2] == 'd' &&
10849               name[3] == 'p' &&
10850               name[4] == 'r' &&
10851               name[5] == 'o' &&
10852               name[6] == 't' &&
10853               name[7] == 'o' &&
10854               name[8] == 'e' &&
10855               name[9] == 'n' &&
10856               name[10] == 't')
10857           {                                       /* endprotoent */
10858             return -KEY_endprotoent;
10859           }
10860
10861           goto unknown;
10862
10863         case 'g':
10864           if (name[1] == 'e' &&
10865               name[2] == 't')
10866           {
10867             switch (name[3])
10868             {
10869               case 'p':
10870                 switch (name[4])
10871                 {
10872                   case 'e':
10873                     if (name[5] == 'e' &&
10874                         name[6] == 'r' &&
10875                         name[7] == 'n' &&
10876                         name[8] == 'a' &&
10877                         name[9] == 'm' &&
10878                         name[10] == 'e')
10879                     {                             /* getpeername */
10880                       return -KEY_getpeername;
10881                     }
10882
10883                     goto unknown;
10884
10885                   case 'r':
10886                     switch (name[5])
10887                     {
10888                       case 'i':
10889                         if (name[6] == 'o' &&
10890                             name[7] == 'r' &&
10891                             name[8] == 'i' &&
10892                             name[9] == 't' &&
10893                             name[10] == 'y')
10894                         {                         /* getpriority */
10895                           return -KEY_getpriority;
10896                         }
10897
10898                         goto unknown;
10899
10900                       case 'o':
10901                         if (name[6] == 't' &&
10902                             name[7] == 'o' &&
10903                             name[8] == 'e' &&
10904                             name[9] == 'n' &&
10905                             name[10] == 't')
10906                         {                         /* getprotoent */
10907                           return -KEY_getprotoent;
10908                         }
10909
10910                         goto unknown;
10911
10912                       default:
10913                         goto unknown;
10914                     }
10915
10916                   default:
10917                     goto unknown;
10918                 }
10919
10920               case 's':
10921                 if (name[4] == 'o' &&
10922                     name[5] == 'c' &&
10923                     name[6] == 'k' &&
10924                     name[7] == 'n' &&
10925                     name[8] == 'a' &&
10926                     name[9] == 'm' &&
10927                     name[10] == 'e')
10928                 {                                 /* getsockname */
10929                   return -KEY_getsockname;
10930                 }
10931
10932                 goto unknown;
10933
10934               default:
10935                 goto unknown;
10936             }
10937           }
10938
10939           goto unknown;
10940
10941         case 's':
10942           if (name[1] == 'e' &&
10943               name[2] == 't' &&
10944               name[3] == 'p' &&
10945               name[4] == 'r')
10946           {
10947             switch (name[5])
10948             {
10949               case 'i':
10950                 if (name[6] == 'o' &&
10951                     name[7] == 'r' &&
10952                     name[8] == 'i' &&
10953                     name[9] == 't' &&
10954                     name[10] == 'y')
10955                 {                                 /* setpriority */
10956                   return -KEY_setpriority;
10957                 }
10958
10959                 goto unknown;
10960
10961               case 'o':
10962                 if (name[6] == 't' &&
10963                     name[7] == 'o' &&
10964                     name[8] == 'e' &&
10965                     name[9] == 'n' &&
10966                     name[10] == 't')
10967                 {                                 /* setprotoent */
10968                   return -KEY_setprotoent;
10969                 }
10970
10971                 goto unknown;
10972
10973               default:
10974                 goto unknown;
10975             }
10976           }
10977
10978           goto unknown;
10979
10980         default:
10981           goto unknown;
10982       }
10983
10984     case 12: /* 2 tokens of length 12 */
10985       if (name[0] == 'g' &&
10986           name[1] == 'e' &&
10987           name[2] == 't' &&
10988           name[3] == 'n' &&
10989           name[4] == 'e' &&
10990           name[5] == 't' &&
10991           name[6] == 'b' &&
10992           name[7] == 'y')
10993       {
10994         switch (name[8])
10995         {
10996           case 'a':
10997             if (name[9] == 'd' &&
10998                 name[10] == 'd' &&
10999                 name[11] == 'r')
11000             {                                     /* getnetbyaddr */
11001               return -KEY_getnetbyaddr;
11002             }
11003
11004             goto unknown;
11005
11006           case 'n':
11007             if (name[9] == 'a' &&
11008                 name[10] == 'm' &&
11009                 name[11] == 'e')
11010             {                                     /* getnetbyname */
11011               return -KEY_getnetbyname;
11012             }
11013
11014             goto unknown;
11015
11016           default:
11017             goto unknown;
11018         }
11019       }
11020
11021       goto unknown;
11022
11023     case 13: /* 4 tokens of length 13 */
11024       if (name[0] == 'g' &&
11025           name[1] == 'e' &&
11026           name[2] == 't')
11027       {
11028         switch (name[3])
11029         {
11030           case 'h':
11031             if (name[4] == 'o' &&
11032                 name[5] == 's' &&
11033                 name[6] == 't' &&
11034                 name[7] == 'b' &&
11035                 name[8] == 'y')
11036             {
11037               switch (name[9])
11038               {
11039                 case 'a':
11040                   if (name[10] == 'd' &&
11041                       name[11] == 'd' &&
11042                       name[12] == 'r')
11043                   {                               /* gethostbyaddr */
11044                     return -KEY_gethostbyaddr;
11045                   }
11046
11047                   goto unknown;
11048
11049                 case 'n':
11050                   if (name[10] == 'a' &&
11051                       name[11] == 'm' &&
11052                       name[12] == 'e')
11053                   {                               /* gethostbyname */
11054                     return -KEY_gethostbyname;
11055                   }
11056
11057                   goto unknown;
11058
11059                 default:
11060                   goto unknown;
11061               }
11062             }
11063
11064             goto unknown;
11065
11066           case 's':
11067             if (name[4] == 'e' &&
11068                 name[5] == 'r' &&
11069                 name[6] == 'v' &&
11070                 name[7] == 'b' &&
11071                 name[8] == 'y')
11072             {
11073               switch (name[9])
11074               {
11075                 case 'n':
11076                   if (name[10] == 'a' &&
11077                       name[11] == 'm' &&
11078                       name[12] == 'e')
11079                   {                               /* getservbyname */
11080                     return -KEY_getservbyname;
11081                   }
11082
11083                   goto unknown;
11084
11085                 case 'p':
11086                   if (name[10] == 'o' &&
11087                       name[11] == 'r' &&
11088                       name[12] == 't')
11089                   {                               /* getservbyport */
11090                     return -KEY_getservbyport;
11091                   }
11092
11093                   goto unknown;
11094
11095                 default:
11096                   goto unknown;
11097               }
11098             }
11099
11100             goto unknown;
11101
11102           default:
11103             goto unknown;
11104         }
11105       }
11106
11107       goto unknown;
11108
11109     case 14: /* 1 tokens of length 14 */
11110       if (name[0] == 'g' &&
11111           name[1] == 'e' &&
11112           name[2] == 't' &&
11113           name[3] == 'p' &&
11114           name[4] == 'r' &&
11115           name[5] == 'o' &&
11116           name[6] == 't' &&
11117           name[7] == 'o' &&
11118           name[8] == 'b' &&
11119           name[9] == 'y' &&
11120           name[10] == 'n' &&
11121           name[11] == 'a' &&
11122           name[12] == 'm' &&
11123           name[13] == 'e')
11124       {                                           /* getprotobyname */
11125         return -KEY_getprotobyname;
11126       }
11127
11128       goto unknown;
11129
11130     case 16: /* 1 tokens of length 16 */
11131       if (name[0] == 'g' &&
11132           name[1] == 'e' &&
11133           name[2] == 't' &&
11134           name[3] == 'p' &&
11135           name[4] == 'r' &&
11136           name[5] == 'o' &&
11137           name[6] == 't' &&
11138           name[7] == 'o' &&
11139           name[8] == 'b' &&
11140           name[9] == 'y' &&
11141           name[10] == 'n' &&
11142           name[11] == 'u' &&
11143           name[12] == 'm' &&
11144           name[13] == 'b' &&
11145           name[14] == 'e' &&
11146           name[15] == 'r')
11147       {                                           /* getprotobynumber */
11148         return -KEY_getprotobynumber;
11149       }
11150
11151       goto unknown;
11152
11153     default:
11154       goto unknown;
11155   }
11156
11157 unknown:
11158   return 0;
11159 }
11160
11161 STATIC void
11162 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11163 {
11164     dVAR;
11165
11166     PERL_ARGS_ASSERT_CHECKCOMMA;
11167
11168     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
11169         if (ckWARN(WARN_SYNTAX)) {
11170             int level = 1;
11171             const char *w;
11172             for (w = s+2; *w && level; w++) {
11173                 if (*w == '(')
11174                     ++level;
11175                 else if (*w == ')')
11176                     --level;
11177             }
11178             while (isSPACE(*w))
11179                 ++w;
11180             /* the list of chars below is for end of statements or
11181              * block / parens, boolean operators (&&, ||, //) and branch
11182              * constructs (or, and, if, until, unless, while, err, for).
11183              * Not a very solid hack... */
11184             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11185                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11186                             "%s (...) interpreted as function",name);
11187         }
11188     }
11189     while (s < PL_bufend && isSPACE(*s))
11190         s++;
11191     if (*s == '(')
11192         s++;
11193     while (s < PL_bufend && isSPACE(*s))
11194         s++;
11195     if (isIDFIRST_lazy_if(s,UTF)) {
11196         const char * const w = s++;
11197         while (isALNUM_lazy_if(s,UTF))
11198             s++;
11199         while (s < PL_bufend && isSPACE(*s))
11200             s++;
11201         if (*s == ',') {
11202             GV* gv;
11203             if (keyword(w, s - w, 0))
11204                 return;
11205
11206             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11207             if (gv && GvCVu(gv))
11208                 return;
11209             Perl_croak(aTHX_ "No comma allowed after %s", what);
11210         }
11211     }
11212 }
11213
11214 /* Either returns sv, or mortalizes sv and returns a new SV*.
11215    Best used as sv=new_constant(..., sv, ...).
11216    If s, pv are NULL, calls subroutine with one argument,
11217    and type is used with error messages only. */
11218
11219 STATIC SV *
11220 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11221                SV *sv, SV *pv, const char *type, STRLEN typelen)
11222 {
11223     dVAR; dSP;
11224     HV * const table = GvHV(PL_hintgv);          /* ^H */
11225     SV *res;
11226     SV **cvp;
11227     SV *cv, *typesv;
11228     const char *why1 = "", *why2 = "", *why3 = "";
11229
11230     PERL_ARGS_ASSERT_NEW_CONSTANT;
11231
11232     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11233         SV *msg;
11234
11235         why2 = (const char *)
11236             (strEQ(key,"charnames")
11237              ? "(possibly a missing \"use charnames ...\")"
11238              : "");
11239         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11240                             (type ? type: "undef"), why2);
11241
11242         /* This is convoluted and evil ("goto considered harmful")
11243          * but I do not understand the intricacies of all the different
11244          * failure modes of %^H in here.  The goal here is to make
11245          * the most probable error message user-friendly. --jhi */
11246
11247         goto msgdone;
11248
11249     report:
11250         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11251                             (type ? type: "undef"), why1, why2, why3);
11252     msgdone:
11253         yyerror(SvPVX_const(msg));
11254         SvREFCNT_dec(msg);
11255         return sv;
11256     }
11257     cvp = hv_fetch(table, key, keylen, FALSE);
11258     if (!cvp || !SvOK(*cvp)) {
11259         why1 = "$^H{";
11260         why2 = key;
11261         why3 = "} is not defined";
11262         goto report;
11263     }
11264     sv_2mortal(sv);                     /* Parent created it permanently */
11265     cv = *cvp;
11266     if (!pv && s)
11267         pv = newSVpvn_flags(s, len, SVs_TEMP);
11268     if (type && pv)
11269         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11270     else
11271         typesv = &PL_sv_undef;
11272
11273     PUSHSTACKi(PERLSI_OVERLOAD);
11274     ENTER ;
11275     SAVETMPS;
11276
11277     PUSHMARK(SP) ;
11278     EXTEND(sp, 3);
11279     if (pv)
11280         PUSHs(pv);
11281     PUSHs(sv);
11282     if (pv)
11283         PUSHs(typesv);
11284     PUTBACK;
11285     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11286
11287     SPAGAIN ;
11288
11289     /* Check the eval first */
11290     if (!PL_in_eval && SvTRUE(ERRSV)) {
11291         sv_catpvs(ERRSV, "Propagated");
11292         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11293         (void)POPs;
11294         res = SvREFCNT_inc_simple(sv);
11295     }
11296     else {
11297         res = POPs;
11298         SvREFCNT_inc_simple_void(res);
11299     }
11300
11301     PUTBACK ;
11302     FREETMPS ;
11303     LEAVE ;
11304     POPSTACK;
11305
11306     if (!SvOK(res)) {
11307         why1 = "Call to &{$^H{";
11308         why2 = key;
11309         why3 = "}} did not return a defined value";
11310         sv = res;
11311         goto report;
11312     }
11313
11314     return res;
11315 }
11316
11317 /* Returns a NUL terminated string, with the length of the string written to
11318    *slp
11319    */
11320 STATIC char *
11321 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11322 {
11323     dVAR;
11324     register char *d = dest;
11325     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
11326
11327     PERL_ARGS_ASSERT_SCAN_WORD;
11328
11329     for (;;) {
11330         if (d >= e)
11331             Perl_croak(aTHX_ ident_too_long);
11332         if (isALNUM(*s))        /* UTF handled below */
11333             *d++ = *s++;
11334         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11335             *d++ = ':';
11336             *d++ = ':';
11337             s++;
11338         }
11339         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11340             *d++ = *s++;
11341             *d++ = *s++;
11342         }
11343         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11344             char *t = s + UTF8SKIP(s);
11345             size_t len;
11346             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11347                 t += UTF8SKIP(t);
11348             len = t - s;
11349             if (d + len > e)
11350                 Perl_croak(aTHX_ ident_too_long);
11351             Copy(s, d, len, char);
11352             d += len;
11353             s = t;
11354         }
11355         else {
11356             *d = '\0';
11357             *slp = d - dest;
11358             return s;
11359         }
11360     }
11361 }
11362
11363 STATIC char *
11364 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11365 {
11366     dVAR;
11367     char *bracket = NULL;
11368     char funny = *s++;
11369     register char *d = dest;
11370     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
11371
11372     PERL_ARGS_ASSERT_SCAN_IDENT;
11373
11374     if (isSPACE(*s))
11375         s = PEEKSPACE(s);
11376     if (isDIGIT(*s)) {
11377         while (isDIGIT(*s)) {
11378             if (d >= e)
11379                 Perl_croak(aTHX_ ident_too_long);
11380             *d++ = *s++;
11381         }
11382     }
11383     else {
11384         for (;;) {
11385             if (d >= e)
11386                 Perl_croak(aTHX_ ident_too_long);
11387             if (isALNUM(*s))    /* UTF handled below */
11388                 *d++ = *s++;
11389             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11390                 *d++ = ':';
11391                 *d++ = ':';
11392                 s++;
11393             }
11394             else if (*s == ':' && s[1] == ':') {
11395                 *d++ = *s++;
11396                 *d++ = *s++;
11397             }
11398             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11399                 char *t = s + UTF8SKIP(s);
11400                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11401                     t += UTF8SKIP(t);
11402                 if (d + (t - s) > e)
11403                     Perl_croak(aTHX_ ident_too_long);
11404                 Copy(s, d, t - s, char);
11405                 d += t - s;
11406                 s = t;
11407             }
11408             else
11409                 break;
11410         }
11411     }
11412     *d = '\0';
11413     d = dest;
11414     if (*d) {
11415         if (PL_lex_state != LEX_NORMAL)
11416             PL_lex_state = LEX_INTERPENDMAYBE;
11417         return s;
11418     }
11419     if (*s == '$' && s[1] &&
11420         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11421     {
11422         return s;
11423     }
11424     if (*s == '{') {
11425         bracket = s;
11426         s++;
11427     }
11428     else if (ck_uni)
11429         check_uni();
11430     if (s < send)
11431         *d = *s++;
11432     d[1] = '\0';
11433     if (*d == '^' && *s && isCONTROLVAR(*s)) {
11434         *d = toCTRL(*s);
11435         s++;
11436     }
11437     if (bracket) {
11438         if (isSPACE(s[-1])) {
11439             while (s < send) {
11440                 const char ch = *s++;
11441                 if (!SPACE_OR_TAB(ch)) {
11442                     *d = ch;
11443                     break;
11444                 }
11445             }
11446         }
11447         if (isIDFIRST_lazy_if(d,UTF)) {
11448             d++;
11449             if (UTF) {
11450                 char *end = s;
11451                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11452                     end += UTF8SKIP(end);
11453                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11454                         end += UTF8SKIP(end);
11455                 }
11456                 Copy(s, d, end - s, char);
11457                 d += end - s;
11458                 s = end;
11459             }
11460             else {
11461                 while ((isALNUM(*s) || *s == ':') && d < e)
11462                     *d++ = *s++;
11463                 if (d >= e)
11464                     Perl_croak(aTHX_ ident_too_long);
11465             }
11466             *d = '\0';
11467             while (s < send && SPACE_OR_TAB(*s))
11468                 s++;
11469             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11470                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11471                     const char * const brack =
11472                         (const char *)
11473                         ((*s == '[') ? "[...]" : "{...}");
11474                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11475                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11476                         funny, dest, brack, funny, dest, brack);
11477                 }
11478                 bracket++;
11479                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11480                 return s;
11481             }
11482         }
11483         /* Handle extended ${^Foo} variables
11484          * 1999-02-27 mjd-perl-patch@plover.com */
11485         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11486                  && isALNUM(*s))
11487         {
11488             d++;
11489             while (isALNUM(*s) && d < e) {
11490                 *d++ = *s++;
11491             }
11492             if (d >= e)
11493                 Perl_croak(aTHX_ ident_too_long);
11494             *d = '\0';
11495         }
11496         if (*s == '}') {
11497             s++;
11498             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11499                 PL_lex_state = LEX_INTERPEND;
11500                 PL_expect = XREF;
11501             }
11502             if (PL_lex_state == LEX_NORMAL) {
11503                 if (ckWARN(WARN_AMBIGUOUS) &&
11504                     (keyword(dest, d - dest, 0)
11505                      || get_cvn_flags(dest, d - dest, 0)))
11506                 {
11507                     if (funny == '#')
11508                         funny = '@';
11509                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11510                         "Ambiguous use of %c{%s} resolved to %c%s",
11511                         funny, dest, funny, dest);
11512                 }
11513             }
11514         }
11515         else {
11516             s = bracket;                /* let the parser handle it */
11517             *dest = '\0';
11518         }
11519     }
11520     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11521         PL_lex_state = LEX_INTERPEND;
11522     return s;
11523 }
11524
11525 static U32
11526 S_pmflag(U32 pmfl, const char ch) {
11527     switch (ch) {
11528         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11529     case GLOBAL_PAT_MOD:    pmfl |= PMf_GLOBAL; break;
11530     case CONTINUE_PAT_MOD:  pmfl |= PMf_CONTINUE; break;
11531     case ONCE_PAT_MOD:      pmfl |= PMf_KEEP; break;
11532     case KEEPCOPY_PAT_MOD:  pmfl |= PMf_KEEPCOPY; break;
11533     }
11534     return pmfl;
11535 }
11536
11537 void
11538 Perl_pmflag(pTHX_ U32* pmfl, int ch)
11539 {
11540     PERL_ARGS_ASSERT_PMFLAG;
11541
11542     Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
11543                      "Perl_pmflag() is deprecated, and will be removed from the XS API");
11544
11545     if (ch<256) {
11546         *pmfl = S_pmflag(*pmfl, (char)ch);
11547     }
11548 }
11549
11550 STATIC char *
11551 S_scan_pat(pTHX_ char *start, I32 type)
11552 {
11553     dVAR;
11554     PMOP *pm;
11555     char *s = scan_str(start,!!PL_madskills,FALSE);
11556     const char * const valid_flags =
11557         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11558 #ifdef PERL_MAD
11559     char *modstart;
11560 #endif
11561
11562     PERL_ARGS_ASSERT_SCAN_PAT;
11563
11564     if (!s) {
11565         const char * const delimiter = skipspace(start);
11566         Perl_croak(aTHX_
11567                    (const char *)
11568                    (*delimiter == '?'
11569                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
11570                     : "Search pattern not terminated" ));
11571     }
11572
11573     pm = (PMOP*)newPMOP(type, 0);
11574     if (PL_multi_open == '?') {
11575         /* This is the only point in the code that sets PMf_ONCE:  */
11576         pm->op_pmflags |= PMf_ONCE;
11577
11578         /* Hence it's safe to do this bit of PMOP book-keeping here, which
11579            allows us to restrict the list needed by reset to just the ??
11580            matches.  */
11581         assert(type != OP_TRANS);
11582         if (PL_curstash) {
11583             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11584             U32 elements;
11585             if (!mg) {
11586                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11587                                  0);
11588             }
11589             elements = mg->mg_len / sizeof(PMOP**);
11590             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11591             ((PMOP**)mg->mg_ptr) [elements++] = pm;
11592             mg->mg_len = elements * sizeof(PMOP**);
11593             PmopSTASH_set(pm,PL_curstash);
11594         }
11595     }
11596 #ifdef PERL_MAD
11597     modstart = s;
11598 #endif
11599     while (*s && strchr(valid_flags, *s))
11600         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11601 #ifdef PERL_MAD
11602     if (PL_madskills && modstart != s) {
11603         SV* tmptoken = newSVpvn(modstart, s - modstart);
11604         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11605     }
11606 #endif
11607     /* issue a warning if /c is specified,but /g is not */
11608     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11609     {
11610         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11611                        "Use of /c modifier is meaningless without /g" );
11612     }
11613
11614     PL_lex_op = (OP*)pm;
11615     pl_yylval.ival = OP_MATCH;
11616     return s;
11617 }
11618
11619 STATIC char *
11620 S_scan_subst(pTHX_ char *start)
11621 {
11622     dVAR;
11623     register char *s;
11624     register PMOP *pm;
11625     I32 first_start;
11626     I32 es = 0;
11627 #ifdef PERL_MAD
11628     char *modstart;
11629 #endif
11630
11631     PERL_ARGS_ASSERT_SCAN_SUBST;
11632
11633     pl_yylval.ival = OP_NULL;
11634
11635     s = scan_str(start,!!PL_madskills,FALSE);
11636
11637     if (!s)
11638         Perl_croak(aTHX_ "Substitution pattern not terminated");
11639
11640     if (s[-1] == PL_multi_open)
11641         s--;
11642 #ifdef PERL_MAD
11643     if (PL_madskills) {
11644         CURMAD('q', PL_thisopen);
11645         CURMAD('_', PL_thiswhite);
11646         CURMAD('E', PL_thisstuff);
11647         CURMAD('Q', PL_thisclose);
11648         PL_realtokenstart = s - SvPVX(PL_linestr);
11649     }
11650 #endif
11651
11652     first_start = PL_multi_start;
11653     s = scan_str(s,!!PL_madskills,FALSE);
11654     if (!s) {
11655         if (PL_lex_stuff) {
11656             SvREFCNT_dec(PL_lex_stuff);
11657             PL_lex_stuff = NULL;
11658         }
11659         Perl_croak(aTHX_ "Substitution replacement not terminated");
11660     }
11661     PL_multi_start = first_start;       /* so whole substitution is taken together */
11662
11663     pm = (PMOP*)newPMOP(OP_SUBST, 0);
11664
11665 #ifdef PERL_MAD
11666     if (PL_madskills) {
11667         CURMAD('z', PL_thisopen);
11668         CURMAD('R', PL_thisstuff);
11669         CURMAD('Z', PL_thisclose);
11670     }
11671     modstart = s;
11672 #endif
11673
11674     while (*s) {
11675         if (*s == EXEC_PAT_MOD) {
11676             s++;
11677             es++;
11678         }
11679         else if (strchr(S_PAT_MODS, *s))
11680             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11681         else
11682             break;
11683     }
11684
11685 #ifdef PERL_MAD
11686     if (PL_madskills) {
11687         if (modstart != s)
11688             curmad('m', newSVpvn(modstart, s - modstart));
11689         append_madprops(PL_thismad, (OP*)pm, 0);
11690         PL_thismad = 0;
11691     }
11692 #endif
11693     if ((pm->op_pmflags & PMf_CONTINUE)) {
11694         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11695     }
11696
11697     if (es) {
11698         SV * const repl = newSVpvs("");
11699
11700         PL_sublex_info.super_bufptr = s;
11701         PL_sublex_info.super_bufend = PL_bufend;
11702         PL_multi_end = 0;
11703         pm->op_pmflags |= PMf_EVAL;
11704         while (es-- > 0) {
11705             if (es)
11706                 sv_catpvs(repl, "eval ");
11707             else
11708                 sv_catpvs(repl, "do ");
11709         }
11710         sv_catpvs(repl, "{");
11711         sv_catsv(repl, PL_lex_repl);
11712         if (strchr(SvPVX(PL_lex_repl), '#'))
11713             sv_catpvs(repl, "\n");
11714         sv_catpvs(repl, "}");
11715         SvEVALED_on(repl);
11716         SvREFCNT_dec(PL_lex_repl);
11717         PL_lex_repl = repl;
11718     }
11719
11720     PL_lex_op = (OP*)pm;
11721     pl_yylval.ival = OP_SUBST;
11722     return s;
11723 }
11724
11725 STATIC char *
11726 S_scan_trans(pTHX_ char *start)
11727 {
11728     dVAR;
11729     register char* s;
11730     OP *o;
11731     short *tbl;
11732     U8 squash;
11733     U8 del;
11734     U8 complement;
11735 #ifdef PERL_MAD
11736     char *modstart;
11737 #endif
11738
11739     PERL_ARGS_ASSERT_SCAN_TRANS;
11740
11741     pl_yylval.ival = OP_NULL;
11742
11743     s = scan_str(start,!!PL_madskills,FALSE);
11744     if (!s)
11745         Perl_croak(aTHX_ "Transliteration pattern not terminated");
11746
11747     if (s[-1] == PL_multi_open)
11748         s--;
11749 #ifdef PERL_MAD
11750     if (PL_madskills) {
11751         CURMAD('q', PL_thisopen);
11752         CURMAD('_', PL_thiswhite);
11753         CURMAD('E', PL_thisstuff);
11754         CURMAD('Q', PL_thisclose);
11755         PL_realtokenstart = s - SvPVX(PL_linestr);
11756     }
11757 #endif
11758
11759     s = scan_str(s,!!PL_madskills,FALSE);
11760     if (!s) {
11761         if (PL_lex_stuff) {
11762             SvREFCNT_dec(PL_lex_stuff);
11763             PL_lex_stuff = NULL;
11764         }
11765         Perl_croak(aTHX_ "Transliteration replacement not terminated");
11766     }
11767     if (PL_madskills) {
11768         CURMAD('z', PL_thisopen);
11769         CURMAD('R', PL_thisstuff);
11770         CURMAD('Z', PL_thisclose);
11771     }
11772
11773     complement = del = squash = 0;
11774 #ifdef PERL_MAD
11775     modstart = s;
11776 #endif
11777     while (1) {
11778         switch (*s) {
11779         case 'c':
11780             complement = OPpTRANS_COMPLEMENT;
11781             break;
11782         case 'd':
11783             del = OPpTRANS_DELETE;
11784             break;
11785         case 's':
11786             squash = OPpTRANS_SQUASH;
11787             break;
11788         default:
11789             goto no_more;
11790         }
11791         s++;
11792     }
11793   no_more:
11794
11795     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11796     o = newPVOP(OP_TRANS, 0, (char*)tbl);
11797     o->op_private &= ~OPpTRANS_ALL;
11798     o->op_private |= del|squash|complement|
11799       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11800       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
11801
11802     PL_lex_op = o;
11803     pl_yylval.ival = OP_TRANS;
11804
11805 #ifdef PERL_MAD
11806     if (PL_madskills) {
11807         if (modstart != s)
11808             curmad('m', newSVpvn(modstart, s - modstart));
11809         append_madprops(PL_thismad, o, 0);
11810         PL_thismad = 0;
11811     }
11812 #endif
11813
11814     return s;
11815 }
11816
11817 STATIC char *
11818 S_scan_heredoc(pTHX_ register char *s)
11819 {
11820     dVAR;
11821     SV *herewas;
11822     I32 op_type = OP_SCALAR;
11823     I32 len;
11824     SV *tmpstr;
11825     char term;
11826     const char *found_newline;
11827     register char *d;
11828     register char *e;
11829     char *peek;
11830     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11831 #ifdef PERL_MAD
11832     I32 stuffstart = s - SvPVX(PL_linestr);
11833     char *tstart;
11834
11835     PL_realtokenstart = -1;
11836 #endif
11837
11838     PERL_ARGS_ASSERT_SCAN_HEREDOC;
11839
11840     s += 2;
11841     d = PL_tokenbuf;
11842     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11843     if (!outer)
11844         *d++ = '\n';
11845     peek = s;
11846     while (SPACE_OR_TAB(*peek))
11847         peek++;
11848     if (*peek == '`' || *peek == '\'' || *peek =='"') {
11849         s = peek;
11850         term = *s++;
11851         s = delimcpy(d, e, s, PL_bufend, term, &len);
11852         d += len;
11853         if (s < PL_bufend)
11854             s++;
11855     }
11856     else {
11857         if (*s == '\\')
11858             s++, term = '\'';
11859         else
11860             term = '"';
11861         if (!isALNUM_lazy_if(s,UTF))
11862             deprecate("bare << to mean <<\"\"");
11863         for (; isALNUM_lazy_if(s,UTF); s++) {
11864             if (d < e)
11865                 *d++ = *s;
11866         }
11867     }
11868     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11869         Perl_croak(aTHX_ "Delimiter for here document is too long");
11870     *d++ = '\n';
11871     *d = '\0';
11872     len = d - PL_tokenbuf;
11873
11874 #ifdef PERL_MAD
11875     if (PL_madskills) {
11876         tstart = PL_tokenbuf + !outer;
11877         PL_thisclose = newSVpvn(tstart, len - !outer);
11878         tstart = SvPVX(PL_linestr) + stuffstart;
11879         PL_thisopen = newSVpvn(tstart, s - tstart);
11880         stuffstart = s - SvPVX(PL_linestr);
11881     }
11882 #endif
11883 #ifndef PERL_STRICT_CR
11884     d = strchr(s, '\r');
11885     if (d) {
11886         char * const olds = s;
11887         s = d;
11888         while (s < PL_bufend) {
11889             if (*s == '\r') {
11890                 *d++ = '\n';
11891                 if (*++s == '\n')
11892                     s++;
11893             }
11894             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
11895                 *d++ = *s++;
11896                 s++;
11897             }
11898             else
11899                 *d++ = *s++;
11900         }
11901         *d = '\0';
11902         PL_bufend = d;
11903         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11904         s = olds;
11905     }
11906 #endif
11907 #ifdef PERL_MAD
11908     found_newline = 0;
11909 #endif
11910     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11911         herewas = newSVpvn(s,PL_bufend-s);
11912     }
11913     else {
11914 #ifdef PERL_MAD
11915         herewas = newSVpvn(s-1,found_newline-s+1);
11916 #else
11917         s--;
11918         herewas = newSVpvn(s,found_newline-s);
11919 #endif
11920     }
11921 #ifdef PERL_MAD
11922     if (PL_madskills) {
11923         tstart = SvPVX(PL_linestr) + stuffstart;
11924         if (PL_thisstuff)
11925             sv_catpvn(PL_thisstuff, tstart, s - tstart);
11926         else
11927             PL_thisstuff = newSVpvn(tstart, s - tstart);
11928     }
11929 #endif
11930     s += SvCUR(herewas);
11931
11932 #ifdef PERL_MAD
11933     stuffstart = s - SvPVX(PL_linestr);
11934
11935     if (found_newline)
11936         s--;
11937 #endif
11938
11939     tmpstr = newSV_type(SVt_PVIV);
11940     SvGROW(tmpstr, 80);
11941     if (term == '\'') {
11942         op_type = OP_CONST;
11943         SvIV_set(tmpstr, -1);
11944     }
11945     else if (term == '`') {
11946         op_type = OP_BACKTICK;
11947         SvIV_set(tmpstr, '\\');
11948     }
11949
11950     CLINE;
11951     PL_multi_start = CopLINE(PL_curcop);
11952     PL_multi_open = PL_multi_close = '<';
11953     term = *PL_tokenbuf;
11954     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11955         char * const bufptr = PL_sublex_info.super_bufptr;
11956         char * const bufend = PL_sublex_info.super_bufend;
11957         char * const olds = s - SvCUR(herewas);
11958         s = strchr(bufptr, '\n');
11959         if (!s)
11960             s = bufend;
11961         d = s;
11962         while (s < bufend &&
11963           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11964             if (*s++ == '\n')
11965                 CopLINE_inc(PL_curcop);
11966         }
11967         if (s >= bufend) {
11968             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11969             missingterm(PL_tokenbuf);
11970         }
11971         sv_setpvn(herewas,bufptr,d-bufptr+1);
11972         sv_setpvn(tmpstr,d+1,s-d);
11973         s += len - 1;
11974         sv_catpvn(herewas,s,bufend-s);
11975         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11976
11977         s = olds;
11978         goto retval;
11979     }
11980     else if (!outer) {
11981         d = s;
11982         while (s < PL_bufend &&
11983           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11984             if (*s++ == '\n')
11985                 CopLINE_inc(PL_curcop);
11986         }
11987         if (s >= PL_bufend) {
11988             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11989             missingterm(PL_tokenbuf);
11990         }
11991         sv_setpvn(tmpstr,d+1,s-d);
11992 #ifdef PERL_MAD
11993         if (PL_madskills) {
11994             if (PL_thisstuff)
11995                 sv_catpvn(PL_thisstuff, d + 1, s - d);
11996             else
11997                 PL_thisstuff = newSVpvn(d + 1, s - d);
11998             stuffstart = s - SvPVX(PL_linestr);
11999         }
12000 #endif
12001         s += len - 1;
12002         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12003
12004         sv_catpvn(herewas,s,PL_bufend-s);
12005         sv_setsv(PL_linestr,herewas);
12006         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12007         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12008         PL_last_lop = PL_last_uni = NULL;
12009     }
12010     else
12011         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
12012     while (s >= PL_bufend) {    /* multiple line string? */
12013 #ifdef PERL_MAD
12014         if (PL_madskills) {
12015             tstart = SvPVX(PL_linestr) + stuffstart;
12016             if (PL_thisstuff)
12017                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12018             else
12019                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12020         }
12021 #endif
12022         PL_bufptr = s;
12023         CopLINE_inc(PL_curcop);
12024         if (!outer || !lex_next_chunk(0)) {
12025             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12026             missingterm(PL_tokenbuf);
12027         }
12028         CopLINE_dec(PL_curcop);
12029         s = PL_bufptr;
12030 #ifdef PERL_MAD
12031         stuffstart = s - SvPVX(PL_linestr);
12032 #endif
12033         CopLINE_inc(PL_curcop);
12034         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12035         PL_last_lop = PL_last_uni = NULL;
12036 #ifndef PERL_STRICT_CR
12037         if (PL_bufend - PL_linestart >= 2) {
12038             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12039                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12040             {
12041                 PL_bufend[-2] = '\n';
12042                 PL_bufend--;
12043                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12044             }
12045             else if (PL_bufend[-1] == '\r')
12046                 PL_bufend[-1] = '\n';
12047         }
12048         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12049             PL_bufend[-1] = '\n';
12050 #endif
12051         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12052             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12053             *(SvPVX(PL_linestr) + off ) = ' ';
12054             sv_catsv(PL_linestr,herewas);
12055             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12056             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12057         }
12058         else {
12059             s = PL_bufend;
12060             sv_catsv(tmpstr,PL_linestr);
12061         }
12062     }
12063     s++;
12064 retval:
12065     PL_multi_end = CopLINE(PL_curcop);
12066     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12067         SvPV_shrink_to_cur(tmpstr);
12068     }
12069     SvREFCNT_dec(herewas);
12070     if (!IN_BYTES) {
12071         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12072             SvUTF8_on(tmpstr);
12073         else if (PL_encoding)
12074             sv_recode_to_utf8(tmpstr, PL_encoding);
12075     }
12076     PL_lex_stuff = tmpstr;
12077     pl_yylval.ival = op_type;
12078     return s;
12079 }
12080
12081 /* scan_inputsymbol
12082    takes: current position in input buffer
12083    returns: new position in input buffer
12084    side-effects: pl_yylval and lex_op are set.
12085
12086    This code handles:
12087
12088    <>           read from ARGV
12089    <FH>         read from filehandle
12090    <pkg::FH>    read from package qualified filehandle
12091    <pkg'FH>     read from package qualified filehandle
12092    <$fh>        read from filehandle in $fh
12093    <*.h>        filename glob
12094
12095 */
12096
12097 STATIC char *
12098 S_scan_inputsymbol(pTHX_ char *start)
12099 {
12100     dVAR;
12101     register char *s = start;           /* current position in buffer */
12102     char *end;
12103     I32 len;
12104     char *d = PL_tokenbuf;                                      /* start of temp holding space */
12105     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
12106
12107     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12108
12109     end = strchr(s, '\n');
12110     if (!end)
12111         end = PL_bufend;
12112     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
12113
12114     /* die if we didn't have space for the contents of the <>,
12115        or if it didn't end, or if we see a newline
12116     */
12117
12118     if (len >= (I32)sizeof PL_tokenbuf)
12119         Perl_croak(aTHX_ "Excessively long <> operator");
12120     if (s >= end)
12121         Perl_croak(aTHX_ "Unterminated <> operator");
12122
12123     s++;
12124
12125     /* check for <$fh>
12126        Remember, only scalar variables are interpreted as filehandles by
12127        this code.  Anything more complex (e.g., <$fh{$num}>) will be
12128        treated as a glob() call.
12129        This code makes use of the fact that except for the $ at the front,
12130        a scalar variable and a filehandle look the same.
12131     */
12132     if (*d == '$' && d[1]) d++;
12133
12134     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12135     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12136         d++;
12137
12138     /* If we've tried to read what we allow filehandles to look like, and
12139        there's still text left, then it must be a glob() and not a getline.
12140        Use scan_str to pull out the stuff between the <> and treat it
12141        as nothing more than a string.
12142     */
12143
12144     if (d - PL_tokenbuf != len) {
12145         pl_yylval.ival = OP_GLOB;
12146         s = scan_str(start,!!PL_madskills,FALSE);
12147         if (!s)
12148            Perl_croak(aTHX_ "Glob not terminated");
12149         return s;
12150     }
12151     else {
12152         bool readline_overriden = FALSE;
12153         GV *gv_readline;
12154         GV **gvp;
12155         /* we're in a filehandle read situation */
12156         d = PL_tokenbuf;
12157
12158         /* turn <> into <ARGV> */
12159         if (!len)
12160             Copy("ARGV",d,5,char);
12161
12162         /* Check whether readline() is overriden */
12163         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12164         if ((gv_readline
12165                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12166                 ||
12167                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12168                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12169                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12170             readline_overriden = TRUE;
12171
12172         /* if <$fh>, create the ops to turn the variable into a
12173            filehandle
12174         */
12175         if (*d == '$') {
12176             /* try to find it in the pad for this block, otherwise find
12177                add symbol table ops
12178             */
12179             const PADOFFSET tmp = pad_findmy(d, len, 0);
12180             if (tmp != NOT_IN_PAD) {
12181                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12182                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12183                     HEK * const stashname = HvNAME_HEK(stash);
12184                     SV * const sym = sv_2mortal(newSVhek(stashname));
12185                     sv_catpvs(sym, "::");
12186                     sv_catpv(sym, d+1);
12187                     d = SvPVX(sym);
12188                     goto intro_sym;
12189                 }
12190                 else {
12191                     OP * const o = newOP(OP_PADSV, 0);
12192                     o->op_targ = tmp;
12193                     PL_lex_op = readline_overriden
12194                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12195                                 append_elem(OP_LIST, o,
12196                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12197                         : (OP*)newUNOP(OP_READLINE, 0, o);
12198                 }
12199             }
12200             else {
12201                 GV *gv;
12202                 ++d;
12203 intro_sym:
12204                 gv = gv_fetchpv(d,
12205                                 (PL_in_eval
12206                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
12207                                  : GV_ADDMULTI),
12208                                 SVt_PV);
12209                 PL_lex_op = readline_overriden
12210                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12211                             append_elem(OP_LIST,
12212                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12213                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12214                     : (OP*)newUNOP(OP_READLINE, 0,
12215                             newUNOP(OP_RV2SV, 0,
12216                                 newGVOP(OP_GV, 0, gv)));
12217             }
12218             if (!readline_overriden)
12219                 PL_lex_op->op_flags |= OPf_SPECIAL;
12220             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12221             pl_yylval.ival = OP_NULL;
12222         }
12223
12224         /* If it's none of the above, it must be a literal filehandle
12225            (<Foo::BAR> or <FOO>) so build a simple readline OP */
12226         else {
12227             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12228             PL_lex_op = readline_overriden
12229                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12230                         append_elem(OP_LIST,
12231                             newGVOP(OP_GV, 0, gv),
12232                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12233                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12234             pl_yylval.ival = OP_NULL;
12235         }
12236     }
12237
12238     return s;
12239 }
12240
12241
12242 /* scan_str
12243    takes: start position in buffer
12244           keep_quoted preserve \ on the embedded delimiter(s)
12245           keep_delims preserve the delimiters around the string
12246    returns: position to continue reading from buffer
12247    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12248         updates the read buffer.
12249
12250    This subroutine pulls a string out of the input.  It is called for:
12251         q               single quotes           q(literal text)
12252         '               single quotes           'literal text'
12253         qq              double quotes           qq(interpolate $here please)
12254         "               double quotes           "interpolate $here please"
12255         qx              backticks               qx(/bin/ls -l)
12256         `               backticks               `/bin/ls -l`
12257         qw              quote words             @EXPORT_OK = qw( func() $spam )
12258         m//             regexp match            m/this/
12259         s///            regexp substitute       s/this/that/
12260         tr///           string transliterate    tr/this/that/
12261         y///            string transliterate    y/this/that/
12262         ($*@)           sub prototypes          sub foo ($)
12263         (stuff)         sub attr parameters     sub foo : attr(stuff)
12264         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
12265
12266    In most of these cases (all but <>, patterns and transliterate)
12267    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
12268    calls scan_str().  s/// makes yylex() call scan_subst() which calls
12269    scan_str().  tr/// and y/// make yylex() call scan_trans() which
12270    calls scan_str().
12271
12272    It skips whitespace before the string starts, and treats the first
12273    character as the delimiter.  If the delimiter is one of ([{< then
12274    the corresponding "close" character )]}> is used as the closing
12275    delimiter.  It allows quoting of delimiters, and if the string has
12276    balanced delimiters ([{<>}]) it allows nesting.
12277
12278    On success, the SV with the resulting string is put into lex_stuff or,
12279    if that is already non-NULL, into lex_repl. The second case occurs only
12280    when parsing the RHS of the special constructs s/// and tr/// (y///).
12281    For convenience, the terminating delimiter character is stuffed into
12282    SvIVX of the SV.
12283 */
12284
12285 STATIC char *
12286 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12287 {
12288     dVAR;
12289     SV *sv;                             /* scalar value: string */
12290     const char *tmps;                   /* temp string, used for delimiter matching */
12291     register char *s = start;           /* current position in the buffer */
12292     register char term;                 /* terminating character */
12293     register char *to;                  /* current position in the sv's data */
12294     I32 brackets = 1;                   /* bracket nesting level */
12295     bool has_utf8 = FALSE;              /* is there any utf8 content? */
12296     I32 termcode;                       /* terminating char. code */
12297     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
12298     STRLEN termlen;                     /* length of terminating string */
12299     int last_off = 0;                   /* last position for nesting bracket */
12300 #ifdef PERL_MAD
12301     int stuffstart;
12302     char *tstart;
12303 #endif
12304
12305     PERL_ARGS_ASSERT_SCAN_STR;
12306
12307     /* skip space before the delimiter */
12308     if (isSPACE(*s)) {
12309         s = PEEKSPACE(s);
12310     }
12311
12312 #ifdef PERL_MAD
12313     if (PL_realtokenstart >= 0) {
12314         stuffstart = PL_realtokenstart;
12315         PL_realtokenstart = -1;
12316     }
12317     else
12318         stuffstart = start - SvPVX(PL_linestr);
12319 #endif
12320     /* mark where we are, in case we need to report errors */
12321     CLINE;
12322
12323     /* after skipping whitespace, the next character is the terminator */
12324     term = *s;
12325     if (!UTF) {
12326         termcode = termstr[0] = term;
12327         termlen = 1;
12328     }
12329     else {
12330         termcode = utf8_to_uvchr((U8*)s, &termlen);
12331         Copy(s, termstr, termlen, U8);
12332         if (!UTF8_IS_INVARIANT(term))
12333             has_utf8 = TRUE;
12334     }
12335
12336     /* mark where we are */
12337     PL_multi_start = CopLINE(PL_curcop);
12338     PL_multi_open = term;
12339
12340     /* find corresponding closing delimiter */
12341     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12342         termcode = termstr[0] = term = tmps[5];
12343
12344     PL_multi_close = term;
12345
12346     /* create a new SV to hold the contents.  79 is the SV's initial length.
12347        What a random number. */
12348     sv = newSV_type(SVt_PVIV);
12349     SvGROW(sv, 80);
12350     SvIV_set(sv, termcode);
12351     (void)SvPOK_only(sv);               /* validate pointer */
12352
12353     /* move past delimiter and try to read a complete string */
12354     if (keep_delims)
12355         sv_catpvn(sv, s, termlen);
12356     s += termlen;
12357 #ifdef PERL_MAD
12358     tstart = SvPVX(PL_linestr) + stuffstart;
12359     if (!PL_thisopen && !keep_delims) {
12360         PL_thisopen = newSVpvn(tstart, s - tstart);
12361         stuffstart = s - SvPVX(PL_linestr);
12362     }
12363 #endif
12364     for (;;) {
12365         if (PL_encoding && !UTF) {
12366             bool cont = TRUE;
12367
12368             while (cont) {
12369                 int offset = s - SvPVX_const(PL_linestr);
12370                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12371                                            &offset, (char*)termstr, termlen);
12372                 const char * const ns = SvPVX_const(PL_linestr) + offset;
12373                 char * const svlast = SvEND(sv) - 1;
12374
12375                 for (; s < ns; s++) {
12376                     if (*s == '\n' && !PL_rsfp)
12377                         CopLINE_inc(PL_curcop);
12378                 }
12379                 if (!found)
12380                     goto read_more_line;
12381                 else {
12382                     /* handle quoted delimiters */
12383                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12384                         const char *t;
12385                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12386                             t--;
12387                         if ((svlast-1 - t) % 2) {
12388                             if (!keep_quoted) {
12389                                 *(svlast-1) = term;
12390                                 *svlast = '\0';
12391                                 SvCUR_set(sv, SvCUR(sv) - 1);
12392                             }
12393                             continue;
12394                         }
12395                     }
12396                     if (PL_multi_open == PL_multi_close) {
12397                         cont = FALSE;
12398                     }
12399                     else {
12400                         const char *t;
12401                         char *w;
12402                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12403                             /* At here, all closes are "was quoted" one,
12404                                so we don't check PL_multi_close. */
12405                             if (*t == '\\') {
12406                                 if (!keep_quoted && *(t+1) == PL_multi_open)
12407                                     t++;
12408                                 else
12409                                     *w++ = *t++;
12410                             }
12411                             else if (*t == PL_multi_open)
12412                                 brackets++;
12413
12414                             *w = *t;
12415                         }
12416                         if (w < t) {
12417                             *w++ = term;
12418                             *w = '\0';
12419                             SvCUR_set(sv, w - SvPVX_const(sv));
12420                         }
12421                         last_off = w - SvPVX(sv);
12422                         if (--brackets <= 0)
12423                             cont = FALSE;
12424                     }
12425                 }
12426             }
12427             if (!keep_delims) {
12428                 SvCUR_set(sv, SvCUR(sv) - 1);
12429                 *SvEND(sv) = '\0';
12430             }
12431             break;
12432         }
12433
12434         /* extend sv if need be */
12435         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12436         /* set 'to' to the next character in the sv's string */
12437         to = SvPVX(sv)+SvCUR(sv);
12438
12439         /* if open delimiter is the close delimiter read unbridle */
12440         if (PL_multi_open == PL_multi_close) {
12441             for (; s < PL_bufend; s++,to++) {
12442                 /* embedded newlines increment the current line number */
12443                 if (*s == '\n' && !PL_rsfp)
12444                     CopLINE_inc(PL_curcop);
12445                 /* handle quoted delimiters */
12446                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12447                     if (!keep_quoted && s[1] == term)
12448                         s++;
12449                 /* any other quotes are simply copied straight through */
12450                     else
12451                         *to++ = *s++;
12452                 }
12453                 /* terminate when run out of buffer (the for() condition), or
12454                    have found the terminator */
12455                 else if (*s == term) {
12456                     if (termlen == 1)
12457                         break;
12458                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12459                         break;
12460                 }
12461                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12462                     has_utf8 = TRUE;
12463                 *to = *s;
12464             }
12465         }
12466
12467         /* if the terminator isn't the same as the start character (e.g.,
12468            matched brackets), we have to allow more in the quoting, and
12469            be prepared for nested brackets.
12470         */
12471         else {
12472             /* read until we run out of string, or we find the terminator */
12473             for (; s < PL_bufend; s++,to++) {
12474                 /* embedded newlines increment the line count */
12475                 if (*s == '\n' && !PL_rsfp)
12476                     CopLINE_inc(PL_curcop);
12477                 /* backslashes can escape the open or closing characters */
12478                 if (*s == '\\' && s+1 < PL_bufend) {
12479                     if (!keep_quoted &&
12480                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12481                         s++;
12482                     else
12483                         *to++ = *s++;
12484                 }
12485                 /* allow nested opens and closes */
12486                 else if (*s == PL_multi_close && --brackets <= 0)
12487                     break;
12488                 else if (*s == PL_multi_open)
12489                     brackets++;
12490                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12491                     has_utf8 = TRUE;
12492                 *to = *s;
12493             }
12494         }
12495         /* terminate the copied string and update the sv's end-of-string */
12496         *to = '\0';
12497         SvCUR_set(sv, to - SvPVX_const(sv));
12498
12499         /*
12500          * this next chunk reads more into the buffer if we're not done yet
12501          */
12502
12503         if (s < PL_bufend)
12504             break;              /* handle case where we are done yet :-) */
12505
12506 #ifndef PERL_STRICT_CR
12507         if (to - SvPVX_const(sv) >= 2) {
12508             if ((to[-2] == '\r' && to[-1] == '\n') ||
12509                 (to[-2] == '\n' && to[-1] == '\r'))
12510             {
12511                 to[-2] = '\n';
12512                 to--;
12513                 SvCUR_set(sv, to - SvPVX_const(sv));
12514             }
12515             else if (to[-1] == '\r')
12516                 to[-1] = '\n';
12517         }
12518         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12519             to[-1] = '\n';
12520 #endif
12521
12522      read_more_line:
12523         /* if we're out of file, or a read fails, bail and reset the current
12524            line marker so we can report where the unterminated string began
12525         */
12526 #ifdef PERL_MAD
12527         if (PL_madskills) {
12528             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12529             if (PL_thisstuff)
12530                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12531             else
12532                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12533         }
12534 #endif
12535         CopLINE_inc(PL_curcop);
12536         PL_bufptr = PL_bufend;
12537         if (!lex_next_chunk(0)) {
12538             sv_free(sv);
12539             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12540             return NULL;
12541         }
12542         s = PL_bufptr;
12543 #ifdef PERL_MAD
12544         stuffstart = 0;
12545 #endif
12546     }
12547
12548     /* at this point, we have successfully read the delimited string */
12549
12550     if (!PL_encoding || UTF) {
12551 #ifdef PERL_MAD
12552         if (PL_madskills) {
12553             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12554             const int len = s - tstart;
12555             if (PL_thisstuff)
12556                 sv_catpvn(PL_thisstuff, tstart, len);
12557             else
12558                 PL_thisstuff = newSVpvn(tstart, len);
12559             if (!PL_thisclose && !keep_delims)
12560                 PL_thisclose = newSVpvn(s,termlen);
12561         }
12562 #endif
12563
12564         if (keep_delims)
12565             sv_catpvn(sv, s, termlen);
12566         s += termlen;
12567     }
12568 #ifdef PERL_MAD
12569     else {
12570         if (PL_madskills) {
12571             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12572             const int len = s - tstart - termlen;
12573             if (PL_thisstuff)
12574                 sv_catpvn(PL_thisstuff, tstart, len);
12575             else
12576                 PL_thisstuff = newSVpvn(tstart, len);
12577             if (!PL_thisclose && !keep_delims)
12578                 PL_thisclose = newSVpvn(s - termlen,termlen);
12579         }
12580     }
12581 #endif
12582     if (has_utf8 || PL_encoding)
12583         SvUTF8_on(sv);
12584
12585     PL_multi_end = CopLINE(PL_curcop);
12586
12587     /* if we allocated too much space, give some back */
12588     if (SvCUR(sv) + 5 < SvLEN(sv)) {
12589         SvLEN_set(sv, SvCUR(sv) + 1);
12590         SvPV_renew(sv, SvLEN(sv));
12591     }
12592
12593     /* decide whether this is the first or second quoted string we've read
12594        for this op
12595     */
12596
12597     if (PL_lex_stuff)
12598         PL_lex_repl = sv;
12599     else
12600         PL_lex_stuff = sv;
12601     return s;
12602 }
12603
12604 /*
12605   scan_num
12606   takes: pointer to position in buffer
12607   returns: pointer to new position in buffer
12608   side-effects: builds ops for the constant in pl_yylval.op
12609
12610   Read a number in any of the formats that Perl accepts:
12611
12612   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12613   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12614   0b[01](_?[01])*
12615   0[0-7](_?[0-7])*
12616   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12617
12618   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12619   thing it reads.
12620
12621   If it reads a number without a decimal point or an exponent, it will
12622   try converting the number to an integer and see if it can do so
12623   without loss of precision.
12624 */
12625
12626 char *
12627 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12628 {
12629     dVAR;
12630     register const char *s = start;     /* current position in buffer */
12631     register char *d;                   /* destination in temp buffer */
12632     register char *e;                   /* end of temp buffer */
12633     NV nv;                              /* number read, as a double */
12634     SV *sv = NULL;                      /* place to put the converted number */
12635     bool floatit;                       /* boolean: int or float? */
12636     const char *lastub = NULL;          /* position of last underbar */
12637     static char const number_too_long[] = "Number too long";
12638
12639     PERL_ARGS_ASSERT_SCAN_NUM;
12640
12641     /* We use the first character to decide what type of number this is */
12642
12643     switch (*s) {
12644     default:
12645       Perl_croak(aTHX_ "panic: scan_num");
12646
12647     /* if it starts with a 0, it could be an octal number, a decimal in
12648        0.13 disguise, or a hexadecimal number, or a binary number. */
12649     case '0':
12650         {
12651           /* variables:
12652              u          holds the "number so far"
12653              shift      the power of 2 of the base
12654                         (hex == 4, octal == 3, binary == 1)
12655              overflowed was the number more than we can hold?
12656
12657              Shift is used when we add a digit.  It also serves as an "are
12658              we in octal/hex/binary?" indicator to disallow hex characters
12659              when in octal mode.
12660            */
12661             NV n = 0.0;
12662             UV u = 0;
12663             I32 shift;
12664             bool overflowed = FALSE;
12665             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
12666             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12667             static const char* const bases[5] =
12668               { "", "binary", "", "octal", "hexadecimal" };
12669             static const char* const Bases[5] =
12670               { "", "Binary", "", "Octal", "Hexadecimal" };
12671             static const char* const maxima[5] =
12672               { "",
12673                 "0b11111111111111111111111111111111",
12674                 "",
12675                 "037777777777",
12676                 "0xffffffff" };
12677             const char *base, *Base, *max;
12678
12679             /* check for hex */
12680             if (s[1] == 'x') {
12681                 shift = 4;
12682                 s += 2;
12683                 just_zero = FALSE;
12684             } else if (s[1] == 'b') {
12685                 shift = 1;
12686                 s += 2;
12687                 just_zero = FALSE;
12688             }
12689             /* check for a decimal in disguise */
12690             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12691                 goto decimal;
12692             /* so it must be octal */
12693             else {
12694                 shift = 3;
12695                 s++;
12696             }
12697
12698             if (*s == '_') {
12699                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12700                                "Misplaced _ in number");
12701                lastub = s++;
12702             }
12703
12704             base = bases[shift];
12705             Base = Bases[shift];
12706             max  = maxima[shift];
12707
12708             /* read the rest of the number */
12709             for (;;) {
12710                 /* x is used in the overflow test,
12711                    b is the digit we're adding on. */
12712                 UV x, b;
12713
12714                 switch (*s) {
12715
12716                 /* if we don't mention it, we're done */
12717                 default:
12718                     goto out;
12719
12720                 /* _ are ignored -- but warned about if consecutive */
12721                 case '_':
12722                     if (lastub && s == lastub + 1)
12723                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12724                                        "Misplaced _ in number");
12725                     lastub = s++;
12726                     break;
12727
12728                 /* 8 and 9 are not octal */
12729                 case '8': case '9':
12730                     if (shift == 3)
12731                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12732                     /* FALL THROUGH */
12733
12734                 /* octal digits */
12735                 case '2': case '3': case '4':
12736                 case '5': case '6': case '7':
12737                     if (shift == 1)
12738                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12739                     /* FALL THROUGH */
12740
12741                 case '0': case '1':
12742                     b = *s++ & 15;              /* ASCII digit -> value of digit */
12743                     goto digit;
12744
12745                 /* hex digits */
12746                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12747                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12748                     /* make sure they said 0x */
12749                     if (shift != 4)
12750                         goto out;
12751                     b = (*s++ & 7) + 9;
12752
12753                     /* Prepare to put the digit we have onto the end
12754                        of the number so far.  We check for overflows.
12755                     */
12756
12757                   digit:
12758                     just_zero = FALSE;
12759                     if (!overflowed) {
12760                         x = u << shift; /* make room for the digit */
12761
12762                         if ((x >> shift) != u
12763                             && !(PL_hints & HINT_NEW_BINARY)) {
12764                             overflowed = TRUE;
12765                             n = (NV) u;
12766                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12767                                              "Integer overflow in %s number",
12768                                              base);
12769                         } else
12770                             u = x | b;          /* add the digit to the end */
12771                     }
12772                     if (overflowed) {
12773                         n *= nvshift[shift];
12774                         /* If an NV has not enough bits in its
12775                          * mantissa to represent an UV this summing of
12776                          * small low-order numbers is a waste of time
12777                          * (because the NV cannot preserve the
12778                          * low-order bits anyway): we could just
12779                          * remember when did we overflow and in the
12780                          * end just multiply n by the right
12781                          * amount. */
12782                         n += (NV) b;
12783                     }
12784                     break;
12785                 }
12786             }
12787
12788           /* if we get here, we had success: make a scalar value from
12789              the number.
12790           */
12791           out:
12792
12793             /* final misplaced underbar check */
12794             if (s[-1] == '_') {
12795                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12796             }
12797
12798             sv = newSV(0);
12799             if (overflowed) {
12800                 if (n > 4294967295.0)
12801                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12802                                    "%s number > %s non-portable",
12803                                    Base, max);
12804                 sv_setnv(sv, n);
12805             }
12806             else {
12807 #if UVSIZE > 4
12808                 if (u > 0xffffffff)
12809                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12810                                    "%s number > %s non-portable",
12811                                    Base, max);
12812 #endif
12813                 sv_setuv(sv, u);
12814             }
12815             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12816                 sv = new_constant(start, s - start, "integer",
12817                                   sv, NULL, NULL, 0);
12818             else if (PL_hints & HINT_NEW_BINARY)
12819                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12820         }
12821         break;
12822
12823     /*
12824       handle decimal numbers.
12825       we're also sent here when we read a 0 as the first digit
12826     */
12827     case '1': case '2': case '3': case '4': case '5':
12828     case '6': case '7': case '8': case '9': case '.':
12829       decimal:
12830         d = PL_tokenbuf;
12831         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12832         floatit = FALSE;
12833
12834         /* read next group of digits and _ and copy into d */
12835         while (isDIGIT(*s) || *s == '_') {
12836             /* skip underscores, checking for misplaced ones
12837                if -w is on
12838             */
12839             if (*s == '_') {
12840                 if (lastub && s == lastub + 1)
12841                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12842                                    "Misplaced _ in number");
12843                 lastub = s++;
12844             }
12845             else {
12846                 /* check for end of fixed-length buffer */
12847                 if (d >= e)
12848                     Perl_croak(aTHX_ number_too_long);
12849                 /* if we're ok, copy the character */
12850                 *d++ = *s++;
12851             }
12852         }
12853
12854         /* final misplaced underbar check */
12855         if (lastub && s == lastub + 1) {
12856             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12857         }
12858
12859         /* read a decimal portion if there is one.  avoid
12860            3..5 being interpreted as the number 3. followed
12861            by .5
12862         */
12863         if (*s == '.' && s[1] != '.') {
12864             floatit = TRUE;
12865             *d++ = *s++;
12866
12867             if (*s == '_') {
12868                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12869                                "Misplaced _ in number");
12870                 lastub = s;
12871             }
12872
12873             /* copy, ignoring underbars, until we run out of digits.
12874             */
12875             for (; isDIGIT(*s) || *s == '_'; s++) {
12876                 /* fixed length buffer check */
12877                 if (d >= e)
12878                     Perl_croak(aTHX_ number_too_long);
12879                 if (*s == '_') {
12880                    if (lastub && s == lastub + 1)
12881                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12882                                       "Misplaced _ in number");
12883                    lastub = s;
12884                 }
12885                 else
12886                     *d++ = *s;
12887             }
12888             /* fractional part ending in underbar? */
12889             if (s[-1] == '_') {
12890                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12891                                "Misplaced _ in number");
12892             }
12893             if (*s == '.' && isDIGIT(s[1])) {
12894                 /* oops, it's really a v-string, but without the "v" */
12895                 s = start;
12896                 goto vstring;
12897             }
12898         }
12899
12900         /* read exponent part, if present */
12901         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12902             floatit = TRUE;
12903             s++;
12904
12905             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12906             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
12907
12908             /* stray preinitial _ */
12909             if (*s == '_') {
12910                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12911                                "Misplaced _ in number");
12912                 lastub = s++;
12913             }
12914
12915             /* allow positive or negative exponent */
12916             if (*s == '+' || *s == '-')
12917                 *d++ = *s++;
12918
12919             /* stray initial _ */
12920             if (*s == '_') {
12921                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12922                                "Misplaced _ in number");
12923                 lastub = s++;
12924             }
12925
12926             /* read digits of exponent */
12927             while (isDIGIT(*s) || *s == '_') {
12928                 if (isDIGIT(*s)) {
12929                     if (d >= e)
12930                         Perl_croak(aTHX_ number_too_long);
12931                     *d++ = *s++;
12932                 }
12933                 else {
12934                    if (((lastub && s == lastub + 1) ||
12935                         (!isDIGIT(s[1]) && s[1] != '_')))
12936                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12937                                       "Misplaced _ in number");
12938                    lastub = s++;
12939                 }
12940             }
12941         }
12942
12943
12944         /* make an sv from the string */
12945         sv = newSV(0);
12946
12947         /*
12948            We try to do an integer conversion first if no characters
12949            indicating "float" have been found.
12950          */
12951
12952         if (!floatit) {
12953             UV uv;
12954             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12955
12956             if (flags == IS_NUMBER_IN_UV) {
12957               if (uv <= IV_MAX)
12958                 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12959               else
12960                 sv_setuv(sv, uv);
12961             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12962               if (uv <= (UV) IV_MIN)
12963                 sv_setiv(sv, -(IV)uv);
12964               else
12965                 floatit = TRUE;
12966             } else
12967               floatit = TRUE;
12968         }
12969         if (floatit) {
12970             /* terminate the string */
12971             *d = '\0';
12972             nv = Atof(PL_tokenbuf);
12973             sv_setnv(sv, nv);
12974         }
12975
12976         if ( floatit
12977              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12978             const char *const key = floatit ? "float" : "integer";
12979             const STRLEN keylen = floatit ? 5 : 7;
12980             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12981                                 key, keylen, sv, NULL, NULL, 0);
12982         }
12983         break;
12984
12985     /* if it starts with a v, it could be a v-string */
12986     case 'v':
12987 vstring:
12988                 sv = newSV(5); /* preallocate storage space */
12989                 s = scan_vstring(s, PL_bufend, sv);
12990         break;
12991     }
12992
12993     /* make the op for the constant and return */
12994
12995     if (sv)
12996         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12997     else
12998         lvalp->opval = NULL;
12999
13000     return (char *)s;
13001 }
13002
13003 STATIC char *
13004 S_scan_formline(pTHX_ register char *s)
13005 {
13006     dVAR;
13007     register char *eol;
13008     register char *t;
13009     SV * const stuff = newSVpvs("");
13010     bool needargs = FALSE;
13011     bool eofmt = FALSE;
13012 #ifdef PERL_MAD
13013     char *tokenstart = s;
13014     SV* savewhite = NULL;
13015
13016     if (PL_madskills) {
13017         savewhite = PL_thiswhite;
13018         PL_thiswhite = 0;
13019     }
13020 #endif
13021
13022     PERL_ARGS_ASSERT_SCAN_FORMLINE;
13023
13024     while (!needargs) {
13025         if (*s == '.') {
13026             t = s+1;
13027 #ifdef PERL_STRICT_CR
13028             while (SPACE_OR_TAB(*t))
13029                 t++;
13030 #else
13031             while (SPACE_OR_TAB(*t) || *t == '\r')
13032                 t++;
13033 #endif
13034             if (*t == '\n' || t == PL_bufend) {
13035                 eofmt = TRUE;
13036                 break;
13037             }
13038         }
13039         if (PL_in_eval && !PL_rsfp) {
13040             eol = (char *) memchr(s,'\n',PL_bufend-s);
13041             if (!eol++)
13042                 eol = PL_bufend;
13043         }
13044         else
13045             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13046         if (*s != '#') {
13047             for (t = s; t < eol; t++) {
13048                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13049                     needargs = FALSE;
13050                     goto enough;        /* ~~ must be first line in formline */
13051                 }
13052                 if (*t == '@' || *t == '^')
13053                     needargs = TRUE;
13054             }
13055             if (eol > s) {
13056                 sv_catpvn(stuff, s, eol-s);
13057 #ifndef PERL_STRICT_CR
13058                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13059                     char *end = SvPVX(stuff) + SvCUR(stuff);
13060                     end[-2] = '\n';
13061                     end[-1] = '\0';
13062                     SvCUR_set(stuff, SvCUR(stuff) - 1);
13063                 }
13064 #endif
13065             }
13066             else
13067               break;
13068         }
13069         s = (char*)eol;
13070         if (PL_rsfp) {
13071             bool got_some;
13072 #ifdef PERL_MAD
13073             if (PL_madskills) {
13074                 if (PL_thistoken)
13075                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13076                 else
13077                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13078             }
13079 #endif
13080             PL_bufptr = PL_bufend;
13081             CopLINE_inc(PL_curcop);
13082             got_some = lex_next_chunk(0);
13083             CopLINE_dec(PL_curcop);
13084             s = PL_bufptr;
13085 #ifdef PERL_MAD
13086             tokenstart = PL_bufptr;
13087 #endif
13088             if (!got_some)
13089                 break;
13090         }
13091         incline(s);
13092     }
13093   enough:
13094     if (SvCUR(stuff)) {
13095         PL_expect = XTERM;
13096         if (needargs) {
13097             PL_lex_state = LEX_NORMAL;
13098             start_force(PL_curforce);
13099             NEXTVAL_NEXTTOKE.ival = 0;
13100             force_next(',');
13101         }
13102         else
13103             PL_lex_state = LEX_FORMLINE;
13104         if (!IN_BYTES) {
13105             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13106                 SvUTF8_on(stuff);
13107             else if (PL_encoding)
13108                 sv_recode_to_utf8(stuff, PL_encoding);
13109         }
13110         start_force(PL_curforce);
13111         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13112         force_next(THING);
13113         start_force(PL_curforce);
13114         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13115         force_next(LSTOP);
13116     }
13117     else {
13118         SvREFCNT_dec(stuff);
13119         if (eofmt)
13120             PL_lex_formbrack = 0;
13121         PL_bufptr = s;
13122     }
13123 #ifdef PERL_MAD
13124     if (PL_madskills) {
13125         if (PL_thistoken)
13126             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13127         else
13128             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13129         PL_thiswhite = savewhite;
13130     }
13131 #endif
13132     return s;
13133 }
13134
13135 I32
13136 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13137 {
13138     dVAR;
13139     const I32 oldsavestack_ix = PL_savestack_ix;
13140     CV* const outsidecv = PL_compcv;
13141
13142     if (PL_compcv) {
13143         assert(SvTYPE(PL_compcv) == SVt_PVCV);
13144     }
13145     SAVEI32(PL_subline);
13146     save_item(PL_subname);
13147     SAVESPTR(PL_compcv);
13148
13149     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13150     CvFLAGS(PL_compcv) |= flags;
13151
13152     PL_subline = CopLINE(PL_curcop);
13153     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13154     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13155     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13156
13157     return oldsavestack_ix;
13158 }
13159
13160 #ifdef __SC__
13161 #pragma segment Perl_yylex
13162 #endif
13163 static int
13164 S_yywarn(pTHX_ const char *const s)
13165 {
13166     dVAR;
13167
13168     PERL_ARGS_ASSERT_YYWARN;
13169
13170     PL_in_eval |= EVAL_WARNONLY;
13171     yyerror(s);
13172     PL_in_eval &= ~EVAL_WARNONLY;
13173     return 0;
13174 }
13175
13176 int
13177 Perl_yyerror(pTHX_ const char *const s)
13178 {
13179     dVAR;
13180     const char *where = NULL;
13181     const char *context = NULL;
13182     int contlen = -1;
13183     SV *msg;
13184     int yychar  = PL_parser->yychar;
13185
13186     PERL_ARGS_ASSERT_YYERROR;
13187
13188     if (!yychar || (yychar == ';' && !PL_rsfp))
13189         where = "at EOF";
13190     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13191       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13192       PL_oldbufptr != PL_bufptr) {
13193         /*
13194                 Only for NetWare:
13195                 The code below is removed for NetWare because it abends/crashes on NetWare
13196                 when the script has error such as not having the closing quotes like:
13197                     if ($var eq "value)
13198                 Checking of white spaces is anyway done in NetWare code.
13199         */
13200 #ifndef NETWARE
13201         while (isSPACE(*PL_oldoldbufptr))
13202             PL_oldoldbufptr++;
13203 #endif
13204         context = PL_oldoldbufptr;
13205         contlen = PL_bufptr - PL_oldoldbufptr;
13206     }
13207     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13208       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13209         /*
13210                 Only for NetWare:
13211                 The code below is removed for NetWare because it abends/crashes on NetWare
13212                 when the script has error such as not having the closing quotes like:
13213                     if ($var eq "value)
13214                 Checking of white spaces is anyway done in NetWare code.
13215         */
13216 #ifndef NETWARE
13217         while (isSPACE(*PL_oldbufptr))
13218             PL_oldbufptr++;
13219 #endif
13220         context = PL_oldbufptr;
13221         contlen = PL_bufptr - PL_oldbufptr;
13222     }
13223     else if (yychar > 255)
13224         where = "next token ???";
13225     else if (yychar == -2) { /* YYEMPTY */
13226         if (PL_lex_state == LEX_NORMAL ||
13227            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13228             where = "at end of line";
13229         else if (PL_lex_inpat)
13230             where = "within pattern";
13231         else
13232             where = "within string";
13233     }
13234     else {
13235         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13236         if (yychar < 32)
13237             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13238         else if (isPRINT_LC(yychar)) {
13239             const char string = yychar;
13240             sv_catpvn(where_sv, &string, 1);
13241         }
13242         else
13243             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13244         where = SvPVX_const(where_sv);
13245     }
13246     msg = sv_2mortal(newSVpv(s, 0));
13247     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13248         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13249     if (context)
13250         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13251     else
13252         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13253     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13254         Perl_sv_catpvf(aTHX_ msg,
13255         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13256                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13257         PL_multi_end = 0;
13258     }
13259     if (PL_in_eval & EVAL_WARNONLY) {
13260         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13261     }
13262     else
13263         qerror(msg);
13264     if (PL_error_count >= 10) {
13265         if (PL_in_eval && SvCUR(ERRSV))
13266             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13267                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
13268         else
13269             Perl_croak(aTHX_ "%s has too many errors.\n",
13270             OutCopFILE(PL_curcop));
13271     }
13272     PL_in_my = 0;
13273     PL_in_my_stash = NULL;
13274     return 0;
13275 }
13276 #ifdef __SC__
13277 #pragma segment Main
13278 #endif
13279
13280 STATIC char*
13281 S_swallow_bom(pTHX_ U8 *s)
13282 {
13283     dVAR;
13284     const STRLEN slen = SvCUR(PL_linestr);
13285
13286     PERL_ARGS_ASSERT_SWALLOW_BOM;
13287
13288     switch (s[0]) {
13289     case 0xFF:
13290         if (s[1] == 0xFE) {
13291             /* UTF-16 little-endian? (or UTF-32LE?) */
13292             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13293                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13294 #ifndef PERL_NO_UTF16_FILTER
13295             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13296             s += 2;
13297             if (PL_bufend > (char*)s) {
13298                 s = add_utf16_textfilter(s, TRUE);
13299             }
13300 #else
13301             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13302 #endif
13303         }
13304         break;
13305     case 0xFE:
13306         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13307 #ifndef PERL_NO_UTF16_FILTER
13308             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13309             s += 2;
13310             if (PL_bufend > (char *)s) {
13311                 s = add_utf16_textfilter(s, FALSE);
13312             }
13313 #else
13314             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13315 #endif
13316         }
13317         break;
13318     case 0xEF:
13319         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13320             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13321             s += 3;                      /* UTF-8 */
13322         }
13323         break;
13324     case 0:
13325         if (slen > 3) {
13326              if (s[1] == 0) {
13327                   if (s[2] == 0xFE && s[3] == 0xFF) {
13328                        /* UTF-32 big-endian */
13329                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13330                   }
13331              }
13332              else if (s[2] == 0 && s[3] != 0) {
13333                   /* Leading bytes
13334                    * 00 xx 00 xx
13335                    * are a good indicator of UTF-16BE. */
13336 #ifndef PERL_NO_UTF16_FILTER
13337                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13338                   s = add_utf16_textfilter(s, FALSE);
13339 #else
13340                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13341 #endif
13342              }
13343         }
13344 #ifdef EBCDIC
13345     case 0xDD:
13346         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13347             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13348             s += 4;                      /* UTF-8 */
13349         }
13350         break;
13351 #endif
13352
13353     default:
13354          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13355                   /* Leading bytes
13356                    * xx 00 xx 00
13357                    * are a good indicator of UTF-16LE. */
13358 #ifndef PERL_NO_UTF16_FILTER
13359               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13360               s = add_utf16_textfilter(s, TRUE);
13361 #else
13362               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13363 #endif
13364          }
13365     }
13366     return (char*)s;
13367 }
13368
13369
13370 #ifndef PERL_NO_UTF16_FILTER
13371 static I32
13372 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13373 {
13374     dVAR;
13375     SV *const filter = FILTER_DATA(idx);
13376     /* We re-use this each time round, throwing the contents away before we
13377        return.  */
13378     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13379     SV *const utf8_buffer = filter;
13380     IV status = IoPAGE(filter);
13381     const bool reverse = (bool) IoLINES(filter);
13382     I32 retval;
13383
13384     /* As we're automatically added, at the lowest level, and hence only called
13385        from this file, we can be sure that we're not called in block mode. Hence
13386        don't bother writing code to deal with block mode.  */
13387     if (maxlen) {
13388         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13389     }
13390     if (status < 0) {
13391         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13392     }
13393     DEBUG_P(PerlIO_printf(Perl_debug_log,
13394                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13395                           FPTR2DPTR(void *, S_utf16_textfilter),
13396                           reverse ? 'l' : 'b', idx, maxlen, status,
13397                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13398
13399     while (1) {
13400         STRLEN chars;
13401         STRLEN have;
13402         I32 newlen;
13403         U8 *end;
13404         /* First, look in our buffer of existing UTF-8 data:  */
13405         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13406
13407         if (nl) {
13408             ++nl;
13409         } else if (status == 0) {
13410             /* EOF */
13411             IoPAGE(filter) = 0;
13412             nl = SvEND(utf8_buffer);
13413         }
13414         if (nl) {
13415             STRLEN got = nl - SvPVX(utf8_buffer);
13416             /* Did we have anything to append?  */
13417             retval = got != 0;
13418             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13419             /* Everything else in this code works just fine if SVp_POK isn't
13420                set.  This, however, needs it, and we need it to work, else
13421                we loop infinitely because the buffer is never consumed.  */
13422             sv_chop(utf8_buffer, nl);
13423             break;
13424         }
13425
13426         /* OK, not a complete line there, so need to read some more UTF-16.
13427            Read an extra octect if the buffer currently has an odd number. */
13428         while (1) {
13429             if (status <= 0)
13430                 break;
13431             if (SvCUR(utf16_buffer) >= 2) {
13432                 /* Location of the high octet of the last complete code point.
13433                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13434                    *coupled* with all the benefits of partial reads and
13435                    endianness.  */
13436                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13437                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13438
13439                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13440                     break;
13441                 }
13442
13443                 /* We have the first half of a surrogate. Read more.  */
13444                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13445             }
13446
13447             status = FILTER_READ(idx + 1, utf16_buffer,
13448                                  160 + (SvCUR(utf16_buffer) & 1));
13449             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13450             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13451             if (status < 0) {
13452                 /* Error */
13453                 IoPAGE(filter) = status;
13454                 return status;
13455             }
13456         }
13457
13458         chars = SvCUR(utf16_buffer) >> 1;
13459         have = SvCUR(utf8_buffer);
13460         SvGROW(utf8_buffer, have + chars * 3 + 1);
13461
13462         if (reverse) {
13463             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13464                                          (U8*)SvPVX_const(utf8_buffer) + have,
13465                                          chars * 2, &newlen);
13466         } else {
13467             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13468                                 (U8*)SvPVX_const(utf8_buffer) + have,
13469                                 chars * 2, &newlen);
13470         }
13471         SvCUR_set(utf8_buffer, have + newlen);
13472         *end = '\0';
13473
13474         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13475            it's private to us, and utf16_to_utf8{,reversed} take a
13476            (pointer,length) pair, rather than a NUL-terminated string.  */
13477         if(SvCUR(utf16_buffer) & 1) {
13478             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13479             SvCUR_set(utf16_buffer, 1);
13480         } else {
13481             SvCUR_set(utf16_buffer, 0);
13482         }
13483     }
13484     DEBUG_P(PerlIO_printf(Perl_debug_log,
13485                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13486                           status,
13487                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13488     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13489     return retval;
13490 }
13491
13492 static U8 *
13493 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13494 {
13495     SV *filter = filter_add(S_utf16_textfilter, NULL);
13496
13497     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13498     sv_setpvs(filter, "");
13499     IoLINES(filter) = reversed;
13500     IoPAGE(filter) = 1; /* Not EOF */
13501
13502     /* Sadly, we have to return a valid pointer, come what may, so we have to
13503        ignore any error return from this.  */
13504     SvCUR_set(PL_linestr, 0);
13505     if (FILTER_READ(0, PL_linestr, 0)) {
13506         SvUTF8_on(PL_linestr);
13507     } else {
13508         SvUTF8_on(PL_linestr);
13509     }
13510     PL_bufend = SvEND(PL_linestr);
13511     return (U8*)SvPVX(PL_linestr);
13512 }
13513 #endif
13514
13515 /*
13516 Returns a pointer to the next character after the parsed
13517 vstring, as well as updating the passed in sv.
13518
13519 Function must be called like
13520
13521         sv = newSV(5);
13522         s = scan_vstring(s,e,sv);
13523
13524 where s and e are the start and end of the string.
13525 The sv should already be large enough to store the vstring
13526 passed in, for performance reasons.
13527
13528 */
13529
13530 char *
13531 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13532 {
13533     dVAR;
13534     const char *pos = s;
13535     const char *start = s;
13536
13537     PERL_ARGS_ASSERT_SCAN_VSTRING;
13538
13539     if (*pos == 'v') pos++;  /* get past 'v' */
13540     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13541         pos++;
13542     if ( *pos != '.') {
13543         /* this may not be a v-string if followed by => */
13544         const char *next = pos;
13545         while (next < e && isSPACE(*next))
13546             ++next;
13547         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13548             /* return string not v-string */
13549             sv_setpvn(sv,(char *)s,pos-s);
13550             return (char *)pos;
13551         }
13552     }
13553
13554     if (!isALPHA(*pos)) {
13555         U8 tmpbuf[UTF8_MAXBYTES+1];
13556
13557         if (*s == 'v')
13558             s++;  /* get past 'v' */
13559
13560         sv_setpvs(sv, "");
13561
13562         for (;;) {
13563             /* this is atoi() that tolerates underscores */
13564             U8 *tmpend;
13565             UV rev = 0;
13566             const char *end = pos;
13567             UV mult = 1;
13568             while (--end >= s) {
13569                 if (*end != '_') {
13570                     const UV orev = rev;
13571                     rev += (*end - '0') * mult;
13572                     mult *= 10;
13573                     if (orev > rev)
13574                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13575                                          "Integer overflow in decimal number");
13576                 }
13577             }
13578 #ifdef EBCDIC
13579             if (rev > 0x7FFFFFFF)
13580                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13581 #endif
13582             /* Append native character for the rev point */
13583             tmpend = uvchr_to_utf8(tmpbuf, rev);
13584             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13585             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13586                  SvUTF8_on(sv);
13587             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13588                  s = ++pos;
13589             else {
13590                  s = pos;
13591                  break;
13592             }
13593             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13594                  pos++;
13595         }
13596         SvPOK_on(sv);
13597         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13598         SvRMAGICAL_on(sv);
13599     }
13600     return (char *)s;
13601 }
13602
13603 int
13604 Perl_keyword_plugin_standard(pTHX_
13605         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13606 {
13607     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13608     PERL_UNUSED_CONTEXT;
13609     PERL_UNUSED_ARG(keyword_ptr);
13610     PERL_UNUSED_ARG(keyword_len);
13611     PERL_UNUSED_ARG(op_ptr);
13612     return KEYWORD_PLUGIN_DECLINE;
13613 }
13614
13615 /*
13616  * Local variables:
13617  * c-indentation-style: bsd
13618  * c-basic-offset: 4
13619  * indent-tabs-mode: t
13620  * End:
13621  *
13622  * ex: set ts=8 sts=4 sw=4 noet:
13623  */