toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* YYINITDEPTH -- initial size of the parser's stacks.  */
  49 #define YYINITDEPTH 200
  50
  51 /* XXX temporary backwards compatibility */
  52 #define PL_lex_brackets         (PL_parser->lex_brackets)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XFAKEBRACK 128
 130 #define XENUMMASK 127
 131
 132 #ifdef USE_UTF8_SCRIPTS
 133 #   define UTF (!IN_BYTES)
 134 #else
 135 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 136 #endif
 137
 138 /* The maximum number of characters preceding the unrecognized one to display */
 139 #define UNRECOGNIZED_PRECEDE_COUNT 10
 140
 141 /* In variables named $^X, these are the legal values for X.
 142  * 1999-02-27 mjd-perl-patch@plover.com */
 143 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 144
 145 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 146
 147 /* LEX_* are values for PL_lex_state, the state of the lexer.
 148  * They are arranged oddly so that the guard on the switch statement
 149  * can get by with a single comparison (if the compiler is smart enough).
 150  */
 151
 152 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 153
 154 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 155 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 156 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 157 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 158 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 159
 160                                    /* at end of code, eg "$x" followed by:  */
 161 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 162 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 163
 164 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 165                                         string or after \E, $foo, etc       */
 166 #define LEX_INTERPCONST          2 /* NOT USED */
 167 #define LEX_FORMLINE             1 /* expecting a format line               */
 168 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 169
 170
 171 #ifdef DEBUGGING
 172 static const char* const lex_state_names[] = {
 173     "KNOWNEXT",
 174     "FORMLINE",
 175     "INTERPCONST",
 176     "INTERPCONCAT",
 177     "INTERPENDMAYBE",
 178     "INTERPEND",
 179     "INTERPSTART",
 180     "INTERPPUSH",
 181     "INTERPCASEMOD",
 182     "INTERPNORMAL",
 183     "NORMAL"
 184 };
 185 #endif
 186
 187 #ifdef ff_next
 188 #undef ff_next
 189 #endif
 190
 191 #include "keywords.h"
 192
 193 /* CLINE is a macro that ensures PL_copline has a sane value */
 194
 195 #ifdef CLINE
 196 #undef CLINE
 197 #endif
 198 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 199
 200 #ifdef PERL_MAD
 201 #  define SKIPSPACE0(s) skipspace0(s)
 202 #  define SKIPSPACE1(s) skipspace1(s)
 203 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 204 #  define PEEKSPACE(s) skipspace2(s,0)
 205 #else
 206 #  define SKIPSPACE0(s) skipspace(s)
 207 #  define SKIPSPACE1(s) skipspace(s)
 208 #  define SKIPSPACE2(s,tsv) skipspace(s)
 209 #  define PEEKSPACE(s) skipspace(s)
 210 #endif
 211
 212 /*
 213  * Convenience functions to return different tokens and prime the
 214  * lexer for the next token.  They all take an argument.
 215  *
 216  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 217  * OPERATOR     : generic operator
 218  * AOPERATOR    : assignment operator
 219  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 220  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 221  * PREREF       : *EXPR where EXPR is not a simple identifier
 222  * TERM         : expression term
 223  * LOOPX        : loop exiting command (goto, last, dump, etc)
 224  * FTST         : file test operator
 225  * FUN0         : zero-argument function
 226  * FUN1         : not used, except for not, which isn't a UNIOP
 227  * BOop         : bitwise or or xor
 228  * BAop         : bitwise and
 229  * SHop         : shift operator
 230  * PWop         : power operator
 231  * PMop         : pattern-matching operator
 232  * Aop          : addition-level operator
 233  * Mop          : multiplication-level operator
 234  * Eop          : equality-testing operator
 235  * Rop          : relational operator <= != gt
 236  *
 237  * Also see LOP and lop() below.
 238  */
 239
 240 #ifdef DEBUGGING /* Serve -DT. */
 241 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 242 #else
 243 #   define REPORT(retval) (retval)
 244 #endif
 245
 246 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 247 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 248 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 249 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 252 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 253 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 254 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 255 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI2(f,x) { \
 273         pl_yylval.ival = f; \
 274         PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI2(f,XTERM)
 284 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 285
 286 #define UNIBRACK(f) { \
 287         pl_yylval.ival = f; \
 288         PL_bufptr = s; \
 289         PL_last_uni = PL_oldbufptr; \
 290         if (*s == '(') \
 291             return REPORT( (int)FUNC1 ); \
 292         s = PEEKSPACE(s); \
 293         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 294         }
 295
 296 /* grandfather return to old style */
 297 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 298
 299 #ifdef DEBUGGING
 300
 301 /* how to interpret the pl_yylval associated with the token */
 302 enum token_type {
 303     TOKENTYPE_NONE,
 304     TOKENTYPE_IVAL,
 305     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 306     TOKENTYPE_PVAL,
 307     TOKENTYPE_OPVAL,
 308     TOKENTYPE_GVVAL
 309 };
 310
 311 static struct debug_tokens {
 312     const int token;
 313     enum token_type type;
 314     const char *name;
 315 } const debug_tokens[] =
 316 {
 317     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 318     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 319     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 320     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 321     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 322     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 323     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 324     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 325     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 326     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 327     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 328     { DO,               TOKENTYPE_NONE,         "DO" },
 329     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 330     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 331     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 332     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 333     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 334     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 335     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 336     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 337     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 338     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 339     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 340     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 341     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 342     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 343     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 344     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 345     { IF,               TOKENTYPE_IVAL,         "IF" },
 346     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 347     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 348     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 349     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 350     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 351     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 352     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 353     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 354     { MY,               TOKENTYPE_IVAL,         "MY" },
 355     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 356     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 357     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 358     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 359     { OROR,             TOKENTYPE_NONE,         "OROR" },
 360     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 361     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 362     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 363     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 364     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 365     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 366     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 367     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 368     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 369     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 370     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 371     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 372     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 373     { SUB,              TOKENTYPE_NONE,         "SUB" },
 374     { THING,            TOKENTYPE_OPVAL,        "THING" },
 375     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 376     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 377     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 378     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 379     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 380     { USE,              TOKENTYPE_IVAL,         "USE" },
 381     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 382     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 383     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 384     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 385     { 0,                TOKENTYPE_NONE,         NULL }
 386 };
 387
 388 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 389
 390 STATIC int
 391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 392 {
 393     dVAR;
 394
 395     PERL_ARGS_ASSERT_TOKEREPORT;
 396
 397     if (DEBUG_T_TEST) {
 398         const char *name = NULL;
 399         enum token_type type = TOKENTYPE_NONE;
 400         const struct debug_tokens *p;
 401         SV* const report = newSVpvs("<== ");
 402
 403         for (p = debug_tokens; p->token; p++) {
 404             if (p->token == (int)rv) {
 405                 name = p->name;
 406                 type = p->type;
 407                 break;
 408             }
 409         }
 410         if (name)
 411             Perl_sv_catpv(aTHX_ report, name);
 412         else if ((char)rv > ' ' && (char)rv < '~')
 413             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 414         else if (!rv)
 415             sv_catpvs(report, "EOF");
 416         else
 417             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 418         switch (type) {
 419         case TOKENTYPE_NONE:
 420         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 421             break;
 422         case TOKENTYPE_IVAL:
 423             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 424             break;
 425         case TOKENTYPE_OPNUM:
 426             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 427                                     PL_op_name[lvalp->ival]);
 428             break;
 429         case TOKENTYPE_PVAL:
 430             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 431             break;
 432         case TOKENTYPE_OPVAL:
 433             if (lvalp->opval) {
 434                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 435                                     PL_op_name[lvalp->opval->op_type]);
 436                 if (lvalp->opval->op_type == OP_CONST) {
 437                     Perl_sv_catpvf(aTHX_ report, " %s",
 438                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 439                 }
 440
 441             }
 442             else
 443                 sv_catpvs(report, "(opval=null)");
 444             break;
 445         }
 446         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 447     };
 448     return (int)rv;
 449 }
 450
 451
 452 /* print the buffer with suitable escapes */
 453
 454 STATIC void
 455 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 456 {
 457     SV* const tmp = newSVpvs("");
 458
 459     PERL_ARGS_ASSERT_PRINTBUF;
 460
 461     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 static int
 468 S_deprecate_commaless_var_list(pTHX) {
 469     PL_expect = XTERM;
 470     deprecate("comma-less variable list");
 471     return REPORT(','); /* grandfather non-comma-format format */
 472 }
 473
 474 /*
 475  * S_ao
 476  *
 477  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 478  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 479  */
 480
 481 STATIC int
 482 S_ao(pTHX_ int toketype)
 483 {
 484     dVAR;
 485     if (*PL_bufptr == '=') {
 486         PL_bufptr++;
 487         if (toketype == ANDAND)
 488             pl_yylval.ival = OP_ANDASSIGN;
 489         else if (toketype == OROR)
 490             pl_yylval.ival = OP_ORASSIGN;
 491         else if (toketype == DORDOR)
 492             pl_yylval.ival = OP_DORASSIGN;
 493         toketype = ASSIGNOP;
 494     }
 495     return toketype;
 496 }
 497
 498 /*
 499  * S_no_op
 500  * When Perl expects an operator and finds something else, no_op
 501  * prints the warning.  It always prints "<something> found where
 502  * operator expected.  It prints "Missing semicolon on previous line?"
 503  * if the surprise occurs at the start of the line.  "do you need to
 504  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 505  * where the compiler doesn't know if foo is a method call or a function.
 506  * It prints "Missing operator before end of line" if there's nothing
 507  * after the missing operator, or "... before <...>" if there is something
 508  * after the missing operator.
 509  */
 510
 511 STATIC void
 512 S_no_op(pTHX_ const char *const what, char *s)
 513 {
 514     dVAR;
 515     char * const oldbp = PL_bufptr;
 516     const bool is_first = (PL_oldbufptr == PL_linestart);
 517
 518     PERL_ARGS_ASSERT_NO_OP;
 519
 520     if (!s)
 521         s = oldbp;
 522     else
 523         PL_bufptr = s;
 524     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 525     if (ckWARN_d(WARN_SYNTAX)) {
 526         if (is_first)
 527             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 528                     "\t(Missing semicolon on previous line?)\n");
 529         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 530             const char *t;
 531             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 532                 NOOP;
 533             if (t < PL_bufptr && isSPACE(*t))
 534                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 535                         "\t(Do you need to predeclare %.*s?)\n",
 536                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 537         }
 538         else {
 539             assert(s >= oldbp);
 540             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 541                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 542         }
 543     }
 544     PL_bufptr = oldbp;
 545 }
 546
 547 /*
 548  * S_missingterm
 549  * Complain about missing quote/regexp/heredoc terminator.
 550  * If it's called with NULL then it cauterizes the line buffer.
 551  * If we're in a delimited string and the delimiter is a control
 552  * character, it's reformatted into a two-char sequence like ^C.
 553  * This is fatal.
 554  */
 555
 556 STATIC void
 557 S_missingterm(pTHX_ char *s)
 558 {
 559     dVAR;
 560     char tmpbuf[3];
 561     char q;
 562     if (s) {
 563         char * const nl = strrchr(s,'\n');
 564         if (nl)
 565             *nl = '\0';
 566     }
 567     else if (isCNTRL(PL_multi_close)) {
 568         *tmpbuf = '^';
 569         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 570         tmpbuf[2] = '\0';
 571         s = tmpbuf;
 572     }
 573     else {
 574         *tmpbuf = (char)PL_multi_close;
 575         tmpbuf[1] = '\0';
 576         s = tmpbuf;
 577     }
 578     q = strchr(s,'"') ? '\'' : '"';
 579     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 580 }
 581
 582 #define FEATURE_IS_ENABLED(name)                                        \
 583         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 584             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 585 /* The longest string we pass in.  */
 586 #define MAX_FEATURE_LEN (sizeof("switch")-1)
 587
 588 /*
 589  * S_feature_is_enabled
 590  * Check whether the named feature is enabled.
 591  */
 592 STATIC bool
 593 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 594 {
 595     dVAR;
 596     HV * const hinthv = GvHV(PL_hintgv);
 597     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 598
 599     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 600
 601     assert(namelen <= MAX_FEATURE_LEN);
 602     memcpy(&he_name[8], name, namelen);
 603
 604     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 605 }
 606
 607 /*
 608  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 609  * utf16-to-utf8-reversed.
 610  */
 611
 612 #ifdef PERL_CR_FILTER
 613 static void
 614 strip_return(SV *sv)
 615 {
 616     register const char *s = SvPVX_const(sv);
 617     register const char * const e = s + SvCUR(sv);
 618
 619     PERL_ARGS_ASSERT_STRIP_RETURN;
 620
 621     /* outer loop optimized to do nothing if there are no CR-LFs */
 622     while (s < e) {
 623         if (*s++ == '\r' && *s == '\n') {
 624             /* hit a CR-LF, need to copy the rest */
 625             register char *d = s - 1;
 626             *d++ = *s++;
 627             while (s < e) {
 628                 if (*s == '\r' && s[1] == '\n')
 629                     s++;
 630                 *d++ = *s++;
 631             }
 632             SvCUR(sv) -= s - d;
 633             return;
 634         }
 635     }
 636 }
 637
 638 STATIC I32
 639 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 640 {
 641     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 642     if (count > 0 && !maxlen)
 643         strip_return(sv);
 644     return count;
 645 }
 646 #endif
 647
 648
 649
 650 /*
 651  * Perl_lex_start
 652  *
 653  * Create a parser object and initialise its parser and lexer fields
 654  *
 655  * rsfp       is the opened file handle to read from (if any),
 656  *
 657  * line       holds any initial content already read from the file (or in
 658  *            the case of no file, such as an eval, the whole contents);
 659  *
 660  * new_filter indicates that this is a new file and it shouldn't inherit
 661  *            the filters from the current parser (ie require).
 662  */
 663
 664 void
 665 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 666 {
 667     dVAR;
 668     const char *s = NULL;
 669     STRLEN len;
 670     yy_parser *parser, *oparser;
 671
 672     /* create and initialise a parser */
 673
 674     Newxz(parser, 1, yy_parser);
 675     parser->old_parser = oparser = PL_parser;
 676     PL_parser = parser;
 677
 678     Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
 679     parser->ps = parser->stack;
 680     parser->stack_size = YYINITDEPTH;
 681
 682     parser->stack->state = 0;
 683     parser->yyerrstatus = 0;
 684     parser->yychar = YYEMPTY;           /* Cause a token to be read.  */
 685
 686     /* on scope exit, free this parser and restore any outer one */
 687     SAVEPARSER(parser);
 688     parser->saved_curcop = PL_curcop;
 689
 690     /* initialise lexer state */
 691
 692 #ifdef PERL_MAD
 693     parser->curforce = -1;
 694 #else
 695     parser->nexttoke = 0;
 696 #endif
 697     parser->error_count = oparser ? oparser->error_count : 0;
 698     parser->copline = NOLINE;
 699     parser->lex_state = LEX_NORMAL;
 700     parser->expect = XSTATE;
 701     parser->rsfp = rsfp;
 702     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 703                 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 704
 705     Newx(parser->lex_brackstack, 120, char);
 706     Newx(parser->lex_casestack, 12, char);
 707     *parser->lex_casestack = '\0';
 708
 709     if (line) {
 710         s = SvPV_const(line, len);
 711     } else {
 712         len = 0;
 713     }
 714
 715     if (!len) {
 716         parser->linestr = newSVpvs("\n;");
 717     } else if (SvREADONLY(line) || s[len-1] != ';') {
 718         parser->linestr = newSVsv(line);
 719         if (s[len-1] != ';')
 720             sv_catpvs(parser->linestr, "\n;");
 721     } else {
 722         SvTEMP_off(line);
 723         SvREFCNT_inc_simple_void_NN(line);
 724         parser->linestr = line;
 725     }
 726     parser->oldoldbufptr =
 727         parser->oldbufptr =
 728         parser->bufptr =
 729         parser->linestart = SvPVX(parser->linestr);
 730     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 731     parser->last_lop = parser->last_uni = NULL;
 732 }
 733
 734
 735 /* delete a parser object */
 736
 737 void
 738 Perl_parser_free(pTHX_  const yy_parser *parser)
 739 {
 740     PERL_ARGS_ASSERT_PARSER_FREE;
 741
 742     PL_curcop = parser->saved_curcop;
 743     SvREFCNT_dec(parser->linestr);
 744
 745     if (parser->rsfp == PerlIO_stdin())
 746         PerlIO_clearerr(parser->rsfp);
 747     else if (parser->rsfp && (!parser->old_parser ||
 748                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 749         PerlIO_close(parser->rsfp);
 750     SvREFCNT_dec(parser->rsfp_filters);
 751
 752     Safefree(parser->stack);
 753     Safefree(parser->lex_brackstack);
 754     Safefree(parser->lex_casestack);
 755     PL_parser = parser->old_parser;
 756     Safefree(parser);
 757 }
 758
 759
 760 /*
 761  * Perl_lex_end
 762  * Finalizer for lexing operations.  Must be called when the parser is
 763  * done with the lexer.
 764  */
 765
 766 void
 767 Perl_lex_end(pTHX)
 768 {
 769     dVAR;
 770     PL_doextract = FALSE;
 771 }
 772
 773 /*
 774 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 775
 776 Buffer scalar containing the chunk currently under consideration of the
 777 text currently being lexed.  This is always a plain string scalar (for
 778 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 779 normal scalar means; instead refer to the buffer directly by the pointer
 780 variables described below.
 781
 782 The lexer maintains various C<char*> pointers to things in the
 783 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 784 reallocated, all of these pointers must be updated.  Don't attempt to
 785 do this manually, but rather use L</lex_grow_linestr> if you need to
 786 reallocate the buffer.
 787
 788 The content of the text chunk in the buffer is commonly exactly one
 789 complete line of input, up to and including a newline terminator,
 790 but there are situations where it is otherwise.  The octets of the
 791 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 792 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 793 flag on this scalar, which may disagree with it.
 794
 795 For direct examination of the buffer, the variable
 796 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 797 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 798 of these pointers is usually preferable to examination of the scalar
 799 through normal scalar means.
 800
 801 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 802
 803 Direct pointer to the end of the chunk of text currently being lexed, the
 804 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 805 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 806 always located at the end of the buffer, and does not count as part of
 807 the buffer's contents.
 808
 809 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 810
 811 Points to the current position of lexing inside the lexer buffer.
 812 Characters around this point may be freely examined, within
 813 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 814 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 815 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 816
 817 Lexing code (whether in the Perl core or not) moves this pointer past
 818 the characters that it consumes.  It is also expected to perform some
 819 bookkeeping whenever a newline character is consumed.  This movement
 820 can be more conveniently performed by the function L</lex_read_to>,
 821 which handles newlines appropriately.
 822
 823 Interpretation of the buffer's octets can be abstracted out by
 824 using the slightly higher-level functions L</lex_peek_unichar> and
 825 L</lex_read_unichar>.
 826
 827 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 828
 829 Points to the start of the current line inside the lexer buffer.
 830 This is useful for indicating at which column an error occurred, and
 831 not much else.  This must be updated by any lexing code that consumes
 832 a newline; the function L</lex_read_to> handles this detail.
 833
 834 =cut
 835 */
 836
 837 /*
 838 =for apidoc Amx|bool|lex_bufutf8
 839
 840 Indicates whether the octets in the lexer buffer
 841 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 842 of Unicode characters.  If not, they should be interpreted as Latin-1
 843 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 844
 845 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 846 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 847 encoding.
 848
 849 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 850 is significant, but not the whole story regarding the input character
 851 encoding.  Normally, when a file is being read, the scalar contains octets
 852 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 853 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 854 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 855 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 856 is in effect.  This logic may change in the future; use this function
 857 instead of implementing the logic yourself.
 858
 859 =cut
 860 */
 861
 862 bool
 863 Perl_lex_bufutf8(pTHX)
 864 {
 865     return UTF;
 866 }
 867
 868 /*
 869 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 870
 871 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 872 at least I<len> octets (including terminating NUL).  Returns a
 873 pointer to the reallocated buffer.  This is necessary before making
 874 any direct modification of the buffer that would increase its length.
 875 L</lex_stuff_pvn> provides a more convenient way to insert text into
 876 the buffer.
 877
 878 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 879 this function updates all of the lexer's variables that point directly
 880 into the buffer.
 881
 882 =cut
 883 */
 884
 885 char *
 886 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 887 {
 888     SV *linestr;
 889     char *buf;
 890     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 891     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 892     linestr = PL_parser->linestr;
 893     buf = SvPVX(linestr);
 894     if (len <= SvLEN(linestr))
 895         return buf;
 896     bufend_pos = PL_parser->bufend - buf;
 897     bufptr_pos = PL_parser->bufptr - buf;
 898     oldbufptr_pos = PL_parser->oldbufptr - buf;
 899     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 900     linestart_pos = PL_parser->linestart - buf;
 901     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 902     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 903     buf = sv_grow(linestr, len);
 904     PL_parser->bufend = buf + bufend_pos;
 905     PL_parser->bufptr = buf + bufptr_pos;
 906     PL_parser->oldbufptr = buf + oldbufptr_pos;
 907     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 908     PL_parser->linestart = buf + linestart_pos;
 909     if (PL_parser->last_uni)
 910         PL_parser->last_uni = buf + last_uni_pos;
 911     if (PL_parser->last_lop)
 912         PL_parser->last_lop = buf + last_lop_pos;
 913     return buf;
 914 }
 915
 916 /*
 917 =for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags
 918
 919 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 920 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 921 reallocating the buffer if necessary.  This means that lexing code that
 922 runs later will see the characters as if they had appeared in the input.
 923 It is not recommended to do this as part of normal parsing, and most
 924 uses of this facility run the risk of the inserted characters being
 925 interpreted in an unintended manner.
 926
 927 The string to be inserted is represented by I<len> octets starting
 928 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 929 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 930 The characters are recoded for the lexer buffer, according to how the
 931 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 932 to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
 933 function is more convenient.
 934
 935 =cut
 936 */
 937
 938 void
 939 Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
 940 {
 941     char *bufptr;
 942     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 943     if (flags & ~(LEX_STUFF_UTF8))
 944         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 945     if (UTF) {
 946         if (flags & LEX_STUFF_UTF8) {
 947             goto plain_copy;
 948         } else {
 949             STRLEN highhalf = 0;
 950             char *p, *e = pv+len;
 951             for (p = pv; p != e; p++)
 952                 highhalf += !!(((U8)*p) & 0x80);
 953             if (!highhalf)
 954                 goto plain_copy;
 955             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 956             bufptr = PL_parser->bufptr;
 957             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 958             PL_parser->bufend += len+highhalf;
 959             for (p = pv; p != e; p++) {
 960                 U8 c = (U8)*p;
 961                 if (c & 0x80) {
 962                     *bufptr++ = (char)(0xc0 | (c >> 6));
 963                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 964                 } else {
 965                     *bufptr++ = (char)c;
 966                 }
 967             }
 968         }
 969     } else {
 970         if (flags & LEX_STUFF_UTF8) {
 971             STRLEN highhalf = 0;
 972             char *p, *e = pv+len;
 973             for (p = pv; p != e; p++) {
 974                 U8 c = (U8)*p;
 975                 if (c >= 0xc4) {
 976                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 977                                 "non-Latin-1 character into Latin-1 input");
 978                 } else if (c >= 0xc2 && p+1 != e &&
 979                             (((U8)p[1]) & 0xc0) == 0x80) {
 980                     p++;
 981                     highhalf++;
 982                 } else if (c >= 0x80) {
 983                     /* malformed UTF-8 */
 984                     ENTER;
 985                     SAVESPTR(PL_warnhook);
 986                     PL_warnhook = PERL_WARNHOOK_FATAL;
 987                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 988                     LEAVE;
 989                 }
 990             }
 991             if (!highhalf)
 992                 goto plain_copy;
 993             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 994             bufptr = PL_parser->bufptr;
 995             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 996             PL_parser->bufend += len-highhalf;
 997             for (p = pv; p != e; p++) {
 998                 U8 c = (U8)*p;
 999                 if (c & 0x80) {
1000                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1001                     p++;
1002                 } else {
1003                     *bufptr++ = (char)c;
1004                 }
1005             }
1006         } else {
1007             plain_copy:
1008             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1009             bufptr = PL_parser->bufptr;
1010             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1011             PL_parser->bufend += len;
1012             Copy(pv, bufptr, len, char);
1013         }
1014     }
1015 }
1016
1017 /*
1018 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1019
1020 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022 reallocating the buffer if necessary.  This means that lexing code that
1023 runs later will see the characters as if they had appeared in the input.
1024 It is not recommended to do this as part of normal parsing, and most
1025 uses of this facility run the risk of the inserted characters being
1026 interpreted in an unintended manner.
1027
1028 The string to be inserted is the string value of I<sv>.  The characters
1029 are recoded for the lexer buffer, according to how the buffer is currently
1030 being interpreted (L</lex_bufutf8>).  If a string to be interpreted is
1031 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1032 need to construct a scalar.
1033
1034 =cut
1035 */
1036
1037 void
1038 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1039 {
1040     char *pv;
1041     STRLEN len;
1042     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1043     if (flags)
1044         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1045     pv = SvPV(sv, len);
1046     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1047 }
1048
1049 /*
1050 =for apidoc Amx|void|lex_unstuff|char *ptr
1051
1052 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1053 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1054 This hides the discarded text from any lexing code that runs later,
1055 as if the text had never appeared.
1056
1057 This is not the normal way to consume lexed text.  For that, use
1058 L</lex_read_to>.
1059
1060 =cut
1061 */
1062
1063 void
1064 Perl_lex_unstuff(pTHX_ char *ptr)
1065 {
1066     char *buf, *bufend;
1067     STRLEN unstuff_len;
1068     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1069     buf = PL_parser->bufptr;
1070     if (ptr < buf)
1071         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1072     if (ptr == buf)
1073         return;
1074     bufend = PL_parser->bufend;
1075     if (ptr > bufend)
1076         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1077     unstuff_len = ptr - buf;
1078     Move(ptr, buf, bufend+1-ptr, char);
1079     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1080     PL_parser->bufend = bufend - unstuff_len;
1081 }
1082
1083 /*
1084 =for apidoc Amx|void|lex_read_to|char *ptr
1085
1086 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1087 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1088 performing the correct bookkeeping whenever a newline character is passed.
1089 This is the normal way to consume lexed text.
1090
1091 Interpretation of the buffer's octets can be abstracted out by
1092 using the slightly higher-level functions L</lex_peek_unichar> and
1093 L</lex_read_unichar>.
1094
1095 =cut
1096 */
1097
1098 void
1099 Perl_lex_read_to(pTHX_ char *ptr)
1100 {
1101     char *s;
1102     PERL_ARGS_ASSERT_LEX_READ_TO;
1103     s = PL_parser->bufptr;
1104     if (ptr < s || ptr > PL_parser->bufend)
1105         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1106     for (; s != ptr; s++)
1107         if (*s == '\n') {
1108             CopLINE_inc(PL_curcop);
1109             PL_parser->linestart = s+1;
1110         }
1111     PL_parser->bufptr = ptr;
1112 }
1113
1114 /*
1115 =for apidoc Amx|void|lex_discard_to|char *ptr
1116
1117 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1118 up to I<ptr>.  The remaining content of the buffer will be moved, and
1119 all pointers into the buffer updated appropriately.  I<ptr> must not
1120 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1121 it is not permitted to discard text that has yet to be lexed.
1122
1123 Normally it is not necessarily to do this directly, because it suffices to
1124 use the implicit discarding behaviour of L</lex_next_chunk> and things
1125 based on it.  However, if a token stretches across multiple lines,
1126 and the lexing code has kept multiple lines of text in the buffer fof
1127 that purpose, then after completion of the token it would be wise to
1128 explicitly discard the now-unneeded earlier lines, to avoid future
1129 multi-line tokens growing the buffer without bound.
1130
1131 =cut
1132 */
1133
1134 void
1135 Perl_lex_discard_to(pTHX_ char *ptr)
1136 {
1137     char *buf;
1138     STRLEN discard_len;
1139     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1140     buf = SvPVX(PL_parser->linestr);
1141     if (ptr < buf)
1142         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1143     if (ptr == buf)
1144         return;
1145     if (ptr > PL_parser->bufptr)
1146         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1147     discard_len = ptr - buf;
1148     if (PL_parser->oldbufptr < ptr)
1149         PL_parser->oldbufptr = ptr;
1150     if (PL_parser->oldoldbufptr < ptr)
1151         PL_parser->oldoldbufptr = ptr;
1152     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1153         PL_parser->last_uni = NULL;
1154     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1155         PL_parser->last_lop = NULL;
1156     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1157     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1158     PL_parser->bufend -= discard_len;
1159     PL_parser->bufptr -= discard_len;
1160     PL_parser->oldbufptr -= discard_len;
1161     PL_parser->oldoldbufptr -= discard_len;
1162     if (PL_parser->last_uni)
1163         PL_parser->last_uni -= discard_len;
1164     if (PL_parser->last_lop)
1165         PL_parser->last_lop -= discard_len;
1166 }
1167
1168 /*
1169 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1170
1171 Reads in the next chunk of text to be lexed, appending it to
1172 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1173 looked to the end of the current chunk and wants to know more.  It is
1174 usual, but not necessary, for lexing to have consumed the entirety of
1175 the current chunk at this time.
1176
1177 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1178 chunk (i.e., the current chunk has been entirely consumed), normally the
1179 current chunk will be discarded at the same time that the new chunk is
1180 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1181 will not be discarded.  If the current chunk has not been entirely
1182 consumed, then it will not be discarded regardless of the flag.
1183
1184 Returns true if some new text was added to the buffer, or false if the
1185 buffer has reached the end of the input text.
1186
1187 =cut
1188 */
1189
1190 #define LEX_FAKE_EOF 0x80000000
1191
1192 bool
1193 Perl_lex_next_chunk(pTHX_ U32 flags)
1194 {
1195     SV *linestr;
1196     char *buf;
1197     STRLEN old_bufend_pos, new_bufend_pos;
1198     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1199     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1200     bool got_some_for_debugger = 0;
1201     bool got_some;
1202     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1203         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1204 #ifdef PERL_MAD
1205     flags |= LEX_KEEP_PREVIOUS;
1206 #endif /* PERL_MAD */
1207     linestr = PL_parser->linestr;
1208     buf = SvPVX(linestr);
1209     if (!(flags & LEX_KEEP_PREVIOUS) &&
1210             PL_parser->bufptr == PL_parser->bufend) {
1211         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1212         linestart_pos = 0;
1213         if (PL_parser->last_uni != PL_parser->bufend)
1214             PL_parser->last_uni = NULL;
1215         if (PL_parser->last_lop != PL_parser->bufend)
1216             PL_parser->last_lop = NULL;
1217         last_uni_pos = last_lop_pos = 0;
1218         *buf = 0;
1219         SvCUR(linestr) = 0;
1220     } else {
1221         old_bufend_pos = PL_parser->bufend - buf;
1222         bufptr_pos = PL_parser->bufptr - buf;
1223         oldbufptr_pos = PL_parser->oldbufptr - buf;
1224         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1225         linestart_pos = PL_parser->linestart - buf;
1226         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1227         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1228     }
1229     if (flags & LEX_FAKE_EOF) {
1230         goto eof;
1231     } else if (!PL_parser->rsfp) {
1232         got_some = 0;
1233     } else if (filter_gets(linestr, old_bufend_pos)) {
1234         got_some = 1;
1235         got_some_for_debugger = 1;
1236     } else {
1237         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1238             sv_setpvs(linestr, "");
1239         eof:
1240         /* End of real input.  Close filehandle (unless it was STDIN),
1241          * then add implicit termination.
1242          */
1243         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1244             PerlIO_clearerr(PL_parser->rsfp);
1245         else if (PL_parser->rsfp)
1246             (void)PerlIO_close(PL_parser->rsfp);
1247         PL_parser->rsfp = NULL;
1248         PL_doextract = FALSE;
1249 #ifdef PERL_MAD
1250         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1251             PL_faketokens = 1;
1252 #endif
1253         if (!PL_in_eval && PL_minus_p) {
1254             sv_catpvs(linestr,
1255                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1256             PL_minus_n = PL_minus_p = 0;
1257         } else if (!PL_in_eval && PL_minus_n) {
1258             sv_catpvs(linestr, /*{*/";}");
1259             PL_minus_n = 0;
1260         } else
1261             sv_catpvs(linestr, ";");
1262         got_some = 1;
1263     }
1264     buf = SvPVX(linestr);
1265     new_bufend_pos = SvCUR(linestr);
1266     PL_parser->bufend = buf + new_bufend_pos;
1267     PL_parser->bufptr = buf + bufptr_pos;
1268     PL_parser->oldbufptr = buf + oldbufptr_pos;
1269     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1270     PL_parser->linestart = buf + linestart_pos;
1271     if (PL_parser->last_uni)
1272         PL_parser->last_uni = buf + last_uni_pos;
1273     if (PL_parser->last_lop)
1274         PL_parser->last_lop = buf + last_lop_pos;
1275     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1276             PL_curstash != PL_debstash) {
1277         /* debugger active and we're not compiling the debugger code,
1278          * so store the line into the debugger's array of lines
1279          */
1280         update_debugger_info(NULL, buf+old_bufend_pos,
1281             new_bufend_pos-old_bufend_pos);
1282     }
1283     return got_some;
1284 }
1285
1286 /*
1287 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1288
1289 Looks ahead one (Unicode) character in the text currently being lexed.
1290 Returns the codepoint (unsigned integer value) of the next character,
1291 or -1 if lexing has reached the end of the input text.  To consume the
1292 peeked character, use L</lex_read_unichar>.
1293
1294 If the next character is in (or extends into) the next chunk of input
1295 text, the next chunk will be read in.  Normally the current chunk will be
1296 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1297 then the current chunk will not be discarded.
1298
1299 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1300 is encountered, an exception is generated.
1301
1302 =cut
1303 */
1304
1305 I32
1306 Perl_lex_peek_unichar(pTHX_ U32 flags)
1307 {
1308     char *s, *bufend;
1309     if (flags & ~(LEX_KEEP_PREVIOUS))
1310         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1311     s = PL_parser->bufptr;
1312     bufend = PL_parser->bufend;
1313     if (UTF) {
1314         U8 head;
1315         I32 unichar;
1316         STRLEN len, retlen;
1317         if (s == bufend) {
1318             if (!lex_next_chunk(flags))
1319                 return -1;
1320             s = PL_parser->bufptr;
1321             bufend = PL_parser->bufend;
1322         }
1323         head = (U8)*s;
1324         if (!(head & 0x80))
1325             return head;
1326         if (head & 0x40) {
1327             len = PL_utf8skip[head];
1328             while ((STRLEN)(bufend-s) < len) {
1329                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1330                     break;
1331                 s = PL_parser->bufptr;
1332                 bufend = PL_parser->bufend;
1333             }
1334         }
1335         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1336         if (retlen == (STRLEN)-1) {
1337             /* malformed UTF-8 */
1338             ENTER;
1339             SAVESPTR(PL_warnhook);
1340             PL_warnhook = PERL_WARNHOOK_FATAL;
1341             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1342             LEAVE;
1343         }
1344         return unichar;
1345     } else {
1346         if (s == bufend) {
1347             if (!lex_next_chunk(flags))
1348                 return -1;
1349             s = PL_parser->bufptr;
1350         }
1351         return (U8)*s;
1352     }
1353 }
1354
1355 /*
1356 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1357
1358 Reads the next (Unicode) character in the text currently being lexed.
1359 Returns the codepoint (unsigned integer value) of the character read,
1360 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1361 if lexing has reached the end of the input text.  To non-destructively
1362 examine the next character, use L</lex_peek_unichar> instead.
1363
1364 If the next character is in (or extends into) the next chunk of input
1365 text, the next chunk will be read in.  Normally the current chunk will be
1366 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1367 then the current chunk will not be discarded.
1368
1369 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1370 is encountered, an exception is generated.
1371
1372 =cut
1373 */
1374
1375 I32
1376 Perl_lex_read_unichar(pTHX_ U32 flags)
1377 {
1378     I32 c;
1379     if (flags & ~(LEX_KEEP_PREVIOUS))
1380         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1381     c = lex_peek_unichar(flags);
1382     if (c != -1) {
1383         if (c == '\n')
1384             CopLINE_inc(PL_curcop);
1385         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1386     }
1387     return c;
1388 }
1389
1390 /*
1391 =for apidoc Amx|void|lex_read_space|U32 flags
1392
1393 Reads optional spaces, in Perl style, in the text currently being
1394 lexed.  The spaces may include ordinary whitespace characters and
1395 Perl-style comments.  C<#line> directives are processed if encountered.
1396 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1397 at a non-space character (or the end of the input text).
1398
1399 If spaces extend into the next chunk of input text, the next chunk will
1400 be read in.  Normally the current chunk will be discarded at the same
1401 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1402 chunk will not be discarded.
1403
1404 =cut
1405 */
1406
1407 void
1408 Perl_lex_read_space(pTHX_ U32 flags)
1409 {
1410     char *s, *bufend;
1411     bool need_incline = 0;
1412     if (flags & ~(LEX_KEEP_PREVIOUS))
1413         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1414 #ifdef PERL_MAD
1415     if (PL_skipwhite) {
1416         sv_free(PL_skipwhite);
1417         PL_skipwhite = NULL;
1418     }
1419     if (PL_madskills)
1420         PL_skipwhite = newSVpvs("");
1421 #endif /* PERL_MAD */
1422     s = PL_parser->bufptr;
1423     bufend = PL_parser->bufend;
1424     while (1) {
1425         char c = *s;
1426         if (c == '#') {
1427             do {
1428                 c = *++s;
1429             } while (!(c == '\n' || (c == 0 && s == bufend)));
1430         } else if (c == '\n') {
1431             s++;
1432             PL_parser->linestart = s;
1433             if (s == bufend)
1434                 need_incline = 1;
1435             else
1436                 incline(s);
1437         } else if (isSPACE(c)) {
1438             s++;
1439         } else if (c == 0 && s == bufend) {
1440             bool got_more;
1441 #ifdef PERL_MAD
1442             if (PL_madskills)
1443                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1444 #endif /* PERL_MAD */
1445             PL_parser->bufptr = s;
1446             CopLINE_inc(PL_curcop);
1447             got_more = lex_next_chunk(flags);
1448             CopLINE_dec(PL_curcop);
1449             s = PL_parser->bufptr;
1450             bufend = PL_parser->bufend;
1451             if (!got_more)
1452                 break;
1453             if (need_incline && PL_parser->rsfp) {
1454                 incline(s);
1455                 need_incline = 0;
1456             }
1457         } else {
1458             break;
1459         }
1460     }
1461 #ifdef PERL_MAD
1462     if (PL_madskills)
1463         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1464 #endif /* PERL_MAD */
1465     PL_parser->bufptr = s;
1466 }
1467
1468 /*
1469  * S_incline
1470  * This subroutine has nothing to do with tilting, whether at windmills
1471  * or pinball tables.  Its name is short for "increment line".  It
1472  * increments the current line number in CopLINE(PL_curcop) and checks
1473  * to see whether the line starts with a comment of the form
1474  *    # line 500 "foo.pm"
1475  * If so, it sets the current line number and file to the values in the comment.
1476  */
1477
1478 STATIC void
1479 S_incline(pTHX_ const char *s)
1480 {
1481     dVAR;
1482     const char *t;
1483     const char *n;
1484     const char *e;
1485
1486     PERL_ARGS_ASSERT_INCLINE;
1487
1488     CopLINE_inc(PL_curcop);
1489     if (*s++ != '#')
1490         return;
1491     while (SPACE_OR_TAB(*s))
1492         s++;
1493     if (strnEQ(s, "line", 4))
1494         s += 4;
1495     else
1496         return;
1497     if (SPACE_OR_TAB(*s))
1498         s++;
1499     else
1500         return;
1501     while (SPACE_OR_TAB(*s))
1502         s++;
1503     if (!isDIGIT(*s))
1504         return;
1505
1506     n = s;
1507     while (isDIGIT(*s))
1508         s++;
1509     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1510         return;
1511     while (SPACE_OR_TAB(*s))
1512         s++;
1513     if (*s == '"' && (t = strchr(s+1, '"'))) {
1514         s++;
1515         e = t + 1;
1516     }
1517     else {
1518         t = s;
1519         while (!isSPACE(*t))
1520             t++;
1521         e = t;
1522     }
1523     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1524         e++;
1525     if (*e != '\n' && *e != '\0')
1526         return;         /* false alarm */
1527
1528     if (t - s > 0) {
1529         const STRLEN len = t - s;
1530 #ifndef USE_ITHREADS
1531         SV *const temp_sv = CopFILESV(PL_curcop);
1532         const char *cf;
1533         STRLEN tmplen;
1534
1535         if (temp_sv) {
1536             cf = SvPVX(temp_sv);
1537             tmplen = SvCUR(temp_sv);
1538         } else {
1539             cf = NULL;
1540             tmplen = 0;
1541         }
1542
1543         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1544             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1545              * to *{"::_<newfilename"} */
1546             /* However, the long form of evals is only turned on by the
1547                debugger - usually they're "(eval %lu)" */
1548             char smallbuf[128];
1549             char *tmpbuf;
1550             GV **gvp;
1551             STRLEN tmplen2 = len;
1552             if (tmplen + 2 <= sizeof smallbuf)
1553                 tmpbuf = smallbuf;
1554             else
1555                 Newx(tmpbuf, tmplen + 2, char);
1556             tmpbuf[0] = '_';
1557             tmpbuf[1] = '<';
1558             memcpy(tmpbuf + 2, cf, tmplen);
1559             tmplen += 2;
1560             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1561             if (gvp) {
1562                 char *tmpbuf2;
1563                 GV *gv2;
1564
1565                 if (tmplen2 + 2 <= sizeof smallbuf)
1566                     tmpbuf2 = smallbuf;
1567                 else
1568                     Newx(tmpbuf2, tmplen2 + 2, char);
1569
1570                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1571                     /* Either they malloc'd it, or we malloc'd it,
1572                        so no prefix is present in ours.  */
1573                     tmpbuf2[0] = '_';
1574                     tmpbuf2[1] = '<';
1575                 }
1576
1577                 memcpy(tmpbuf2 + 2, s, tmplen2);
1578                 tmplen2 += 2;
1579
1580                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1581                 if (!isGV(gv2)) {
1582                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1583                     /* adjust ${"::_<newfilename"} to store the new file name */
1584                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1585                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1586                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1587                 }
1588
1589                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1590             }
1591             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1592         }
1593 #endif
1594         CopFILE_free(PL_curcop);
1595         CopFILE_setn(PL_curcop, s, len);
1596     }
1597     CopLINE_set(PL_curcop, atoi(n)-1);
1598 }
1599
1600 #ifdef PERL_MAD
1601 /* skip space before PL_thistoken */
1602
1603 STATIC char *
1604 S_skipspace0(pTHX_ register char *s)
1605 {
1606     PERL_ARGS_ASSERT_SKIPSPACE0;
1607
1608     s = skipspace(s);
1609     if (!PL_madskills)
1610         return s;
1611     if (PL_skipwhite) {
1612         if (!PL_thiswhite)
1613             PL_thiswhite = newSVpvs("");
1614         sv_catsv(PL_thiswhite, PL_skipwhite);
1615         sv_free(PL_skipwhite);
1616         PL_skipwhite = 0;
1617     }
1618     PL_realtokenstart = s - SvPVX(PL_linestr);
1619     return s;
1620 }
1621
1622 /* skip space after PL_thistoken */
1623
1624 STATIC char *
1625 S_skipspace1(pTHX_ register char *s)
1626 {
1627     const char *start = s;
1628     I32 startoff = start - SvPVX(PL_linestr);
1629
1630     PERL_ARGS_ASSERT_SKIPSPACE1;
1631
1632     s = skipspace(s);
1633     if (!PL_madskills)
1634         return s;
1635     start = SvPVX(PL_linestr) + startoff;
1636     if (!PL_thistoken && PL_realtokenstart >= 0) {
1637         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1638         PL_thistoken = newSVpvn(tstart, start - tstart);
1639     }
1640     PL_realtokenstart = -1;
1641     if (PL_skipwhite) {
1642         if (!PL_nextwhite)
1643             PL_nextwhite = newSVpvs("");
1644         sv_catsv(PL_nextwhite, PL_skipwhite);
1645         sv_free(PL_skipwhite);
1646         PL_skipwhite = 0;
1647     }
1648     return s;
1649 }
1650
1651 STATIC char *
1652 S_skipspace2(pTHX_ register char *s, SV **svp)
1653 {
1654     char *start;
1655     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1656     const I32 startoff = s - SvPVX(PL_linestr);
1657
1658     PERL_ARGS_ASSERT_SKIPSPACE2;
1659
1660     s = skipspace(s);
1661     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1662     if (!PL_madskills || !svp)
1663         return s;
1664     start = SvPVX(PL_linestr) + startoff;
1665     if (!PL_thistoken && PL_realtokenstart >= 0) {
1666         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1667         PL_thistoken = newSVpvn(tstart, start - tstart);
1668         PL_realtokenstart = -1;
1669     }
1670     if (PL_skipwhite) {
1671         if (!*svp)
1672             *svp = newSVpvs("");
1673         sv_setsv(*svp, PL_skipwhite);
1674         sv_free(PL_skipwhite);
1675         PL_skipwhite = 0;
1676     }
1677
1678     return s;
1679 }
1680 #endif
1681
1682 STATIC void
1683 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1684 {
1685     AV *av = CopFILEAVx(PL_curcop);
1686     if (av) {
1687         SV * const sv = newSV_type(SVt_PVMG);
1688         if (orig_sv)
1689             sv_setsv(sv, orig_sv);
1690         else
1691             sv_setpvn(sv, buf, len);
1692         (void)SvIOK_on(sv);
1693         SvIV_set(sv, 0);
1694         av_store(av, (I32)CopLINE(PL_curcop), sv);
1695     }
1696 }
1697
1698 /*
1699  * S_skipspace
1700  * Called to gobble the appropriate amount and type of whitespace.
1701  * Skips comments as well.
1702  */
1703
1704 STATIC char *
1705 S_skipspace(pTHX_ register char *s)
1706 {
1707 #ifdef PERL_MAD
1708     char *start = s;
1709 #endif /* PERL_MAD */
1710     PERL_ARGS_ASSERT_SKIPSPACE;
1711 #ifdef PERL_MAD
1712     if (PL_skipwhite) {
1713         sv_free(PL_skipwhite);
1714         PL_skipwhite = NULL;
1715     }
1716 #endif /* PERL_MAD */
1717     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1718         while (s < PL_bufend && SPACE_OR_TAB(*s))
1719             s++;
1720     } else if (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE) {
1721         while (isSPACE(*s) && *s != '\n')
1722             s++;
1723         if (*s == '#') {
1724             do {
1725                 s++;
1726             } while (s != PL_bufend && *s != '\n');
1727         }
1728         if (*s == '\n')
1729             s++;
1730     } else {
1731         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1732         PL_bufptr = s;
1733         lex_read_space(LEX_KEEP_PREVIOUS);
1734         s = PL_bufptr;
1735         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1736         if (PL_linestart > PL_bufptr)
1737             PL_bufptr = PL_linestart;
1738         return s;
1739     }
1740 #ifdef PERL_MAD
1741     if (PL_madskills)
1742         PL_skipwhite = newSVpvn(start, s-start);
1743 #endif /* PERL_MAD */
1744     return s;
1745 }
1746
1747 /*
1748  * S_check_uni
1749  * Check the unary operators to ensure there's no ambiguity in how they're
1750  * used.  An ambiguous piece of code would be:
1751  *     rand + 5
1752  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1753  * the +5 is its argument.
1754  */
1755
1756 STATIC void
1757 S_check_uni(pTHX)
1758 {
1759     dVAR;
1760     const char *s;
1761     const char *t;
1762
1763     if (PL_oldoldbufptr != PL_last_uni)
1764         return;
1765     while (isSPACE(*PL_last_uni))
1766         PL_last_uni++;
1767     s = PL_last_uni;
1768     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1769         s++;
1770     if ((t = strchr(s, '(')) && t < PL_bufptr)
1771         return;
1772
1773     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1774                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1775                      (int)(s - PL_last_uni), PL_last_uni);
1776 }
1777
1778 /*
1779  * LOP : macro to build a list operator.  Its behaviour has been replaced
1780  * with a subroutine, S_lop() for which LOP is just another name.
1781  */
1782
1783 #define LOP(f,x) return lop(f,x,s)
1784
1785 /*
1786  * S_lop
1787  * Build a list operator (or something that might be one).  The rules:
1788  *  - if we have a next token, then it's a list operator [why?]
1789  *  - if the next thing is an opening paren, then it's a function
1790  *  - else it's a list operator
1791  */
1792
1793 STATIC I32
1794 S_lop(pTHX_ I32 f, int x, char *s)
1795 {
1796     dVAR;
1797
1798     PERL_ARGS_ASSERT_LOP;
1799
1800     pl_yylval.ival = f;
1801     CLINE;
1802     PL_expect = x;
1803     PL_bufptr = s;
1804     PL_last_lop = PL_oldbufptr;
1805     PL_last_lop_op = (OPCODE)f;
1806 #ifdef PERL_MAD
1807     if (PL_lasttoke)
1808         return REPORT(LSTOP);
1809 #else
1810     if (PL_nexttoke)
1811         return REPORT(LSTOP);
1812 #endif
1813     if (*s == '(')
1814         return REPORT(FUNC);
1815     s = PEEKSPACE(s);
1816     if (*s == '(')
1817         return REPORT(FUNC);
1818     else
1819         return REPORT(LSTOP);
1820 }
1821
1822 #ifdef PERL_MAD
1823  /*
1824  * S_start_force
1825  * Sets up for an eventual force_next().  start_force(0) basically does
1826  * an unshift, while start_force(-1) does a push.  yylex removes items
1827  * on the "pop" end.
1828  */
1829
1830 STATIC void
1831 S_start_force(pTHX_ int where)
1832 {
1833     int i;
1834
1835     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1836         where = PL_lasttoke;
1837     assert(PL_curforce < 0 || PL_curforce == where);
1838     if (PL_curforce != where) {
1839         for (i = PL_lasttoke; i > where; --i) {
1840             PL_nexttoke[i] = PL_nexttoke[i-1];
1841         }
1842         PL_lasttoke++;
1843     }
1844     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1845         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1846     PL_curforce = where;
1847     if (PL_nextwhite) {
1848         if (PL_madskills)
1849             curmad('^', newSVpvs(""));
1850         CURMAD('_', PL_nextwhite);
1851     }
1852 }
1853
1854 STATIC void
1855 S_curmad(pTHX_ char slot, SV *sv)
1856 {
1857     MADPROP **where;
1858
1859     if (!sv)
1860         return;
1861     if (PL_curforce < 0)
1862         where = &PL_thismad;
1863     else
1864         where = &PL_nexttoke[PL_curforce].next_mad;
1865
1866     if (PL_faketokens)
1867         sv_setpvs(sv, "");
1868     else {
1869         if (!IN_BYTES) {
1870             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1871                 SvUTF8_on(sv);
1872             else if (PL_encoding) {
1873                 sv_recode_to_utf8(sv, PL_encoding);
1874             }
1875         }
1876     }
1877
1878     /* keep a slot open for the head of the list? */
1879     if (slot != '_' && *where && (*where)->mad_key == '^') {
1880         (*where)->mad_key = slot;
1881         sv_free(MUTABLE_SV(((*where)->mad_val)));
1882         (*where)->mad_val = (void*)sv;
1883     }
1884     else
1885         addmad(newMADsv(slot, sv), where, 0);
1886 }
1887 #else
1888 #  define start_force(where)    NOOP
1889 #  define curmad(slot, sv)      NOOP
1890 #endif
1891
1892 /*
1893  * S_force_next
1894  * When the lexer realizes it knows the next token (for instance,
1895  * it is reordering tokens for the parser) then it can call S_force_next
1896  * to know what token to return the next time the lexer is called.  Caller
1897  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1898  * and possibly PL_expect to ensure the lexer handles the token correctly.
1899  */
1900
1901 STATIC void
1902 S_force_next(pTHX_ I32 type)
1903 {
1904     dVAR;
1905 #ifdef DEBUGGING
1906     if (DEBUG_T_TEST) {
1907         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1908         tokereport(type, &NEXTVAL_NEXTTOKE);
1909     }
1910 #endif
1911 #ifdef PERL_MAD
1912     if (PL_curforce < 0)
1913         start_force(PL_lasttoke);
1914     PL_nexttoke[PL_curforce].next_type = type;
1915     if (PL_lex_state != LEX_KNOWNEXT)
1916         PL_lex_defer = PL_lex_state;
1917     PL_lex_state = LEX_KNOWNEXT;
1918     PL_lex_expect = PL_expect;
1919     PL_curforce = -1;
1920 #else
1921     PL_nexttype[PL_nexttoke] = type;
1922     PL_nexttoke++;
1923     if (PL_lex_state != LEX_KNOWNEXT) {
1924         PL_lex_defer = PL_lex_state;
1925         PL_lex_expect = PL_expect;
1926         PL_lex_state = LEX_KNOWNEXT;
1927     }
1928 #endif
1929 }
1930
1931 STATIC SV *
1932 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1933 {
1934     dVAR;
1935     SV * const sv = newSVpvn_utf8(start, len,
1936                                   !IN_BYTES
1937                                   && UTF
1938                                   && !is_ascii_string((const U8*)start, len)
1939                                   && is_utf8_string((const U8*)start, len));
1940     return sv;
1941 }
1942
1943 /*
1944  * S_force_word
1945  * When the lexer knows the next thing is a word (for instance, it has
1946  * just seen -> and it knows that the next char is a word char, then
1947  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1948  * lookahead.
1949  *
1950  * Arguments:
1951  *   char *start : buffer position (must be within PL_linestr)
1952  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1953  *   int check_keyword : if true, Perl checks to make sure the word isn't
1954  *       a keyword (do this if the word is a label, e.g. goto FOO)
1955  *   int allow_pack : if true, : characters will also be allowed (require,
1956  *       use, etc. do this)
1957  *   int allow_initial_tick : used by the "sub" lexer only.
1958  */
1959
1960 STATIC char *
1961 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1962 {
1963     dVAR;
1964     register char *s;
1965     STRLEN len;
1966
1967     PERL_ARGS_ASSERT_FORCE_WORD;
1968
1969     start = SKIPSPACE1(start);
1970     s = start;
1971     if (isIDFIRST_lazy_if(s,UTF) ||
1972         (allow_pack && *s == ':') ||
1973         (allow_initial_tick && *s == '\'') )
1974     {
1975         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1976         if (check_keyword && keyword(PL_tokenbuf, len, 0))
1977             return start;
1978         start_force(PL_curforce);
1979         if (PL_madskills)
1980             curmad('X', newSVpvn(start,s-start));
1981         if (token == METHOD) {
1982             s = SKIPSPACE1(s);
1983             if (*s == '(')
1984                 PL_expect = XTERM;
1985             else {
1986                 PL_expect = XOPERATOR;
1987             }
1988         }
1989         if (PL_madskills)
1990             curmad('g', newSVpvs( "forced" ));
1991         NEXTVAL_NEXTTOKE.opval
1992             = (OP*)newSVOP(OP_CONST,0,
1993                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1994         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1995         force_next(token);
1996     }
1997     return s;
1998 }
1999
2000 /*
2001  * S_force_ident
2002  * Called when the lexer wants $foo *foo &foo etc, but the program
2003  * text only contains the "foo" portion.  The first argument is a pointer
2004  * to the "foo", and the second argument is the type symbol to prefix.
2005  * Forces the next token to be a "WORD".
2006  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2007  */
2008
2009 STATIC void
2010 S_force_ident(pTHX_ register const char *s, int kind)
2011 {
2012     dVAR;
2013
2014     PERL_ARGS_ASSERT_FORCE_IDENT;
2015
2016     if (*s) {
2017         const STRLEN len = strlen(s);
2018         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2019         start_force(PL_curforce);
2020         NEXTVAL_NEXTTOKE.opval = o;
2021         force_next(WORD);
2022         if (kind) {
2023             o->op_private = OPpCONST_ENTERED;
2024             /* XXX see note in pp_entereval() for why we forgo typo
2025                warnings if the symbol must be introduced in an eval.
2026                GSAR 96-10-12 */
2027             gv_fetchpvn_flags(s, len,
2028                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2029                               : GV_ADD,
2030                               kind == '$' ? SVt_PV :
2031                               kind == '@' ? SVt_PVAV :
2032                               kind == '%' ? SVt_PVHV :
2033                               SVt_PVGV
2034                               );
2035         }
2036     }
2037 }
2038
2039 NV
2040 Perl_str_to_version(pTHX_ SV *sv)
2041 {
2042     NV retval = 0.0;
2043     NV nshift = 1.0;
2044     STRLEN len;
2045     const char *start = SvPV_const(sv,len);
2046     const char * const end = start + len;
2047     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2048
2049     PERL_ARGS_ASSERT_STR_TO_VERSION;
2050
2051     while (start < end) {
2052         STRLEN skip;
2053         UV n;
2054         if (utf)
2055             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2056         else {
2057             n = *(U8*)start;
2058             skip = 1;
2059         }
2060         retval += ((NV)n)/nshift;
2061         start += skip;
2062         nshift *= 1000;
2063     }
2064     return retval;
2065 }
2066
2067 /*
2068  * S_force_version
2069  * Forces the next token to be a version number.
2070  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2071  * and if "guessing" is TRUE, then no new token is created (and the caller
2072  * must use an alternative parsing method).
2073  */
2074
2075 STATIC char *
2076 S_force_version(pTHX_ char *s, int guessing)
2077 {
2078     dVAR;
2079     OP *version = NULL;
2080     char *d;
2081 #ifdef PERL_MAD
2082     I32 startoff = s - SvPVX(PL_linestr);
2083 #endif
2084
2085     PERL_ARGS_ASSERT_FORCE_VERSION;
2086
2087     s = SKIPSPACE1(s);
2088
2089     d = s;
2090     if (*d == 'v')
2091         d++;
2092     if (isDIGIT(*d)) {
2093         while (isDIGIT(*d) || *d == '_' || *d == '.')
2094             d++;
2095 #ifdef PERL_MAD
2096         if (PL_madskills) {
2097             start_force(PL_curforce);
2098             curmad('X', newSVpvn(s,d-s));
2099         }
2100 #endif
2101         if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
2102             SV *ver;
2103             s = scan_num(s, &pl_yylval);
2104             version = pl_yylval.opval;
2105             ver = cSVOPx(version)->op_sv;
2106             if (SvPOK(ver) && !SvNIOK(ver)) {
2107                 SvUPGRADE(ver, SVt_PVNV);
2108                 SvNV_set(ver, str_to_version(ver));
2109                 SvNOK_on(ver);          /* hint that it is a version */
2110             }
2111         }
2112         else if (guessing) {
2113 #ifdef PERL_MAD
2114             if (PL_madskills) {
2115                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2116                 PL_nextwhite = 0;
2117                 s = SvPVX(PL_linestr) + startoff;
2118             }
2119 #endif
2120             return s;
2121         }
2122     }
2123
2124 #ifdef PERL_MAD
2125     if (PL_madskills && !version) {
2126         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2127         PL_nextwhite = 0;
2128         s = SvPVX(PL_linestr) + startoff;
2129     }
2130 #endif
2131     /* NOTE: The parser sees the package name and the VERSION swapped */
2132     start_force(PL_curforce);
2133     NEXTVAL_NEXTTOKE.opval = version;
2134     force_next(WORD);
2135
2136     return s;
2137 }
2138
2139 /*
2140  * S_tokeq
2141  * Tokenize a quoted string passed in as an SV.  It finds the next
2142  * chunk, up to end of string or a backslash.  It may make a new
2143  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2144  * turns \\ into \.
2145  */
2146
2147 STATIC SV *
2148 S_tokeq(pTHX_ SV *sv)
2149 {
2150     dVAR;
2151     register char *s;
2152     register char *send;
2153     register char *d;
2154     STRLEN len = 0;
2155     SV *pv = sv;
2156
2157     PERL_ARGS_ASSERT_TOKEQ;
2158
2159     if (!SvLEN(sv))
2160         goto finish;
2161
2162     s = SvPV_force(sv, len);
2163     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2164         goto finish;
2165     send = s + len;
2166     while (s < send && *s != '\\')
2167         s++;
2168     if (s == send)
2169         goto finish;
2170     d = s;
2171     if ( PL_hints & HINT_NEW_STRING ) {
2172         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2173     }
2174     while (s < send) {
2175         if (*s == '\\') {
2176             if (s + 1 < send && (s[1] == '\\'))
2177                 s++;            /* all that, just for this */
2178         }
2179         *d++ = *s++;
2180     }
2181     *d = '\0';
2182     SvCUR_set(sv, d - SvPVX_const(sv));
2183   finish:
2184     if ( PL_hints & HINT_NEW_STRING )
2185        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2186     return sv;
2187 }
2188
2189 /*
2190  * Now come three functions related to double-quote context,
2191  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2192  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2193  * interact with PL_lex_state, and create fake ( ... ) argument lists
2194  * to handle functions and concatenation.
2195  * They assume that whoever calls them will be setting up a fake
2196  * join call, because each subthing puts a ',' after it.  This lets
2197  *   "lower \luPpEr"
2198  * become
2199  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2200  *
2201  * (I'm not sure whether the spurious commas at the end of lcfirst's
2202  * arguments and join's arguments are created or not).
2203  */
2204
2205 /*
2206  * S_sublex_start
2207  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2208  *
2209  * Pattern matching will set PL_lex_op to the pattern-matching op to
2210  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2211  *
2212  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2213  *
2214  * Everything else becomes a FUNC.
2215  *
2216  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2217  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2218  * call to S_sublex_push().
2219  */
2220
2221 STATIC I32
2222 S_sublex_start(pTHX)
2223 {
2224     dVAR;
2225     register const I32 op_type = pl_yylval.ival;
2226
2227     if (op_type == OP_NULL) {
2228         pl_yylval.opval = PL_lex_op;
2229         PL_lex_op = NULL;
2230         return THING;
2231     }
2232     if (op_type == OP_CONST || op_type == OP_READLINE) {
2233         SV *sv = tokeq(PL_lex_stuff);
2234
2235         if (SvTYPE(sv) == SVt_PVIV) {
2236             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2237             STRLEN len;
2238             const char * const p = SvPV_const(sv, len);
2239             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2240             SvREFCNT_dec(sv);
2241             sv = nsv;
2242         }
2243         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2244         PL_lex_stuff = NULL;
2245         /* Allow <FH> // "foo" */
2246         if (op_type == OP_READLINE)
2247             PL_expect = XTERMORDORDOR;
2248         return THING;
2249     }
2250     else if (op_type == OP_BACKTICK && PL_lex_op) {
2251         /* readpipe() vas overriden */
2252         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2253         pl_yylval.opval = PL_lex_op;
2254         PL_lex_op = NULL;
2255         PL_lex_stuff = NULL;
2256         return THING;
2257     }
2258
2259     PL_sublex_info.super_state = PL_lex_state;
2260     PL_sublex_info.sub_inwhat = (U16)op_type;
2261     PL_sublex_info.sub_op = PL_lex_op;
2262     PL_lex_state = LEX_INTERPPUSH;
2263
2264     PL_expect = XTERM;
2265     if (PL_lex_op) {
2266         pl_yylval.opval = PL_lex_op;
2267         PL_lex_op = NULL;
2268         return PMFUNC;
2269     }
2270     else
2271         return FUNC;
2272 }
2273
2274 /*
2275  * S_sublex_push
2276  * Create a new scope to save the lexing state.  The scope will be
2277  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2278  * to the uc, lc, etc. found before.
2279  * Sets PL_lex_state to LEX_INTERPCONCAT.
2280  */
2281
2282 STATIC I32
2283 S_sublex_push(pTHX)
2284 {
2285     dVAR;
2286     ENTER;
2287
2288     PL_lex_state = PL_sublex_info.super_state;
2289     SAVEBOOL(PL_lex_dojoin);
2290     SAVEI32(PL_lex_brackets);
2291     SAVEI32(PL_lex_casemods);
2292     SAVEI32(PL_lex_starts);
2293     SAVEI8(PL_lex_state);
2294     SAVEVPTR(PL_lex_inpat);
2295     SAVEI16(PL_lex_inwhat);
2296     SAVECOPLINE(PL_curcop);
2297     SAVEPPTR(PL_bufptr);
2298     SAVEPPTR(PL_bufend);
2299     SAVEPPTR(PL_oldbufptr);
2300     SAVEPPTR(PL_oldoldbufptr);
2301     SAVEPPTR(PL_last_lop);
2302     SAVEPPTR(PL_last_uni);
2303     SAVEPPTR(PL_linestart);
2304     SAVESPTR(PL_linestr);
2305     SAVEGENERICPV(PL_lex_brackstack);
2306     SAVEGENERICPV(PL_lex_casestack);
2307
2308     PL_linestr = PL_lex_stuff;
2309     PL_lex_stuff = NULL;
2310
2311     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2312         = SvPVX(PL_linestr);
2313     PL_bufend += SvCUR(PL_linestr);
2314     PL_last_lop = PL_last_uni = NULL;
2315     SAVEFREESV(PL_linestr);
2316
2317     PL_lex_dojoin = FALSE;
2318     PL_lex_brackets = 0;
2319     Newx(PL_lex_brackstack, 120, char);
2320     Newx(PL_lex_casestack, 12, char);
2321     PL_lex_casemods = 0;
2322     *PL_lex_casestack = '\0';
2323     PL_lex_starts = 0;
2324     PL_lex_state = LEX_INTERPCONCAT;
2325     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2326
2327     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2328     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2329         PL_lex_inpat = PL_sublex_info.sub_op;
2330     else
2331         PL_lex_inpat = NULL;
2332
2333     return '(';
2334 }
2335
2336 /*
2337  * S_sublex_done
2338  * Restores lexer state after a S_sublex_push.
2339  */
2340
2341 STATIC I32
2342 S_sublex_done(pTHX)
2343 {
2344     dVAR;
2345     if (!PL_lex_starts++) {
2346         SV * const sv = newSVpvs("");
2347         if (SvUTF8(PL_linestr))
2348             SvUTF8_on(sv);
2349         PL_expect = XOPERATOR;
2350         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2351         return THING;
2352     }
2353
2354     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2355         PL_lex_state = LEX_INTERPCASEMOD;
2356         return yylex();
2357     }
2358
2359     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2360     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2361         PL_linestr = PL_lex_repl;
2362         PL_lex_inpat = 0;
2363         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2364         PL_bufend += SvCUR(PL_linestr);
2365         PL_last_lop = PL_last_uni = NULL;
2366         SAVEFREESV(PL_linestr);
2367         PL_lex_dojoin = FALSE;
2368         PL_lex_brackets = 0;
2369         PL_lex_casemods = 0;
2370         *PL_lex_casestack = '\0';
2371         PL_lex_starts = 0;
2372         if (SvEVALED(PL_lex_repl)) {
2373             PL_lex_state = LEX_INTERPNORMAL;
2374             PL_lex_starts++;
2375             /*  we don't clear PL_lex_repl here, so that we can check later
2376                 whether this is an evalled subst; that means we rely on the
2377                 logic to ensure sublex_done() is called again only via the
2378                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2379         }
2380         else {
2381             PL_lex_state = LEX_INTERPCONCAT;
2382             PL_lex_repl = NULL;
2383         }
2384         return ',';
2385     }
2386     else {
2387 #ifdef PERL_MAD
2388         if (PL_madskills) {
2389             if (PL_thiswhite) {
2390                 if (!PL_endwhite)
2391                     PL_endwhite = newSVpvs("");
2392                 sv_catsv(PL_endwhite, PL_thiswhite);
2393                 PL_thiswhite = 0;
2394             }
2395             if (PL_thistoken)
2396                 sv_setpvs(PL_thistoken,"");
2397             else
2398                 PL_realtokenstart = -1;
2399         }
2400 #endif
2401         LEAVE;
2402         PL_bufend = SvPVX(PL_linestr);
2403         PL_bufend += SvCUR(PL_linestr);
2404         PL_expect = XOPERATOR;
2405         PL_sublex_info.sub_inwhat = 0;
2406         return ')';
2407     }
2408 }
2409
2410 /*
2411   scan_const
2412
2413   Extracts a pattern, double-quoted string, or transliteration.  This
2414   is terrifying code.
2415
2416   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2417   processing a pattern (PL_lex_inpat is true), a transliteration
2418   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2419
2420   Returns a pointer to the character scanned up to. If this is
2421   advanced from the start pointer supplied (i.e. if anything was
2422   successfully parsed), will leave an OP for the substring scanned
2423   in pl_yylval. Caller must intuit reason for not parsing further
2424   by looking at the next characters herself.
2425
2426   In patterns:
2427     backslashes:
2428       double-quoted style: \r and \n
2429       regexp special ones: \D \s
2430       constants: \x31
2431       backrefs: \1
2432       case and quoting: \U \Q \E
2433     stops on @ and $, but not for $ as tail anchor
2434
2435   In transliterations:
2436     characters are VERY literal, except for - not at the start or end
2437     of the string, which indicates a range. If the range is in bytes,
2438     scan_const expands the range to the full set of intermediate
2439     characters. If the range is in utf8, the hyphen is replaced with
2440     a certain range mark which will be handled by pmtrans() in op.c.
2441
2442   In double-quoted strings:
2443     backslashes:
2444       double-quoted style: \r and \n
2445       constants: \x31
2446       deprecated backrefs: \1 (in substitution replacements)
2447       case and quoting: \U \Q \E
2448     stops on @ and $
2449
2450   scan_const does *not* construct ops to handle interpolated strings.
2451   It stops processing as soon as it finds an embedded $ or @ variable
2452   and leaves it to the caller to work out what's going on.
2453
2454   embedded arrays (whether in pattern or not) could be:
2455       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2456
2457   $ in double-quoted strings must be the symbol of an embedded scalar.
2458
2459   $ in pattern could be $foo or could be tail anchor.  Assumption:
2460   it's a tail anchor if $ is the last thing in the string, or if it's
2461   followed by one of "()| \r\n\t"
2462
2463   \1 (backreferences) are turned into $1
2464
2465   The structure of the code is
2466       while (there's a character to process) {
2467           handle transliteration ranges
2468           skip regexp comments /(?#comment)/ and codes /(?{code})/
2469           skip #-initiated comments in //x patterns
2470           check for embedded arrays
2471           check for embedded scalars
2472           if (backslash) {
2473               leave intact backslashes from leaveit (below)
2474               deprecate \1 in substitution replacements
2475               handle string-changing backslashes \l \U \Q \E, etc.
2476               switch (what was escaped) {
2477                   handle \- in a transliteration (becomes a literal -)
2478                   handle \132 (octal characters)
2479                   handle \x15 and \x{1234} (hex characters)
2480                   handle \N{name} (named characters)
2481                   handle \cV (control characters)
2482                   handle printf-style backslashes (\f, \r, \n, etc)
2483               } (end switch)
2484               continue
2485           } (end if backslash)
2486           handle regular character
2487     } (end while character to read)
2488
2489 */
2490
2491 STATIC char *
2492 S_scan_const(pTHX_ char *start)
2493 {
2494     dVAR;
2495     register char *send = PL_bufend;            /* end of the constant */
2496     SV *sv = newSV(send - start);               /* sv for the constant.  See
2497                                                    note below on sizing. */
2498     register char *s = start;                   /* start of the constant */
2499     register char *d = SvPVX(sv);               /* destination for copies */
2500     bool dorange = FALSE;                       /* are we in a translit range? */
2501     bool didrange = FALSE;                      /* did we just finish a range? */
2502     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
2503     I32  this_utf8 = UTF;                       /* Is the source string assumed
2504                                                    to be UTF8?  But, this can
2505                                                    show as true when the source
2506                                                    isn't utf8, as for example
2507                                                    when it is entirely composed
2508                                                    of hex constants */
2509
2510     /* Note on sizing:  The scanned constant is placed into sv, which is
2511      * initialized by newSV() assuming one byte of output for every byte of
2512      * input.  This routine expects newSV() to allocate an extra byte for a
2513      * trailing NUL, which this routine will append if it gets to the end of
2514      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2515      * CAPITAL LETTER A}), or more output than input if the constant ends up
2516      * recoded to utf8, but each time a construct is found that might increase
2517      * the needed size, SvGROW() is called.  Its size parameter each time is
2518      * based on the best guess estimate at the time, namely the length used so
2519      * far, plus the length the current construct will occupy, plus room for
2520      * the trailing NUL, plus one byte for every input byte still unscanned */
2521
2522     UV uv;
2523 #ifdef EBCDIC
2524     UV literal_endpoint = 0;
2525     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2526 #endif
2527
2528     PERL_ARGS_ASSERT_SCAN_CONST;
2529
2530     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2531         /* If we are doing a trans and we know we want UTF8 set expectation */
2532         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2533         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2534     }
2535
2536
2537     while (s < send || dorange) {
2538         /* get transliterations out of the way (they're most literal) */
2539         if (PL_lex_inwhat == OP_TRANS) {
2540             /* expand a range A-Z to the full set of characters.  AIE! */
2541             if (dorange) {
2542                 I32 i;                          /* current expanded character */
2543                 I32 min;                        /* first character in range */
2544                 I32 max;                        /* last character in range */
2545
2546 #ifdef EBCDIC
2547                 UV uvmax = 0;
2548 #endif
2549
2550                 if (has_utf8
2551 #ifdef EBCDIC
2552                     && !native_range
2553 #endif
2554                     ) {
2555                     char * const c = (char*)utf8_hop((U8*)d, -1);
2556                     char *e = d++;
2557                     while (e-- > c)
2558                         *(e + 1) = *e;
2559                     *c = (char)UTF_TO_NATIVE(0xff);
2560                     /* mark the range as done, and continue */
2561                     dorange = FALSE;
2562                     didrange = TRUE;
2563                     continue;
2564                 }
2565
2566                 i = d - SvPVX_const(sv);                /* remember current offset */
2567 #ifdef EBCDIC
2568                 SvGROW(sv,
2569                        SvLEN(sv) + (has_utf8 ?
2570                                     (512 - UTF_CONTINUATION_MARK +
2571                                      UNISKIP(0x100))
2572                                     : 256));
2573                 /* How many two-byte within 0..255: 128 in UTF-8,
2574                  * 96 in UTF-8-mod. */
2575 #else
2576                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2577 #endif
2578                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2579 #ifdef EBCDIC
2580                 if (has_utf8) {
2581                     int j;
2582                     for (j = 0; j <= 1; j++) {
2583                         char * const c = (char*)utf8_hop((U8*)d, -1);
2584                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2585                         if (j)
2586                             min = (U8)uv;
2587                         else if (uv < 256)
2588                             max = (U8)uv;
2589                         else {
2590                             max = (U8)0xff; /* only to \xff */
2591                             uvmax = uv; /* \x{100} to uvmax */
2592                         }
2593                         d = c; /* eat endpoint chars */
2594                      }
2595                 }
2596                else {
2597 #endif
2598                    d -= 2;              /* eat the first char and the - */
2599                    min = (U8)*d;        /* first char in range */
2600                    max = (U8)d[1];      /* last char in range  */
2601 #ifdef EBCDIC
2602                }
2603 #endif
2604
2605                 if (min > max) {
2606                     Perl_croak(aTHX_
2607                                "Invalid range \"%c-%c\" in transliteration operator",
2608                                (char)min, (char)max);
2609                 }
2610
2611 #ifdef EBCDIC
2612                 if (literal_endpoint == 2 &&
2613                     ((isLOWER(min) && isLOWER(max)) ||
2614                      (isUPPER(min) && isUPPER(max)))) {
2615                     if (isLOWER(min)) {
2616                         for (i = min; i <= max; i++)
2617                             if (isLOWER(i))
2618                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2619                     } else {
2620                         for (i = min; i <= max; i++)
2621                             if (isUPPER(i))
2622                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2623                     }
2624                 }
2625                 else
2626 #endif
2627                     for (i = min; i <= max; i++)
2628 #ifdef EBCDIC
2629                         if (has_utf8) {
2630                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2631                             if (UNI_IS_INVARIANT(ch))
2632                                 *d++ = (U8)i;
2633                             else {
2634                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2635                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2636                             }
2637                         }
2638                         else
2639 #endif
2640                             *d++ = (char)i;
2641
2642 #ifdef EBCDIC
2643                 if (uvmax) {
2644                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2645                     if (uvmax > 0x101)
2646                         *d++ = (char)UTF_TO_NATIVE(0xff);
2647                     if (uvmax > 0x100)
2648                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2649                 }
2650 #endif
2651
2652                 /* mark the range as done, and continue */
2653                 dorange = FALSE;
2654                 didrange = TRUE;
2655 #ifdef EBCDIC
2656                 literal_endpoint = 0;
2657 #endif
2658                 continue;
2659             }
2660
2661             /* range begins (ignore - as first or last char) */
2662             else if (*s == '-' && s+1 < send  && s != start) {
2663                 if (didrange) {
2664                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2665                 }
2666                 if (has_utf8
2667 #ifdef EBCDIC
2668                     && !native_range
2669 #endif
2670                     ) {
2671                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2672                     s++;
2673                     continue;
2674                 }
2675                 dorange = TRUE;
2676                 s++;
2677             }
2678             else {
2679                 didrange = FALSE;
2680 #ifdef EBCDIC
2681                 literal_endpoint = 0;
2682                 native_range = TRUE;
2683 #endif
2684             }
2685         }
2686
2687         /* if we get here, we're not doing a transliteration */
2688
2689         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2690            except for the last char, which will be done separately. */
2691         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2692             if (s[2] == '#') {
2693                 while (s+1 < send && *s != ')')
2694                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2695             }
2696             else if (s[2] == '{' /* This should match regcomp.c */
2697                     || (s[2] == '?' && s[3] == '{'))
2698             {
2699                 I32 count = 1;
2700                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2701                 char c;
2702
2703                 while (count && (c = *regparse)) {
2704                     if (c == '\\' && regparse[1])
2705                         regparse++;
2706                     else if (c == '{')
2707                         count++;
2708                     else if (c == '}')
2709                         count--;
2710                     regparse++;
2711                 }
2712                 if (*regparse != ')')
2713                     regparse--;         /* Leave one char for continuation. */
2714                 while (s < regparse)
2715                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2716             }
2717         }
2718
2719         /* likewise skip #-initiated comments in //x patterns */
2720         else if (*s == '#' && PL_lex_inpat &&
2721           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2722             while (s+1 < send && *s != '\n')
2723                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2724         }
2725
2726         /* check for embedded arrays
2727            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2728            */
2729         else if (*s == '@' && s[1]) {
2730             if (isALNUM_lazy_if(s+1,UTF))
2731                 break;
2732             if (strchr(":'{$", s[1]))
2733                 break;
2734             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2735                 break; /* in regexp, neither @+ nor @- are interpolated */
2736         }
2737
2738         /* check for embedded scalars.  only stop if we're sure it's a
2739            variable.
2740         */
2741         else if (*s == '$') {
2742             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2743                 break;
2744             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2745                 if (s[1] == '\\') {
2746                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2747                                    "Possible unintended interpolation of $\\ in regex");
2748                 }
2749                 break;          /* in regexp, $ might be tail anchor */
2750             }
2751         }
2752
2753         /* End of else if chain - OP_TRANS rejoin rest */
2754
2755         /* backslashes */
2756         if (*s == '\\' && s+1 < send) {
2757             s++;
2758
2759             /* deprecate \1 in strings and substitution replacements */
2760             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2761                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2762             {
2763                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2764                 *--s = '$';
2765                 break;
2766             }
2767
2768             /* string-change backslash escapes */
2769             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2770                 --s;
2771                 break;
2772             }
2773             /* skip any other backslash escapes in a pattern */
2774             else if (PL_lex_inpat) {
2775                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2776                 goto default_action;
2777             }
2778
2779             /* if we get here, it's either a quoted -, or a digit */
2780             switch (*s) {
2781
2782             /* quoted - in transliterations */
2783             case '-':
2784                 if (PL_lex_inwhat == OP_TRANS) {
2785                     *d++ = *s++;
2786                     continue;
2787                 }
2788                 /* FALL THROUGH */
2789             default:
2790                 {
2791                     if ((isALPHA(*s) || isDIGIT(*s)))
2792                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2793                                        "Unrecognized escape \\%c passed through",
2794                                        *s);
2795                     /* default action is to copy the quoted character */
2796                     goto default_action;
2797                 }
2798
2799             /* eg. \132 indicates the octal constant 0x132 */
2800             case '0': case '1': case '2': case '3':
2801             case '4': case '5': case '6': case '7':
2802                 {
2803                     I32 flags = 0;
2804                     STRLEN len = 3;
2805                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2806                     s += len;
2807                 }
2808                 goto NUM_ESCAPE_INSERT;
2809
2810             /* eg. \x24 indicates the hex constant 0x24 */
2811             case 'x':
2812                 ++s;
2813                 if (*s == '{') {
2814                     char* const e = strchr(s, '}');
2815                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2816                       PERL_SCAN_DISALLOW_PREFIX;
2817                     STRLEN len;
2818
2819                     ++s;
2820                     if (!e) {
2821                         yyerror("Missing right brace on \\x{}");
2822                         continue;
2823                     }
2824                     len = e - s;
2825                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2826                     s = e + 1;
2827                 }
2828                 else {
2829                     {
2830                         STRLEN len = 2;
2831                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2832                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2833                         s += len;
2834                     }
2835                 }
2836
2837               NUM_ESCAPE_INSERT:
2838                 /* Insert oct, hex, or \N{U+...} escaped character.  There will
2839                  * always be enough room in sv since such escapes will be
2840                  * longer than any UTF-8 sequence they can end up as, except if
2841                  * they force us to recode the rest of the string into utf8 */
2842
2843                 /* Here uv is the ordinal of the next character being added in
2844                  * unicode (converted from native).  (It has to be done before
2845                  * here because \N is interpreted as unicode, and oct and hex
2846                  * as native.) */
2847                 if (!UNI_IS_INVARIANT(uv)) {
2848                     if (!has_utf8 && uv > 255) {
2849                         /* Might need to recode whatever we have accumulated so
2850                          * far if it contains any chars variant in utf8 or
2851                          * utf-ebcdic. */
2852
2853                         SvCUR_set(sv, d - SvPVX_const(sv));
2854                         SvPOK_on(sv);
2855                         *d = '\0';
2856                         /* See Note on sizing above.  */
2857                         sv_utf8_upgrade_flags_grow(sv,
2858                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2859                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
2860                         d = SvPVX(sv) + SvCUR(sv);
2861                         has_utf8 = TRUE;
2862                     }
2863
2864                     if (has_utf8) {
2865                         d = (char*)uvuni_to_utf8((U8*)d, uv);
2866                         if (PL_lex_inwhat == OP_TRANS &&
2867                             PL_sublex_info.sub_op) {
2868                             PL_sublex_info.sub_op->op_private |=
2869                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2870                                              : OPpTRANS_TO_UTF);
2871                         }
2872 #ifdef EBCDIC
2873                         if (uv > 255 && !dorange)
2874                             native_range = FALSE;
2875 #endif
2876                     }
2877                     else {
2878                         *d++ = (char)uv;
2879                     }
2880                 }
2881                 else {
2882                     *d++ = (char) uv;
2883                 }
2884                 continue;
2885
2886             /* \N{LATIN SMALL LETTER A} is a named character, and so is
2887              * \N{U+0041} */
2888             case 'N':
2889                 ++s;
2890                 if (*s == '{') {
2891                     char* e = strchr(s, '}');
2892                     SV *res;
2893                     STRLEN len;
2894                     const char *str;
2895
2896                     if (!e) {
2897                         yyerror("Missing right brace on \\N{}");
2898                         e = s - 1;
2899                         goto cont_scan;
2900                     }
2901                     if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2902                         /* \N{U+...} The ... is a unicode value even on EBCDIC
2903                          * machines */
2904                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2905                           PERL_SCAN_DISALLOW_PREFIX;
2906                         s += 3;
2907                         len = e - s;
2908                         uv = grok_hex(s, &len, &flags, NULL);
2909                         if ( e > s && len != (STRLEN)(e - s) ) {
2910                             uv = 0xFFFD;
2911                         }
2912                         s = e + 1;
2913                         goto NUM_ESCAPE_INSERT;
2914                     }
2915                     res = newSVpvn(s + 1, e - s - 1);
2916                     res = new_constant( NULL, 0, "charnames",
2917                                         res, NULL, s - 2, e - s + 3 );
2918                     if (has_utf8)
2919                         sv_utf8_upgrade(res);
2920                     str = SvPV_const(res,len);
2921 #ifdef EBCDIC_NEVER_MIND
2922                     /* charnames uses pack U and that has been
2923                      * recently changed to do the below uni->native
2924                      * mapping, so this would be redundant (and wrong,
2925                      * the code point would be doubly converted).
2926                      * But leave this in just in case the pack U change
2927                      * gets revoked, but the semantics is still
2928                      * desireable for charnames. --jhi */
2929                     {
2930                          UV uv = utf8_to_uvchr((const U8*)str, 0);
2931
2932                          if (uv < 0x100) {
2933                               U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2934
2935                               d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2936                               sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2937                               str = SvPV_const(res, len);
2938                          }
2939                     }
2940 #endif
2941                     /* If destination is not in utf8 but this new character is,
2942                      * recode the dest to utf8 */
2943                     if (!has_utf8 && SvUTF8(res)) {
2944                         SvCUR_set(sv, d - SvPVX_const(sv));
2945                         SvPOK_on(sv);
2946                         *d = '\0';
2947                         /* See Note on sizing above.  */
2948                         sv_utf8_upgrade_flags_grow(sv,
2949                                             SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2950                                             len + (STRLEN)(send - s) + 1);
2951                         d = SvPVX(sv) + SvCUR(sv);
2952                         has_utf8 = TRUE;
2953                     } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2954
2955                         /* See Note on sizing above.  (NOTE: SvCUR() is not set
2956                          * correctly here). */
2957                         const STRLEN off = d - SvPVX_const(sv);
2958                         d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
2959                     }
2960 #ifdef EBCDIC
2961                     if (!dorange)
2962                         native_range = FALSE; /* \N{} is guessed to be Unicode */
2963 #endif
2964                     Copy(str, d, len, char);
2965                     d += len;
2966                     SvREFCNT_dec(res);
2967                   cont_scan:
2968                     s = e + 1;
2969                 }
2970                 else
2971                     yyerror("Missing braces on \\N{}");
2972                 continue;
2973
2974             /* \c is a control character */
2975             case 'c':
2976                 s++;
2977                 if (s < send) {
2978                     U8 c = *s++;
2979 #ifdef EBCDIC
2980                     if (isLOWER(c))
2981                         c = toUPPER(c);
2982 #endif
2983                     *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2984                 }
2985                 else {
2986                     yyerror("Missing control char name in \\c");
2987                 }
2988                 continue;
2989
2990             /* printf-style backslashes, formfeeds, newlines, etc */
2991             case 'b':
2992                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2993                 break;
2994             case 'n':
2995                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2996                 break;
2997             case 'r':
2998                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2999                 break;
3000             case 'f':
3001                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3002                 break;
3003             case 't':
3004                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3005                 break;
3006             case 'e':
3007                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3008                 break;
3009             case 'a':
3010                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3011                 break;
3012             } /* end switch */
3013
3014             s++;
3015             continue;
3016         } /* end if (backslash) */
3017 #ifdef EBCDIC
3018         else
3019             literal_endpoint++;
3020 #endif
3021
3022     default_action:
3023         /* If we started with encoded form, or already know we want it,
3024            then encode the next character */
3025         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3026             STRLEN len  = 1;
3027
3028
3029             /* One might think that it is wasted effort in the case of the
3030              * source being utf8 (this_utf8 == TRUE) to take the next character
3031              * in the source, convert it to an unsigned value, and then convert
3032              * it back again.  But the source has not been validated here.  The
3033              * routine that does the conversion checks for errors like
3034              * malformed utf8 */
3035
3036             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3037             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3038             if (!has_utf8) {
3039                 SvCUR_set(sv, d - SvPVX_const(sv));
3040                 SvPOK_on(sv);
3041                 *d = '\0';
3042                 /* See Note on sizing above.  */
3043                 sv_utf8_upgrade_flags_grow(sv,
3044                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3045                                         need + (STRLEN)(send - s) + 1);
3046                 d = SvPVX(sv) + SvCUR(sv);
3047                 has_utf8 = TRUE;
3048             } else if (need > len) {
3049                 /* encoded value larger than old, may need extra space (NOTE:
3050                  * SvCUR() is not set correctly here).   See Note on sizing
3051                  * above.  */
3052                 const STRLEN off = d - SvPVX_const(sv);
3053                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3054             }
3055             s += len;
3056
3057             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3058 #ifdef EBCDIC
3059             if (uv > 255 && !dorange)
3060                 native_range = FALSE;
3061 #endif
3062         }
3063         else {
3064             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3065         }
3066     } /* while loop to process each character */
3067
3068     /* terminate the string and set up the sv */
3069     *d = '\0';
3070     SvCUR_set(sv, d - SvPVX_const(sv));
3071     if (SvCUR(sv) >= SvLEN(sv))
3072         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3073
3074     SvPOK_on(sv);
3075     if (PL_encoding && !has_utf8) {
3076         sv_recode_to_utf8(sv, PL_encoding);
3077         if (SvUTF8(sv))
3078             has_utf8 = TRUE;
3079     }
3080     if (has_utf8) {
3081         SvUTF8_on(sv);
3082         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3083             PL_sublex_info.sub_op->op_private |=
3084                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3085         }
3086     }
3087
3088     /* shrink the sv if we allocated more than we used */
3089     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3090         SvPV_shrink_to_cur(sv);
3091     }
3092
3093     /* return the substring (via pl_yylval) only if we parsed anything */
3094     if (s > PL_bufptr) {
3095         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3096             const char *const key = PL_lex_inpat ? "qr" : "q";
3097             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3098             const char *type;
3099             STRLEN typelen;
3100
3101             if (PL_lex_inwhat == OP_TRANS) {
3102                 type = "tr";
3103                 typelen = 2;
3104             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3105                 type = "s";
3106                 typelen = 1;
3107             } else  {
3108                 type = "qq";
3109                 typelen = 2;
3110             }
3111
3112             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3113                                 type, typelen);
3114         }
3115         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3116     } else
3117         SvREFCNT_dec(sv);
3118     return s;
3119 }
3120
3121 /* S_intuit_more
3122  * Returns TRUE if there's more to the expression (e.g., a subscript),
3123  * FALSE otherwise.
3124  *
3125  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3126  *
3127  * ->[ and ->{ return TRUE
3128  * { and [ outside a pattern are always subscripts, so return TRUE
3129  * if we're outside a pattern and it's not { or [, then return FALSE
3130  * if we're in a pattern and the first char is a {
3131  *   {4,5} (any digits around the comma) returns FALSE
3132  * if we're in a pattern and the first char is a [
3133  *   [] returns FALSE
3134  *   [SOMETHING] has a funky algorithm to decide whether it's a
3135  *      character class or not.  It has to deal with things like
3136  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3137  * anything else returns TRUE
3138  */
3139
3140 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3141
3142 STATIC int
3143 S_intuit_more(pTHX_ register char *s)
3144 {
3145     dVAR;
3146
3147     PERL_ARGS_ASSERT_INTUIT_MORE;
3148
3149     if (PL_lex_brackets)
3150         return TRUE;
3151     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3152         return TRUE;
3153     if (*s != '{' && *s != '[')
3154         return FALSE;
3155     if (!PL_lex_inpat)
3156         return TRUE;
3157
3158     /* In a pattern, so maybe we have {n,m}. */
3159     if (*s == '{') {
3160         s++;
3161         if (!isDIGIT(*s))
3162             return TRUE;
3163         while (isDIGIT(*s))
3164             s++;
3165         if (*s == ',')
3166             s++;
3167         while (isDIGIT(*s))
3168             s++;
3169         if (*s == '}')
3170             return FALSE;
3171         return TRUE;
3172
3173     }
3174
3175     /* On the other hand, maybe we have a character class */
3176
3177     s++;
3178     if (*s == ']' || *s == '^')
3179         return FALSE;
3180     else {
3181         /* this is terrifying, and it works */
3182         int weight = 2;         /* let's weigh the evidence */
3183         char seen[256];
3184         unsigned char un_char = 255, last_un_char;
3185         const char * const send = strchr(s,']');
3186         char tmpbuf[sizeof PL_tokenbuf * 4];
3187
3188         if (!send)              /* has to be an expression */
3189             return TRUE;
3190
3191         Zero(seen,256,char);
3192         if (*s == '$')
3193             weight -= 3;
3194         else if (isDIGIT(*s)) {
3195             if (s[1] != ']') {
3196                 if (isDIGIT(s[1]) && s[2] == ']')
3197                     weight -= 10;
3198             }
3199             else
3200                 weight -= 100;
3201         }
3202         for (; s < send; s++) {
3203             last_un_char = un_char;
3204             un_char = (unsigned char)*s;
3205             switch (*s) {
3206             case '@':
3207             case '&':
3208             case '$':
3209                 weight -= seen[un_char] * 10;
3210                 if (isALNUM_lazy_if(s+1,UTF)) {
3211                     int len;
3212                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3213                     len = (int)strlen(tmpbuf);
3214                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3215                         weight -= 100;
3216                     else
3217                         weight -= 10;
3218                 }
3219                 else if (*s == '$' && s[1] &&
3220                   strchr("[#!%*<>()-=",s[1])) {
3221                     if (/*{*/ strchr("])} =",s[2]))
3222                         weight -= 10;
3223                     else
3224                         weight -= 1;
3225                 }
3226                 break;
3227             case '\\':
3228                 un_char = 254;
3229                 if (s[1]) {
3230                     if (strchr("wds]",s[1]))
3231                         weight += 100;
3232                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3233                         weight += 1;
3234                     else if (strchr("rnftbxcav",s[1]))
3235                         weight += 40;
3236                     else if (isDIGIT(s[1])) {
3237                         weight += 40;
3238                         while (s[1] && isDIGIT(s[1]))
3239                             s++;
3240                     }
3241                 }
3242                 else
3243                     weight += 100;
3244                 break;
3245             case '-':
3246                 if (s[1] == '\\')
3247                     weight += 50;
3248                 if (strchr("aA01! ",last_un_char))
3249                     weight += 30;
3250                 if (strchr("zZ79~",s[1]))
3251                     weight += 30;
3252                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3253                     weight -= 5;        /* cope with negative subscript */
3254                 break;
3255             default:
3256                 if (!isALNUM(last_un_char)
3257                     && !(last_un_char == '$' || last_un_char == '@'
3258                          || last_un_char == '&')
3259                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3260                     char *d = tmpbuf;
3261                     while (isALPHA(*s))
3262                         *d++ = *s++;
3263                     *d = '\0';
3264                     if (keyword(tmpbuf, d - tmpbuf, 0))
3265                         weight -= 150;
3266                 }
3267                 if (un_char == last_un_char + 1)
3268                     weight += 5;
3269                 weight -= seen[un_char];
3270                 break;
3271             }
3272             seen[un_char]++;
3273         }
3274         if (weight >= 0)        /* probably a character class */
3275             return FALSE;
3276     }
3277
3278     return TRUE;
3279 }
3280
3281 /*
3282  * S_intuit_method
3283  *
3284  * Does all the checking to disambiguate
3285  *   foo bar
3286  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3287  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3288  *
3289  * First argument is the stuff after the first token, e.g. "bar".
3290  *
3291  * Not a method if bar is a filehandle.
3292  * Not a method if foo is a subroutine prototyped to take a filehandle.
3293  * Not a method if it's really "Foo $bar"
3294  * Method if it's "foo $bar"
3295  * Not a method if it's really "print foo $bar"
3296  * Method if it's really "foo package::" (interpreted as package->foo)
3297  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3298  * Not a method if bar is a filehandle or package, but is quoted with
3299  *   =>
3300  */
3301
3302 STATIC int
3303 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3304 {
3305     dVAR;
3306     char *s = start + (*start == '$');
3307     char tmpbuf[sizeof PL_tokenbuf];
3308     STRLEN len;
3309     GV* indirgv;
3310 #ifdef PERL_MAD
3311     int soff;
3312 #endif
3313
3314     PERL_ARGS_ASSERT_INTUIT_METHOD;
3315
3316     if (gv) {
3317         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3318             return 0;
3319         if (cv) {
3320             if (SvPOK(cv)) {
3321                 const char *proto = SvPVX_const(cv);
3322                 if (proto) {
3323                     if (*proto == ';')
3324                         proto++;
3325                     if (*proto == '*')
3326                         return 0;
3327                 }
3328             }
3329         } else
3330             gv = NULL;
3331     }
3332     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3333     /* start is the beginning of the possible filehandle/object,
3334      * and s is the end of it
3335      * tmpbuf is a copy of it
3336      */
3337
3338     if (*start == '$') {
3339         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3340                 isUPPER(*PL_tokenbuf))
3341             return 0;
3342 #ifdef PERL_MAD
3343         len = start - SvPVX(PL_linestr);
3344 #endif
3345         s = PEEKSPACE(s);
3346 #ifdef PERL_MAD
3347         start = SvPVX(PL_linestr) + len;
3348 #endif
3349         PL_bufptr = start;
3350         PL_expect = XREF;
3351         return *s == '(' ? FUNCMETH : METHOD;
3352     }
3353     if (!keyword(tmpbuf, len, 0)) {
3354         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3355             len -= 2;
3356             tmpbuf[len] = '\0';
3357 #ifdef PERL_MAD
3358             soff = s - SvPVX(PL_linestr);
3359 #endif
3360             goto bare_package;
3361         }
3362         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3363         if (indirgv && GvCVu(indirgv))
3364             return 0;
3365         /* filehandle or package name makes it a method */
3366         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3367 #ifdef PERL_MAD
3368             soff = s - SvPVX(PL_linestr);
3369 #endif
3370             s = PEEKSPACE(s);
3371             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3372                 return 0;       /* no assumptions -- "=>" quotes bearword */
3373       bare_package:
3374             start_force(PL_curforce);
3375             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3376                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3377             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3378             if (PL_madskills)
3379                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3380             PL_expect = XTERM;
3381             force_next(WORD);
3382             PL_bufptr = s;
3383 #ifdef PERL_MAD
3384             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3385 #endif
3386             return *s == '(' ? FUNCMETH : METHOD;
3387         }
3388     }
3389     return 0;
3390 }
3391
3392 /* Encoded script support. filter_add() effectively inserts a
3393  * 'pre-processing' function into the current source input stream.
3394  * Note that the filter function only applies to the current source file
3395  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3396  *
3397  * The datasv parameter (which may be NULL) can be used to pass
3398  * private data to this instance of the filter. The filter function
3399  * can recover the SV using the FILTER_DATA macro and use it to
3400  * store private buffers and state information.
3401  *
3402  * The supplied datasv parameter is upgraded to a PVIO type
3403  * and the IoDIRP/IoANY field is used to store the function pointer,
3404  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3405  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3406  * private use must be set using malloc'd pointers.
3407  */
3408
3409 SV *
3410 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3411 {
3412     dVAR;
3413     if (!funcp)
3414         return NULL;
3415
3416     if (!PL_parser)
3417         return NULL;
3418
3419     if (!PL_rsfp_filters)
3420         PL_rsfp_filters = newAV();
3421     if (!datasv)
3422         datasv = newSV(0);
3423     SvUPGRADE(datasv, SVt_PVIO);
3424     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3425     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3426     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3427                           FPTR2DPTR(void *, IoANY(datasv)),
3428                           SvPV_nolen(datasv)));
3429     av_unshift(PL_rsfp_filters, 1);
3430     av_store(PL_rsfp_filters, 0, datasv) ;
3431     return(datasv);
3432 }
3433
3434
3435 /* Delete most recently added instance of this filter function. */
3436 void
3437 Perl_filter_del(pTHX_ filter_t funcp)
3438 {
3439     dVAR;
3440     SV *datasv;
3441
3442     PERL_ARGS_ASSERT_FILTER_DEL;
3443
3444 #ifdef DEBUGGING
3445     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3446                           FPTR2DPTR(void*, funcp)));
3447 #endif
3448     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3449         return;
3450     /* if filter is on top of stack (usual case) just pop it off */
3451     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3452     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3453         IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
3454         IoANY(datasv) = (void *)NULL;
3455         sv_free(av_pop(PL_rsfp_filters));
3456
3457         return;
3458     }
3459     /* we need to search for the correct entry and clear it     */
3460     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3461 }
3462
3463
3464 /* Invoke the idxth filter function for the current rsfp.        */
3465 /* maxlen 0 = read one text line */
3466 I32
3467 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3468 {
3469     dVAR;
3470     filter_t funcp;
3471     SV *datasv = NULL;
3472     /* This API is bad. It should have been using unsigned int for maxlen.
3473        Not sure if we want to change the API, but if not we should sanity
3474        check the value here.  */
3475     const unsigned int correct_length
3476         = maxlen < 0 ?
3477 #ifdef PERL_MICRO
3478         0x7FFFFFFF
3479 #else
3480         INT_MAX
3481 #endif
3482         : maxlen;
3483
3484     PERL_ARGS_ASSERT_FILTER_READ;
3485
3486     if (!PL_parser || !PL_rsfp_filters)
3487         return -1;
3488     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3489         /* Provide a default input filter to make life easy.    */
3490         /* Note that we append to the line. This is handy.      */
3491         DEBUG_P(PerlIO_printf(Perl_debug_log,
3492                               "filter_read %d: from rsfp\n", idx));
3493         if (correct_length) {
3494             /* Want a block */
3495             int len ;
3496             const int old_len = SvCUR(buf_sv);
3497
3498             /* ensure buf_sv is large enough */
3499             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3500             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3501                                    correct_length)) <= 0) {
3502                 if (PerlIO_error(PL_rsfp))
3503                     return -1;          /* error */
3504                 else
3505                     return 0 ;          /* end of file */
3506             }
3507             SvCUR_set(buf_sv, old_len + len) ;
3508             SvPVX(buf_sv)[old_len + len] = '\0';
3509         } else {
3510             /* Want a line */
3511             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3512                 if (PerlIO_error(PL_rsfp))
3513                     return -1;          /* error */
3514                 else
3515                     return 0 ;          /* end of file */
3516             }
3517         }
3518         return SvCUR(buf_sv);
3519     }
3520     /* Skip this filter slot if filter has been deleted */
3521     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3522         DEBUG_P(PerlIO_printf(Perl_debug_log,
3523                               "filter_read %d: skipped (filter deleted)\n",
3524                               idx));
3525         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3526     }
3527     /* Get function pointer hidden within datasv        */
3528     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3529     DEBUG_P(PerlIO_printf(Perl_debug_log,
3530                           "filter_read %d: via function %p (%s)\n",
3531                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3532     /* Call function. The function is expected to       */
3533     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3534     /* Return: <0:error, =0:eof, >0:not eof             */
3535     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3536 }
3537
3538 STATIC char *
3539 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3540 {
3541     dVAR;
3542
3543     PERL_ARGS_ASSERT_FILTER_GETS;
3544
3545 #ifdef PERL_CR_FILTER
3546     if (!PL_rsfp_filters) {
3547         filter_add(S_cr_textfilter,NULL);
3548     }
3549 #endif
3550     if (PL_rsfp_filters) {
3551         if (!append)
3552             SvCUR_set(sv, 0);   /* start with empty line        */
3553         if (FILTER_READ(0, sv, 0) > 0)
3554             return ( SvPVX(sv) ) ;
3555         else
3556             return NULL ;
3557     }
3558     else
3559         return (sv_gets(sv, PL_rsfp, append));
3560 }
3561
3562 STATIC HV *
3563 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3564 {
3565     dVAR;
3566     GV *gv;
3567
3568     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3569
3570     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3571         return PL_curstash;
3572
3573     if (len > 2 &&
3574         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3575         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3576     {
3577         return GvHV(gv);                        /* Foo:: */
3578     }
3579
3580     /* use constant CLASS => 'MyClass' */
3581     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3582     if (gv && GvCV(gv)) {
3583         SV * const sv = cv_const_sv(GvCV(gv));
3584         if (sv)
3585             pkgname = SvPV_const(sv, len);
3586     }
3587
3588     return gv_stashpvn(pkgname, len, 0);
3589 }
3590
3591 /*
3592  * S_readpipe_override
3593  * Check whether readpipe() is overriden, and generates the appropriate
3594  * optree, provided sublex_start() is called afterwards.
3595  */
3596 STATIC void
3597 S_readpipe_override(pTHX)
3598 {
3599     GV **gvp;
3600     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3601     pl_yylval.ival = OP_BACKTICK;
3602     if ((gv_readpipe
3603                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3604             ||
3605             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3606              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3607              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3608     {
3609         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3610             append_elem(OP_LIST,
3611                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3612                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3613     }
3614 }
3615
3616 #ifdef PERL_MAD
3617  /*
3618  * Perl_madlex
3619  * The intent of this yylex wrapper is to minimize the changes to the
3620  * tokener when we aren't interested in collecting madprops.  It remains
3621  * to be seen how successful this strategy will be...
3622  */
3623
3624 int
3625 Perl_madlex(pTHX)
3626 {
3627     int optype;
3628     char *s = PL_bufptr;
3629
3630     /* make sure PL_thiswhite is initialized */
3631     PL_thiswhite = 0;
3632     PL_thismad = 0;
3633
3634     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3635     if (PL_pending_ident)
3636         return S_pending_ident(aTHX);
3637
3638     /* previous token ate up our whitespace? */
3639     if (!PL_lasttoke && PL_nextwhite) {
3640         PL_thiswhite = PL_nextwhite;
3641         PL_nextwhite = 0;
3642     }
3643
3644     /* isolate the token, and figure out where it is without whitespace */
3645     PL_realtokenstart = -1;
3646     PL_thistoken = 0;
3647     optype = yylex();
3648     s = PL_bufptr;
3649     assert(PL_curforce < 0);
3650
3651     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3652         if (!PL_thistoken) {
3653             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3654                 PL_thistoken = newSVpvs("");
3655             else {
3656                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3657                 PL_thistoken = newSVpvn(tstart, s - tstart);
3658             }
3659         }
3660         if (PL_thismad) /* install head */
3661             CURMAD('X', PL_thistoken);
3662     }
3663
3664     /* last whitespace of a sublex? */
3665     if (optype == ')' && PL_endwhite) {
3666         CURMAD('X', PL_endwhite);
3667     }
3668
3669     if (!PL_thismad) {
3670
3671         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
3672         if (!PL_thiswhite && !PL_endwhite && !optype) {
3673             sv_free(PL_thistoken);
3674             PL_thistoken = 0;
3675             return 0;
3676         }
3677
3678         /* put off final whitespace till peg */
3679         if (optype == ';' && !PL_rsfp) {
3680             PL_nextwhite = PL_thiswhite;
3681             PL_thiswhite = 0;
3682         }
3683         else if (PL_thisopen) {
3684             CURMAD('q', PL_thisopen);
3685             if (PL_thistoken)
3686                 sv_free(PL_thistoken);
3687             PL_thistoken = 0;
3688         }
3689         else {
3690             /* Store actual token text as madprop X */
3691             CURMAD('X', PL_thistoken);
3692         }
3693
3694         if (PL_thiswhite) {
3695             /* add preceding whitespace as madprop _ */
3696             CURMAD('_', PL_thiswhite);
3697         }
3698
3699         if (PL_thisstuff) {
3700             /* add quoted material as madprop = */
3701             CURMAD('=', PL_thisstuff);
3702         }
3703
3704         if (PL_thisclose) {
3705             /* add terminating quote as madprop Q */
3706             CURMAD('Q', PL_thisclose);
3707         }
3708     }
3709
3710     /* special processing based on optype */
3711
3712     switch (optype) {
3713
3714     /* opval doesn't need a TOKEN since it can already store mp */
3715     case WORD:
3716     case METHOD:
3717     case FUNCMETH:
3718     case THING:
3719     case PMFUNC:
3720     case PRIVATEREF:
3721     case FUNC0SUB:
3722     case UNIOPSUB:
3723     case LSTOPSUB:
3724         if (pl_yylval.opval)
3725             append_madprops(PL_thismad, pl_yylval.opval, 0);
3726         PL_thismad = 0;
3727         return optype;
3728
3729     /* fake EOF */
3730     case 0:
3731         optype = PEG;
3732         if (PL_endwhite) {
3733             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3734             PL_endwhite = 0;
3735         }
3736         break;
3737
3738     case ']':
3739     case '}':
3740         if (PL_faketokens)
3741             break;
3742         /* remember any fake bracket that lexer is about to discard */
3743         if (PL_lex_brackets == 1 &&
3744             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3745         {
3746             s = PL_bufptr;
3747             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3748                 s++;
3749             if (*s == '}') {
3750                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3751                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3752                 PL_thiswhite = 0;
3753                 PL_bufptr = s - 1;
3754                 break;  /* don't bother looking for trailing comment */
3755             }
3756             else
3757                 s = PL_bufptr;
3758         }
3759         if (optype == ']')
3760             break;
3761         /* FALLTHROUGH */
3762
3763     /* attach a trailing comment to its statement instead of next token */
3764     case ';':
3765         if (PL_faketokens)
3766             break;
3767         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3768             s = PL_bufptr;
3769             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3770                 s++;
3771             if (*s == '\n' || *s == '#') {
3772                 while (s < PL_bufend && *s != '\n')
3773                     s++;
3774                 if (s < PL_bufend)
3775                     s++;
3776                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3777                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3778                 PL_thiswhite = 0;
3779                 PL_bufptr = s;
3780             }
3781         }
3782         break;
3783
3784     /* pval */
3785     case LABEL:
3786         break;
3787
3788     /* ival */
3789     default:
3790         break;
3791
3792     }
3793
3794     /* Create new token struct.  Note: opvals return early above. */
3795     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3796     PL_thismad = 0;
3797     return optype;
3798 }
3799 #endif
3800
3801 STATIC char *
3802 S_tokenize_use(pTHX_ int is_use, char *s) {
3803     dVAR;
3804
3805     PERL_ARGS_ASSERT_TOKENIZE_USE;
3806
3807     if (PL_expect != XSTATE)
3808         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3809                     is_use ? "use" : "no"));
3810     s = SKIPSPACE1(s);
3811     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3812         s = force_version(s, TRUE);
3813         if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3814             start_force(PL_curforce);
3815             NEXTVAL_NEXTTOKE.opval = NULL;
3816             force_next(WORD);
3817         }
3818         else if (*s == 'v') {
3819             s = force_word(s,WORD,FALSE,TRUE,FALSE);
3820             s = force_version(s, FALSE);
3821         }
3822     }
3823     else {
3824         s = force_word(s,WORD,FALSE,TRUE,FALSE);
3825         s = force_version(s, FALSE);
3826     }
3827     pl_yylval.ival = is_use;
3828     return s;
3829 }
3830 #ifdef DEBUGGING
3831     static const char* const exp_name[] =
3832         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3833           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3834         };
3835 #endif
3836
3837 /*
3838   yylex
3839
3840   Works out what to call the token just pulled out of the input
3841   stream.  The yacc parser takes care of taking the ops we return and
3842   stitching them into a tree.
3843
3844   Returns:
3845     PRIVATEREF
3846
3847   Structure:
3848       if read an identifier
3849           if we're in a my declaration
3850               croak if they tried to say my($foo::bar)
3851               build the ops for a my() declaration
3852           if it's an access to a my() variable
3853               are we in a sort block?
3854                   croak if my($a); $a <=> $b
3855               build ops for access to a my() variable
3856           if in a dq string, and they've said @foo and we can't find @foo
3857               croak
3858           build ops for a bareword
3859       if we already built the token before, use it.
3860 */
3861
3862
3863 #ifdef __SC__
3864 #pragma segment Perl_yylex
3865 #endif
3866 int
3867 Perl_yylex(pTHX)
3868 {
3869     dVAR;
3870     register char *s = PL_bufptr;
3871     register char *d;
3872     STRLEN len;
3873     bool bof = FALSE;
3874     U32 fake_eof = 0;
3875
3876     /* orig_keyword, gvp, and gv are initialized here because
3877      * jump to the label just_a_word_zero can bypass their
3878      * initialization later. */
3879     I32 orig_keyword = 0;
3880     GV *gv = NULL;
3881     GV **gvp = NULL;
3882
3883     DEBUG_T( {
3884         SV* tmp = newSVpvs("");
3885         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3886             (IV)CopLINE(PL_curcop),
3887             lex_state_names[PL_lex_state],
3888             exp_name[PL_expect],
3889             pv_display(tmp, s, strlen(s), 0, 60));
3890         SvREFCNT_dec(tmp);
3891     } );
3892     /* check if there's an identifier for us to look at */
3893     if (PL_pending_ident)
3894         return REPORT(S_pending_ident(aTHX));
3895
3896     /* no identifier pending identification */
3897
3898     switch (PL_lex_state) {
3899 #ifdef COMMENTARY
3900     case LEX_NORMAL:            /* Some compilers will produce faster */
3901     case LEX_INTERPNORMAL:      /* code if we comment these out. */
3902         break;
3903 #endif
3904
3905     /* when we've already built the next token, just pull it out of the queue */
3906     case LEX_KNOWNEXT:
3907 #ifdef PERL_MAD
3908         PL_lasttoke--;
3909         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3910         if (PL_madskills) {
3911             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3912             PL_nexttoke[PL_lasttoke].next_mad = 0;
3913             if (PL_thismad && PL_thismad->mad_key == '_') {
3914                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
3915                 PL_thismad->mad_val = 0;
3916                 mad_free(PL_thismad);
3917                 PL_thismad = 0;
3918             }
3919         }
3920         if (!PL_lasttoke) {
3921             PL_lex_state = PL_lex_defer;
3922             PL_expect = PL_lex_expect;
3923             PL_lex_defer = LEX_NORMAL;
3924             if (!PL_nexttoke[PL_lasttoke].next_type)
3925                 return yylex();
3926         }
3927 #else
3928         PL_nexttoke--;
3929         pl_yylval = PL_nextval[PL_nexttoke];
3930         if (!PL_nexttoke) {
3931             PL_lex_state = PL_lex_defer;
3932             PL_expect = PL_lex_expect;
3933             PL_lex_defer = LEX_NORMAL;
3934         }
3935 #endif
3936 #ifdef PERL_MAD
3937         /* FIXME - can these be merged?  */
3938         return(PL_nexttoke[PL_lasttoke].next_type);
3939 #else
3940         return REPORT(PL_nexttype[PL_nexttoke]);
3941 #endif
3942
3943     /* interpolated case modifiers like \L \U, including \Q and \E.
3944        when we get here, PL_bufptr is at the \
3945     */
3946     case LEX_INTERPCASEMOD:
3947 #ifdef DEBUGGING
3948         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3949             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3950 #endif
3951         /* handle \E or end of string */
3952         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3953             /* if at a \E */
3954             if (PL_lex_casemods) {
3955                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3956                 PL_lex_casestack[PL_lex_casemods] = '\0';
3957
3958                 if (PL_bufptr != PL_bufend
3959                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3960                     PL_bufptr += 2;
3961                     PL_lex_state = LEX_INTERPCONCAT;
3962 #ifdef PERL_MAD
3963                     if (PL_madskills)
3964                         PL_thistoken = newSVpvs("\\E");
3965 #endif
3966                 }
3967                 return REPORT(')');
3968             }
3969 #ifdef PERL_MAD
3970             while (PL_bufptr != PL_bufend &&
3971               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3972                 if (!PL_thiswhite)
3973                     PL_thiswhite = newSVpvs("");
3974                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3975                 PL_bufptr += 2;
3976             }
3977 #else
3978             if (PL_bufptr != PL_bufend)
3979                 PL_bufptr += 2;
3980 #endif
3981             PL_lex_state = LEX_INTERPCONCAT;
3982             return yylex();
3983         }
3984         else {
3985             DEBUG_T({ PerlIO_printf(Perl_debug_log,
3986               "### Saw case modifier\n"); });
3987             s = PL_bufptr + 1;
3988             if (s[1] == '\\' && s[2] == 'E') {
3989 #ifdef PERL_MAD
3990                 if (!PL_thiswhite)
3991                     PL_thiswhite = newSVpvs("");
3992                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3993 #endif
3994                 PL_bufptr = s + 3;
3995                 PL_lex_state = LEX_INTERPCONCAT;
3996                 return yylex();
3997             }
3998             else {
3999                 I32 tmp;
4000                 if (!PL_madskills) /* when just compiling don't need correct */
4001                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4002                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4003                 if ((*s == 'L' || *s == 'U') &&
4004                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4005                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4006                     return REPORT(')');
4007                 }
4008                 if (PL_lex_casemods > 10)
4009                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4010                 PL_lex_casestack[PL_lex_casemods++] = *s;
4011                 PL_lex_casestack[PL_lex_casemods] = '\0';
4012                 PL_lex_state = LEX_INTERPCONCAT;
4013                 start_force(PL_curforce);
4014                 NEXTVAL_NEXTTOKE.ival = 0;
4015                 force_next('(');
4016                 start_force(PL_curforce);
4017                 if (*s == 'l')
4018                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4019                 else if (*s == 'u')
4020                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4021                 else if (*s == 'L')
4022                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4023                 else if (*s == 'U')
4024                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4025                 else if (*s == 'Q')
4026                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4027                 else
4028                     Perl_croak(aTHX_ "panic: yylex");
4029                 if (PL_madskills) {
4030                     SV* const tmpsv = newSVpvs("\\ ");
4031                     /* replace the space with the character we want to escape
4032                      */
4033                     SvPVX(tmpsv)[1] = *s;
4034                     curmad('_', tmpsv);
4035                 }
4036                 PL_bufptr = s + 1;
4037             }
4038             force_next(FUNC);
4039             if (PL_lex_starts) {
4040                 s = PL_bufptr;
4041                 PL_lex_starts = 0;
4042 #ifdef PERL_MAD
4043                 if (PL_madskills) {
4044                     if (PL_thistoken)
4045                         sv_free(PL_thistoken);
4046                     PL_thistoken = newSVpvs("");
4047                 }
4048 #endif
4049                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4050                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4051                     OPERATOR(',');
4052                 else
4053                     Aop(OP_CONCAT);
4054             }
4055             else
4056                 return yylex();
4057         }
4058
4059     case LEX_INTERPPUSH:
4060         return REPORT(sublex_push());
4061
4062     case LEX_INTERPSTART:
4063         if (PL_bufptr == PL_bufend)
4064             return REPORT(sublex_done());
4065         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4066               "### Interpolated variable\n"); });
4067         PL_expect = XTERM;
4068         PL_lex_dojoin = (*PL_bufptr == '@');
4069         PL_lex_state = LEX_INTERPNORMAL;
4070         if (PL_lex_dojoin) {
4071             start_force(PL_curforce);
4072             NEXTVAL_NEXTTOKE.ival = 0;
4073             force_next(',');
4074             start_force(PL_curforce);
4075             force_ident("\"", '$');
4076             start_force(PL_curforce);
4077             NEXTVAL_NEXTTOKE.ival = 0;
4078             force_next('$');
4079             start_force(PL_curforce);
4080             NEXTVAL_NEXTTOKE.ival = 0;
4081             force_next('(');
4082             start_force(PL_curforce);
4083             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4084             force_next(FUNC);
4085         }
4086         if (PL_lex_starts++) {
4087             s = PL_bufptr;
4088 #ifdef PERL_MAD
4089             if (PL_madskills) {
4090                 if (PL_thistoken)
4091                     sv_free(PL_thistoken);
4092                 PL_thistoken = newSVpvs("");
4093             }
4094 #endif
4095             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4096             if (!PL_lex_casemods && PL_lex_inpat)
4097                 OPERATOR(',');
4098             else
4099                 Aop(OP_CONCAT);
4100         }
4101         return yylex();
4102
4103     case LEX_INTERPENDMAYBE:
4104         if (intuit_more(PL_bufptr)) {
4105             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4106             break;
4107         }
4108         /* FALL THROUGH */
4109
4110     case LEX_INTERPEND:
4111         if (PL_lex_dojoin) {
4112             PL_lex_dojoin = FALSE;
4113             PL_lex_state = LEX_INTERPCONCAT;
4114 #ifdef PERL_MAD
4115             if (PL_madskills) {
4116                 if (PL_thistoken)
4117                     sv_free(PL_thistoken);
4118                 PL_thistoken = newSVpvs("");
4119             }
4120 #endif
4121             return REPORT(')');
4122         }
4123         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4124             && SvEVALED(PL_lex_repl))
4125         {
4126             if (PL_bufptr != PL_bufend)
4127                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4128             PL_lex_repl = NULL;
4129         }
4130         /* FALLTHROUGH */
4131     case LEX_INTERPCONCAT:
4132 #ifdef DEBUGGING
4133         if (PL_lex_brackets)
4134             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4135 #endif
4136         if (PL_bufptr == PL_bufend)
4137             return REPORT(sublex_done());
4138
4139         if (SvIVX(PL_linestr) == '\'') {
4140             SV *sv = newSVsv(PL_linestr);
4141             if (!PL_lex_inpat)
4142                 sv = tokeq(sv);
4143             else if ( PL_hints & HINT_NEW_RE )
4144                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4145             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4146             s = PL_bufend;
4147         }
4148         else {
4149             s = scan_const(PL_bufptr);
4150             if (*s == '\\')
4151                 PL_lex_state = LEX_INTERPCASEMOD;
4152             else
4153                 PL_lex_state = LEX_INTERPSTART;
4154         }
4155
4156         if (s != PL_bufptr) {
4157             start_force(PL_curforce);
4158             if (PL_madskills) {
4159                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4160             }
4161             NEXTVAL_NEXTTOKE = pl_yylval;
4162             PL_expect = XTERM;
4163             force_next(THING);
4164             if (PL_lex_starts++) {
4165 #ifdef PERL_MAD
4166                 if (PL_madskills) {
4167                     if (PL_thistoken)
4168                         sv_free(PL_thistoken);
4169                     PL_thistoken = newSVpvs("");
4170                 }
4171 #endif
4172                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4173                 if (!PL_lex_casemods && PL_lex_inpat)
4174                     OPERATOR(',');
4175                 else
4176                     Aop(OP_CONCAT);
4177             }
4178             else {
4179                 PL_bufptr = s;
4180                 return yylex();
4181             }
4182         }
4183
4184         return yylex();
4185     case LEX_FORMLINE:
4186         PL_lex_state = LEX_NORMAL;
4187         s = scan_formline(PL_bufptr);
4188         if (!PL_lex_formbrack)
4189             goto rightbracket;
4190         OPERATOR(';');
4191     }
4192
4193     s = PL_bufptr;
4194     PL_oldoldbufptr = PL_oldbufptr;
4195     PL_oldbufptr = s;
4196
4197   retry:
4198 #ifdef PERL_MAD
4199     if (PL_thistoken) {
4200         sv_free(PL_thistoken);
4201         PL_thistoken = 0;
4202     }
4203     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4204 #endif
4205     switch (*s) {
4206     default:
4207         if (isIDFIRST_lazy_if(s,UTF))
4208             goto keylookup;
4209         {
4210         unsigned char c = *s;
4211         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4212         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4213             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4214         } else {
4215             d = PL_linestart;
4216         }
4217         *s = '\0';
4218         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4219     }
4220     case 4:
4221     case 26:
4222         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4223     case 0:
4224 #ifdef PERL_MAD
4225         if (PL_madskills)
4226             PL_faketokens = 0;
4227 #endif
4228         if (!PL_rsfp) {
4229             PL_last_uni = 0;
4230             PL_last_lop = 0;
4231             if (PL_lex_brackets) {
4232                 yyerror((const char *)
4233                         (PL_lex_formbrack
4234                          ? "Format not terminated"
4235                          : "Missing right curly or square bracket"));
4236             }
4237             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4238                         "### Tokener got EOF\n");
4239             } );
4240             TOKEN(0);
4241         }
4242         if (s++ < PL_bufend)
4243             goto retry;                 /* ignore stray nulls */
4244         PL_last_uni = 0;
4245         PL_last_lop = 0;
4246         if (!PL_in_eval && !PL_preambled) {
4247             PL_preambled = TRUE;
4248 #ifdef PERL_MAD
4249             if (PL_madskills)
4250                 PL_faketokens = 1;
4251 #endif
4252             if (PL_perldb) {
4253                 /* Generate a string of Perl code to load the debugger.
4254                  * If PERL5DB is set, it will return the contents of that,
4255                  * otherwise a compile-time require of perl5db.pl.  */
4256
4257                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4258
4259                 if (pdb) {
4260                     sv_setpv(PL_linestr, pdb);
4261                     sv_catpvs(PL_linestr,";");
4262                 } else {
4263                     SETERRNO(0,SS_NORMAL);
4264                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4265                 }
4266             } else
4267                 sv_setpvs(PL_linestr,"");
4268             if (PL_preambleav) {
4269                 SV **svp = AvARRAY(PL_preambleav);
4270                 SV **const end = svp + AvFILLp(PL_preambleav);
4271                 while(svp <= end) {
4272                     sv_catsv(PL_linestr, *svp);
4273                     ++svp;
4274                     sv_catpvs(PL_linestr, ";");
4275                 }
4276                 sv_free(MUTABLE_SV(PL_preambleav));
4277                 PL_preambleav = NULL;
4278             }
4279             if (PL_minus_E)
4280                 sv_catpvs(PL_linestr,
4281                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4282             if (PL_minus_n || PL_minus_p) {
4283                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4284                 if (PL_minus_l)
4285                     sv_catpvs(PL_linestr,"chomp;");
4286                 if (PL_minus_a) {
4287                     if (PL_minus_F) {
4288                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4289                              || *PL_splitstr == '"')
4290                               && strchr(PL_splitstr + 1, *PL_splitstr))
4291                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4292                         else {
4293                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4294                                bytes can be used as quoting characters.  :-) */
4295                             const char *splits = PL_splitstr;
4296                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4297                             do {
4298                                 /* Need to \ \s  */
4299                                 if (*splits == '\\')
4300                                     sv_catpvn(PL_linestr, splits, 1);
4301                                 sv_catpvn(PL_linestr, splits, 1);
4302                             } while (*splits++);
4303                             /* This loop will embed the trailing NUL of
4304                                PL_linestr as the last thing it does before
4305                                terminating.  */
4306                             sv_catpvs(PL_linestr, ");");
4307                         }
4308                     }
4309                     else
4310                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4311                 }
4312             }
4313             sv_catpvs(PL_linestr, "\n");
4314             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4315             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4316             PL_last_lop = PL_last_uni = NULL;
4317             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4318                 update_debugger_info(PL_linestr, NULL, 0);
4319             goto retry;
4320         }
4321         do {
4322             fake_eof = 0;
4323             bof = PL_rsfp ? TRUE : FALSE;
4324             if (0) {
4325               fake_eof:
4326                 fake_eof = LEX_FAKE_EOF;
4327             }
4328             PL_bufptr = PL_bufend;
4329             CopLINE_inc(PL_curcop);
4330             if (!lex_next_chunk(fake_eof)) {
4331                 CopLINE_dec(PL_curcop);
4332                 s = PL_bufptr;
4333                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4334             }
4335             CopLINE_dec(PL_curcop);
4336 #ifdef PERL_MAD
4337             if (!PL_rsfp)
4338                 PL_realtokenstart = -1;
4339 #endif
4340             s = PL_bufptr;
4341             /* If it looks like the start of a BOM or raw UTF-16,
4342              * check if it in fact is. */
4343             if (bof && PL_rsfp &&
4344                      (*s == 0 ||
4345                       *(U8*)s == 0xEF ||
4346                       *(U8*)s >= 0xFE ||
4347                       s[1] == 0)) {
4348                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4349                 if (bof) {
4350                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4351                     s = swallow_bom((U8*)s);
4352                 }
4353             }
4354             if (PL_doextract) {
4355                 /* Incest with pod. */
4356 #ifdef PERL_MAD
4357                 if (PL_madskills)
4358                     sv_catsv(PL_thiswhite, PL_linestr);
4359 #endif
4360                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4361                     sv_setpvs(PL_linestr, "");
4362                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4363                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4364                     PL_last_lop = PL_last_uni = NULL;
4365                     PL_doextract = FALSE;
4366                 }
4367             }
4368             incline(s);
4369         } while (PL_doextract);
4370         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4371         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4372         PL_last_lop = PL_last_uni = NULL;
4373         if (CopLINE(PL_curcop) == 1) {
4374             while (s < PL_bufend && isSPACE(*s))
4375                 s++;
4376             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4377                 s++;
4378 #ifdef PERL_MAD
4379             if (PL_madskills)
4380                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4381 #endif
4382             d = NULL;
4383             if (!PL_in_eval) {
4384                 if (*s == '#' && *(s+1) == '!')
4385                     d = s + 2;
4386 #ifdef ALTERNATE_SHEBANG
4387                 else {
4388                     static char const as[] = ALTERNATE_SHEBANG;
4389                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4390                         d = s + (sizeof(as) - 1);
4391                 }
4392 #endif /* ALTERNATE_SHEBANG */
4393             }
4394             if (d) {
4395                 char *ipath;
4396                 char *ipathend;
4397
4398                 while (isSPACE(*d))
4399                     d++;
4400                 ipath = d;
4401                 while (*d && !isSPACE(*d))
4402                     d++;
4403                 ipathend = d;
4404
4405 #ifdef ARG_ZERO_IS_SCRIPT
4406                 if (ipathend > ipath) {
4407                     /*
4408                      * HP-UX (at least) sets argv[0] to the script name,
4409                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4410                      * at least, set argv[0] to the basename of the Perl
4411                      * interpreter. So, having found "#!", we'll set it right.
4412                      */
4413                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4414                                                     SVt_PV)); /* $^X */
4415                     assert(SvPOK(x) || SvGMAGICAL(x));
4416                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4417                         sv_setpvn(x, ipath, ipathend - ipath);
4418                         SvSETMAGIC(x);
4419                     }
4420                     else {
4421                         STRLEN blen;
4422                         STRLEN llen;
4423                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4424                         const char * const lstart = SvPV_const(x,llen);
4425                         if (llen < blen) {
4426                             bstart += blen - llen;
4427                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4428                                 sv_setpvn(x, ipath, ipathend - ipath);
4429                                 SvSETMAGIC(x);
4430                             }
4431                         }
4432                     }
4433                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4434                 }
4435 #endif /* ARG_ZERO_IS_SCRIPT */
4436
4437                 /*
4438                  * Look for options.
4439                  */
4440                 d = instr(s,"perl -");
4441                 if (!d) {
4442                     d = instr(s,"perl");
4443 #if defined(DOSISH)
4444                     /* avoid getting into infinite loops when shebang
4445                      * line contains "Perl" rather than "perl" */
4446                     if (!d) {
4447                         for (d = ipathend-4; d >= ipath; --d) {
4448                             if ((*d == 'p' || *d == 'P')
4449                                 && !ibcmp(d, "perl", 4))
4450                             {
4451                                 break;
4452                             }
4453                         }
4454                         if (d < ipath)
4455                             d = NULL;
4456                     }
4457 #endif
4458                 }
4459 #ifdef ALTERNATE_SHEBANG
4460                 /*
4461                  * If the ALTERNATE_SHEBANG on this system starts with a
4462                  * character that can be part of a Perl expression, then if
4463                  * we see it but not "perl", we're probably looking at the
4464                  * start of Perl code, not a request to hand off to some
4465                  * other interpreter.  Similarly, if "perl" is there, but
4466                  * not in the first 'word' of the line, we assume the line
4467                  * contains the start of the Perl program.
4468                  */
4469                 if (d && *s != '#') {
4470                     const char *c = ipath;
4471                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4472                         c++;
4473                     if (c < d)
4474                         d = NULL;       /* "perl" not in first word; ignore */
4475                     else
4476                         *s = '#';       /* Don't try to parse shebang line */
4477                 }
4478 #endif /* ALTERNATE_SHEBANG */
4479                 if (!d &&
4480                     *s == '#' &&
4481                     ipathend > ipath &&
4482                     !PL_minus_c &&
4483                     !instr(s,"indir") &&
4484                     instr(PL_origargv[0],"perl"))
4485                 {
4486                     dVAR;
4487                     char **newargv;
4488
4489                     *ipathend = '\0';
4490                     s = ipathend + 1;
4491                     while (s < PL_bufend && isSPACE(*s))
4492                         s++;
4493                     if (s < PL_bufend) {
4494                         Newx(newargv,PL_origargc+3,char*);
4495                         newargv[1] = s;
4496                         while (s < PL_bufend && !isSPACE(*s))
4497                             s++;
4498                         *s = '\0';
4499                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4500                     }
4501                     else
4502                         newargv = PL_origargv;
4503                     newargv[0] = ipath;
4504                     PERL_FPU_PRE_EXEC
4505                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4506                     PERL_FPU_POST_EXEC
4507                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4508                 }
4509                 if (d) {
4510                     while (*d && !isSPACE(*d))
4511                         d++;
4512                     while (SPACE_OR_TAB(*d))
4513                         d++;
4514
4515                     if (*d++ == '-') {
4516                         const bool switches_done = PL_doswitches;
4517                         const U32 oldpdb = PL_perldb;
4518                         const bool oldn = PL_minus_n;
4519                         const bool oldp = PL_minus_p;
4520                         const char *d1 = d;
4521
4522                         do {
4523                             bool baduni = FALSE;
4524                             if (*d1 == 'C') {
4525                                 const char *d2 = d1 + 1;
4526                                 if (parse_unicode_opts((const char **)&d2)
4527                                     != PL_unicode)
4528                                     baduni = TRUE;
4529                             }
4530                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4531                                 const char * const m = d1;
4532                                 while (*d1 && !isSPACE(*d1))
4533                                     d1++;
4534                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4535                                       (int)(d1 - m), m);
4536                             }
4537                             d1 = moreswitches(d1);
4538                         } while (d1);
4539                         if (PL_doswitches && !switches_done) {
4540                             int argc = PL_origargc;
4541                             char **argv = PL_origargv;
4542                             do {
4543                                 argc--,argv++;
4544                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4545                             init_argv_symbols(argc,argv);
4546                         }
4547                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4548                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4549                               /* if we have already added "LINE: while (<>) {",
4550                                  we must not do it again */
4551                         {
4552                             sv_setpvs(PL_linestr, "");
4553                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4554                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4555                             PL_last_lop = PL_last_uni = NULL;
4556                             PL_preambled = FALSE;
4557                             if (PERLDB_LINE || PERLDB_SAVESRC)
4558                                 (void)gv_fetchfile(PL_origfilename);
4559                             goto retry;
4560                         }
4561                     }
4562                 }
4563             }
4564         }
4565         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4566             PL_bufptr = s;
4567             PL_lex_state = LEX_FORMLINE;
4568             return yylex();
4569         }
4570         goto retry;
4571     case '\r':
4572 #ifdef PERL_STRICT_CR
4573         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4574         Perl_croak(aTHX_
4575       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4576 #endif
4577     case ' ': case '\t': case '\f': case 013:
4578 #ifdef PERL_MAD
4579         PL_realtokenstart = -1;
4580         if (!PL_thiswhite)
4581             PL_thiswhite = newSVpvs("");
4582         sv_catpvn(PL_thiswhite, s, 1);
4583 #endif
4584         s++;
4585         goto retry;
4586     case '#':
4587     case '\n':
4588 #ifdef PERL_MAD
4589         PL_realtokenstart = -1;
4590         if (PL_madskills)
4591             PL_faketokens = 0;
4592 #endif
4593         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4594             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4595                 /* handle eval qq[#line 1 "foo"\n ...] */
4596                 CopLINE_dec(PL_curcop);
4597                 incline(s);
4598             }
4599             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4600                 s = SKIPSPACE0(s);
4601                 if (!PL_in_eval || PL_rsfp)
4602                     incline(s);
4603             }
4604             else {
4605                 d = s;
4606                 while (d < PL_bufend && *d != '\n')
4607                     d++;
4608                 if (d < PL_bufend)
4609                     d++;
4610                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4611                   Perl_croak(aTHX_ "panic: input overflow");
4612 #ifdef PERL_MAD
4613                 if (PL_madskills)
4614                     PL_thiswhite = newSVpvn(s, d - s);
4615 #endif
4616                 s = d;
4617                 incline(s);
4618             }
4619             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4620                 PL_bufptr = s;
4621                 PL_lex_state = LEX_FORMLINE;
4622                 return yylex();
4623             }
4624         }
4625         else {
4626 #ifdef PERL_MAD
4627             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4628                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4629                     PL_faketokens = 0;
4630                     s = SKIPSPACE0(s);
4631                     TOKEN(PEG); /* make sure any #! line is accessible */
4632                 }
4633                 s = SKIPSPACE0(s);
4634             }
4635             else {
4636 /*              if (PL_madskills && PL_lex_formbrack) { */
4637                     d = s;
4638                     while (d < PL_bufend && *d != '\n')
4639                         d++;
4640                     if (d < PL_bufend)
4641                         d++;
4642                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4643                       Perl_croak(aTHX_ "panic: input overflow");
4644                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4645                         if (!PL_thiswhite)
4646                             PL_thiswhite = newSVpvs("");
4647                         if (CopLINE(PL_curcop) == 1) {
4648                             sv_setpvs(PL_thiswhite, "");
4649                             PL_faketokens = 0;
4650                         }
4651                         sv_catpvn(PL_thiswhite, s, d - s);
4652                     }
4653                     s = d;
4654 /*              }
4655                 *s = '\0';
4656                 PL_bufend = s; */
4657             }
4658 #else
4659             *s = '\0';
4660             PL_bufend = s;
4661 #endif
4662         }
4663         goto retry;
4664     case '-':
4665         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4666             I32 ftst = 0;
4667             char tmp;
4668
4669             s++;
4670             PL_bufptr = s;
4671             tmp = *s++;
4672
4673             while (s < PL_bufend && SPACE_OR_TAB(*s))
4674                 s++;
4675
4676             if (strnEQ(s,"=>",2)) {
4677                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4678                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4679                 OPERATOR('-');          /* unary minus */
4680             }
4681             PL_last_uni = PL_oldbufptr;
4682             switch (tmp) {
4683             case 'r': ftst = OP_FTEREAD;        break;
4684             case 'w': ftst = OP_FTEWRITE;       break;
4685             case 'x': ftst = OP_FTEEXEC;        break;
4686             case 'o': ftst = OP_FTEOWNED;       break;
4687             case 'R': ftst = OP_FTRREAD;        break;
4688             case 'W': ftst = OP_FTRWRITE;       break;
4689             case 'X': ftst = OP_FTREXEC;        break;
4690             case 'O': ftst = OP_FTROWNED;       break;
4691             case 'e': ftst = OP_FTIS;           break;
4692             case 'z': ftst = OP_FTZERO;         break;
4693             case 's': ftst = OP_FTSIZE;         break;
4694             case 'f': ftst = OP_FTFILE;         break;
4695             case 'd': ftst = OP_FTDIR;          break;
4696             case 'l': ftst = OP_FTLINK;         break;
4697             case 'p': ftst = OP_FTPIPE;         break;
4698             case 'S': ftst = OP_FTSOCK;         break;
4699             case 'u': ftst = OP_FTSUID;         break;
4700             case 'g': ftst = OP_FTSGID;         break;
4701             case 'k': ftst = OP_FTSVTX;         break;
4702             case 'b': ftst = OP_FTBLK;          break;
4703             case 'c': ftst = OP_FTCHR;          break;
4704             case 't': ftst = OP_FTTTY;          break;
4705             case 'T': ftst = OP_FTTEXT;         break;
4706             case 'B': ftst = OP_FTBINARY;       break;
4707             case 'M': case 'A': case 'C':
4708                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4709                 switch (tmp) {
4710                 case 'M': ftst = OP_FTMTIME;    break;
4711                 case 'A': ftst = OP_FTATIME;    break;
4712                 case 'C': ftst = OP_FTCTIME;    break;
4713                 default:                        break;
4714                 }
4715                 break;
4716             default:
4717                 break;
4718             }
4719             if (ftst) {
4720                 PL_last_lop_op = (OPCODE)ftst;
4721                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4722                         "### Saw file test %c\n", (int)tmp);
4723                 } );
4724                 FTST(ftst);
4725             }
4726             else {
4727                 /* Assume it was a minus followed by a one-letter named
4728                  * subroutine call (or a -bareword), then. */
4729                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4730                         "### '-%c' looked like a file test but was not\n",
4731                         (int) tmp);
4732                 } );
4733                 s = --PL_bufptr;
4734             }
4735         }
4736         {
4737             const char tmp = *s++;
4738             if (*s == tmp) {
4739                 s++;
4740                 if (PL_expect == XOPERATOR)
4741                     TERM(POSTDEC);
4742                 else
4743                     OPERATOR(PREDEC);
4744             }
4745             else if (*s == '>') {
4746                 s++;
4747                 s = SKIPSPACE1(s);
4748                 if (isIDFIRST_lazy_if(s,UTF)) {
4749                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4750                     TOKEN(ARROW);
4751                 }
4752                 else if (*s == '$')
4753                     OPERATOR(ARROW);
4754                 else
4755                     TERM(ARROW);
4756             }
4757             if (PL_expect == XOPERATOR)
4758                 Aop(OP_SUBTRACT);
4759             else {
4760                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4761                     check_uni();
4762                 OPERATOR('-');          /* unary minus */
4763             }
4764         }
4765
4766     case '+':
4767         {
4768             const char tmp = *s++;
4769             if (*s == tmp) {
4770                 s++;
4771                 if (PL_expect == XOPERATOR)
4772                     TERM(POSTINC);
4773                 else
4774                     OPERATOR(PREINC);
4775             }
4776             if (PL_expect == XOPERATOR)
4777                 Aop(OP_ADD);
4778             else {
4779                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4780                     check_uni();
4781                 OPERATOR('+');
4782             }
4783         }
4784
4785     case '*':
4786         if (PL_expect != XOPERATOR) {
4787             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4788             PL_expect = XOPERATOR;
4789             force_ident(PL_tokenbuf, '*');
4790             if (!*PL_tokenbuf)
4791                 PREREF('*');
4792             TERM('*');
4793         }
4794         s++;
4795         if (*s == '*') {
4796             s++;
4797             PWop(OP_POW);
4798         }
4799         Mop(OP_MULTIPLY);
4800
4801     case '%':
4802         if (PL_expect == XOPERATOR) {
4803             ++s;
4804             Mop(OP_MODULO);
4805         }
4806         PL_tokenbuf[0] = '%';
4807         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4808                 sizeof PL_tokenbuf - 1, FALSE);
4809         if (!PL_tokenbuf[1]) {
4810             PREREF('%');
4811         }
4812         PL_pending_ident = '%';
4813         TERM('%');
4814
4815     case '^':
4816         s++;
4817         BOop(OP_BIT_XOR);
4818     case '[':
4819         PL_lex_brackets++;
4820         {
4821             const char tmp = *s++;
4822             OPERATOR(tmp);
4823         }
4824     case '~':
4825         if (s[1] == '~'
4826             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4827         {
4828             s += 2;
4829             Eop(OP_SMARTMATCH);
4830         }
4831     case ',':
4832         {
4833             const char tmp = *s++;
4834             OPERATOR(tmp);
4835         }
4836     case ':':
4837         if (s[1] == ':') {
4838             len = 0;
4839             goto just_a_word_zero_gv;
4840         }
4841         s++;
4842         switch (PL_expect) {
4843             OP *attrs;
4844 #ifdef PERL_MAD
4845             I32 stuffstart;
4846 #endif
4847         case XOPERATOR:
4848             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4849                 break;
4850             PL_bufptr = s;      /* update in case we back off */
4851             if (*s == '=') {
4852                 deprecate(":= for an empty attribute list");
4853             }
4854             goto grabattrs;
4855         case XATTRBLOCK:
4856             PL_expect = XBLOCK;
4857             goto grabattrs;
4858         case XATTRTERM:
4859             PL_expect = XTERMBLOCK;
4860          grabattrs:
4861 #ifdef PERL_MAD
4862             stuffstart = s - SvPVX(PL_linestr) - 1;
4863 #endif
4864             s = PEEKSPACE(s);
4865             attrs = NULL;
4866             while (isIDFIRST_lazy_if(s,UTF)) {
4867                 I32 tmp;
4868                 SV *sv;
4869                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4870                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4871                     if (tmp < 0) tmp = -tmp;
4872                     switch (tmp) {
4873                     case KEY_or:
4874                     case KEY_and:
4875                     case KEY_for:
4876                     case KEY_foreach:
4877                     case KEY_unless:
4878                     case KEY_if:
4879                     case KEY_while:
4880                     case KEY_until:
4881                         goto got_attrs;
4882                     default:
4883                         break;
4884                     }
4885                 }
4886                 sv = newSVpvn(s, len);
4887                 if (*d == '(') {
4888                     d = scan_str(d,TRUE,TRUE);
4889                     if (!d) {
4890                         /* MUST advance bufptr here to avoid bogus
4891                            "at end of line" context messages from yyerror().
4892                          */
4893                         PL_bufptr = s + len;
4894                         yyerror("Unterminated attribute parameter in attribute list");
4895                         if (attrs)
4896                             op_free(attrs);
4897                         sv_free(sv);
4898                         return REPORT(0);       /* EOF indicator */
4899                     }
4900                 }
4901                 if (PL_lex_stuff) {
4902                     sv_catsv(sv, PL_lex_stuff);
4903                     attrs = append_elem(OP_LIST, attrs,
4904                                         newSVOP(OP_CONST, 0, sv));
4905                     SvREFCNT_dec(PL_lex_stuff);
4906                     PL_lex_stuff = NULL;
4907                 }
4908                 else {
4909                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4910                         sv_free(sv);
4911                         if (PL_in_my == KEY_our) {
4912                             deprecate(":unique");
4913                         }
4914                         else
4915                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4916                     }
4917
4918                     /* NOTE: any CV attrs applied here need to be part of
4919                        the CVf_BUILTIN_ATTRS define in cv.h! */
4920                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4921                         sv_free(sv);
4922                         CvLVALUE_on(PL_compcv);
4923                     }
4924                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4925                         sv_free(sv);
4926                         deprecate(":locked");
4927                     }
4928                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4929                         sv_free(sv);
4930                         CvMETHOD_on(PL_compcv);
4931                     }
4932                     /* After we've set the flags, it could be argued that
4933                        we don't need to do the attributes.pm-based setting
4934                        process, and shouldn't bother appending recognized
4935                        flags.  To experiment with that, uncomment the
4936                        following "else".  (Note that's already been
4937                        uncommented.  That keeps the above-applied built-in
4938                        attributes from being intercepted (and possibly
4939                        rejected) by a package's attribute routines, but is
4940                        justified by the performance win for the common case
4941                        of applying only built-in attributes.) */
4942                     else
4943                         attrs = append_elem(OP_LIST, attrs,
4944                                             newSVOP(OP_CONST, 0,
4945                                                     sv));
4946                 }
4947                 s = PEEKSPACE(d);
4948                 if (*s == ':' && s[1] != ':')
4949                     s = PEEKSPACE(s+1);
4950                 else if (s == d)
4951                     break;      /* require real whitespace or :'s */
4952                 /* XXX losing whitespace on sequential attributes here */
4953             }
4954             {
4955                 const char tmp
4956                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4957                 if (*s != ';' && *s != '}' && *s != tmp
4958                     && (tmp != '=' || *s != ')')) {
4959                     const char q = ((*s == '\'') ? '"' : '\'');
4960                     /* If here for an expression, and parsed no attrs, back
4961                        off. */
4962                     if (tmp == '=' && !attrs) {
4963                         s = PL_bufptr;
4964                         break;
4965                     }
4966                     /* MUST advance bufptr here to avoid bogus "at end of line"
4967                        context messages from yyerror().
4968                     */
4969                     PL_bufptr = s;
4970                     yyerror( (const char *)
4971                              (*s
4972                               ? Perl_form(aTHX_ "Invalid separator character "
4973                                           "%c%c%c in attribute list", q, *s, q)
4974                               : "Unterminated attribute list" ) );
4975                     if (attrs)
4976                         op_free(attrs);
4977                     OPERATOR(':');
4978                 }
4979             }
4980         got_attrs:
4981             if (attrs) {
4982                 start_force(PL_curforce);
4983                 NEXTVAL_NEXTTOKE.opval = attrs;
4984                 CURMAD('_', PL_nextwhite);
4985                 force_next(THING);
4986             }
4987 #ifdef PERL_MAD
4988             if (PL_madskills) {
4989                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4990                                      (s - SvPVX(PL_linestr)) - stuffstart);
4991             }
4992 #endif
4993             TOKEN(COLONATTR);
4994         }
4995         OPERATOR(':');
4996     case '(':
4997         s++;
4998         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4999             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5000         else
5001             PL_expect = XTERM;
5002         s = SKIPSPACE1(s);
5003         TOKEN('(');
5004     case ';':
5005         CLINE;
5006         {
5007             const char tmp = *s++;
5008             OPERATOR(tmp);
5009         }
5010     case ')':
5011         {
5012             const char tmp = *s++;
5013             s = SKIPSPACE1(s);
5014             if (*s == '{')
5015                 PREBLOCK(tmp);
5016             TERM(tmp);
5017         }
5018     case ']':
5019         s++;
5020         if (PL_lex_brackets <= 0)
5021             yyerror("Unmatched right square bracket");
5022         else
5023             --PL_lex_brackets;
5024         if (PL_lex_state == LEX_INTERPNORMAL) {
5025             if (PL_lex_brackets == 0) {
5026                 if (*s == '-' && s[1] == '>')
5027                     PL_lex_state = LEX_INTERPENDMAYBE;
5028                 else if (*s != '[' && *s != '{')
5029                     PL_lex_state = LEX_INTERPEND;
5030             }
5031         }
5032         TERM(']');
5033     case '{':
5034       leftbracket:
5035         s++;
5036         if (PL_lex_brackets > 100) {
5037             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5038         }
5039         switch (PL_expect) {
5040         case XTERM:
5041             if (PL_lex_formbrack) {
5042                 s--;
5043                 PRETERMBLOCK(DO);
5044             }
5045             if (PL_oldoldbufptr == PL_last_lop)
5046                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5047             else
5048                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5049             OPERATOR(HASHBRACK);
5050         case XOPERATOR:
5051             while (s < PL_bufend && SPACE_OR_TAB(*s))
5052                 s++;
5053             d = s;
5054             PL_tokenbuf[0] = '\0';
5055             if (d < PL_bufend && *d == '-') {
5056                 PL_tokenbuf[0] = '-';
5057                 d++;
5058                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5059                     d++;
5060             }
5061             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5062                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5063                               FALSE, &len);
5064                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5065                     d++;
5066                 if (*d == '}') {
5067                     const char minus = (PL_tokenbuf[0] == '-');
5068                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5069                     if (minus)
5070                         force_next('-');
5071                 }
5072             }
5073             /* FALL THROUGH */
5074         case XATTRBLOCK:
5075         case XBLOCK:
5076             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5077             PL_expect = XSTATE;
5078             break;
5079         case XATTRTERM:
5080         case XTERMBLOCK:
5081             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5082             PL_expect = XSTATE;
5083             break;
5084         default: {
5085                 const char *t;
5086                 if (PL_oldoldbufptr == PL_last_lop)
5087                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5088                 else
5089                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5090                 s = SKIPSPACE1(s);
5091                 if (*s == '}') {
5092                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5093                         PL_expect = XTERM;
5094                         /* This hack is to get the ${} in the message. */
5095                         PL_bufptr = s+1;
5096                         yyerror("syntax error");
5097                         break;
5098                     }
5099                     OPERATOR(HASHBRACK);
5100                 }
5101                 /* This hack serves to disambiguate a pair of curlies
5102                  * as being a block or an anon hash.  Normally, expectation
5103                  * determines that, but in cases where we're not in a
5104                  * position to expect anything in particular (like inside
5105                  * eval"") we have to resolve the ambiguity.  This code
5106                  * covers the case where the first term in the curlies is a
5107                  * quoted string.  Most other cases need to be explicitly
5108                  * disambiguated by prepending a "+" before the opening
5109                  * curly in order to force resolution as an anon hash.
5110                  *
5111                  * XXX should probably propagate the outer expectation
5112                  * into eval"" to rely less on this hack, but that could
5113                  * potentially break current behavior of eval"".
5114                  * GSAR 97-07-21
5115                  */
5116                 t = s;
5117                 if (*s == '\'' || *s == '"' || *s == '`') {
5118                     /* common case: get past first string, handling escapes */
5119                     for (t++; t < PL_bufend && *t != *s;)
5120                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5121                             t++;
5122                     t++;
5123                 }
5124                 else if (*s == 'q') {
5125                     if (++t < PL_bufend
5126                         && (!isALNUM(*t)
5127                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5128                                 && !isALNUM(*t))))
5129                     {
5130                         /* skip q//-like construct */
5131                         const char *tmps;
5132                         char open, close, term;
5133                         I32 brackets = 1;
5134
5135                         while (t < PL_bufend && isSPACE(*t))
5136                             t++;
5137                         /* check for q => */
5138                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5139                             OPERATOR(HASHBRACK);
5140                         }
5141                         term = *t;
5142                         open = term;
5143                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5144                             term = tmps[5];
5145                         close = term;
5146                         if (open == close)
5147                             for (t++; t < PL_bufend; t++) {
5148                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5149                                     t++;
5150                                 else if (*t == open)
5151                                     break;
5152                             }
5153                         else {
5154                             for (t++; t < PL_bufend; t++) {
5155                                 if (*t == '\\' && t+1 < PL_bufend)
5156                                     t++;
5157                                 else if (*t == close && --brackets <= 0)
5158                                     break;
5159                                 else if (*t == open)
5160                                     brackets++;
5161                             }
5162                         }
5163                         t++;
5164                     }
5165                     else
5166                         /* skip plain q word */
5167                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5168                              t += UTF8SKIP(t);
5169                 }
5170                 else if (isALNUM_lazy_if(t,UTF)) {
5171                     t += UTF8SKIP(t);
5172                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5173                          t += UTF8SKIP(t);
5174                 }
5175                 while (t < PL_bufend && isSPACE(*t))
5176                     t++;
5177                 /* if comma follows first term, call it an anon hash */
5178                 /* XXX it could be a comma expression with loop modifiers */
5179                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5180                                    || (*t == '=' && t[1] == '>')))
5181                     OPERATOR(HASHBRACK);
5182                 if (PL_expect == XREF)
5183                     PL_expect = XTERM;
5184                 else {
5185                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5186                     PL_expect = XSTATE;
5187                 }
5188             }
5189             break;
5190         }
5191         pl_yylval.ival = CopLINE(PL_curcop);
5192         if (isSPACE(*s) || *s == '#')
5193             PL_copline = NOLINE;   /* invalidate current command line number */
5194         TOKEN('{');
5195     case '}':
5196       rightbracket:
5197         s++;
5198         if (PL_lex_brackets <= 0)
5199             yyerror("Unmatched right curly bracket");
5200         else
5201             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5202         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5203             PL_lex_formbrack = 0;
5204         if (PL_lex_state == LEX_INTERPNORMAL) {
5205             if (PL_lex_brackets == 0) {
5206                 if (PL_expect & XFAKEBRACK) {
5207                     PL_expect &= XENUMMASK;
5208                     PL_lex_state = LEX_INTERPEND;
5209                     PL_bufptr = s;
5210 #if 0
5211                     if (PL_madskills) {
5212                         if (!PL_thiswhite)
5213                             PL_thiswhite = newSVpvs("");
5214                         sv_catpvs(PL_thiswhite,"}");
5215                     }
5216 #endif
5217                     return yylex();     /* ignore fake brackets */
5218                 }
5219                 if (*s == '-' && s[1] == '>')
5220                     PL_lex_state = LEX_INTERPENDMAYBE;
5221                 else if (*s != '[' && *s != '{')
5222                     PL_lex_state = LEX_INTERPEND;
5223             }
5224         }
5225         if (PL_expect & XFAKEBRACK) {
5226             PL_expect &= XENUMMASK;
5227             PL_bufptr = s;
5228             return yylex();             /* ignore fake brackets */
5229         }
5230         start_force(PL_curforce);
5231         if (PL_madskills) {
5232             curmad('X', newSVpvn(s-1,1));
5233             CURMAD('_', PL_thiswhite);
5234         }
5235         force_next('}');
5236 #ifdef PERL_MAD
5237         if (!PL_thistoken)
5238             PL_thistoken = newSVpvs("");
5239 #endif
5240         TOKEN(';');
5241     case '&':
5242         s++;
5243         if (*s++ == '&')
5244             AOPERATOR(ANDAND);
5245         s--;
5246         if (PL_expect == XOPERATOR) {
5247             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5248                 && isIDFIRST_lazy_if(s,UTF))
5249             {
5250                 CopLINE_dec(PL_curcop);
5251                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5252                 CopLINE_inc(PL_curcop);
5253             }
5254             BAop(OP_BIT_AND);
5255         }
5256
5257         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5258         if (*PL_tokenbuf) {
5259             PL_expect = XOPERATOR;
5260             force_ident(PL_tokenbuf, '&');
5261         }
5262         else
5263             PREREF('&');
5264         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5265         TERM('&');
5266
5267     case '|':
5268         s++;
5269         if (*s++ == '|')
5270             AOPERATOR(OROR);
5271         s--;
5272         BOop(OP_BIT_OR);
5273     case '=':
5274         s++;
5275         {
5276             const char tmp = *s++;
5277             if (tmp == '=')
5278                 Eop(OP_EQ);
5279             if (tmp == '>')
5280                 OPERATOR(',');
5281             if (tmp == '~')
5282                 PMop(OP_MATCH);
5283             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5284                 && strchr("+-*/%.^&|<",tmp))
5285                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5286                             "Reversed %c= operator",(int)tmp);
5287             s--;
5288             if (PL_expect == XSTATE && isALPHA(tmp) &&
5289                 (s == PL_linestart+1 || s[-2] == '\n') )
5290                 {
5291                     if (PL_in_eval && !PL_rsfp) {
5292                         d = PL_bufend;
5293                         while (s < d) {
5294                             if (*s++ == '\n') {
5295                                 incline(s);
5296                                 if (strnEQ(s,"=cut",4)) {
5297                                     s = strchr(s,'\n');
5298                                     if (s)
5299                                         s++;
5300                                     else
5301                                         s = d;
5302                                     incline(s);
5303                                     goto retry;
5304                                 }
5305                             }
5306                         }
5307                         goto retry;
5308                     }
5309 #ifdef PERL_MAD
5310                     if (PL_madskills) {
5311                         if (!PL_thiswhite)
5312                             PL_thiswhite = newSVpvs("");
5313                         sv_catpvn(PL_thiswhite, PL_linestart,
5314                                   PL_bufend - PL_linestart);
5315                     }
5316 #endif
5317                     s = PL_bufend;
5318                     PL_doextract = TRUE;
5319                     goto retry;
5320                 }
5321         }
5322         if (PL_lex_brackets < PL_lex_formbrack) {
5323             const char *t = s;
5324 #ifdef PERL_STRICT_CR
5325             while (SPACE_OR_TAB(*t))
5326 #else
5327             while (SPACE_OR_TAB(*t) || *t == '\r')
5328 #endif
5329                 t++;
5330             if (*t == '\n' || *t == '#') {
5331                 s--;
5332                 PL_expect = XBLOCK;
5333                 goto leftbracket;
5334             }
5335         }
5336         pl_yylval.ival = 0;
5337         OPERATOR(ASSIGNOP);
5338     case '!':
5339         s++;
5340         {
5341             const char tmp = *s++;
5342             if (tmp == '=') {
5343                 /* was this !=~ where !~ was meant?
5344                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5345
5346                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5347                     const char *t = s+1;
5348
5349                     while (t < PL_bufend && isSPACE(*t))
5350                         ++t;
5351
5352                     if (*t == '/' || *t == '?' ||
5353                         ((*t == 'm' || *t == 's' || *t == 'y')
5354                          && !isALNUM(t[1])) ||
5355                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5356                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5357                                     "!=~ should be !~");
5358                 }
5359                 Eop(OP_NE);
5360             }
5361             if (tmp == '~')
5362                 PMop(OP_NOT);
5363         }
5364         s--;
5365         OPERATOR('!');
5366     case '<':
5367         if (PL_expect != XOPERATOR) {
5368             if (s[1] != '<' && !strchr(s,'>'))
5369                 check_uni();
5370             if (s[1] == '<')
5371                 s = scan_heredoc(s);
5372             else
5373                 s = scan_inputsymbol(s);
5374             TERM(sublex_start());
5375         }
5376         s++;
5377         {
5378             char tmp = *s++;
5379             if (tmp == '<')
5380                 SHop(OP_LEFT_SHIFT);
5381             if (tmp == '=') {
5382                 tmp = *s++;
5383                 if (tmp == '>')
5384                     Eop(OP_NCMP);
5385                 s--;
5386                 Rop(OP_LE);
5387             }
5388         }
5389         s--;
5390         Rop(OP_LT);
5391     case '>':
5392         s++;
5393         {
5394             const char tmp = *s++;
5395             if (tmp == '>')
5396                 SHop(OP_RIGHT_SHIFT);
5397             else if (tmp == '=')
5398                 Rop(OP_GE);
5399         }
5400         s--;
5401         Rop(OP_GT);
5402
5403     case '$':
5404         CLINE;
5405
5406         if (PL_expect == XOPERATOR) {
5407             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5408                 return deprecate_commaless_var_list();
5409             }
5410         }
5411
5412         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
5413             PL_tokenbuf[0] = '@';
5414             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5415                            sizeof PL_tokenbuf - 1, FALSE);
5416             if (PL_expect == XOPERATOR)
5417                 no_op("Array length", s);
5418             if (!PL_tokenbuf[1])
5419                 PREREF(DOLSHARP);
5420             PL_expect = XOPERATOR;
5421             PL_pending_ident = '#';
5422             TOKEN(DOLSHARP);
5423         }
5424
5425         PL_tokenbuf[0] = '$';
5426         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5427                        sizeof PL_tokenbuf - 1, FALSE);
5428         if (PL_expect == XOPERATOR)
5429             no_op("Scalar", s);
5430         if (!PL_tokenbuf[1]) {
5431             if (s == PL_bufend)
5432                 yyerror("Final $ should be \\$ or $name");
5433             PREREF('$');
5434         }
5435
5436         /* This kludge not intended to be bulletproof. */
5437         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5438             pl_yylval.opval = newSVOP(OP_CONST, 0,
5439                                    newSViv(CopARYBASE_get(&PL_compiling)));
5440             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5441             TERM(THING);
5442         }
5443
5444         d = s;
5445         {
5446             const char tmp = *s;
5447             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5448                 s = SKIPSPACE1(s);
5449
5450             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5451                 && intuit_more(s)) {
5452                 if (*s == '[') {
5453                     PL_tokenbuf[0] = '@';
5454                     if (ckWARN(WARN_SYNTAX)) {
5455                         char *t = s+1;
5456
5457                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5458                             t++;
5459                         if (*t++ == ',') {
5460                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5461                             while (t < PL_bufend && *t != ']')
5462                                 t++;
5463                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5464                                         "Multidimensional syntax %.*s not supported",
5465                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5466                         }
5467                     }
5468                 }
5469                 else if (*s == '{') {
5470                     char *t;
5471                     PL_tokenbuf[0] = '%';
5472                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5473                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5474                         {
5475                             char tmpbuf[sizeof PL_tokenbuf];
5476                             do {
5477                                 t++;
5478                             } while (isSPACE(*t));
5479                             if (isIDFIRST_lazy_if(t,UTF)) {
5480                                 STRLEN len;
5481                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5482                                               &len);
5483                                 while (isSPACE(*t))
5484                                     t++;
5485                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5486                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5487                                                 "You need to quote \"%s\"",
5488                                                 tmpbuf);
5489                             }
5490                         }
5491                 }
5492             }
5493
5494             PL_expect = XOPERATOR;
5495             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5496                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5497                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5498                     PL_expect = XOPERATOR;
5499                 else if (strchr("$@\"'`q", *s))
5500                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5501                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5502                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5503                 else if (isIDFIRST_lazy_if(s,UTF)) {
5504                     char tmpbuf[sizeof PL_tokenbuf];
5505                     int t2;
5506                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5507                     if ((t2 = keyword(tmpbuf, len, 0))) {
5508                         /* binary operators exclude handle interpretations */
5509                         switch (t2) {
5510                         case -KEY_x:
5511                         case -KEY_eq:
5512                         case -KEY_ne:
5513                         case -KEY_gt:
5514                         case -KEY_lt:
5515                         case -KEY_ge:
5516                         case -KEY_le:
5517                         case -KEY_cmp:
5518                             break;
5519                         default:
5520                             PL_expect = XTERM;  /* e.g. print $fh length() */
5521                             break;
5522                         }
5523                     }
5524                     else {
5525                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5526                     }
5527                 }
5528                 else if (isDIGIT(*s))
5529                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5530                 else if (*s == '.' && isDIGIT(s[1]))
5531                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5532                 else if ((*s == '?' || *s == '-' || *s == '+')
5533                          && !isSPACE(s[1]) && s[1] != '=')
5534                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5535                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5536                          && s[1] != '/')
5537                     PL_expect = XTERM;          /* e.g. print $fh /.../
5538                                                    XXX except DORDOR operator
5539                                                 */
5540                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5541                          && s[2] != '=')
5542                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5543             }
5544         }
5545         PL_pending_ident = '$';
5546         TOKEN('$');
5547
5548     case '@':
5549         if (PL_expect == XOPERATOR)
5550             no_op("Array", s);
5551         PL_tokenbuf[0] = '@';
5552         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5553         if (!PL_tokenbuf[1]) {
5554             PREREF('@');
5555         }
5556         if (PL_lex_state == LEX_NORMAL)
5557             s = SKIPSPACE1(s);
5558         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5559             if (*s == '{')
5560                 PL_tokenbuf[0] = '%';
5561
5562             /* Warn about @ where they meant $. */
5563             if (*s == '[' || *s == '{') {
5564                 if (ckWARN(WARN_SYNTAX)) {
5565                     const char *t = s + 1;
5566                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5567                         t++;
5568                     if (*t == '}' || *t == ']') {
5569                         t++;
5570                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5571                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5572                             "Scalar value %.*s better written as $%.*s",
5573                             (int)(t-PL_bufptr), PL_bufptr,
5574                             (int)(t-PL_bufptr-1), PL_bufptr+1);
5575                     }
5576                 }
5577             }
5578         }
5579         PL_pending_ident = '@';
5580         TERM('@');
5581
5582      case '/':                  /* may be division, defined-or, or pattern */
5583         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5584             s += 2;
5585             AOPERATOR(DORDOR);
5586         }
5587      case '?':                  /* may either be conditional or pattern */
5588         if (PL_expect == XOPERATOR) {
5589              char tmp = *s++;
5590              if(tmp == '?') {
5591                 OPERATOR('?');
5592              }
5593              else {
5594                  tmp = *s++;
5595                  if(tmp == '/') {
5596                      /* A // operator. */
5597                     AOPERATOR(DORDOR);
5598                  }
5599                  else {
5600                      s--;
5601                      Mop(OP_DIVIDE);
5602                  }
5603              }
5604          }
5605          else {
5606              /* Disable warning on "study /blah/" */
5607              if (PL_oldoldbufptr == PL_last_uni
5608               && (*PL_last_uni != 's' || s - PL_last_uni < 5
5609                   || memNE(PL_last_uni, "study", 5)
5610                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
5611               ))
5612                  check_uni();
5613              s = scan_pat(s,OP_MATCH);
5614              TERM(sublex_start());
5615          }
5616
5617     case '.':
5618         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5619 #ifdef PERL_STRICT_CR
5620             && s[1] == '\n'
5621 #else
5622             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5623 #endif
5624             && (s == PL_linestart || s[-1] == '\n') )
5625         {
5626             PL_lex_formbrack = 0;
5627             PL_expect = XSTATE;
5628             goto rightbracket;
5629         }
5630         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5631             s += 3;
5632             OPERATOR(YADAYADA);
5633         }
5634         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5635             char tmp = *s++;
5636             if (*s == tmp) {
5637                 s++;
5638                 if (*s == tmp) {
5639                     s++;
5640                     pl_yylval.ival = OPf_SPECIAL;
5641                 }
5642                 else
5643                     pl_yylval.ival = 0;
5644                 OPERATOR(DOTDOT);
5645             }
5646             if (PL_expect != XOPERATOR)
5647                 check_uni();
5648             Aop(OP_CONCAT);
5649         }
5650         /* FALL THROUGH */
5651     case '0': case '1': case '2': case '3': case '4':
5652     case '5': case '6': case '7': case '8': case '9':
5653         s = scan_num(s, &pl_yylval);
5654         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5655         if (PL_expect == XOPERATOR)
5656             no_op("Number",s);
5657         TERM(THING);
5658
5659     case '\'':
5660         s = scan_str(s,!!PL_madskills,FALSE);
5661         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5662         if (PL_expect == XOPERATOR) {
5663             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5664                 return deprecate_commaless_var_list();
5665             }
5666             else
5667                 no_op("String",s);
5668         }
5669         if (!s)
5670             missingterm(NULL);
5671         pl_yylval.ival = OP_CONST;
5672         TERM(sublex_start());
5673
5674     case '"':
5675         s = scan_str(s,!!PL_madskills,FALSE);
5676         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5677         if (PL_expect == XOPERATOR) {
5678             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5679                 return deprecate_commaless_var_list();
5680             }
5681             else
5682                 no_op("String",s);
5683         }
5684         if (!s)
5685             missingterm(NULL);
5686         pl_yylval.ival = OP_CONST;
5687         /* FIXME. I think that this can be const if char *d is replaced by
5688            more localised variables.  */
5689         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5690             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5691                 pl_yylval.ival = OP_STRINGIFY;
5692                 break;
5693             }
5694         }
5695         TERM(sublex_start());
5696
5697     case '`':
5698         s = scan_str(s,!!PL_madskills,FALSE);
5699         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5700         if (PL_expect == XOPERATOR)
5701             no_op("Backticks",s);
5702         if (!s)
5703             missingterm(NULL);
5704         readpipe_override();
5705         TERM(sublex_start());
5706
5707     case '\\':
5708         s++;
5709         if (PL_lex_inwhat && isDIGIT(*s))
5710             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5711                            *s, *s);
5712         if (PL_expect == XOPERATOR)
5713             no_op("Backslash",s);
5714         OPERATOR(REFGEN);
5715
5716     case 'v':
5717         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5718             char *start = s + 2;
5719             while (isDIGIT(*start) || *start == '_')
5720                 start++;
5721             if (*start == '.' && isDIGIT(start[1])) {
5722                 s = scan_num(s, &pl_yylval);
5723                 TERM(THING);
5724             }
5725             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5726             else if (!isALPHA(*start) && (PL_expect == XTERM
5727                         || PL_expect == XREF || PL_expect == XSTATE
5728                         || PL_expect == XTERMORDORDOR)) {
5729                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5730                 if (!gv) {
5731                     s = scan_num(s, &pl_yylval);
5732                     TERM(THING);
5733                 }
5734             }
5735         }
5736         goto keylookup;
5737     case 'x':
5738         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5739             s++;
5740             Mop(OP_REPEAT);
5741         }
5742         goto keylookup;
5743
5744     case '_':
5745     case 'a': case 'A':
5746     case 'b': case 'B':
5747     case 'c': case 'C':
5748     case 'd': case 'D':
5749     case 'e': case 'E':
5750     case 'f': case 'F':
5751     case 'g': case 'G':
5752     case 'h': case 'H':
5753     case 'i': case 'I':
5754     case 'j': case 'J':
5755     case 'k': case 'K':
5756     case 'l': case 'L':
5757     case 'm': case 'M':
5758     case 'n': case 'N':
5759     case 'o': case 'O':
5760     case 'p': case 'P':
5761     case 'q': case 'Q':
5762     case 'r': case 'R':
5763     case 's': case 'S':
5764     case 't': case 'T':
5765     case 'u': case 'U':
5766               case 'V':
5767     case 'w': case 'W':
5768               case 'X':
5769     case 'y': case 'Y':
5770     case 'z': case 'Z':
5771
5772       keylookup: {
5773         bool anydelim;
5774         I32 tmp;
5775
5776         orig_keyword = 0;
5777         gv = NULL;
5778         gvp = NULL;
5779
5780         PL_bufptr = s;
5781         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5782
5783         /* Some keywords can be followed by any delimiter, including ':' */
5784         anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5785                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5786                              (PL_tokenbuf[0] == 'q' &&
5787                               strchr("qwxr", PL_tokenbuf[1])))));
5788
5789         /* x::* is just a word, unless x is "CORE" */
5790         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5791             goto just_a_word;
5792
5793         d = s;
5794         while (d < PL_bufend && isSPACE(*d))
5795                 d++;    /* no comments skipped here, or s### is misparsed */
5796
5797         /* Is this a word before a => operator? */
5798         if (*d == '=' && d[1] == '>') {
5799             CLINE;
5800             pl_yylval.opval
5801                 = (OP*)newSVOP(OP_CONST, 0,
5802                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5803             pl_yylval.opval->op_private = OPpCONST_BARE;
5804             TERM(WORD);
5805         }
5806
5807         /* Check for plugged-in keyword */
5808         {
5809             OP *o;
5810             int result;
5811             char *saved_bufptr = PL_bufptr;
5812             PL_bufptr = s;
5813             result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
5814             s = PL_bufptr;
5815             if (result == KEYWORD_PLUGIN_DECLINE) {
5816                 /* not a plugged-in keyword */
5817                 PL_bufptr = saved_bufptr;
5818             } else if (result == KEYWORD_PLUGIN_STMT) {
5819                 pl_yylval.opval = o;
5820                 CLINE;
5821                 PL_expect = XSTATE;
5822                 return REPORT(PLUGSTMT);
5823             } else if (result == KEYWORD_PLUGIN_EXPR) {
5824                 pl_yylval.opval = o;
5825                 CLINE;
5826                 PL_expect = XOPERATOR;
5827                 return REPORT(PLUGEXPR);
5828             } else {
5829                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
5830                                         PL_tokenbuf);
5831             }
5832         }
5833
5834         /* Check for built-in keyword */
5835         tmp = keyword(PL_tokenbuf, len, 0);
5836
5837         /* Is this a label? */
5838         if (!anydelim && PL_expect == XSTATE
5839               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5840             if (tmp)
5841                 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5842             s = d + 1;
5843             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5844             CLINE;
5845             TOKEN(LABEL);
5846         }
5847
5848         if (tmp < 0) {                  /* second-class keyword? */
5849             GV *ogv = NULL;     /* override (winner) */
5850             GV *hgv = NULL;     /* hidden (loser) */
5851             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5852                 CV *cv;
5853                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5854                     (cv = GvCVu(gv)))
5855                 {
5856                     if (GvIMPORTED_CV(gv))
5857                         ogv = gv;
5858                     else if (! CvMETHOD(cv))
5859                         hgv = gv;
5860                 }
5861                 if (!ogv &&
5862                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5863                     (gv = *gvp) && isGV_with_GP(gv) &&
5864                     GvCVu(gv) && GvIMPORTED_CV(gv))
5865                 {
5866                     ogv = gv;
5867                 }
5868             }
5869             if (ogv) {
5870                 orig_keyword = tmp;
5871                 tmp = 0;                /* overridden by import or by GLOBAL */
5872             }
5873             else if (gv && !gvp
5874                      && -tmp==KEY_lock  /* XXX generalizable kludge */
5875                      && GvCVu(gv))
5876             {
5877                 tmp = 0;                /* any sub overrides "weak" keyword */
5878             }
5879             else {                      /* no override */
5880                 tmp = -tmp;
5881                 if (tmp == KEY_dump) {
5882                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5883                                    "dump() better written as CORE::dump()");
5884                 }
5885                 gv = NULL;
5886                 gvp = 0;
5887                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
5888                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5889                                    "Ambiguous call resolved as CORE::%s(), %s",
5890                                    GvENAME(hgv), "qualify as such or use &");
5891             }
5892         }
5893
5894       reserved_word:
5895         switch (tmp) {
5896
5897         default:                        /* not a keyword */
5898             /* Trade off - by using this evil construction we can pull the
5899                variable gv into the block labelled keylookup. If not, then
5900                we have to give it function scope so that the goto from the
5901                earlier ':' case doesn't bypass the initialisation.  */
5902             if (0) {
5903             just_a_word_zero_gv:
5904                 gv = NULL;
5905                 gvp = NULL;
5906                 orig_keyword = 0;
5907             }
5908           just_a_word: {
5909                 SV *sv;
5910                 int pkgname = 0;
5911                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5912                 OP *rv2cv_op;
5913                 CV *cv;
5914 #ifdef PERL_MAD
5915                 SV *nextPL_nextwhite = 0;
5916 #endif
5917
5918
5919                 /* Get the rest if it looks like a package qualifier */
5920
5921                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5922                     STRLEN morelen;
5923                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5924                                   TRUE, &morelen);
5925                     if (!morelen)
5926                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5927                                 *s == '\'' ? "'" : "::");
5928                     len += morelen;
5929                     pkgname = 1;
5930                 }
5931
5932                 if (PL_expect == XOPERATOR) {
5933                     if (PL_bufptr == PL_linestart) {
5934                         CopLINE_dec(PL_curcop);
5935                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5936                         CopLINE_inc(PL_curcop);
5937                     }
5938                     else
5939                         no_op("Bareword",s);
5940                 }
5941
5942                 /* Look for a subroutine with this name in current package,
5943                    unless name is "Foo::", in which case Foo is a bearword
5944                    (and a package name). */
5945
5946                 if (len > 2 && !PL_madskills &&
5947                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5948                 {
5949                     if (ckWARN(WARN_BAREWORD)
5950                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5951                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5952                             "Bareword \"%s\" refers to nonexistent package",
5953                              PL_tokenbuf);
5954                     len -= 2;
5955                     PL_tokenbuf[len] = '\0';
5956                     gv = NULL;
5957                     gvp = 0;
5958                 }
5959                 else {
5960                     if (!gv) {
5961                         /* Mustn't actually add anything to a symbol table.
5962                            But also don't want to "initialise" any placeholder
5963                            constants that might already be there into full
5964                            blown PVGVs with attached PVCV.  */
5965                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5966                                                GV_NOADD_NOINIT, SVt_PVCV);
5967                     }
5968                     len = 0;
5969                 }
5970
5971                 /* if we saw a global override before, get the right name */
5972
5973                 if (gvp) {
5974                     sv = newSVpvs("CORE::GLOBAL::");
5975                     sv_catpv(sv,PL_tokenbuf);
5976                 }
5977                 else {
5978                     /* If len is 0, newSVpv does strlen(), which is correct.
5979                        If len is non-zero, then it will be the true length,
5980                        and so the scalar will be created correctly.  */
5981                     sv = newSVpv(PL_tokenbuf,len);
5982                 }
5983 #ifdef PERL_MAD
5984                 if (PL_madskills && !PL_thistoken) {
5985                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5986                     PL_thistoken = newSVpvn(start,s - start);
5987                     PL_realtokenstart = s - SvPVX(PL_linestr);
5988                 }
5989 #endif
5990
5991                 /* Presume this is going to be a bareword of some sort. */
5992
5993                 CLINE;
5994                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5995                 pl_yylval.opval->op_private = OPpCONST_BARE;
5996                 /* UTF-8 package name? */
5997                 if (UTF && !IN_BYTES &&
5998                     is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5999                     SvUTF8_on(sv);
6000
6001                 /* And if "Foo::", then that's what it certainly is. */
6002
6003                 if (len)
6004                     goto safe_bareword;
6005
6006                 cv = NULL;
6007                 {
6008                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6009                     const_op->op_private = OPpCONST_BARE;
6010                     rv2cv_op = newCVREF(0, const_op);
6011                 }
6012                 if (rv2cv_op->op_type == OP_RV2CV &&
6013                         (rv2cv_op->op_flags & OPf_KIDS)) {
6014                     OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6015                     switch (rv_op->op_type) {
6016                         case OP_CONST: {
6017                             SV *sv = cSVOPx_sv(rv_op);
6018                             if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6019                                 cv = (CV*)SvRV(sv);
6020                         } break;
6021                         case OP_GV: {
6022                             GV *gv = cGVOPx_gv(rv_op);
6023                             CV *maybe_cv = GvCVu(gv);
6024                             if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6025                                 cv = maybe_cv;
6026                         } break;
6027                     }
6028                 }
6029
6030                 /* See if it's the indirect object for a list operator. */
6031
6032                 if (PL_oldoldbufptr &&
6033                     PL_oldoldbufptr < PL_bufptr &&
6034                     (PL_oldoldbufptr == PL_last_lop
6035                      || PL_oldoldbufptr == PL_last_uni) &&
6036                     /* NO SKIPSPACE BEFORE HERE! */
6037                     (PL_expect == XREF ||
6038                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6039                 {
6040                     bool immediate_paren = *s == '(';
6041
6042                     /* (Now we can afford to cross potential line boundary.) */
6043                     s = SKIPSPACE2(s,nextPL_nextwhite);
6044 #ifdef PERL_MAD
6045                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6046 #endif
6047
6048                     /* Two barewords in a row may indicate method call. */
6049
6050                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6051                         (tmp = intuit_method(s, gv, cv))) {
6052                         op_free(rv2cv_op);
6053                         return REPORT(tmp);
6054                     }
6055
6056                     /* If not a declared subroutine, it's an indirect object. */
6057                     /* (But it's an indir obj regardless for sort.) */
6058                     /* Also, if "_" follows a filetest operator, it's a bareword */
6059
6060                     if (
6061                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6062                          (!cv &&
6063                         (PL_last_lop_op != OP_MAPSTART &&
6064                          PL_last_lop_op != OP_GREPSTART))))
6065                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6066                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6067                        )
6068                     {
6069                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6070                         goto bareword;
6071                     }
6072                 }
6073
6074                 PL_expect = XOPERATOR;
6075 #ifdef PERL_MAD
6076                 if (isSPACE(*s))
6077                     s = SKIPSPACE2(s,nextPL_nextwhite);
6078                 PL_nextwhite = nextPL_nextwhite;
6079 #else
6080                 s = skipspace(s);
6081 #endif
6082
6083                 /* Is this a word before a => operator? */
6084                 if (*s == '=' && s[1] == '>' && !pkgname) {
6085                     op_free(rv2cv_op);
6086                     CLINE;
6087                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6088                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6089                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6090                     TERM(WORD);
6091                 }
6092
6093                 /* If followed by a paren, it's certainly a subroutine. */
6094                 if (*s == '(') {
6095                     CLINE;
6096                     if (cv) {
6097                         d = s + 1;
6098                         while (SPACE_OR_TAB(*d))
6099                             d++;
6100                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6101                             s = d + 1;
6102                             goto its_constant;
6103                         }
6104                     }
6105 #ifdef PERL_MAD
6106                     if (PL_madskills) {
6107                         PL_nextwhite = PL_thiswhite;
6108                         PL_thiswhite = 0;
6109                     }
6110                     start_force(PL_curforce);
6111 #endif
6112                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6113                     PL_expect = XOPERATOR;
6114 #ifdef PERL_MAD
6115                     if (PL_madskills) {
6116                         PL_nextwhite = nextPL_nextwhite;
6117                         curmad('X', PL_thistoken);
6118                         PL_thistoken = newSVpvs("");
6119                     }
6120 #endif
6121                     op_free(rv2cv_op);
6122                     force_next(WORD);
6123                     pl_yylval.ival = 0;
6124                     TOKEN('&');
6125                 }
6126
6127                 /* If followed by var or block, call it a method (unless sub) */
6128
6129                 if ((*s == '$' || *s == '{') && !cv) {
6130                     op_free(rv2cv_op);
6131                     PL_last_lop = PL_oldbufptr;
6132                     PL_last_lop_op = OP_METHOD;
6133                     PREBLOCK(METHOD);
6134                 }
6135
6136                 /* If followed by a bareword, see if it looks like indir obj. */
6137
6138                 if (!orig_keyword
6139                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6140                         && (tmp = intuit_method(s, gv, cv))) {
6141                     op_free(rv2cv_op);
6142                     return REPORT(tmp);
6143                 }
6144
6145                 /* Not a method, so call it a subroutine (if defined) */
6146
6147                 if (cv) {
6148                     if (lastchar == '-')
6149                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6150                                          "Ambiguous use of -%s resolved as -&%s()",
6151                                          PL_tokenbuf, PL_tokenbuf);
6152                     /* Check for a constant sub */
6153                     if ((sv = cv_const_sv(cv))) {
6154                   its_constant:
6155                         op_free(rv2cv_op);
6156                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6157                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6158                         pl_yylval.opval->op_private = 0;
6159                         TOKEN(WORD);
6160                     }
6161
6162                     op_free(pl_yylval.opval);
6163                     pl_yylval.opval = rv2cv_op;
6164                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6165                     PL_last_lop = PL_oldbufptr;
6166                     PL_last_lop_op = OP_ENTERSUB;
6167                     /* Is there a prototype? */
6168                     if (
6169 #ifdef PERL_MAD
6170                         cv &&
6171 #endif
6172                         SvPOK(cv))
6173                     {
6174                         STRLEN protolen;
6175                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6176                         if (!protolen)
6177                             TERM(FUNC0SUB);
6178                         if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
6179                             OPERATOR(UNIOPSUB);
6180                         while (*proto == ';')
6181                             proto++;
6182                         if (*proto == '&' && *s == '{') {
6183                             if (PL_curstash)
6184                                 sv_setpvs(PL_subname, "__ANON__");
6185                             else
6186                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6187                             PREBLOCK(LSTOPSUB);
6188                         }
6189                     }
6190 #ifdef PERL_MAD
6191                     {
6192                         if (PL_madskills) {
6193                             PL_nextwhite = PL_thiswhite;
6194                             PL_thiswhite = 0;
6195                         }
6196                         start_force(PL_curforce);
6197                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6198                         PL_expect = XTERM;
6199                         if (PL_madskills) {
6200                             PL_nextwhite = nextPL_nextwhite;
6201                             curmad('X', PL_thistoken);
6202                             PL_thistoken = newSVpvs("");
6203                         }
6204                         force_next(WORD);
6205                         TOKEN(NOAMP);
6206                     }
6207                 }
6208
6209                 /* Guess harder when madskills require "best effort". */
6210                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6211                     int probable_sub = 0;
6212                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6213                         probable_sub = 1;
6214                     else if (isALPHA(*s)) {
6215                         char tmpbuf[1024];
6216                         STRLEN tmplen;
6217                         d = s;
6218                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6219                         if (!keyword(tmpbuf, tmplen, 0))
6220                             probable_sub = 1;
6221                         else {
6222                             while (d < PL_bufend && isSPACE(*d))
6223                                 d++;
6224                             if (*d == '=' && d[1] == '>')
6225                                 probable_sub = 1;
6226                         }
6227                     }
6228                     if (probable_sub) {
6229                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6230                         op_free(pl_yylval.opval);
6231                         pl_yylval.opval = rv2cv_op;
6232                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6233                         PL_last_lop = PL_oldbufptr;
6234                         PL_last_lop_op = OP_ENTERSUB;
6235                         PL_nextwhite = PL_thiswhite;
6236                         PL_thiswhite = 0;
6237                         start_force(PL_curforce);
6238                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6239                         PL_expect = XTERM;
6240                         PL_nextwhite = nextPL_nextwhite;
6241                         curmad('X', PL_thistoken);
6242                         PL_thistoken = newSVpvs("");
6243                         force_next(WORD);
6244                         TOKEN(NOAMP);
6245                     }
6246 #else
6247                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6248                     PL_expect = XTERM;
6249                     force_next(WORD);
6250                     TOKEN(NOAMP);
6251 #endif
6252                 }
6253
6254                 /* Call it a bare word */
6255
6256                 if (PL_hints & HINT_STRICT_SUBS)
6257                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6258                 else {
6259                 bareword:
6260                     /* after "print" and similar functions (corresponding to
6261                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6262                      * a filehandle should be subject to "strict subs".
6263                      * Likewise for the optional indirect-object argument to system
6264                      * or exec, which can't be a bareword */
6265                     if ((PL_last_lop_op == OP_PRINT
6266                             || PL_last_lop_op == OP_PRTF
6267                             || PL_last_lop_op == OP_SAY
6268                             || PL_last_lop_op == OP_SYSTEM
6269                             || PL_last_lop_op == OP_EXEC)
6270                             && (PL_hints & HINT_STRICT_SUBS))
6271                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6272                     if (lastchar != '-') {
6273                         if (ckWARN(WARN_RESERVED)) {
6274                             d = PL_tokenbuf;
6275                             while (isLOWER(*d))
6276                                 d++;
6277                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6278                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6279                                        PL_tokenbuf);
6280                         }
6281                     }
6282                 }
6283                 op_free(rv2cv_op);
6284
6285             safe_bareword:
6286                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6287                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6288                                      "Operator or semicolon missing before %c%s",
6289                                      lastchar, PL_tokenbuf);
6290                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6291                                      "Ambiguous use of %c resolved as operator %c",
6292                                      lastchar, lastchar);
6293                 }
6294                 TOKEN(WORD);
6295             }
6296
6297         case KEY___FILE__:
6298             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6299                                         newSVpv(CopFILE(PL_curcop),0));
6300             TERM(THING);
6301
6302         case KEY___LINE__:
6303             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6304                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6305             TERM(THING);
6306
6307         case KEY___PACKAGE__:
6308             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6309                                         (PL_curstash
6310                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6311                                          : &PL_sv_undef));
6312             TERM(THING);
6313
6314         case KEY___DATA__:
6315         case KEY___END__: {
6316             GV *gv;
6317             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6318                 const char *pname = "main";
6319                 if (PL_tokenbuf[2] == 'D')
6320                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6321                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6322                                 SVt_PVIO);
6323                 GvMULTI_on(gv);
6324                 if (!GvIO(gv))
6325                     GvIOp(gv) = newIO();
6326                 IoIFP(GvIOp(gv)) = PL_rsfp;
6327 #if defined(HAS_FCNTL) && defined(F_SETFD)
6328                 {
6329                     const int fd = PerlIO_fileno(PL_rsfp);
6330                     fcntl(fd,F_SETFD,fd >= 3);
6331                 }
6332 #endif
6333                 /* Mark this internal pseudo-handle as clean */
6334                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6335                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6336                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6337                 else
6338                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6339 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6340                 /* if the script was opened in binmode, we need to revert
6341                  * it to text mode for compatibility; but only iff it has CRs
6342                  * XXX this is a questionable hack at best. */
6343                 if (PL_bufend-PL_bufptr > 2
6344                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6345                 {
6346                     Off_t loc = 0;
6347                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6348                         loc = PerlIO_tell(PL_rsfp);
6349                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6350                     }
6351 #ifdef NETWARE
6352                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6353 #else
6354                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6355 #endif  /* NETWARE */
6356 #ifdef PERLIO_IS_STDIO /* really? */
6357 #  if defined(__BORLANDC__)
6358                         /* XXX see note in do_binmode() */
6359                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6360 #  endif
6361 #endif
6362                         if (loc > 0)
6363                             PerlIO_seek(PL_rsfp, loc, 0);
6364                     }
6365                 }
6366 #endif
6367 #ifdef PERLIO_LAYERS
6368                 if (!IN_BYTES) {
6369                     if (UTF)
6370                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6371                     else if (PL_encoding) {
6372                         SV *name;
6373                         dSP;
6374                         ENTER;
6375                         SAVETMPS;
6376                         PUSHMARK(sp);
6377                         EXTEND(SP, 1);
6378                         XPUSHs(PL_encoding);
6379                         PUTBACK;
6380                         call_method("name", G_SCALAR);
6381                         SPAGAIN;
6382                         name = POPs;
6383                         PUTBACK;
6384                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6385                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6386                                                       SVfARG(name)));
6387                         FREETMPS;
6388                         LEAVE;
6389                     }
6390                 }
6391 #endif
6392 #ifdef PERL_MAD
6393                 if (PL_madskills) {
6394                     if (PL_realtokenstart >= 0) {
6395                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6396                         if (!PL_endwhite)
6397                             PL_endwhite = newSVpvs("");
6398                         sv_catsv(PL_endwhite, PL_thiswhite);
6399                         PL_thiswhite = 0;
6400                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6401                         PL_realtokenstart = -1;
6402                     }
6403                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6404                            != NULL) ;
6405                 }
6406 #endif
6407                 PL_rsfp = NULL;
6408             }
6409             goto fake_eof;
6410         }
6411
6412         case KEY_AUTOLOAD:
6413         case KEY_DESTROY:
6414         case KEY_BEGIN:
6415         case KEY_UNITCHECK:
6416         case KEY_CHECK:
6417         case KEY_INIT:
6418         case KEY_END:
6419             if (PL_expect == XSTATE) {
6420                 s = PL_bufptr;
6421                 goto really_sub;
6422             }
6423             goto just_a_word;
6424
6425         case KEY_CORE:
6426             if (*s == ':' && s[1] == ':') {
6427                 s += 2;
6428                 d = s;
6429                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6430                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6431                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6432                 if (tmp < 0)
6433                     tmp = -tmp;
6434                 else if (tmp == KEY_require || tmp == KEY_do)
6435                     /* that's a way to remember we saw "CORE::" */
6436                     orig_keyword = tmp;
6437                 goto reserved_word;
6438             }
6439             goto just_a_word;
6440
6441         case KEY_abs:
6442             UNI(OP_ABS);
6443
6444         case KEY_alarm:
6445             UNI(OP_ALARM);
6446
6447         case KEY_accept:
6448             LOP(OP_ACCEPT,XTERM);
6449
6450         case KEY_and:
6451             OPERATOR(ANDOP);
6452
6453         case KEY_atan2:
6454             LOP(OP_ATAN2,XTERM);
6455
6456         case KEY_bind:
6457             LOP(OP_BIND,XTERM);
6458
6459         case KEY_binmode:
6460             LOP(OP_BINMODE,XTERM);
6461
6462         case KEY_bless:
6463             LOP(OP_BLESS,XTERM);
6464
6465         case KEY_break:
6466             FUN0(OP_BREAK);
6467
6468         case KEY_chop:
6469             UNI(OP_CHOP);
6470
6471         case KEY_continue:
6472             /* When 'use switch' is in effect, continue has a dual
6473                life as a control operator. */
6474             {
6475                 if (!FEATURE_IS_ENABLED("switch"))
6476                     PREBLOCK(CONTINUE);
6477                 else {
6478                     /* We have to disambiguate the two senses of
6479                       "continue". If the next token is a '{' then
6480                       treat it as the start of a continue block;
6481                       otherwise treat it as a control operator.
6482                      */
6483                     s = skipspace(s);
6484                     if (*s == '{')
6485             PREBLOCK(CONTINUE);
6486                     else
6487                         FUN0(OP_CONTINUE);
6488                 }
6489             }
6490
6491         case KEY_chdir:
6492             /* may use HOME */
6493             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6494             UNI(OP_CHDIR);
6495
6496         case KEY_close:
6497             UNI(OP_CLOSE);
6498
6499         case KEY_closedir:
6500             UNI(OP_CLOSEDIR);
6501
6502         case KEY_cmp:
6503             Eop(OP_SCMP);
6504
6505         case KEY_caller:
6506             UNI(OP_CALLER);
6507
6508         case KEY_crypt:
6509 #ifdef FCRYPT
6510             if (!PL_cryptseen) {
6511                 PL_cryptseen = TRUE;
6512                 init_des();
6513             }
6514 #endif
6515             LOP(OP_CRYPT,XTERM);
6516
6517         case KEY_chmod:
6518             LOP(OP_CHMOD,XTERM);
6519
6520         case KEY_chown:
6521             LOP(OP_CHOWN,XTERM);
6522
6523         case KEY_connect:
6524             LOP(OP_CONNECT,XTERM);
6525
6526         case KEY_chr:
6527             UNI(OP_CHR);
6528
6529         case KEY_cos:
6530             UNI(OP_COS);
6531
6532         case KEY_chroot:
6533             UNI(OP_CHROOT);
6534
6535         case KEY_default:
6536             PREBLOCK(DEFAULT);
6537
6538         case KEY_do:
6539             s = SKIPSPACE1(s);
6540             if (*s == '{')
6541                 PRETERMBLOCK(DO);
6542             if (*s != '\'')
6543                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6544             if (orig_keyword == KEY_do) {
6545                 orig_keyword = 0;
6546                 pl_yylval.ival = 1;
6547             }
6548             else
6549                 pl_yylval.ival = 0;
6550             OPERATOR(DO);
6551
6552         case KEY_die:
6553             PL_hints |= HINT_BLOCK_SCOPE;
6554             LOP(OP_DIE,XTERM);
6555
6556         case KEY_defined:
6557             UNI(OP_DEFINED);
6558
6559         case KEY_delete:
6560             UNI(OP_DELETE);
6561
6562         case KEY_dbmopen:
6563             gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6564             LOP(OP_DBMOPEN,XTERM);
6565
6566         case KEY_dbmclose:
6567             UNI(OP_DBMCLOSE);
6568
6569         case KEY_dump:
6570             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6571             LOOPX(OP_DUMP);
6572
6573         case KEY_else:
6574             PREBLOCK(ELSE);
6575
6576         case KEY_elsif:
6577             pl_yylval.ival = CopLINE(PL_curcop);
6578             OPERATOR(ELSIF);
6579
6580         case KEY_eq:
6581             Eop(OP_SEQ);
6582
6583         case KEY_exists:
6584             UNI(OP_EXISTS);
6585
6586         case KEY_exit:
6587             if (PL_madskills)
6588                 UNI(OP_INT);
6589             UNI(OP_EXIT);
6590
6591         case KEY_eval:
6592             s = SKIPSPACE1(s);
6593             PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
6594             UNIBRACK(OP_ENTEREVAL);
6595
6596         case KEY_eof:
6597             UNI(OP_EOF);
6598
6599         case KEY_exp:
6600             UNI(OP_EXP);
6601
6602         case KEY_each:
6603             UNI(OP_EACH);
6604
6605         case KEY_exec:
6606             LOP(OP_EXEC,XREF);
6607
6608         case KEY_endhostent:
6609             FUN0(OP_EHOSTENT);
6610
6611         case KEY_endnetent:
6612             FUN0(OP_ENETENT);
6613
6614         case KEY_endservent:
6615             FUN0(OP_ESERVENT);
6616
6617         case KEY_endprotoent:
6618             FUN0(OP_EPROTOENT);
6619
6620         case KEY_endpwent:
6621             FUN0(OP_EPWENT);
6622
6623         case KEY_endgrent:
6624             FUN0(OP_EGRENT);
6625
6626         case KEY_for:
6627         case KEY_foreach:
6628             pl_yylval.ival = CopLINE(PL_curcop);
6629             s = SKIPSPACE1(s);
6630             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6631                 char *p = s;
6632 #ifdef PERL_MAD
6633                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6634 #endif
6635
6636                 if ((PL_bufend - p) >= 3 &&
6637                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6638                     p += 2;
6639                 else if ((PL_bufend - p) >= 4 &&
6640                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6641                     p += 3;
6642                 p = PEEKSPACE(p);
6643                 if (isIDFIRST_lazy_if(p,UTF)) {
6644                     p = scan_ident(p, PL_bufend,
6645                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6646                     p = PEEKSPACE(p);
6647                 }
6648                 if (*p != '$')
6649                     Perl_croak(aTHX_ "Missing $ on loop variable");
6650 #ifdef PERL_MAD
6651                 s = SvPVX(PL_linestr) + soff;
6652 #endif
6653             }
6654             OPERATOR(FOR);
6655
6656         case KEY_formline:
6657             LOP(OP_FORMLINE,XTERM);
6658
6659         case KEY_fork:
6660             FUN0(OP_FORK);
6661
6662         case KEY_fcntl:
6663             LOP(OP_FCNTL,XTERM);
6664
6665         case KEY_fileno:
6666             UNI(OP_FILENO);
6667
6668         case KEY_flock:
6669             LOP(OP_FLOCK,XTERM);
6670
6671         case KEY_gt:
6672             Rop(OP_SGT);
6673
6674         case KEY_ge:
6675             Rop(OP_SGE);
6676
6677         case KEY_grep:
6678             LOP(OP_GREPSTART, XREF);
6679
6680         case KEY_goto:
6681             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6682             LOOPX(OP_GOTO);
6683
6684         case KEY_gmtime:
6685             UNI(OP_GMTIME);
6686
6687         case KEY_getc:
6688             UNIDOR(OP_GETC);
6689
6690         case KEY_getppid:
6691             FUN0(OP_GETPPID);
6692
6693         case KEY_getpgrp:
6694             UNI(OP_GETPGRP);
6695
6696         case KEY_getpriority:
6697             LOP(OP_GETPRIORITY,XTERM);
6698
6699         case KEY_getprotobyname:
6700             UNI(OP_GPBYNAME);
6701
6702         case KEY_getprotobynumber:
6703             LOP(OP_GPBYNUMBER,XTERM);
6704
6705         case KEY_getprotoent:
6706             FUN0(OP_GPROTOENT);
6707
6708         case KEY_getpwent:
6709             FUN0(OP_GPWENT);
6710
6711         case KEY_getpwnam:
6712             UNI(OP_GPWNAM);
6713
6714         case KEY_getpwuid:
6715             UNI(OP_GPWUID);
6716
6717         case KEY_getpeername:
6718             UNI(OP_GETPEERNAME);
6719
6720         case KEY_gethostbyname:
6721             UNI(OP_GHBYNAME);
6722
6723         case KEY_gethostbyaddr:
6724             LOP(OP_GHBYADDR,XTERM);
6725
6726         case KEY_gethostent:
6727             FUN0(OP_GHOSTENT);
6728
6729         case KEY_getnetbyname:
6730             UNI(OP_GNBYNAME);
6731
6732         case KEY_getnetbyaddr:
6733             LOP(OP_GNBYADDR,XTERM);
6734
6735         case KEY_getnetent:
6736             FUN0(OP_GNETENT);
6737
6738         case KEY_getservbyname:
6739             LOP(OP_GSBYNAME,XTERM);
6740
6741         case KEY_getservbyport:
6742             LOP(OP_GSBYPORT,XTERM);
6743
6744         case KEY_getservent:
6745             FUN0(OP_GSERVENT);
6746
6747         case KEY_getsockname:
6748             UNI(OP_GETSOCKNAME);
6749
6750         case KEY_getsockopt:
6751             LOP(OP_GSOCKOPT,XTERM);
6752
6753         case KEY_getgrent:
6754             FUN0(OP_GGRENT);
6755
6756         case KEY_getgrnam:
6757             UNI(OP_GGRNAM);
6758
6759         case KEY_getgrgid:
6760             UNI(OP_GGRGID);
6761
6762         case KEY_getlogin:
6763             FUN0(OP_GETLOGIN);
6764
6765         case KEY_given:
6766             pl_yylval.ival = CopLINE(PL_curcop);
6767             OPERATOR(GIVEN);
6768
6769         case KEY_glob:
6770             LOP(OP_GLOB,XTERM);
6771
6772         case KEY_hex:
6773             UNI(OP_HEX);
6774
6775         case KEY_if:
6776             pl_yylval.ival = CopLINE(PL_curcop);
6777             OPERATOR(IF);
6778
6779         case KEY_index:
6780             LOP(OP_INDEX,XTERM);
6781
6782         case KEY_int:
6783             UNI(OP_INT);
6784
6785         case KEY_ioctl:
6786             LOP(OP_IOCTL,XTERM);
6787
6788         case KEY_join:
6789             LOP(OP_JOIN,XTERM);
6790
6791         case KEY_keys:
6792             UNI(OP_KEYS);
6793
6794         case KEY_kill:
6795             LOP(OP_KILL,XTERM);
6796
6797         case KEY_last:
6798             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6799             LOOPX(OP_LAST);
6800
6801         case KEY_lc:
6802             UNI(OP_LC);
6803
6804         case KEY_lcfirst:
6805             UNI(OP_LCFIRST);
6806
6807         case KEY_local:
6808             pl_yylval.ival = 0;
6809             OPERATOR(LOCAL);
6810
6811         case KEY_length:
6812             UNI(OP_LENGTH);
6813
6814         case KEY_lt:
6815             Rop(OP_SLT);
6816
6817         case KEY_le:
6818             Rop(OP_SLE);
6819
6820         case KEY_localtime:
6821             UNI(OP_LOCALTIME);
6822
6823         case KEY_log:
6824             UNI(OP_LOG);
6825
6826         case KEY_link:
6827             LOP(OP_LINK,XTERM);
6828
6829         case KEY_listen:
6830             LOP(OP_LISTEN,XTERM);
6831
6832         case KEY_lock:
6833             UNI(OP_LOCK);
6834
6835         case KEY_lstat:
6836             UNI(OP_LSTAT);
6837
6838         case KEY_m:
6839             s = scan_pat(s,OP_MATCH);
6840             TERM(sublex_start());
6841
6842         case KEY_map:
6843             LOP(OP_MAPSTART, XREF);
6844
6845         case KEY_mkdir:
6846             LOP(OP_MKDIR,XTERM);
6847
6848         case KEY_msgctl:
6849             LOP(OP_MSGCTL,XTERM);
6850
6851         case KEY_msgget:
6852             LOP(OP_MSGGET,XTERM);
6853
6854         case KEY_msgrcv:
6855             LOP(OP_MSGRCV,XTERM);
6856
6857         case KEY_msgsnd:
6858             LOP(OP_MSGSND,XTERM);
6859
6860         case KEY_our:
6861         case KEY_my:
6862         case KEY_state:
6863             PL_in_my = (U16)tmp;
6864             s = SKIPSPACE1(s);
6865             if (isIDFIRST_lazy_if(s,UTF)) {
6866 #ifdef PERL_MAD
6867                 char* start = s;
6868 #endif
6869                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6870                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6871                     goto really_sub;
6872                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6873                 if (!PL_in_my_stash) {
6874                     char tmpbuf[1024];
6875                     PL_bufptr = s;
6876                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6877                     yyerror(tmpbuf);
6878                 }
6879 #ifdef PERL_MAD
6880                 if (PL_madskills) {     /* just add type to declarator token */
6881                     sv_catsv(PL_thistoken, PL_nextwhite);
6882                     PL_nextwhite = 0;
6883                     sv_catpvn(PL_thistoken, start, s - start);
6884                 }
6885 #endif
6886             }
6887             pl_yylval.ival = 1;
6888             OPERATOR(MY);
6889
6890         case KEY_next:
6891             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6892             LOOPX(OP_NEXT);
6893
6894         case KEY_ne:
6895             Eop(OP_SNE);
6896
6897         case KEY_no:
6898             s = tokenize_use(0, s);
6899             OPERATOR(USE);
6900
6901         case KEY_not:
6902             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6903                 FUN1(OP_NOT);
6904             else
6905                 OPERATOR(NOTOP);
6906
6907         case KEY_open:
6908             s = SKIPSPACE1(s);
6909             if (isIDFIRST_lazy_if(s,UTF)) {
6910                 const char *t;
6911                 for (d = s; isALNUM_lazy_if(d,UTF);)
6912                     d++;
6913                 for (t=d; isSPACE(*t);)
6914                     t++;
6915                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6916                     /* [perl #16184] */
6917                     && !(t[0] == '=' && t[1] == '>')
6918                 ) {
6919                     int parms_len = (int)(d-s);
6920                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6921                            "Precedence problem: open %.*s should be open(%.*s)",
6922                             parms_len, s, parms_len, s);
6923                 }
6924             }
6925             LOP(OP_OPEN,XTERM);
6926
6927         case KEY_or:
6928             pl_yylval.ival = OP_OR;
6929             OPERATOR(OROP);
6930
6931         case KEY_ord:
6932             UNI(OP_ORD);
6933
6934         case KEY_oct:
6935             UNI(OP_OCT);
6936
6937         case KEY_opendir:
6938             LOP(OP_OPEN_DIR,XTERM);
6939
6940         case KEY_print:
6941             checkcomma(s,PL_tokenbuf,"filehandle");
6942             LOP(OP_PRINT,XREF);
6943
6944         case KEY_printf:
6945             checkcomma(s,PL_tokenbuf,"filehandle");
6946             LOP(OP_PRTF,XREF);
6947
6948         case KEY_prototype:
6949             UNI(OP_PROTOTYPE);
6950
6951         case KEY_push:
6952             LOP(OP_PUSH,XTERM);
6953
6954         case KEY_pop:
6955             UNIDOR(OP_POP);
6956
6957         case KEY_pos:
6958             UNIDOR(OP_POS);
6959
6960         case KEY_pack:
6961             LOP(OP_PACK,XTERM);
6962
6963         case KEY_package:
6964             s = force_word(s,WORD,FALSE,TRUE,FALSE);
6965             s = force_version(s, FALSE);
6966             OPERATOR(PACKAGE);
6967
6968         case KEY_pipe:
6969             LOP(OP_PIPE_OP,XTERM);
6970
6971         case KEY_q:
6972             s = scan_str(s,!!PL_madskills,FALSE);
6973             if (!s)
6974                 missingterm(NULL);
6975             pl_yylval.ival = OP_CONST;
6976             TERM(sublex_start());
6977
6978         case KEY_quotemeta:
6979             UNI(OP_QUOTEMETA);
6980
6981         case KEY_qw:
6982             s = scan_str(s,!!PL_madskills,FALSE);
6983             if (!s)
6984                 missingterm(NULL);
6985             PL_expect = XOPERATOR;
6986             force_next(')');
6987             if (SvCUR(PL_lex_stuff)) {
6988                 OP *words = NULL;
6989                 int warned = 0;
6990                 d = SvPV_force(PL_lex_stuff, len);
6991                 while (len) {
6992                     for (; isSPACE(*d) && len; --len, ++d)
6993                         /**/;
6994                     if (len) {
6995                         SV *sv;
6996                         const char *b = d;
6997                         if (!warned && ckWARN(WARN_QW)) {
6998                             for (; !isSPACE(*d) && len; --len, ++d) {
6999                                 if (*d == ',') {
7000                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7001                                         "Possible attempt to separate words with commas");
7002                                     ++warned;
7003                                 }
7004                                 else if (*d == '#') {
7005                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7006                                         "Possible attempt to put comments in qw() list");
7007                                     ++warned;
7008                                 }
7009                             }
7010                         }
7011                         else {
7012                             for (; !isSPACE(*d) && len; --len, ++d)
7013                                 /**/;
7014                         }
7015                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7016                         words = append_elem(OP_LIST, words,
7017                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7018                     }
7019                 }
7020                 if (words) {
7021                     start_force(PL_curforce);
7022                     NEXTVAL_NEXTTOKE.opval = words;
7023                     force_next(THING);
7024                 }
7025             }
7026             if (PL_lex_stuff) {
7027                 SvREFCNT_dec(PL_lex_stuff);
7028                 PL_lex_stuff = NULL;
7029             }
7030             PL_expect = XTERM;
7031             TOKEN('(');
7032
7033         case KEY_qq:
7034             s = scan_str(s,!!PL_madskills,FALSE);
7035             if (!s)
7036                 missingterm(NULL);
7037             pl_yylval.ival = OP_STRINGIFY;
7038             if (SvIVX(PL_lex_stuff) == '\'')
7039                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
7040             TERM(sublex_start());
7041
7042         case KEY_qr:
7043             s = scan_pat(s,OP_QR);
7044             TERM(sublex_start());
7045
7046         case KEY_qx:
7047             s = scan_str(s,!!PL_madskills,FALSE);
7048             if (!s)
7049                 missingterm(NULL);
7050             readpipe_override();
7051             TERM(sublex_start());
7052
7053         case KEY_return:
7054             OLDLOP(OP_RETURN);
7055
7056         case KEY_require:
7057             s = SKIPSPACE1(s);
7058             if (isDIGIT(*s)) {
7059                 s = force_version(s, FALSE);
7060             }
7061             else if (*s != 'v' || !isDIGIT(s[1])
7062                     || (s = force_version(s, TRUE), *s == 'v'))
7063             {
7064                 *PL_tokenbuf = '\0';
7065                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7066                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7067                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7068                 else if (*s == '<')
7069                     yyerror("<> should be quotes");
7070             }
7071             if (orig_keyword == KEY_require) {
7072                 orig_keyword = 0;
7073                 pl_yylval.ival = 1;
7074             }
7075             else
7076                 pl_yylval.ival = 0;
7077             PL_expect = XTERM;
7078             PL_bufptr = s;
7079             PL_last_uni = PL_oldbufptr;
7080             PL_last_lop_op = OP_REQUIRE;
7081             s = skipspace(s);
7082             return REPORT( (int)REQUIRE );
7083
7084         case KEY_reset:
7085             UNI(OP_RESET);
7086
7087         case KEY_redo:
7088             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7089             LOOPX(OP_REDO);
7090
7091         case KEY_rename:
7092             LOP(OP_RENAME,XTERM);
7093
7094         case KEY_rand:
7095             UNI(OP_RAND);
7096
7097         case KEY_rmdir:
7098             UNI(OP_RMDIR);
7099
7100         case KEY_rindex:
7101             LOP(OP_RINDEX,XTERM);
7102
7103         case KEY_read:
7104             LOP(OP_READ,XTERM);
7105
7106         case KEY_readdir:
7107             UNI(OP_READDIR);
7108
7109         case KEY_readline:
7110             UNIDOR(OP_READLINE);
7111
7112         case KEY_readpipe:
7113             UNIDOR(OP_BACKTICK);
7114
7115         case KEY_rewinddir:
7116             UNI(OP_REWINDDIR);
7117
7118         case KEY_recv:
7119             LOP(OP_RECV,XTERM);
7120
7121         case KEY_reverse:
7122             LOP(OP_REVERSE,XTERM);
7123
7124         case KEY_readlink:
7125             UNIDOR(OP_READLINK);
7126
7127         case KEY_ref:
7128             UNI(OP_REF);
7129
7130         case KEY_s:
7131             s = scan_subst(s);
7132             if (pl_yylval.opval)
7133                 TERM(sublex_start());
7134             else
7135                 TOKEN(1);       /* force error */
7136
7137         case KEY_say:
7138             checkcomma(s,PL_tokenbuf,"filehandle");
7139             LOP(OP_SAY,XREF);
7140
7141         case KEY_chomp:
7142             UNI(OP_CHOMP);
7143
7144         case KEY_scalar:
7145             UNI(OP_SCALAR);
7146
7147         case KEY_select:
7148             LOP(OP_SELECT,XTERM);
7149
7150         case KEY_seek:
7151             LOP(OP_SEEK,XTERM);
7152
7153         case KEY_semctl:
7154             LOP(OP_SEMCTL,XTERM);
7155
7156         case KEY_semget:
7157             LOP(OP_SEMGET,XTERM);
7158
7159         case KEY_semop:
7160             LOP(OP_SEMOP,XTERM);
7161
7162         case KEY_send:
7163             LOP(OP_SEND,XTERM);
7164
7165         case KEY_setpgrp:
7166             LOP(OP_SETPGRP,XTERM);
7167
7168         case KEY_setpriority:
7169             LOP(OP_SETPRIORITY,XTERM);
7170
7171         case KEY_sethostent:
7172             UNI(OP_SHOSTENT);
7173
7174         case KEY_setnetent:
7175             UNI(OP_SNETENT);
7176
7177         case KEY_setservent:
7178             UNI(OP_SSERVENT);
7179
7180         case KEY_setprotoent:
7181             UNI(OP_SPROTOENT);
7182
7183         case KEY_setpwent:
7184             FUN0(OP_SPWENT);
7185
7186         case KEY_setgrent:
7187             FUN0(OP_SGRENT);
7188
7189         case KEY_seekdir:
7190             LOP(OP_SEEKDIR,XTERM);
7191
7192         case KEY_setsockopt:
7193             LOP(OP_SSOCKOPT,XTERM);
7194
7195         case KEY_shift:
7196             UNIDOR(OP_SHIFT);
7197
7198         case KEY_shmctl:
7199             LOP(OP_SHMCTL,XTERM);
7200
7201         case KEY_shmget:
7202             LOP(OP_SHMGET,XTERM);
7203
7204         case KEY_shmread:
7205             LOP(OP_SHMREAD,XTERM);
7206
7207         case KEY_shmwrite:
7208             LOP(OP_SHMWRITE,XTERM);
7209
7210         case KEY_shutdown:
7211             LOP(OP_SHUTDOWN,XTERM);
7212
7213         case KEY_sin:
7214             UNI(OP_SIN);
7215
7216         case KEY_sleep:
7217             UNI(OP_SLEEP);
7218
7219         case KEY_socket:
7220             LOP(OP_SOCKET,XTERM);
7221
7222         case KEY_socketpair:
7223             LOP(OP_SOCKPAIR,XTERM);
7224
7225         case KEY_sort:
7226             checkcomma(s,PL_tokenbuf,"subroutine name");
7227             s = SKIPSPACE1(s);
7228             if (*s == ';' || *s == ')')         /* probably a close */
7229                 Perl_croak(aTHX_ "sort is now a reserved word");
7230             PL_expect = XTERM;
7231             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7232             LOP(OP_SORT,XREF);
7233
7234         case KEY_split:
7235             LOP(OP_SPLIT,XTERM);
7236
7237         case KEY_sprintf:
7238             LOP(OP_SPRINTF,XTERM);
7239
7240         case KEY_splice:
7241             LOP(OP_SPLICE,XTERM);
7242
7243         case KEY_sqrt:
7244             UNI(OP_SQRT);
7245
7246         case KEY_srand:
7247             UNI(OP_SRAND);
7248
7249         case KEY_stat:
7250             UNI(OP_STAT);
7251
7252         case KEY_study:
7253             UNI(OP_STUDY);
7254
7255         case KEY_substr:
7256             LOP(OP_SUBSTR,XTERM);
7257
7258         case KEY_format:
7259         case KEY_sub:
7260           really_sub:
7261             {
7262                 char tmpbuf[sizeof PL_tokenbuf];
7263                 SSize_t tboffset = 0;
7264                 expectation attrful;
7265                 bool have_name, have_proto;
7266                 const int key = tmp;
7267
7268 #ifdef PERL_MAD
7269                 SV *tmpwhite = 0;
7270
7271                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7272                 SV *subtoken = newSVpvn(tstart, s - tstart);
7273                 PL_thistoken = 0;
7274
7275                 d = s;
7276                 s = SKIPSPACE2(s,tmpwhite);
7277 #else
7278                 s = skipspace(s);
7279 #endif
7280
7281                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7282                     (*s == ':' && s[1] == ':'))
7283                 {
7284 #ifdef PERL_MAD
7285                     SV *nametoke = NULL;
7286 #endif
7287
7288                     PL_expect = XBLOCK;
7289                     attrful = XATTRBLOCK;
7290                     /* remember buffer pos'n for later force_word */
7291                     tboffset = s - PL_oldbufptr;
7292                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7293 #ifdef PERL_MAD
7294                     if (PL_madskills)
7295                         nametoke = newSVpvn(s, d - s);
7296 #endif
7297                     if (memchr(tmpbuf, ':', len))
7298                         sv_setpvn(PL_subname, tmpbuf, len);
7299                     else {
7300                         sv_setsv(PL_subname,PL_curstname);
7301                         sv_catpvs(PL_subname,"::");
7302                         sv_catpvn(PL_subname,tmpbuf,len);
7303                     }
7304                     have_name = TRUE;
7305
7306 #ifdef PERL_MAD
7307
7308                     start_force(0);
7309                     CURMAD('X', nametoke);
7310                     CURMAD('_', tmpwhite);
7311                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7312                                       FALSE, TRUE, TRUE);
7313
7314                     s = SKIPSPACE2(d,tmpwhite);
7315 #else
7316                     s = skipspace(d);
7317 #endif
7318                 }
7319                 else {
7320                     if (key == KEY_my)
7321                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7322                     PL_expect = XTERMBLOCK;
7323                     attrful = XATTRTERM;
7324                     sv_setpvs(PL_subname,"?");
7325                     have_name = FALSE;
7326                 }
7327
7328                 if (key == KEY_format) {
7329                     if (*s == '=')
7330                         PL_lex_formbrack = PL_lex_brackets + 1;
7331 #ifdef PERL_MAD
7332                     PL_thistoken = subtoken;
7333                     s = d;
7334 #else
7335                     if (have_name)
7336                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7337                                           FALSE, TRUE, TRUE);
7338 #endif
7339                     OPERATOR(FORMAT);
7340                 }
7341
7342                 /* Look for a prototype */
7343                 if (*s == '(') {
7344                     char *p;
7345                     bool bad_proto = FALSE;
7346                     bool in_brackets = FALSE;
7347                     char greedy_proto = ' ';
7348                     bool proto_after_greedy_proto = FALSE;
7349                     bool must_be_last = FALSE;
7350                     bool underscore = FALSE;
7351                     bool seen_underscore = FALSE;
7352                     const bool warnsyntax = ckWARN(WARN_SYNTAX);
7353
7354                     s = scan_str(s,!!PL_madskills,FALSE);
7355                     if (!s)
7356                         Perl_croak(aTHX_ "Prototype not terminated");
7357                     /* strip spaces and check for bad characters */
7358                     d = SvPVX(PL_lex_stuff);
7359                     tmp = 0;
7360                     for (p = d; *p; ++p) {
7361                         if (!isSPACE(*p)) {
7362                             d[tmp++] = *p;
7363
7364                             if (warnsyntax) {
7365                                 if (must_be_last)
7366                                     proto_after_greedy_proto = TRUE;
7367                                 if (!strchr("$@%*;[]&\\_", *p)) {
7368                                     bad_proto = TRUE;
7369                                 }
7370                                 else {
7371                                     if ( underscore ) {
7372                                         if ( *p != ';' )
7373                                             bad_proto = TRUE;
7374                                         underscore = FALSE;
7375                                     }
7376                                     if ( *p == '[' ) {
7377                                         in_brackets = TRUE;
7378                                     }
7379                                     else if ( *p == ']' ) {
7380                                         in_brackets = FALSE;
7381                                     }
7382                                     else if ( (*p == '@' || *p == '%') &&
7383                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
7384                                          !in_brackets ) {
7385                                         must_be_last = TRUE;
7386                                         greedy_proto = *p;
7387                                     }
7388                                     else if ( *p == '_' ) {
7389                                         underscore = seen_underscore = TRUE;
7390                                     }
7391                                 }
7392                             }
7393                         }
7394                     }
7395                     d[tmp] = '\0';
7396                     if (proto_after_greedy_proto)
7397                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7398                                     "Prototype after '%c' for %"SVf" : %s",
7399                                     greedy_proto, SVfARG(PL_subname), d);
7400                     if (bad_proto)
7401                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7402                                     "Illegal character %sin prototype for %"SVf" : %s",
7403                                     seen_underscore ? "after '_' " : "",
7404                                     SVfARG(PL_subname), d);
7405                     SvCUR_set(PL_lex_stuff, tmp);
7406                     have_proto = TRUE;
7407
7408 #ifdef PERL_MAD
7409                     start_force(0);
7410                     CURMAD('q', PL_thisopen);
7411                     CURMAD('_', tmpwhite);
7412                     CURMAD('=', PL_thisstuff);
7413                     CURMAD('Q', PL_thisclose);
7414                     NEXTVAL_NEXTTOKE.opval =
7415                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7416                     PL_lex_stuff = NULL;
7417                     force_next(THING);
7418
7419                     s = SKIPSPACE2(s,tmpwhite);
7420 #else
7421                     s = skipspace(s);
7422 #endif
7423                 }
7424                 else
7425                     have_proto = FALSE;
7426
7427                 if (*s == ':' && s[1] != ':')
7428                     PL_expect = attrful;
7429                 else if (*s != '{' && key == KEY_sub) {
7430                     if (!have_name)
7431                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7432                     else if (*s != ';')
7433                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7434                 }
7435
7436 #ifdef PERL_MAD
7437                 start_force(0);
7438                 if (tmpwhite) {
7439                     if (PL_madskills)
7440                         curmad('^', newSVpvs(""));
7441                     CURMAD('_', tmpwhite);
7442                 }
7443                 force_next(0);
7444
7445                 PL_thistoken = subtoken;
7446 #else
7447                 if (have_proto) {
7448                     NEXTVAL_NEXTTOKE.opval =
7449                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7450                     PL_lex_stuff = NULL;
7451                     force_next(THING);
7452                 }
7453 #endif
7454                 if (!have_name) {
7455                     if (PL_curstash)
7456                         sv_setpvs(PL_subname, "__ANON__");
7457                     else
7458                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7459                     TOKEN(ANONSUB);
7460                 }
7461 #ifndef PERL_MAD
7462                 (void) force_word(PL_oldbufptr + tboffset, WORD,
7463                                   FALSE, TRUE, TRUE);
7464 #endif
7465                 if (key == KEY_my)
7466                     TOKEN(MYSUB);
7467                 TOKEN(SUB);
7468             }
7469
7470         case KEY_system:
7471             LOP(OP_SYSTEM,XREF);
7472
7473         case KEY_symlink:
7474             LOP(OP_SYMLINK,XTERM);
7475
7476         case KEY_syscall:
7477             LOP(OP_SYSCALL,XTERM);
7478
7479         case KEY_sysopen:
7480             LOP(OP_SYSOPEN,XTERM);
7481
7482         case KEY_sysseek:
7483             LOP(OP_SYSSEEK,XTERM);
7484
7485         case KEY_sysread:
7486             LOP(OP_SYSREAD,XTERM);
7487
7488         case KEY_syswrite:
7489             LOP(OP_SYSWRITE,XTERM);
7490
7491         case KEY_tr:
7492             s = scan_trans(s);
7493             TERM(sublex_start());
7494
7495         case KEY_tell:
7496             UNI(OP_TELL);
7497
7498         case KEY_telldir:
7499             UNI(OP_TELLDIR);
7500
7501         case KEY_tie:
7502             LOP(OP_TIE,XTERM);
7503
7504         case KEY_tied:
7505             UNI(OP_TIED);
7506
7507         case KEY_time:
7508             FUN0(OP_TIME);
7509
7510         case KEY_times:
7511             FUN0(OP_TMS);
7512
7513         case KEY_truncate:
7514             LOP(OP_TRUNCATE,XTERM);
7515
7516         case KEY_uc:
7517             UNI(OP_UC);
7518
7519         case KEY_ucfirst:
7520             UNI(OP_UCFIRST);
7521
7522         case KEY_untie:
7523             UNI(OP_UNTIE);
7524
7525         case KEY_until:
7526             pl_yylval.ival = CopLINE(PL_curcop);
7527             OPERATOR(UNTIL);
7528
7529         case KEY_unless:
7530             pl_yylval.ival = CopLINE(PL_curcop);
7531             OPERATOR(UNLESS);
7532
7533         case KEY_unlink:
7534             LOP(OP_UNLINK,XTERM);
7535
7536         case KEY_undef:
7537             UNIDOR(OP_UNDEF);
7538
7539         case KEY_unpack:
7540             LOP(OP_UNPACK,XTERM);
7541
7542         case KEY_utime:
7543             LOP(OP_UTIME,XTERM);
7544
7545         case KEY_umask:
7546             UNIDOR(OP_UMASK);
7547
7548         case KEY_unshift:
7549             LOP(OP_UNSHIFT,XTERM);
7550
7551         case KEY_use:
7552             s = tokenize_use(1, s);
7553             OPERATOR(USE);
7554
7555         case KEY_values:
7556             UNI(OP_VALUES);
7557
7558         case KEY_vec:
7559             LOP(OP_VEC,XTERM);
7560
7561         case KEY_when:
7562             pl_yylval.ival = CopLINE(PL_curcop);
7563             OPERATOR(WHEN);
7564
7565         case KEY_while:
7566             pl_yylval.ival = CopLINE(PL_curcop);
7567             OPERATOR(WHILE);
7568
7569         case KEY_warn:
7570             PL_hints |= HINT_BLOCK_SCOPE;
7571             LOP(OP_WARN,XTERM);
7572
7573         case KEY_wait:
7574             FUN0(OP_WAIT);
7575
7576         case KEY_waitpid:
7577             LOP(OP_WAITPID,XTERM);
7578
7579         case KEY_wantarray:
7580             FUN0(OP_WANTARRAY);
7581
7582         case KEY_write:
7583 #ifdef EBCDIC
7584         {
7585             char ctl_l[2];
7586             ctl_l[0] = toCTRL('L');
7587             ctl_l[1] = '\0';
7588             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7589         }
7590 #else
7591             /* Make sure $^L is defined */
7592             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7593 #endif
7594             UNI(OP_ENTERWRITE);
7595
7596         case KEY_x:
7597             if (PL_expect == XOPERATOR)
7598                 Mop(OP_REPEAT);
7599             check_uni();
7600             goto just_a_word;
7601
7602         case KEY_xor:
7603             pl_yylval.ival = OP_XOR;
7604             OPERATOR(OROP);
7605
7606         case KEY_y:
7607             s = scan_trans(s);
7608             TERM(sublex_start());
7609         }
7610     }}
7611 }
7612 #ifdef __SC__
7613 #pragma segment Main
7614 #endif
7615
7616 static int
7617 S_pending_ident(pTHX)
7618 {
7619     dVAR;
7620     register char *d;
7621     PADOFFSET tmp = 0;
7622     /* pit holds the identifier we read and pending_ident is reset */
7623     char pit = PL_pending_ident;
7624     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7625     /* All routes through this function want to know if there is a colon.  */
7626     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7627     PL_pending_ident = 0;
7628
7629     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7630     DEBUG_T({ PerlIO_printf(Perl_debug_log,
7631           "### Pending identifier '%s'\n", PL_tokenbuf); });
7632
7633     /* if we're in a my(), we can't allow dynamics here.
7634        $foo'bar has already been turned into $foo::bar, so
7635        just check for colons.
7636
7637        if it's a legal name, the OP is a PADANY.
7638     */
7639     if (PL_in_my) {
7640         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
7641             if (has_colon)
7642                 yyerror(Perl_form(aTHX_ "No package name allowed for "
7643                                   "variable %s in \"our\"",
7644                                   PL_tokenbuf));
7645             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7646         }
7647         else {
7648             if (has_colon)
7649                 yyerror(Perl_form(aTHX_ PL_no_myglob,
7650                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7651
7652             pl_yylval.opval = newOP(OP_PADANY, 0);
7653             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7654             return PRIVATEREF;
7655         }
7656     }
7657
7658     /*
7659        build the ops for accesses to a my() variable.
7660
7661        Deny my($a) or my($b) in a sort block, *if* $a or $b is
7662        then used in a comparison.  This catches most, but not
7663        all cases.  For instance, it catches
7664            sort { my($a); $a <=> $b }
7665        but not
7666            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7667        (although why you'd do that is anyone's guess).
7668     */
7669
7670     if (!has_colon) {
7671         if (!PL_in_my)
7672             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
7673         if (tmp != NOT_IN_PAD) {
7674             /* might be an "our" variable" */
7675             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7676                 /* build ops for a bareword */
7677                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
7678                 HEK * const stashname = HvNAME_HEK(stash);
7679                 SV *  const sym = newSVhek(stashname);
7680                 sv_catpvs(sym, "::");
7681                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7682                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7683                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7684                 gv_fetchsv(sym,
7685                     (PL_in_eval
7686                         ? (GV_ADDMULTI | GV_ADDINEVAL)
7687                         : GV_ADDMULTI
7688                     ),
7689                     ((PL_tokenbuf[0] == '$') ? SVt_PV
7690                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7691                      : SVt_PVHV));
7692                 return WORD;
7693             }
7694
7695             /* if it's a sort block and they're naming $a or $b */
7696             if (PL_last_lop_op == OP_SORT &&
7697                 PL_tokenbuf[0] == '$' &&
7698                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7699                 && !PL_tokenbuf[2])
7700             {
7701                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7702                      d < PL_bufend && *d != '\n';
7703                      d++)
7704                 {
7705                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7706                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7707                               PL_tokenbuf);
7708                     }
7709                 }
7710             }
7711
7712             pl_yylval.opval = newOP(OP_PADANY, 0);
7713             pl_yylval.opval->op_targ = tmp;
7714             return PRIVATEREF;
7715         }
7716     }
7717
7718     /*
7719        Whine if they've said @foo in a doublequoted string,
7720        and @foo isn't a variable we can find in the symbol
7721        table.
7722     */
7723     if (ckWARN(WARN_AMBIGUOUS) &&
7724         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7725         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7726                                          SVt_PVAV);
7727         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7728                 /* DO NOT warn for @- and @+ */
7729                 && !( PL_tokenbuf[2] == '\0' &&
7730                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7731            )
7732         {
7733             /* Downgraded from fatal to warning 20000522 mjd */
7734             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7735                         "Possible unintended interpolation of %s in string",
7736                         PL_tokenbuf);
7737         }
7738     }
7739
7740     /* build ops for a bareword */
7741     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7742                                                       tokenbuf_len - 1));
7743     pl_yylval.opval->op_private = OPpCONST_ENTERED;
7744     gv_fetchpvn_flags(
7745             PL_tokenbuf + 1, tokenbuf_len - 1,
7746             /* If the identifier refers to a stash, don't autovivify it.
7747              * Change 24660 had the side effect of causing symbol table
7748              * hashes to always be defined, even if they were freshly
7749              * created and the only reference in the entire program was
7750              * the single statement with the defined %foo::bar:: test.
7751              * It appears that all code in the wild doing this actually
7752              * wants to know whether sub-packages have been loaded, so
7753              * by avoiding auto-vivifying symbol tables, we ensure that
7754              * defined %foo::bar:: continues to be false, and the existing
7755              * tests still give the expected answers, even though what
7756              * they're actually testing has now changed subtly.
7757              */
7758             (*PL_tokenbuf == '%'
7759              && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7760              && d[-1] == ':'
7761              ? 0
7762              : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7763             ((PL_tokenbuf[0] == '$') ? SVt_PV
7764              : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7765              : SVt_PVHV));
7766     return WORD;
7767 }
7768
7769 /*
7770  *  The following code was generated by perl_keyword.pl.
7771  */
7772
7773 I32
7774 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7775 {
7776     dVAR;
7777
7778     PERL_ARGS_ASSERT_KEYWORD;
7779
7780   switch (len)
7781   {
7782     case 1: /* 5 tokens of length 1 */
7783       switch (name[0])
7784       {
7785         case 'm':
7786           {                                       /* m          */
7787             return KEY_m;
7788           }
7789
7790         case 'q':
7791           {                                       /* q          */
7792             return KEY_q;
7793           }
7794
7795         case 's':
7796           {                                       /* s          */
7797             return KEY_s;
7798           }
7799
7800         case 'x':
7801           {                                       /* x          */
7802             return -KEY_x;
7803           }
7804
7805         case 'y':
7806           {                                       /* y          */
7807             return KEY_y;
7808           }
7809
7810         default:
7811           goto unknown;
7812       }
7813
7814     case 2: /* 18 tokens of length 2 */
7815       switch (name[0])
7816       {
7817         case 'd':
7818           if (name[1] == 'o')
7819           {                                       /* do         */
7820             return KEY_do;
7821           }
7822
7823           goto unknown;
7824
7825         case 'e':
7826           if (name[1] == 'q')
7827           {                                       /* eq         */
7828             return -KEY_eq;
7829           }
7830
7831           goto unknown;
7832
7833         case 'g':
7834           switch (name[1])
7835           {
7836             case 'e':
7837               {                                   /* ge         */
7838                 return -KEY_ge;
7839               }
7840
7841             case 't':
7842               {                                   /* gt         */
7843                 return -KEY_gt;
7844               }
7845
7846             default:
7847               goto unknown;
7848           }
7849
7850         case 'i':
7851           if (name[1] == 'f')
7852           {                                       /* if         */
7853             return KEY_if;
7854           }
7855
7856           goto unknown;
7857
7858         case 'l':
7859           switch (name[1])
7860           {
7861             case 'c':
7862               {                                   /* lc         */
7863                 return -KEY_lc;
7864               }
7865
7866             case 'e':
7867               {                                   /* le         */
7868                 return -KEY_le;
7869               }
7870
7871             case 't':
7872               {                                   /* lt         */
7873                 return -KEY_lt;
7874               }
7875
7876             default:
7877               goto unknown;
7878           }
7879
7880         case 'm':
7881           if (name[1] == 'y')
7882           {                                       /* my         */
7883             return KEY_my;
7884           }
7885
7886           goto unknown;
7887
7888         case 'n':
7889           switch (name[1])
7890           {
7891             case 'e':
7892               {                                   /* ne         */
7893                 return -KEY_ne;
7894               }
7895
7896             case 'o':
7897               {                                   /* no         */
7898                 return KEY_no;
7899               }
7900
7901             default:
7902               goto unknown;
7903           }
7904
7905         case 'o':
7906           if (name[1] == 'r')
7907           {                                       /* or         */
7908             return -KEY_or;
7909           }
7910
7911           goto unknown;
7912
7913         case 'q':
7914           switch (name[1])
7915           {
7916             case 'q':
7917               {                                   /* qq         */
7918                 return KEY_qq;
7919               }
7920
7921             case 'r':
7922               {                                   /* qr         */
7923                 return KEY_qr;
7924               }
7925
7926             case 'w':
7927               {                                   /* qw         */
7928                 return KEY_qw;
7929               }
7930
7931             case 'x':
7932               {                                   /* qx         */
7933                 return KEY_qx;
7934               }
7935
7936             default:
7937               goto unknown;
7938           }
7939
7940         case 't':
7941           if (name[1] == 'r')
7942           {                                       /* tr         */
7943             return KEY_tr;
7944           }
7945
7946           goto unknown;
7947
7948         case 'u':
7949           if (name[1] == 'c')
7950           {                                       /* uc         */
7951             return -KEY_uc;
7952           }
7953
7954           goto unknown;
7955
7956         default:
7957           goto unknown;
7958       }
7959
7960     case 3: /* 29 tokens of length 3 */
7961       switch (name[0])
7962       {
7963         case 'E':
7964           if (name[1] == 'N' &&
7965               name[2] == 'D')
7966           {                                       /* END        */
7967             return KEY_END;
7968           }
7969
7970           goto unknown;
7971
7972         case 'a':
7973           switch (name[1])
7974           {
7975             case 'b':
7976               if (name[2] == 's')
7977               {                                   /* abs        */
7978                 return -KEY_abs;
7979               }
7980
7981               goto unknown;
7982
7983             case 'n':
7984               if (name[2] == 'd')
7985               {                                   /* and        */
7986                 return -KEY_and;
7987               }
7988
7989               goto unknown;
7990
7991             default:
7992               goto unknown;
7993           }
7994
7995         case 'c':
7996           switch (name[1])
7997           {
7998             case 'h':
7999               if (name[2] == 'r')
8000               {                                   /* chr        */
8001                 return -KEY_chr;
8002               }
8003
8004               goto unknown;
8005
8006             case 'm':
8007               if (name[2] == 'p')
8008               {                                   /* cmp        */
8009                 return -KEY_cmp;
8010               }
8011
8012               goto unknown;
8013
8014             case 'o':
8015               if (name[2] == 's')
8016               {                                   /* cos        */
8017                 return -KEY_cos;
8018               }
8019
8020               goto unknown;
8021
8022             default:
8023               goto unknown;
8024           }
8025
8026         case 'd':
8027           if (name[1] == 'i' &&
8028               name[2] == 'e')
8029           {                                       /* die        */
8030             return -KEY_die;
8031           }
8032
8033           goto unknown;
8034
8035         case 'e':
8036           switch (name[1])
8037           {
8038             case 'o':
8039               if (name[2] == 'f')
8040               {                                   /* eof        */
8041                 return -KEY_eof;
8042               }
8043
8044               goto unknown;
8045
8046             case 'x':
8047               if (name[2] == 'p')
8048               {                                   /* exp        */
8049                 return -KEY_exp;
8050               }
8051
8052               goto unknown;
8053
8054             default:
8055               goto unknown;
8056           }
8057
8058         case 'f':
8059           if (name[1] == 'o' &&
8060               name[2] == 'r')
8061           {                                       /* for        */
8062             return KEY_for;
8063           }
8064
8065           goto unknown;
8066
8067         case 'h':
8068           if (name[1] == 'e' &&
8069               name[2] == 'x')
8070           {                                       /* hex        */
8071             return -KEY_hex;
8072           }
8073
8074           goto unknown;
8075
8076         case 'i':
8077           if (name[1] == 'n' &&
8078               name[2] == 't')
8079           {                                       /* int        */
8080             return -KEY_int;
8081           }
8082
8083           goto unknown;
8084
8085         case 'l':
8086           if (name[1] == 'o' &&
8087               name[2] == 'g')
8088           {                                       /* log        */
8089             return -KEY_log;
8090           }
8091
8092           goto unknown;
8093
8094         case 'm':
8095           if (name[1] == 'a' &&
8096               name[2] == 'p')
8097           {                                       /* map        */
8098             return KEY_map;
8099           }
8100
8101           goto unknown;
8102
8103         case 'n':
8104           if (name[1] == 'o' &&
8105               name[2] == 't')
8106           {                                       /* not        */
8107             return -KEY_not;
8108           }
8109
8110           goto unknown;
8111
8112         case 'o':
8113           switch (name[1])
8114           {
8115             case 'c':
8116               if (name[2] == 't')
8117               {                                   /* oct        */
8118                 return -KEY_oct;
8119               }
8120
8121               goto unknown;
8122
8123             case 'r':
8124               if (name[2] == 'd')
8125               {                                   /* ord        */
8126                 return -KEY_ord;
8127               }
8128
8129               goto unknown;
8130
8131             case 'u':
8132               if (name[2] == 'r')
8133               {                                   /* our        */
8134                 return KEY_our;
8135               }
8136
8137               goto unknown;
8138
8139             default:
8140               goto unknown;
8141           }
8142
8143         case 'p':
8144           if (name[1] == 'o')
8145           {
8146             switch (name[2])
8147             {
8148               case 'p':
8149                 {                                 /* pop        */
8150                   return -KEY_pop;
8151                 }
8152
8153               case 's':
8154                 {                                 /* pos        */
8155                   return KEY_pos;
8156                 }
8157
8158               default:
8159                 goto unknown;
8160             }
8161           }
8162
8163           goto unknown;
8164
8165         case 'r':
8166           if (name[1] == 'e' &&
8167               name[2] == 'f')
8168           {                                       /* ref        */
8169             return -KEY_ref;
8170           }
8171
8172           goto unknown;
8173
8174         case 's':
8175           switch (name[1])
8176           {
8177             case 'a':
8178               if (name[2] == 'y')
8179               {                                   /* say        */
8180                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8181               }
8182
8183               goto unknown;
8184
8185             case 'i':
8186               if (name[2] == 'n')
8187               {                                   /* sin        */
8188                 return -KEY_sin;
8189               }
8190
8191               goto unknown;
8192
8193             case 'u':
8194               if (name[2] == 'b')
8195               {                                   /* sub        */
8196                 return KEY_sub;
8197               }
8198
8199               goto unknown;
8200
8201             default:
8202               goto unknown;
8203           }
8204
8205         case 't':
8206           if (name[1] == 'i' &&
8207               name[2] == 'e')
8208           {                                       /* tie        */
8209             return KEY_tie;
8210           }
8211
8212           goto unknown;
8213
8214         case 'u':
8215           if (name[1] == 's' &&
8216               name[2] == 'e')
8217           {                                       /* use        */
8218             return KEY_use;
8219           }
8220
8221           goto unknown;
8222
8223         case 'v':
8224           if (name[1] == 'e' &&
8225               name[2] == 'c')
8226           {                                       /* vec        */
8227             return -KEY_vec;
8228           }
8229
8230           goto unknown;
8231
8232         case 'x':
8233           if (name[1] == 'o' &&
8234               name[2] == 'r')
8235           {                                       /* xor        */
8236             return -KEY_xor;
8237           }
8238
8239           goto unknown;
8240
8241         default:
8242           goto unknown;
8243       }
8244
8245     case 4: /* 41 tokens of length 4 */
8246       switch (name[0])
8247       {
8248         case 'C':
8249           if (name[1] == 'O' &&
8250               name[2] == 'R' &&
8251               name[3] == 'E')
8252           {                                       /* CORE       */
8253             return -KEY_CORE;
8254           }
8255
8256           goto unknown;
8257
8258         case 'I':
8259           if (name[1] == 'N' &&
8260               name[2] == 'I' &&
8261               name[3] == 'T')
8262           {                                       /* INIT       */
8263             return KEY_INIT;
8264           }
8265
8266           goto unknown;
8267
8268         case 'b':
8269           if (name[1] == 'i' &&
8270               name[2] == 'n' &&
8271               name[3] == 'd')
8272           {                                       /* bind       */
8273             return -KEY_bind;
8274           }
8275
8276           goto unknown;
8277
8278         case 'c':
8279           if (name[1] == 'h' &&
8280               name[2] == 'o' &&
8281               name[3] == 'p')
8282           {                                       /* chop       */
8283             return -KEY_chop;
8284           }
8285
8286           goto unknown;
8287
8288         case 'd':
8289           if (name[1] == 'u' &&
8290               name[2] == 'm' &&
8291               name[3] == 'p')
8292           {                                       /* dump       */
8293             return -KEY_dump;
8294           }
8295
8296           goto unknown;
8297
8298         case 'e':
8299           switch (name[1])
8300           {
8301             case 'a':
8302               if (name[2] == 'c' &&
8303                   name[3] == 'h')
8304               {                                   /* each       */
8305                 return -KEY_each;
8306               }
8307
8308               goto unknown;
8309
8310             case 'l':
8311               if (name[2] == 's' &&
8312                   name[3] == 'e')
8313               {                                   /* else       */
8314                 return KEY_else;
8315               }
8316
8317               goto unknown;
8318
8319             case 'v':
8320               if (name[2] == 'a' &&
8321                   name[3] == 'l')
8322               {                                   /* eval       */
8323                 return KEY_eval;
8324               }
8325
8326               goto unknown;
8327
8328             case 'x':
8329               switch (name[2])
8330               {
8331                 case 'e':
8332                   if (name[3] == 'c')
8333                   {                               /* exec       */
8334                     return -KEY_exec;
8335                   }
8336
8337                   goto unknown;
8338
8339                 case 'i':
8340                   if (name[3] == 't')
8341                   {                               /* exit       */
8342                     return -KEY_exit;
8343                   }
8344
8345                   goto unknown;
8346
8347                 default:
8348                   goto unknown;
8349               }
8350
8351             default:
8352               goto unknown;
8353           }
8354
8355         case 'f':
8356           if (name[1] == 'o' &&
8357               name[2] == 'r' &&
8358               name[3] == 'k')
8359           {                                       /* fork       */
8360             return -KEY_fork;
8361           }
8362
8363           goto unknown;
8364
8365         case 'g':
8366           switch (name[1])
8367           {
8368             case 'e':
8369               if (name[2] == 't' &&
8370                   name[3] == 'c')
8371               {                                   /* getc       */
8372                 return -KEY_getc;
8373               }
8374
8375               goto unknown;
8376
8377             case 'l':
8378               if (name[2] == 'o' &&
8379                   name[3] == 'b')
8380               {                                   /* glob       */
8381                 return KEY_glob;
8382               }
8383
8384               goto unknown;
8385
8386             case 'o':
8387               if (name[2] == 't' &&
8388                   name[3] == 'o')
8389               {                                   /* goto       */
8390                 return KEY_goto;
8391               }
8392
8393               goto unknown;
8394
8395             case 'r':
8396               if (name[2] == 'e' &&
8397                   name[3] == 'p')
8398               {                                   /* grep       */
8399                 return KEY_grep;
8400               }
8401
8402               goto unknown;
8403
8404             default:
8405               goto unknown;
8406           }
8407
8408         case 'j':
8409           if (name[1] == 'o' &&
8410               name[2] == 'i' &&
8411               name[3] == 'n')
8412           {                                       /* join       */
8413             return -KEY_join;
8414           }
8415
8416           goto unknown;
8417
8418         case 'k':
8419           switch (name[1])
8420           {
8421             case 'e':
8422               if (name[2] == 'y' &&
8423                   name[3] == 's')
8424               {                                   /* keys       */
8425                 return -KEY_keys;
8426               }
8427
8428               goto unknown;
8429
8430             case 'i':
8431               if (name[2] == 'l' &&
8432                   name[3] == 'l')
8433               {                                   /* kill       */
8434                 return -KEY_kill;
8435               }
8436
8437               goto unknown;
8438
8439             default:
8440               goto unknown;
8441           }
8442
8443         case 'l':
8444           switch (name[1])
8445           {
8446             case 'a':
8447               if (name[2] == 's' &&
8448                   name[3] == 't')
8449               {                                   /* last       */
8450                 return KEY_last;
8451               }
8452
8453               goto unknown;
8454
8455             case 'i':
8456               if (name[2] == 'n' &&
8457                   name[3] == 'k')
8458               {                                   /* link       */
8459                 return -KEY_link;
8460               }
8461
8462               goto unknown;
8463
8464             case 'o':
8465               if (name[2] == 'c' &&
8466                   name[3] == 'k')
8467               {                                   /* lock       */
8468                 return -KEY_lock;
8469               }
8470
8471               goto unknown;
8472
8473             default:
8474               goto unknown;
8475           }
8476
8477         case 'n':
8478           if (name[1] == 'e' &&
8479               name[2] == 'x' &&
8480               name[3] == 't')
8481           {                                       /* next       */
8482             return KEY_next;
8483           }
8484
8485           goto unknown;
8486
8487         case 'o':
8488           if (name[1] == 'p' &&
8489               name[2] == 'e' &&
8490               name[3] == 'n')
8491           {                                       /* open       */
8492             return -KEY_open;
8493           }
8494
8495           goto unknown;
8496
8497         case 'p':
8498           switch (name[1])
8499           {
8500             case 'a':
8501               if (name[2] == 'c' &&
8502                   name[3] == 'k')
8503               {                                   /* pack       */
8504                 return -KEY_pack;
8505               }
8506
8507               goto unknown;
8508
8509             case 'i':
8510               if (name[2] == 'p' &&
8511                   name[3] == 'e')
8512               {                                   /* pipe       */
8513                 return -KEY_pipe;
8514               }
8515
8516               goto unknown;
8517
8518             case 'u':
8519               if (name[2] == 's' &&
8520                   name[3] == 'h')
8521               {                                   /* push       */
8522                 return -KEY_push;
8523               }
8524
8525               goto unknown;
8526
8527             default:
8528               goto unknown;
8529           }
8530
8531         case 'r':
8532           switch (name[1])
8533           {
8534             case 'a':
8535               if (name[2] == 'n' &&
8536                   name[3] == 'd')
8537               {                                   /* rand       */
8538                 return -KEY_rand;
8539               }
8540
8541               goto unknown;
8542
8543             case 'e':
8544               switch (name[2])
8545               {
8546                 case 'a':
8547                   if (name[3] == 'd')
8548                   {                               /* read       */
8549                     return -KEY_read;
8550                   }
8551
8552                   goto unknown;
8553
8554                 case 'c':
8555                   if (name[3] == 'v')
8556                   {                               /* recv       */
8557                     return -KEY_recv;
8558                   }
8559
8560                   goto unknown;
8561
8562                 case 'd':
8563                   if (name[3] == 'o')
8564                   {                               /* redo       */
8565                     return KEY_redo;
8566                   }
8567
8568                   goto unknown;
8569
8570                 default:
8571                   goto unknown;
8572               }
8573
8574             default:
8575               goto unknown;
8576           }
8577
8578         case 's':
8579           switch (name[1])
8580           {
8581             case 'e':
8582               switch (name[2])
8583               {
8584                 case 'e':
8585                   if (name[3] == 'k')
8586                   {                               /* seek       */
8587                     return -KEY_seek;
8588                   }
8589
8590                   goto unknown;
8591
8592                 case 'n':
8593                   if (name[3] == 'd')
8594                   {                               /* send       */
8595                     return -KEY_send;
8596                   }
8597
8598                   goto unknown;
8599
8600                 default:
8601                   goto unknown;
8602               }
8603
8604             case 'o':
8605               if (name[2] == 'r' &&
8606                   name[3] == 't')
8607               {                                   /* sort       */
8608                 return KEY_sort;
8609               }
8610
8611               goto unknown;
8612
8613             case 'q':
8614               if (name[2] == 'r' &&
8615                   name[3] == 't')
8616               {                                   /* sqrt       */
8617                 return -KEY_sqrt;
8618               }
8619
8620               goto unknown;
8621
8622             case 't':
8623               if (name[2] == 'a' &&
8624                   name[3] == 't')
8625               {                                   /* stat       */
8626                 return -KEY_stat;
8627               }
8628
8629               goto unknown;
8630
8631             default:
8632               goto unknown;
8633           }
8634
8635         case 't':
8636           switch (name[1])
8637           {
8638             case 'e':
8639               if (name[2] == 'l' &&
8640                   name[3] == 'l')
8641               {                                   /* tell       */
8642                 return -KEY_tell;
8643               }
8644
8645               goto unknown;
8646
8647             case 'i':
8648               switch (name[2])
8649               {
8650                 case 'e':
8651                   if (name[3] == 'd')
8652                   {                               /* tied       */
8653                     return KEY_tied;
8654                   }
8655
8656                   goto unknown;
8657
8658                 case 'm':
8659                   if (name[3] == 'e')
8660                   {                               /* time       */
8661                     return -KEY_time;
8662                   }
8663
8664                   goto unknown;
8665
8666                 default:
8667                   goto unknown;
8668               }
8669
8670             default:
8671               goto unknown;
8672           }
8673
8674         case 'w':
8675           switch (name[1])
8676           {
8677             case 'a':
8678               switch (name[2])
8679               {
8680                 case 'i':
8681                   if (name[3] == 't')
8682                   {                               /* wait       */
8683                     return -KEY_wait;
8684                   }
8685
8686                   goto unknown;
8687
8688                 case 'r':
8689                   if (name[3] == 'n')
8690                   {                               /* warn       */
8691                     return -KEY_warn;
8692                   }
8693
8694                   goto unknown;
8695
8696                 default:
8697                   goto unknown;
8698               }
8699
8700             case 'h':
8701               if (name[2] == 'e' &&
8702                   name[3] == 'n')
8703               {                                   /* when       */
8704                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8705               }
8706
8707               goto unknown;
8708
8709             default:
8710               goto unknown;
8711           }
8712
8713         default:
8714           goto unknown;
8715       }
8716
8717     case 5: /* 39 tokens of length 5 */
8718       switch (name[0])
8719       {
8720         case 'B':
8721           if (name[1] == 'E' &&
8722               name[2] == 'G' &&
8723               name[3] == 'I' &&
8724               name[4] == 'N')
8725           {                                       /* BEGIN      */
8726             return KEY_BEGIN;
8727           }
8728
8729           goto unknown;
8730
8731         case 'C':
8732           if (name[1] == 'H' &&
8733               name[2] == 'E' &&
8734               name[3] == 'C' &&
8735               name[4] == 'K')
8736           {                                       /* CHECK      */
8737             return KEY_CHECK;
8738           }
8739
8740           goto unknown;
8741
8742         case 'a':
8743           switch (name[1])
8744           {
8745             case 'l':
8746               if (name[2] == 'a' &&
8747                   name[3] == 'r' &&
8748                   name[4] == 'm')
8749               {                                   /* alarm      */
8750                 return -KEY_alarm;
8751               }
8752
8753               goto unknown;
8754
8755             case 't':
8756               if (name[2] == 'a' &&
8757                   name[3] == 'n' &&
8758                   name[4] == '2')
8759               {                                   /* atan2      */
8760                 return -KEY_atan2;
8761               }
8762
8763               goto unknown;
8764
8765             default:
8766               goto unknown;
8767           }
8768
8769         case 'b':
8770           switch (name[1])
8771           {
8772             case 'l':
8773               if (name[2] == 'e' &&
8774                   name[3] == 's' &&
8775                   name[4] == 's')
8776               {                                   /* bless      */
8777                 return -KEY_bless;
8778               }
8779
8780               goto unknown;
8781
8782             case 'r':
8783               if (name[2] == 'e' &&
8784                   name[3] == 'a' &&
8785                   name[4] == 'k')
8786               {                                   /* break      */
8787                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8788               }
8789
8790               goto unknown;
8791
8792             default:
8793               goto unknown;
8794           }
8795
8796         case 'c':
8797           switch (name[1])
8798           {
8799             case 'h':
8800               switch (name[2])
8801               {
8802                 case 'd':
8803                   if (name[3] == 'i' &&
8804                       name[4] == 'r')
8805                   {                               /* chdir      */
8806                     return -KEY_chdir;
8807                   }
8808
8809                   goto unknown;
8810
8811                 case 'm':
8812                   if (name[3] == 'o' &&
8813                       name[4] == 'd')
8814                   {                               /* chmod      */
8815                     return -KEY_chmod;
8816                   }
8817
8818                   goto unknown;
8819
8820                 case 'o':
8821                   switch (name[3])
8822                   {
8823                     case 'm':
8824                       if (name[4] == 'p')
8825                       {                           /* chomp      */
8826                         return -KEY_chomp;
8827                       }
8828
8829                       goto unknown;
8830
8831                     case 'w':
8832                       if (name[4] == 'n')
8833                       {                           /* chown      */
8834                         return -KEY_chown;
8835                       }
8836
8837                       goto unknown;
8838
8839                     default:
8840                       goto unknown;
8841                   }
8842
8843                 default:
8844                   goto unknown;
8845               }
8846
8847             case 'l':
8848               if (name[2] == 'o' &&
8849                   name[3] == 's' &&
8850                   name[4] == 'e')
8851               {                                   /* close      */
8852                 return -KEY_close;
8853               }
8854
8855               goto unknown;
8856
8857             case 'r':
8858               if (name[2] == 'y' &&
8859                   name[3] == 'p' &&
8860                   name[4] == 't')
8861               {                                   /* crypt      */
8862                 return -KEY_crypt;
8863               }
8864
8865               goto unknown;
8866
8867             default:
8868               goto unknown;
8869           }
8870
8871         case 'e':
8872           if (name[1] == 'l' &&
8873               name[2] == 's' &&
8874               name[3] == 'i' &&
8875               name[4] == 'f')
8876           {                                       /* elsif      */
8877             return KEY_elsif;
8878           }
8879
8880           goto unknown;
8881
8882         case 'f':
8883           switch (name[1])
8884           {
8885             case 'c':
8886               if (name[2] == 'n' &&
8887                   name[3] == 't' &&
8888                   name[4] == 'l')
8889               {                                   /* fcntl      */
8890                 return -KEY_fcntl;
8891               }
8892
8893               goto unknown;
8894
8895             case 'l':
8896               if (name[2] == 'o' &&
8897                   name[3] == 'c' &&
8898                   name[4] == 'k')
8899               {                                   /* flock      */
8900                 return -KEY_flock;
8901               }
8902
8903               goto unknown;
8904
8905             default:
8906               goto unknown;
8907           }
8908
8909         case 'g':
8910           if (name[1] == 'i' &&
8911               name[2] == 'v' &&
8912               name[3] == 'e' &&
8913               name[4] == 'n')
8914           {                                       /* given      */
8915             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8916           }
8917
8918           goto unknown;
8919
8920         case 'i':
8921           switch (name[1])
8922           {
8923             case 'n':
8924               if (name[2] == 'd' &&
8925                   name[3] == 'e' &&
8926                   name[4] == 'x')
8927               {                                   /* index      */
8928                 return -KEY_index;
8929               }
8930
8931               goto unknown;
8932
8933             case 'o':
8934               if (name[2] == 'c' &&
8935                   name[3] == 't' &&
8936                   name[4] == 'l')
8937               {                                   /* ioctl      */
8938                 return -KEY_ioctl;
8939               }
8940
8941               goto unknown;
8942
8943             default:
8944               goto unknown;
8945           }
8946
8947         case 'l':
8948           switch (name[1])
8949           {
8950             case 'o':
8951               if (name[2] == 'c' &&
8952                   name[3] == 'a' &&
8953                   name[4] == 'l')
8954               {                                   /* local      */
8955                 return KEY_local;
8956               }
8957
8958               goto unknown;
8959
8960             case 's':
8961               if (name[2] == 't' &&
8962                   name[3] == 'a' &&
8963                   name[4] == 't')
8964               {                                   /* lstat      */
8965                 return -KEY_lstat;
8966               }
8967
8968               goto unknown;
8969
8970             default:
8971               goto unknown;
8972           }
8973
8974         case 'm':
8975           if (name[1] == 'k' &&
8976               name[2] == 'd' &&
8977               name[3] == 'i' &&
8978               name[4] == 'r')
8979           {                                       /* mkdir      */
8980             return -KEY_mkdir;
8981           }
8982
8983           goto unknown;
8984
8985         case 'p':
8986           if (name[1] == 'r' &&
8987               name[2] == 'i' &&
8988               name[3] == 'n' &&
8989               name[4] == 't')
8990           {                                       /* print      */
8991             return KEY_print;
8992           }
8993
8994           goto unknown;
8995
8996         case 'r':
8997           switch (name[1])
8998           {
8999             case 'e':
9000               if (name[2] == 's' &&
9001                   name[3] == 'e' &&
9002                   name[4] == 't')
9003               {                                   /* reset      */
9004                 return -KEY_reset;
9005               }
9006
9007               goto unknown;
9008
9009             case 'm':
9010               if (name[2] == 'd' &&
9011                   name[3] == 'i' &&
9012                   name[4] == 'r')
9013               {                                   /* rmdir      */
9014                 return -KEY_rmdir;
9015               }
9016
9017               goto unknown;
9018
9019             default:
9020               goto unknown;
9021           }
9022
9023         case 's':
9024           switch (name[1])
9025           {
9026             case 'e':
9027               if (name[2] == 'm' &&
9028                   name[3] == 'o' &&
9029                   name[4] == 'p')
9030               {                                   /* semop      */
9031                 return -KEY_semop;
9032               }
9033
9034               goto unknown;
9035
9036             case 'h':
9037               if (name[2] == 'i' &&
9038                   name[3] == 'f' &&
9039                   name[4] == 't')
9040               {                                   /* shift      */
9041                 return -KEY_shift;
9042               }
9043
9044               goto unknown;
9045
9046             case 'l':
9047               if (name[2] == 'e' &&
9048                   name[3] == 'e' &&
9049                   name[4] == 'p')
9050               {                                   /* sleep      */
9051                 return -KEY_sleep;
9052               }
9053
9054               goto unknown;
9055
9056             case 'p':
9057               if (name[2] == 'l' &&
9058                   name[3] == 'i' &&
9059                   name[4] == 't')
9060               {                                   /* split      */
9061                 return KEY_split;
9062               }
9063
9064               goto unknown;
9065
9066             case 'r':
9067               if (name[2] == 'a' &&
9068                   name[3] == 'n' &&
9069                   name[4] == 'd')
9070               {                                   /* srand      */
9071                 return -KEY_srand;
9072               }
9073
9074               goto unknown;
9075
9076             case 't':
9077               switch (name[2])
9078               {
9079                 case 'a':
9080                   if (name[3] == 't' &&
9081                       name[4] == 'e')
9082                   {                               /* state      */
9083                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9084                   }
9085
9086                   goto unknown;
9087
9088                 case 'u':
9089                   if (name[3] == 'd' &&
9090                       name[4] == 'y')
9091                   {                               /* study      */
9092                     return KEY_study;
9093                   }
9094
9095                   goto unknown;
9096
9097                 default:
9098                   goto unknown;
9099               }
9100
9101             default:
9102               goto unknown;
9103           }
9104
9105         case 't':
9106           if (name[1] == 'i' &&
9107               name[2] == 'm' &&
9108               name[3] == 'e' &&
9109               name[4] == 's')
9110           {                                       /* times      */
9111             return -KEY_times;
9112           }
9113
9114           goto unknown;
9115
9116         case 'u':
9117           switch (name[1])
9118           {
9119             case 'm':
9120               if (name[2] == 'a' &&
9121                   name[3] == 's' &&
9122                   name[4] == 'k')
9123               {                                   /* umask      */
9124                 return -KEY_umask;
9125               }
9126
9127               goto unknown;
9128
9129             case 'n':
9130               switch (name[2])
9131               {
9132                 case 'd':
9133                   if (name[3] == 'e' &&
9134                       name[4] == 'f')
9135                   {                               /* undef      */
9136                     return KEY_undef;
9137                   }
9138
9139                   goto unknown;
9140
9141                 case 't':
9142                   if (name[3] == 'i')
9143                   {
9144                     switch (name[4])
9145                     {
9146                       case 'e':
9147                         {                         /* untie      */
9148                           return KEY_untie;
9149                         }
9150
9151                       case 'l':
9152                         {                         /* until      */
9153                           return KEY_until;
9154                         }
9155
9156                       default:
9157                         goto unknown;
9158                     }
9159                   }
9160
9161                   goto unknown;
9162
9163                 default:
9164                   goto unknown;
9165               }
9166
9167             case 't':
9168               if (name[2] == 'i' &&
9169                   name[3] == 'm' &&
9170                   name[4] == 'e')
9171               {                                   /* utime      */
9172                 return -KEY_utime;
9173               }
9174
9175               goto unknown;
9176
9177             default:
9178               goto unknown;
9179           }
9180
9181         case 'w':
9182           switch (name[1])
9183           {
9184             case 'h':
9185               if (name[2] == 'i' &&
9186                   name[3] == 'l' &&
9187                   name[4] == 'e')
9188               {                                   /* while      */
9189                 return KEY_while;
9190               }
9191
9192               goto unknown;
9193
9194             case 'r':
9195               if (name[2] == 'i' &&
9196                   name[3] == 't' &&
9197                   name[4] == 'e')
9198               {                                   /* write      */
9199                 return -KEY_write;
9200               }
9201
9202               goto unknown;
9203
9204             default:
9205               goto unknown;
9206           }
9207
9208         default:
9209           goto unknown;
9210       }
9211
9212     case 6: /* 33 tokens of length 6 */
9213       switch (name[0])
9214       {
9215         case 'a':
9216           if (name[1] == 'c' &&
9217               name[2] == 'c' &&
9218               name[3] == 'e' &&
9219               name[4] == 'p' &&
9220               name[5] == 't')
9221           {                                       /* accept     */
9222             return -KEY_accept;
9223           }
9224
9225           goto unknown;
9226
9227         case 'c':
9228           switch (name[1])
9229           {
9230             case 'a':
9231               if (name[2] == 'l' &&
9232                   name[3] == 'l' &&
9233                   name[4] == 'e' &&
9234                   name[5] == 'r')
9235               {                                   /* caller     */
9236                 return -KEY_caller;
9237               }
9238
9239               goto unknown;
9240
9241             case 'h':
9242               if (name[2] == 'r' &&
9243                   name[3] == 'o' &&
9244                   name[4] == 'o' &&
9245                   name[5] == 't')
9246               {                                   /* chroot     */
9247                 return -KEY_chroot;
9248               }
9249
9250               goto unknown;
9251
9252             default:
9253               goto unknown;
9254           }
9255
9256         case 'd':
9257           if (name[1] == 'e' &&
9258               name[2] == 'l' &&
9259               name[3] == 'e' &&
9260               name[4] == 't' &&
9261               name[5] == 'e')
9262           {                                       /* delete     */
9263             return KEY_delete;
9264           }
9265
9266           goto unknown;
9267
9268         case 'e':
9269           switch (name[1])
9270           {
9271             case 'l':
9272               if (name[2] == 's' &&
9273                   name[3] == 'e' &&
9274                   name[4] == 'i' &&
9275                   name[5] == 'f')
9276               {                                   /* elseif     */
9277                   Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9278               }
9279
9280               goto unknown;
9281
9282             case 'x':
9283               if (name[2] == 'i' &&
9284                   name[3] == 's' &&
9285                   name[4] == 't' &&
9286                   name[5] == 's')
9287               {                                   /* exists     */
9288                 return KEY_exists;
9289               }
9290
9291               goto unknown;
9292
9293             default:
9294               goto unknown;
9295           }
9296
9297         case 'f':
9298           switch (name[1])
9299           {
9300             case 'i':
9301               if (name[2] == 'l' &&
9302                   name[3] == 'e' &&
9303                   name[4] == 'n' &&
9304                   name[5] == 'o')
9305               {                                   /* fileno     */
9306                 return -KEY_fileno;
9307               }
9308
9309               goto unknown;
9310
9311             case 'o':
9312               if (name[2] == 'r' &&
9313                   name[3] == 'm' &&
9314                   name[4] == 'a' &&
9315                   name[5] == 't')
9316               {                                   /* format     */
9317                 return KEY_format;
9318               }
9319
9320               goto unknown;
9321
9322             default:
9323               goto unknown;
9324           }
9325
9326         case 'g':
9327           if (name[1] == 'm' &&
9328               name[2] == 't' &&
9329               name[3] == 'i' &&
9330               name[4] == 'm' &&
9331               name[5] == 'e')
9332           {                                       /* gmtime     */
9333             return -KEY_gmtime;
9334           }
9335
9336           goto unknown;
9337
9338         case 'l':
9339           switch (name[1])
9340           {
9341             case 'e':
9342               if (name[2] == 'n' &&
9343                   name[3] == 'g' &&
9344                   name[4] == 't' &&
9345                   name[5] == 'h')
9346               {                                   /* length     */
9347                 return -KEY_length;
9348               }
9349
9350               goto unknown;
9351
9352             case 'i':
9353               if (name[2] == 's' &&
9354                   name[3] == 't' &&
9355                   name[4] == 'e' &&
9356                   name[5] == 'n')
9357               {                                   /* listen     */
9358                 return -KEY_listen;
9359               }
9360
9361               goto unknown;
9362
9363             default:
9364               goto unknown;
9365           }
9366
9367         case 'm':
9368           if (name[1] == 's' &&
9369               name[2] == 'g')
9370           {
9371             switch (name[3])
9372             {
9373               case 'c':
9374                 if (name[4] == 't' &&
9375                     name[5] == 'l')
9376                 {                                 /* msgctl     */
9377                   return -KEY_msgctl;
9378                 }
9379
9380                 goto unknown;
9381
9382               case 'g':
9383                 if (name[4] == 'e' &&
9384                     name[5] == 't')
9385                 {                                 /* msgget     */
9386                   return -KEY_msgget;
9387                 }
9388
9389                 goto unknown;
9390
9391               case 'r':
9392                 if (name[4] == 'c' &&
9393                     name[5] == 'v')
9394                 {                                 /* msgrcv     */
9395                   return -KEY_msgrcv;
9396                 }
9397
9398                 goto unknown;
9399
9400               case 's':
9401                 if (name[4] == 'n' &&
9402                     name[5] == 'd')
9403                 {                                 /* msgsnd     */
9404                   return -KEY_msgsnd;
9405                 }
9406
9407                 goto unknown;
9408
9409               default:
9410                 goto unknown;
9411             }
9412           }
9413
9414           goto unknown;
9415
9416         case 'p':
9417           if (name[1] == 'r' &&
9418               name[2] == 'i' &&
9419               name[3] == 'n' &&
9420               name[4] == 't' &&
9421               name[5] == 'f')
9422           {                                       /* printf     */
9423             return KEY_printf;
9424           }
9425
9426           goto unknown;
9427
9428         case 'r':
9429           switch (name[1])
9430           {
9431             case 'e':
9432               switch (name[2])
9433               {
9434                 case 'n':
9435                   if (name[3] == 'a' &&
9436                       name[4] == 'm' &&
9437                       name[5] == 'e')
9438                   {                               /* rename     */
9439                     return -KEY_rename;
9440                   }
9441
9442                   goto unknown;
9443
9444                 case 't':
9445                   if (name[3] == 'u' &&
9446                       name[4] == 'r' &&
9447                       name[5] == 'n')
9448                   {                               /* return     */
9449                     return KEY_return;
9450                   }
9451
9452                   goto unknown;
9453
9454                 default:
9455                   goto unknown;
9456               }
9457
9458             case 'i':
9459               if (name[2] == 'n' &&
9460                   name[3] == 'd' &&
9461                   name[4] == 'e' &&
9462                   name[5] == 'x')
9463               {                                   /* rindex     */
9464                 return -KEY_rindex;
9465               }
9466
9467               goto unknown;
9468
9469             default:
9470               goto unknown;
9471           }
9472
9473         case 's':
9474           switch (name[1])
9475           {
9476             case 'c':
9477               if (name[2] == 'a' &&
9478                   name[3] == 'l' &&
9479                   name[4] == 'a' &&
9480                   name[5] == 'r')
9481               {                                   /* scalar     */
9482                 return KEY_scalar;
9483               }
9484
9485               goto unknown;
9486
9487             case 'e':
9488               switch (name[2])
9489               {
9490                 case 'l':
9491                   if (name[3] == 'e' &&
9492                       name[4] == 'c' &&
9493                       name[5] == 't')
9494                   {                               /* select     */
9495                     return -KEY_select;
9496                   }
9497
9498                   goto unknown;
9499
9500                 case 'm':
9501                   switch (name[3])
9502                   {
9503                     case 'c':
9504                       if (name[4] == 't' &&
9505                           name[5] == 'l')
9506                       {                           /* semctl     */
9507                         return -KEY_semctl;
9508                       }
9509
9510                       goto unknown;
9511
9512                     case 'g':
9513                       if (name[4] == 'e' &&
9514                           name[5] == 't')
9515                       {                           /* semget     */
9516                         return -KEY_semget;
9517                       }
9518
9519                       goto unknown;
9520
9521                     default:
9522                       goto unknown;
9523                   }
9524
9525                 default:
9526                   goto unknown;
9527               }
9528
9529             case 'h':
9530               if (name[2] == 'm')
9531               {
9532                 switch (name[3])
9533                 {
9534                   case 'c':
9535                     if (name[4] == 't' &&
9536                         name[5] == 'l')
9537                     {                             /* shmctl     */
9538                       return -KEY_shmctl;
9539                     }
9540
9541                     goto unknown;
9542
9543                   case 'g':
9544                     if (name[4] == 'e' &&
9545                         name[5] == 't')
9546                     {                             /* shmget     */
9547                       return -KEY_shmget;
9548                     }
9549
9550                     goto unknown;
9551
9552                   default:
9553                     goto unknown;
9554                 }
9555               }
9556
9557               goto unknown;
9558
9559             case 'o':
9560               if (name[2] == 'c' &&
9561                   name[3] == 'k' &&
9562                   name[4] == 'e' &&
9563                   name[5] == 't')
9564               {                                   /* socket     */
9565                 return -KEY_socket;
9566               }
9567
9568               goto unknown;
9569
9570             case 'p':
9571               if (name[2] == 'l' &&
9572                   name[3] == 'i' &&
9573                   name[4] == 'c' &&
9574                   name[5] == 'e')
9575               {                                   /* splice     */
9576                 return -KEY_splice;
9577               }
9578
9579               goto unknown;
9580
9581             case 'u':
9582               if (name[2] == 'b' &&
9583                   name[3] == 's' &&
9584                   name[4] == 't' &&
9585                   name[5] == 'r')
9586               {                                   /* substr     */
9587                 return -KEY_substr;
9588               }
9589
9590               goto unknown;
9591
9592             case 'y':
9593               if (name[2] == 's' &&
9594                   name[3] == 't' &&
9595                   name[4] == 'e' &&
9596                   name[5] == 'm')
9597               {                                   /* system     */
9598                 return -KEY_system;
9599               }
9600
9601               goto unknown;
9602
9603             default:
9604               goto unknown;
9605           }
9606
9607         case 'u':
9608           if (name[1] == 'n')
9609           {
9610             switch (name[2])
9611             {
9612               case 'l':
9613                 switch (name[3])
9614                 {
9615                   case 'e':
9616                     if (name[4] == 's' &&
9617                         name[5] == 's')
9618                     {                             /* unless     */
9619                       return KEY_unless;
9620                     }
9621
9622                     goto unknown;
9623
9624                   case 'i':
9625                     if (name[4] == 'n' &&
9626                         name[5] == 'k')
9627                     {                             /* unlink     */
9628                       return -KEY_unlink;
9629                     }
9630
9631                     goto unknown;
9632
9633                   default:
9634                     goto unknown;
9635                 }
9636
9637               case 'p':
9638                 if (name[3] == 'a' &&
9639                     name[4] == 'c' &&
9640                     name[5] == 'k')
9641                 {                                 /* unpack     */
9642                   return -KEY_unpack;
9643                 }
9644
9645                 goto unknown;
9646
9647               default:
9648                 goto unknown;
9649             }
9650           }
9651
9652           goto unknown;
9653
9654         case 'v':
9655           if (name[1] == 'a' &&
9656               name[2] == 'l' &&
9657               name[3] == 'u' &&
9658               name[4] == 'e' &&
9659               name[5] == 's')
9660           {                                       /* values     */
9661             return -KEY_values;
9662           }
9663
9664           goto unknown;
9665
9666         default:
9667           goto unknown;
9668       }
9669
9670     case 7: /* 29 tokens of length 7 */
9671       switch (name[0])
9672       {
9673         case 'D':
9674           if (name[1] == 'E' &&
9675               name[2] == 'S' &&
9676               name[3] == 'T' &&
9677               name[4] == 'R' &&
9678               name[5] == 'O' &&
9679               name[6] == 'Y')
9680           {                                       /* DESTROY    */
9681             return KEY_DESTROY;
9682           }
9683
9684           goto unknown;
9685
9686         case '_':
9687           if (name[1] == '_' &&
9688               name[2] == 'E' &&
9689               name[3] == 'N' &&
9690               name[4] == 'D' &&
9691               name[5] == '_' &&
9692               name[6] == '_')
9693           {                                       /* __END__    */
9694             return KEY___END__;
9695           }
9696
9697           goto unknown;
9698
9699         case 'b':
9700           if (name[1] == 'i' &&
9701               name[2] == 'n' &&
9702               name[3] == 'm' &&
9703               name[4] == 'o' &&
9704               name[5] == 'd' &&
9705               name[6] == 'e')
9706           {                                       /* binmode    */
9707             return -KEY_binmode;
9708           }
9709
9710           goto unknown;
9711
9712         case 'c':
9713           if (name[1] == 'o' &&
9714               name[2] == 'n' &&
9715               name[3] == 'n' &&
9716               name[4] == 'e' &&
9717               name[5] == 'c' &&
9718               name[6] == 't')
9719           {                                       /* connect    */
9720             return -KEY_connect;
9721           }
9722
9723           goto unknown;
9724
9725         case 'd':
9726           switch (name[1])
9727           {
9728             case 'b':
9729               if (name[2] == 'm' &&
9730                   name[3] == 'o' &&
9731                   name[4] == 'p' &&
9732                   name[5] == 'e' &&
9733                   name[6] == 'n')
9734               {                                   /* dbmopen    */
9735                 return -KEY_dbmopen;
9736               }
9737
9738               goto unknown;
9739
9740             case 'e':
9741               if (name[2] == 'f')
9742               {
9743                 switch (name[3])
9744                 {
9745                   case 'a':
9746                     if (name[4] == 'u' &&
9747                         name[5] == 'l' &&
9748                         name[6] == 't')
9749                     {                             /* default    */
9750                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9751                     }
9752
9753                     goto unknown;
9754
9755                   case 'i':
9756                     if (name[4] == 'n' &&
9757                         name[5] == 'e' &&
9758                         name[6] == 'd')
9759                     {                             /* defined    */
9760                       return KEY_defined;
9761                     }
9762
9763                     goto unknown;
9764
9765                   default:
9766                     goto unknown;
9767                 }
9768               }
9769
9770               goto unknown;
9771
9772             default:
9773               goto unknown;
9774           }
9775
9776         case 'f':
9777           if (name[1] == 'o' &&
9778               name[2] == 'r' &&
9779               name[3] == 'e' &&
9780               name[4] == 'a' &&
9781               name[5] == 'c' &&
9782               name[6] == 'h')
9783           {                                       /* foreach    */
9784             return KEY_foreach;
9785           }
9786
9787           goto unknown;
9788
9789         case 'g':
9790           if (name[1] == 'e' &&
9791               name[2] == 't' &&
9792               name[3] == 'p')
9793           {
9794             switch (name[4])
9795             {
9796               case 'g':
9797                 if (name[5] == 'r' &&
9798                     name[6] == 'p')
9799                 {                                 /* getpgrp    */
9800                   return -KEY_getpgrp;
9801                 }
9802
9803                 goto unknown;
9804
9805               case 'p':
9806                 if (name[5] == 'i' &&
9807                     name[6] == 'd')
9808                 {                                 /* getppid    */
9809                   return -KEY_getppid;
9810                 }
9811
9812                 goto unknown;
9813
9814               default:
9815                 goto unknown;
9816             }
9817           }
9818
9819           goto unknown;
9820
9821         case 'l':
9822           if (name[1] == 'c' &&
9823               name[2] == 'f' &&
9824               name[3] == 'i' &&
9825               name[4] == 'r' &&
9826               name[5] == 's' &&
9827               name[6] == 't')
9828           {                                       /* lcfirst    */
9829             return -KEY_lcfirst;
9830           }
9831
9832           goto unknown;
9833
9834         case 'o':
9835           if (name[1] == 'p' &&
9836               name[2] == 'e' &&
9837               name[3] == 'n' &&
9838               name[4] == 'd' &&
9839               name[5] == 'i' &&
9840               name[6] == 'r')
9841           {                                       /* opendir    */
9842             return -KEY_opendir;
9843           }
9844
9845           goto unknown;
9846
9847         case 'p':
9848           if (name[1] == 'a' &&
9849               name[2] == 'c' &&
9850               name[3] == 'k' &&
9851               name[4] == 'a' &&
9852               name[5] == 'g' &&
9853               name[6] == 'e')
9854           {                                       /* package    */
9855             return KEY_package;
9856           }
9857
9858           goto unknown;
9859
9860         case 'r':
9861           if (name[1] == 'e')
9862           {
9863             switch (name[2])
9864             {
9865               case 'a':
9866                 if (name[3] == 'd' &&
9867                     name[4] == 'd' &&
9868                     name[5] == 'i' &&
9869                     name[6] == 'r')
9870                 {                                 /* readdir    */
9871                   return -KEY_readdir;
9872                 }
9873
9874                 goto unknown;
9875
9876               case 'q':
9877                 if (name[3] == 'u' &&
9878                     name[4] == 'i' &&
9879                     name[5] == 'r' &&
9880                     name[6] == 'e')
9881                 {                                 /* require    */
9882                   return KEY_require;
9883                 }
9884
9885                 goto unknown;
9886
9887               case 'v':
9888                 if (name[3] == 'e' &&
9889                     name[4] == 'r' &&
9890                     name[5] == 's' &&
9891                     name[6] == 'e')
9892                 {                                 /* reverse    */
9893                   return -KEY_reverse;
9894                 }
9895
9896                 goto unknown;
9897
9898               default:
9899                 goto unknown;
9900             }
9901           }
9902
9903           goto unknown;
9904
9905         case 's':
9906           switch (name[1])
9907           {
9908             case 'e':
9909               switch (name[2])
9910               {
9911                 case 'e':
9912                   if (name[3] == 'k' &&
9913                       name[4] == 'd' &&
9914                       name[5] == 'i' &&
9915                       name[6] == 'r')
9916                   {                               /* seekdir    */
9917                     return -KEY_seekdir;
9918                   }
9919
9920                   goto unknown;
9921
9922                 case 't':
9923                   if (name[3] == 'p' &&
9924                       name[4] == 'g' &&
9925                       name[5] == 'r' &&
9926                       name[6] == 'p')
9927                   {                               /* setpgrp    */
9928                     return -KEY_setpgrp;
9929                   }
9930
9931                   goto unknown;
9932
9933                 default:
9934                   goto unknown;
9935               }
9936
9937             case 'h':
9938               if (name[2] == 'm' &&
9939                   name[3] == 'r' &&
9940                   name[4] == 'e' &&
9941                   name[5] == 'a' &&
9942                   name[6] == 'd')
9943               {                                   /* shmread    */
9944                 return -KEY_shmread;
9945               }
9946
9947               goto unknown;
9948
9949             case 'p':
9950               if (name[2] == 'r' &&
9951                   name[3] == 'i' &&
9952                   name[4] == 'n' &&
9953                   name[5] == 't' &&
9954                   name[6] == 'f')
9955               {                                   /* sprintf    */
9956                 return -KEY_sprintf;
9957               }
9958
9959               goto unknown;
9960
9961             case 'y':
9962               switch (name[2])
9963               {
9964                 case 'm':
9965                   if (name[3] == 'l' &&
9966                       name[4] == 'i' &&
9967                       name[5] == 'n' &&
9968                       name[6] == 'k')
9969                   {                               /* symlink    */
9970                     return -KEY_symlink;
9971                   }
9972
9973                   goto unknown;
9974
9975                 case 's':
9976                   switch (name[3])
9977                   {
9978                     case 'c':
9979                       if (name[4] == 'a' &&
9980                           name[5] == 'l' &&
9981                           name[6] == 'l')
9982                       {                           /* syscall    */
9983                         return -KEY_syscall;
9984                       }
9985
9986                       goto unknown;
9987
9988                     case 'o':
9989                       if (name[4] == 'p' &&
9990                           name[5] == 'e' &&
9991                           name[6] == 'n')
9992                       {                           /* sysopen    */
9993                         return -KEY_sysopen;
9994                       }
9995
9996                       goto unknown;
9997
9998                     case 'r':
9999                       if (name[4] == 'e' &&
10000                           name[5] == 'a' &&
10001                           name[6] == 'd')
10002                       {                           /* sysread    */
10003                         return -KEY_sysread;
10004                       }
10005
10006                       goto unknown;
10007
10008                     case 's':
10009                       if (name[4] == 'e' &&
10010                           name[5] == 'e' &&
10011                           name[6] == 'k')
10012                       {                           /* sysseek    */
10013                         return -KEY_sysseek;
10014                       }
10015
10016                       goto unknown;
10017
10018                     default:
10019                       goto unknown;
10020                   }
10021
10022                 default:
10023                   goto unknown;
10024               }
10025
10026             default:
10027               goto unknown;
10028           }
10029
10030         case 't':
10031           if (name[1] == 'e' &&
10032               name[2] == 'l' &&
10033               name[3] == 'l' &&
10034               name[4] == 'd' &&
10035               name[5] == 'i' &&
10036               name[6] == 'r')
10037           {                                       /* telldir    */
10038             return -KEY_telldir;
10039           }
10040
10041           goto unknown;
10042
10043         case 'u':
10044           switch (name[1])
10045           {
10046             case 'c':
10047               if (name[2] == 'f' &&
10048                   name[3] == 'i' &&
10049                   name[4] == 'r' &&
10050                   name[5] == 's' &&
10051                   name[6] == 't')
10052               {                                   /* ucfirst    */
10053                 return -KEY_ucfirst;
10054               }
10055
10056               goto unknown;
10057
10058             case 'n':
10059               if (name[2] == 's' &&
10060                   name[3] == 'h' &&
10061                   name[4] == 'i' &&
10062                   name[5] == 'f' &&
10063                   name[6] == 't')
10064               {                                   /* unshift    */
10065                 return -KEY_unshift;
10066               }
10067
10068               goto unknown;
10069
10070             default:
10071               goto unknown;
10072           }
10073
10074         case 'w':
10075           if (name[1] == 'a' &&
10076               name[2] == 'i' &&
10077               name[3] == 't' &&
10078               name[4] == 'p' &&
10079               name[5] == 'i' &&
10080               name[6] == 'd')
10081           {                                       /* waitpid    */
10082             return -KEY_waitpid;
10083           }
10084
10085           goto unknown;
10086
10087         default:
10088           goto unknown;
10089       }
10090
10091     case 8: /* 26 tokens of length 8 */
10092       switch (name[0])
10093       {
10094         case 'A':
10095           if (name[1] == 'U' &&
10096               name[2] == 'T' &&
10097               name[3] == 'O' &&
10098               name[4] == 'L' &&
10099               name[5] == 'O' &&
10100               name[6] == 'A' &&
10101               name[7] == 'D')
10102           {                                       /* AUTOLOAD   */
10103             return KEY_AUTOLOAD;
10104           }
10105
10106           goto unknown;
10107
10108         case '_':
10109           if (name[1] == '_')
10110           {
10111             switch (name[2])
10112             {
10113               case 'D':
10114                 if (name[3] == 'A' &&
10115                     name[4] == 'T' &&
10116                     name[5] == 'A' &&
10117                     name[6] == '_' &&
10118                     name[7] == '_')
10119                 {                                 /* __DATA__   */
10120                   return KEY___DATA__;
10121                 }
10122
10123                 goto unknown;
10124
10125               case 'F':
10126                 if (name[3] == 'I' &&
10127                     name[4] == 'L' &&
10128                     name[5] == 'E' &&
10129                     name[6] == '_' &&
10130                     name[7] == '_')
10131                 {                                 /* __FILE__   */
10132                   return -KEY___FILE__;
10133                 }
10134
10135                 goto unknown;
10136
10137               case 'L':
10138                 if (name[3] == 'I' &&
10139                     name[4] == 'N' &&
10140                     name[5] == 'E' &&
10141                     name[6] == '_' &&
10142                     name[7] == '_')
10143                 {                                 /* __LINE__   */
10144                   return -KEY___LINE__;
10145                 }
10146
10147                 goto unknown;
10148
10149               default:
10150                 goto unknown;
10151             }
10152           }
10153
10154           goto unknown;
10155
10156         case 'c':
10157           switch (name[1])
10158           {
10159             case 'l':
10160               if (name[2] == 'o' &&
10161                   name[3] == 's' &&
10162                   name[4] == 'e' &&
10163                   name[5] == 'd' &&
10164                   name[6] == 'i' &&
10165                   name[7] == 'r')
10166               {                                   /* closedir   */
10167                 return -KEY_closedir;
10168               }
10169
10170               goto unknown;
10171
10172             case 'o':
10173               if (name[2] == 'n' &&
10174                   name[3] == 't' &&
10175                   name[4] == 'i' &&
10176                   name[5] == 'n' &&
10177                   name[6] == 'u' &&
10178                   name[7] == 'e')
10179               {                                   /* continue   */
10180                 return -KEY_continue;
10181               }
10182
10183               goto unknown;
10184
10185             default:
10186               goto unknown;
10187           }
10188
10189         case 'd':
10190           if (name[1] == 'b' &&
10191               name[2] == 'm' &&
10192               name[3] == 'c' &&
10193               name[4] == 'l' &&
10194               name[5] == 'o' &&
10195               name[6] == 's' &&
10196               name[7] == 'e')
10197           {                                       /* dbmclose   */
10198             return -KEY_dbmclose;
10199           }
10200
10201           goto unknown;
10202
10203         case 'e':
10204           if (name[1] == 'n' &&
10205               name[2] == 'd')
10206           {
10207             switch (name[3])
10208             {
10209               case 'g':
10210                 if (name[4] == 'r' &&
10211                     name[5] == 'e' &&
10212                     name[6] == 'n' &&
10213                     name[7] == 't')
10214                 {                                 /* endgrent   */
10215                   return -KEY_endgrent;
10216                 }
10217
10218                 goto unknown;
10219
10220               case 'p':
10221                 if (name[4] == 'w' &&
10222                     name[5] == 'e' &&
10223                     name[6] == 'n' &&
10224                     name[7] == 't')
10225                 {                                 /* endpwent   */
10226                   return -KEY_endpwent;
10227                 }
10228
10229                 goto unknown;
10230
10231               default:
10232                 goto unknown;
10233             }
10234           }
10235
10236           goto unknown;
10237
10238         case 'f':
10239           if (name[1] == 'o' &&
10240               name[2] == 'r' &&
10241               name[3] == 'm' &&
10242               name[4] == 'l' &&
10243               name[5] == 'i' &&
10244               name[6] == 'n' &&
10245               name[7] == 'e')
10246           {                                       /* formline   */
10247             return -KEY_formline;
10248           }
10249
10250           goto unknown;
10251
10252         case 'g':
10253           if (name[1] == 'e' &&
10254               name[2] == 't')
10255           {
10256             switch (name[3])
10257             {
10258               case 'g':
10259                 if (name[4] == 'r')
10260                 {
10261                   switch (name[5])
10262                   {
10263                     case 'e':
10264                       if (name[6] == 'n' &&
10265                           name[7] == 't')
10266                       {                           /* getgrent   */
10267                         return -KEY_getgrent;
10268                       }
10269
10270                       goto unknown;
10271
10272                     case 'g':
10273                       if (name[6] == 'i' &&
10274                           name[7] == 'd')
10275                       {                           /* getgrgid   */
10276                         return -KEY_getgrgid;
10277                       }
10278
10279                       goto unknown;
10280
10281                     case 'n':
10282                       if (name[6] == 'a' &&
10283                           name[7] == 'm')
10284                       {                           /* getgrnam   */
10285                         return -KEY_getgrnam;
10286                       }
10287
10288                       goto unknown;
10289
10290                     default:
10291                       goto unknown;
10292                   }
10293                 }
10294
10295                 goto unknown;
10296
10297               case 'l':
10298                 if (name[4] == 'o' &&
10299                     name[5] == 'g' &&
10300                     name[6] == 'i' &&
10301                     name[7] == 'n')
10302                 {                                 /* getlogin   */
10303                   return -KEY_getlogin;
10304                 }
10305
10306                 goto unknown;
10307
10308               case 'p':
10309                 if (name[4] == 'w')
10310                 {
10311                   switch (name[5])
10312                   {
10313                     case 'e':
10314                       if (name[6] == 'n' &&
10315                           name[7] == 't')
10316                       {                           /* getpwent   */
10317                         return -KEY_getpwent;
10318                       }
10319
10320                       goto unknown;
10321
10322                     case 'n':
10323                       if (name[6] == 'a' &&
10324                           name[7] == 'm')
10325                       {                           /* getpwnam   */
10326                         return -KEY_getpwnam;
10327                       }
10328
10329                       goto unknown;
10330
10331                     case 'u':
10332                       if (name[6] == 'i' &&
10333                           name[7] == 'd')
10334                       {                           /* getpwuid   */
10335                         return -KEY_getpwuid;
10336                       }
10337
10338                       goto unknown;
10339
10340                     default:
10341                       goto unknown;
10342                   }
10343                 }
10344
10345                 goto unknown;
10346
10347               default:
10348                 goto unknown;
10349             }
10350           }
10351
10352           goto unknown;
10353
10354         case 'r':
10355           if (name[1] == 'e' &&
10356               name[2] == 'a' &&
10357               name[3] == 'd')
10358           {
10359             switch (name[4])
10360             {
10361               case 'l':
10362                 if (name[5] == 'i' &&
10363                     name[6] == 'n')
10364                 {
10365                   switch (name[7])
10366                   {
10367                     case 'e':
10368                       {                           /* readline   */
10369                         return -KEY_readline;
10370                       }
10371
10372                     case 'k':
10373                       {                           /* readlink   */
10374                         return -KEY_readlink;
10375                       }
10376
10377                     default:
10378                       goto unknown;
10379                   }
10380                 }
10381
10382                 goto unknown;
10383
10384               case 'p':
10385                 if (name[5] == 'i' &&
10386                     name[6] == 'p' &&
10387                     name[7] == 'e')
10388                 {                                 /* readpipe   */
10389                   return -KEY_readpipe;
10390                 }
10391
10392                 goto unknown;
10393
10394               default:
10395                 goto unknown;
10396             }
10397           }
10398
10399           goto unknown;
10400
10401         case 's':
10402           switch (name[1])
10403           {
10404             case 'e':
10405               if (name[2] == 't')
10406               {
10407                 switch (name[3])
10408                 {
10409                   case 'g':
10410                     if (name[4] == 'r' &&
10411                         name[5] == 'e' &&
10412                         name[6] == 'n' &&
10413                         name[7] == 't')
10414                     {                             /* setgrent   */
10415                       return -KEY_setgrent;
10416                     }
10417
10418                     goto unknown;
10419
10420                   case 'p':
10421                     if (name[4] == 'w' &&
10422                         name[5] == 'e' &&
10423                         name[6] == 'n' &&
10424                         name[7] == 't')
10425                     {                             /* setpwent   */
10426                       return -KEY_setpwent;
10427                     }
10428
10429                     goto unknown;
10430
10431                   default:
10432                     goto unknown;
10433                 }
10434               }
10435
10436               goto unknown;
10437
10438             case 'h':
10439               switch (name[2])
10440               {
10441                 case 'm':
10442                   if (name[3] == 'w' &&
10443                       name[4] == 'r' &&
10444                       name[5] == 'i' &&
10445                       name[6] == 't' &&
10446                       name[7] == 'e')
10447                   {                               /* shmwrite   */
10448                     return -KEY_shmwrite;
10449                   }
10450
10451                   goto unknown;
10452
10453                 case 'u':
10454                   if (name[3] == 't' &&
10455                       name[4] == 'd' &&
10456                       name[5] == 'o' &&
10457                       name[6] == 'w' &&
10458                       name[7] == 'n')
10459                   {                               /* shutdown   */
10460                     return -KEY_shutdown;
10461                   }
10462
10463                   goto unknown;
10464
10465                 default:
10466                   goto unknown;
10467               }
10468
10469             case 'y':
10470               if (name[2] == 's' &&
10471                   name[3] == 'w' &&
10472                   name[4] == 'r' &&
10473                   name[5] == 'i' &&
10474                   name[6] == 't' &&
10475                   name[7] == 'e')
10476               {                                   /* syswrite   */
10477                 return -KEY_syswrite;
10478               }
10479
10480               goto unknown;
10481
10482             default:
10483               goto unknown;
10484           }
10485
10486         case 't':
10487           if (name[1] == 'r' &&
10488               name[2] == 'u' &&
10489               name[3] == 'n' &&
10490               name[4] == 'c' &&
10491               name[5] == 'a' &&
10492               name[6] == 't' &&
10493               name[7] == 'e')
10494           {                                       /* truncate   */
10495             return -KEY_truncate;
10496           }
10497
10498           goto unknown;
10499
10500         default:
10501           goto unknown;
10502       }
10503
10504     case 9: /* 9 tokens of length 9 */
10505       switch (name[0])
10506       {
10507         case 'U':
10508           if (name[1] == 'N' &&
10509               name[2] == 'I' &&
10510               name[3] == 'T' &&
10511               name[4] == 'C' &&
10512               name[5] == 'H' &&
10513               name[6] == 'E' &&
10514               name[7] == 'C' &&
10515               name[8] == 'K')
10516           {                                       /* UNITCHECK  */
10517             return KEY_UNITCHECK;
10518           }
10519
10520           goto unknown;
10521
10522         case 'e':
10523           if (name[1] == 'n' &&
10524               name[2] == 'd' &&
10525               name[3] == 'n' &&
10526               name[4] == 'e' &&
10527               name[5] == 't' &&
10528               name[6] == 'e' &&
10529               name[7] == 'n' &&
10530               name[8] == 't')
10531           {                                       /* endnetent  */
10532             return -KEY_endnetent;
10533           }
10534
10535           goto unknown;
10536
10537         case 'g':
10538           if (name[1] == 'e' &&
10539               name[2] == 't' &&
10540               name[3] == 'n' &&
10541               name[4] == 'e' &&
10542               name[5] == 't' &&
10543               name[6] == 'e' &&
10544               name[7] == 'n' &&
10545               name[8] == 't')
10546           {                                       /* getnetent  */
10547             return -KEY_getnetent;
10548           }
10549
10550           goto unknown;
10551
10552         case 'l':
10553           if (name[1] == 'o' &&
10554               name[2] == 'c' &&
10555               name[3] == 'a' &&
10556               name[4] == 'l' &&
10557               name[5] == 't' &&
10558               name[6] == 'i' &&
10559               name[7] == 'm' &&
10560               name[8] == 'e')
10561           {                                       /* localtime  */
10562             return -KEY_localtime;
10563           }
10564
10565           goto unknown;
10566
10567         case 'p':
10568           if (name[1] == 'r' &&
10569               name[2] == 'o' &&
10570               name[3] == 't' &&
10571               name[4] == 'o' &&
10572               name[5] == 't' &&
10573               name[6] == 'y' &&
10574               name[7] == 'p' &&
10575               name[8] == 'e')
10576           {                                       /* prototype  */
10577             return KEY_prototype;
10578           }
10579
10580           goto unknown;
10581
10582         case 'q':
10583           if (name[1] == 'u' &&
10584               name[2] == 'o' &&
10585               name[3] == 't' &&
10586               name[4] == 'e' &&
10587               name[5] == 'm' &&
10588               name[6] == 'e' &&
10589               name[7] == 't' &&
10590               name[8] == 'a')
10591           {                                       /* quotemeta  */
10592             return -KEY_quotemeta;
10593           }
10594
10595           goto unknown;
10596
10597         case 'r':
10598           if (name[1] == 'e' &&
10599               name[2] == 'w' &&
10600               name[3] == 'i' &&
10601               name[4] == 'n' &&
10602               name[5] == 'd' &&
10603               name[6] == 'd' &&
10604               name[7] == 'i' &&
10605               name[8] == 'r')
10606           {                                       /* rewinddir  */
10607             return -KEY_rewinddir;
10608           }
10609
10610           goto unknown;
10611
10612         case 's':
10613           if (name[1] == 'e' &&
10614               name[2] == 't' &&
10615               name[3] == 'n' &&
10616               name[4] == 'e' &&
10617               name[5] == 't' &&
10618               name[6] == 'e' &&
10619               name[7] == 'n' &&
10620               name[8] == 't')
10621           {                                       /* setnetent  */
10622             return -KEY_setnetent;
10623           }
10624
10625           goto unknown;
10626
10627         case 'w':
10628           if (name[1] == 'a' &&
10629               name[2] == 'n' &&
10630               name[3] == 't' &&
10631               name[4] == 'a' &&
10632               name[5] == 'r' &&
10633               name[6] == 'r' &&
10634               name[7] == 'a' &&
10635               name[8] == 'y')
10636           {                                       /* wantarray  */
10637             return -KEY_wantarray;
10638           }
10639
10640           goto unknown;
10641
10642         default:
10643           goto unknown;
10644       }
10645
10646     case 10: /* 9 tokens of length 10 */
10647       switch (name[0])
10648       {
10649         case 'e':
10650           if (name[1] == 'n' &&
10651               name[2] == 'd')
10652           {
10653             switch (name[3])
10654             {
10655               case 'h':
10656                 if (name[4] == 'o' &&
10657                     name[5] == 's' &&
10658                     name[6] == 't' &&
10659                     name[7] == 'e' &&
10660                     name[8] == 'n' &&
10661                     name[9] == 't')
10662                 {                                 /* endhostent */
10663                   return -KEY_endhostent;
10664                 }
10665
10666                 goto unknown;
10667
10668               case 's':
10669                 if (name[4] == 'e' &&
10670                     name[5] == 'r' &&
10671                     name[6] == 'v' &&
10672                     name[7] == 'e' &&
10673                     name[8] == 'n' &&
10674                     name[9] == 't')
10675                 {                                 /* endservent */
10676                   return -KEY_endservent;
10677                 }
10678
10679                 goto unknown;
10680
10681               default:
10682                 goto unknown;
10683             }
10684           }
10685
10686           goto unknown;
10687
10688         case 'g':
10689           if (name[1] == 'e' &&
10690               name[2] == 't')
10691           {
10692             switch (name[3])
10693             {
10694               case 'h':
10695                 if (name[4] == 'o' &&
10696                     name[5] == 's' &&
10697                     name[6] == 't' &&
10698                     name[7] == 'e' &&
10699                     name[8] == 'n' &&
10700                     name[9] == 't')
10701                 {                                 /* gethostent */
10702                   return -KEY_gethostent;
10703                 }
10704
10705                 goto unknown;
10706
10707               case 's':
10708                 switch (name[4])
10709                 {
10710                   case 'e':
10711                     if (name[5] == 'r' &&
10712                         name[6] == 'v' &&
10713                         name[7] == 'e' &&
10714                         name[8] == 'n' &&
10715                         name[9] == 't')
10716                     {                             /* getservent */
10717                       return -KEY_getservent;
10718                     }
10719
10720                     goto unknown;
10721
10722                   case 'o':
10723                     if (name[5] == 'c' &&
10724                         name[6] == 'k' &&
10725                         name[7] == 'o' &&
10726                         name[8] == 'p' &&
10727                         name[9] == 't')
10728                     {                             /* getsockopt */
10729                       return -KEY_getsockopt;
10730                     }
10731
10732                     goto unknown;
10733
10734                   default:
10735                     goto unknown;
10736                 }
10737
10738               default:
10739                 goto unknown;
10740             }
10741           }
10742
10743           goto unknown;
10744
10745         case 's':
10746           switch (name[1])
10747           {
10748             case 'e':
10749               if (name[2] == 't')
10750               {
10751                 switch (name[3])
10752                 {
10753                   case 'h':
10754                     if (name[4] == 'o' &&
10755                         name[5] == 's' &&
10756                         name[6] == 't' &&
10757                         name[7] == 'e' &&
10758                         name[8] == 'n' &&
10759                         name[9] == 't')
10760                     {                             /* sethostent */
10761                       return -KEY_sethostent;
10762                     }
10763
10764                     goto unknown;
10765
10766                   case 's':
10767                     switch (name[4])
10768                     {
10769                       case 'e':
10770                         if (name[5] == 'r' &&
10771                             name[6] == 'v' &&
10772                             name[7] == 'e' &&
10773                             name[8] == 'n' &&
10774                             name[9] == 't')
10775                         {                         /* setservent */
10776                           return -KEY_setservent;
10777                         }
10778
10779                         goto unknown;
10780
10781                       case 'o':
10782                         if (name[5] == 'c' &&
10783                             name[6] == 'k' &&
10784                             name[7] == 'o' &&
10785                             name[8] == 'p' &&
10786                             name[9] == 't')
10787                         {                         /* setsockopt */
10788                           return -KEY_setsockopt;
10789                         }
10790
10791                         goto unknown;
10792
10793                       default:
10794                         goto unknown;
10795                     }
10796
10797                   default:
10798                     goto unknown;
10799                 }
10800               }
10801
10802               goto unknown;
10803
10804             case 'o':
10805               if (name[2] == 'c' &&
10806                   name[3] == 'k' &&
10807                   name[4] == 'e' &&
10808                   name[5] == 't' &&
10809                   name[6] == 'p' &&
10810                   name[7] == 'a' &&
10811                   name[8] == 'i' &&
10812                   name[9] == 'r')
10813               {                                   /* socketpair */
10814                 return -KEY_socketpair;
10815               }
10816
10817               goto unknown;
10818
10819             default:
10820               goto unknown;
10821           }
10822
10823         default:
10824           goto unknown;
10825       }
10826
10827     case 11: /* 8 tokens of length 11 */
10828       switch (name[0])
10829       {
10830         case '_':
10831           if (name[1] == '_' &&
10832               name[2] == 'P' &&
10833               name[3] == 'A' &&
10834               name[4] == 'C' &&
10835               name[5] == 'K' &&
10836               name[6] == 'A' &&
10837               name[7] == 'G' &&
10838               name[8] == 'E' &&
10839               name[9] == '_' &&
10840               name[10] == '_')
10841           {                                       /* __PACKAGE__ */
10842             return -KEY___PACKAGE__;
10843           }
10844
10845           goto unknown;
10846
10847         case 'e':
10848           if (name[1] == 'n' &&
10849               name[2] == 'd' &&
10850               name[3] == 'p' &&
10851               name[4] == 'r' &&
10852               name[5] == 'o' &&
10853               name[6] == 't' &&
10854               name[7] == 'o' &&
10855               name[8] == 'e' &&
10856               name[9] == 'n' &&
10857               name[10] == 't')
10858           {                                       /* endprotoent */
10859             return -KEY_endprotoent;
10860           }
10861
10862           goto unknown;
10863
10864         case 'g':
10865           if (name[1] == 'e' &&
10866               name[2] == 't')
10867           {
10868             switch (name[3])
10869             {
10870               case 'p':
10871                 switch (name[4])
10872                 {
10873                   case 'e':
10874                     if (name[5] == 'e' &&
10875                         name[6] == 'r' &&
10876                         name[7] == 'n' &&
10877                         name[8] == 'a' &&
10878                         name[9] == 'm' &&
10879                         name[10] == 'e')
10880                     {                             /* getpeername */
10881                       return -KEY_getpeername;
10882                     }
10883
10884                     goto unknown;
10885
10886                   case 'r':
10887                     switch (name[5])
10888                     {
10889                       case 'i':
10890                         if (name[6] == 'o' &&
10891                             name[7] == 'r' &&
10892                             name[8] == 'i' &&
10893                             name[9] == 't' &&
10894                             name[10] == 'y')
10895                         {                         /* getpriority */
10896                           return -KEY_getpriority;
10897                         }
10898
10899                         goto unknown;
10900
10901                       case 'o':
10902                         if (name[6] == 't' &&
10903                             name[7] == 'o' &&
10904                             name[8] == 'e' &&
10905                             name[9] == 'n' &&
10906                             name[10] == 't')
10907                         {                         /* getprotoent */
10908                           return -KEY_getprotoent;
10909                         }
10910
10911                         goto unknown;
10912
10913                       default:
10914                         goto unknown;
10915                     }
10916
10917                   default:
10918                     goto unknown;
10919                 }
10920
10921               case 's':
10922                 if (name[4] == 'o' &&
10923                     name[5] == 'c' &&
10924                     name[6] == 'k' &&
10925                     name[7] == 'n' &&
10926                     name[8] == 'a' &&
10927                     name[9] == 'm' &&
10928                     name[10] == 'e')
10929                 {                                 /* getsockname */
10930                   return -KEY_getsockname;
10931                 }
10932
10933                 goto unknown;
10934
10935               default:
10936                 goto unknown;
10937             }
10938           }
10939
10940           goto unknown;
10941
10942         case 's':
10943           if (name[1] == 'e' &&
10944               name[2] == 't' &&
10945               name[3] == 'p' &&
10946               name[4] == 'r')
10947           {
10948             switch (name[5])
10949             {
10950               case 'i':
10951                 if (name[6] == 'o' &&
10952                     name[7] == 'r' &&
10953                     name[8] == 'i' &&
10954                     name[9] == 't' &&
10955                     name[10] == 'y')
10956                 {                                 /* setpriority */
10957                   return -KEY_setpriority;
10958                 }
10959
10960                 goto unknown;
10961
10962               case 'o':
10963                 if (name[6] == 't' &&
10964                     name[7] == 'o' &&
10965                     name[8] == 'e' &&
10966                     name[9] == 'n' &&
10967                     name[10] == 't')
10968                 {                                 /* setprotoent */
10969                   return -KEY_setprotoent;
10970                 }
10971
10972                 goto unknown;
10973
10974               default:
10975                 goto unknown;
10976             }
10977           }
10978
10979           goto unknown;
10980
10981         default:
10982           goto unknown;
10983       }
10984
10985     case 12: /* 2 tokens of length 12 */
10986       if (name[0] == 'g' &&
10987           name[1] == 'e' &&
10988           name[2] == 't' &&
10989           name[3] == 'n' &&
10990           name[4] == 'e' &&
10991           name[5] == 't' &&
10992           name[6] == 'b' &&
10993           name[7] == 'y')
10994       {
10995         switch (name[8])
10996         {
10997           case 'a':
10998             if (name[9] == 'd' &&
10999                 name[10] == 'd' &&
11000                 name[11] == 'r')
11001             {                                     /* getnetbyaddr */
11002               return -KEY_getnetbyaddr;
11003             }
11004
11005             goto unknown;
11006
11007           case 'n':
11008             if (name[9] == 'a' &&
11009                 name[10] == 'm' &&
11010                 name[11] == 'e')
11011             {                                     /* getnetbyname */
11012               return -KEY_getnetbyname;
11013             }
11014
11015             goto unknown;
11016
11017           default:
11018             goto unknown;
11019         }
11020       }
11021
11022       goto unknown;
11023
11024     case 13: /* 4 tokens of length 13 */
11025       if (name[0] == 'g' &&
11026           name[1] == 'e' &&
11027           name[2] == 't')
11028       {
11029         switch (name[3])
11030         {
11031           case 'h':
11032             if (name[4] == 'o' &&
11033                 name[5] == 's' &&
11034                 name[6] == 't' &&
11035                 name[7] == 'b' &&
11036                 name[8] == 'y')
11037             {
11038               switch (name[9])
11039               {
11040                 case 'a':
11041                   if (name[10] == 'd' &&
11042                       name[11] == 'd' &&
11043                       name[12] == 'r')
11044                   {                               /* gethostbyaddr */
11045                     return -KEY_gethostbyaddr;
11046                   }
11047
11048                   goto unknown;
11049
11050                 case 'n':
11051                   if (name[10] == 'a' &&
11052                       name[11] == 'm' &&
11053                       name[12] == 'e')
11054                   {                               /* gethostbyname */
11055                     return -KEY_gethostbyname;
11056                   }
11057
11058                   goto unknown;
11059
11060                 default:
11061                   goto unknown;
11062               }
11063             }
11064
11065             goto unknown;
11066
11067           case 's':
11068             if (name[4] == 'e' &&
11069                 name[5] == 'r' &&
11070                 name[6] == 'v' &&
11071                 name[7] == 'b' &&
11072                 name[8] == 'y')
11073             {
11074               switch (name[9])
11075               {
11076                 case 'n':
11077                   if (name[10] == 'a' &&
11078                       name[11] == 'm' &&
11079                       name[12] == 'e')
11080                   {                               /* getservbyname */
11081                     return -KEY_getservbyname;
11082                   }
11083
11084                   goto unknown;
11085
11086                 case 'p':
11087                   if (name[10] == 'o' &&
11088                       name[11] == 'r' &&
11089                       name[12] == 't')
11090                   {                               /* getservbyport */
11091                     return -KEY_getservbyport;
11092                   }
11093
11094                   goto unknown;
11095
11096                 default:
11097                   goto unknown;
11098               }
11099             }
11100
11101             goto unknown;
11102
11103           default:
11104             goto unknown;
11105         }
11106       }
11107
11108       goto unknown;
11109
11110     case 14: /* 1 tokens of length 14 */
11111       if (name[0] == 'g' &&
11112           name[1] == 'e' &&
11113           name[2] == 't' &&
11114           name[3] == 'p' &&
11115           name[4] == 'r' &&
11116           name[5] == 'o' &&
11117           name[6] == 't' &&
11118           name[7] == 'o' &&
11119           name[8] == 'b' &&
11120           name[9] == 'y' &&
11121           name[10] == 'n' &&
11122           name[11] == 'a' &&
11123           name[12] == 'm' &&
11124           name[13] == 'e')
11125       {                                           /* getprotobyname */
11126         return -KEY_getprotobyname;
11127       }
11128
11129       goto unknown;
11130
11131     case 16: /* 1 tokens of length 16 */
11132       if (name[0] == 'g' &&
11133           name[1] == 'e' &&
11134           name[2] == 't' &&
11135           name[3] == 'p' &&
11136           name[4] == 'r' &&
11137           name[5] == 'o' &&
11138           name[6] == 't' &&
11139           name[7] == 'o' &&
11140           name[8] == 'b' &&
11141           name[9] == 'y' &&
11142           name[10] == 'n' &&
11143           name[11] == 'u' &&
11144           name[12] == 'm' &&
11145           name[13] == 'b' &&
11146           name[14] == 'e' &&
11147           name[15] == 'r')
11148       {                                           /* getprotobynumber */
11149         return -KEY_getprotobynumber;
11150       }
11151
11152       goto unknown;
11153
11154     default:
11155       goto unknown;
11156   }
11157
11158 unknown:
11159   return 0;
11160 }
11161
11162 STATIC void
11163 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11164 {
11165     dVAR;
11166
11167     PERL_ARGS_ASSERT_CHECKCOMMA;
11168
11169     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
11170         if (ckWARN(WARN_SYNTAX)) {
11171             int level = 1;
11172             const char *w;
11173             for (w = s+2; *w && level; w++) {
11174                 if (*w == '(')
11175                     ++level;
11176                 else if (*w == ')')
11177                     --level;
11178             }
11179             while (isSPACE(*w))
11180                 ++w;
11181             /* the list of chars below is for end of statements or
11182              * block / parens, boolean operators (&&, ||, //) and branch
11183              * constructs (or, and, if, until, unless, while, err, for).
11184              * Not a very solid hack... */
11185             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11186                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11187                             "%s (...) interpreted as function",name);
11188         }
11189     }
11190     while (s < PL_bufend && isSPACE(*s))
11191         s++;
11192     if (*s == '(')
11193         s++;
11194     while (s < PL_bufend && isSPACE(*s))
11195         s++;
11196     if (isIDFIRST_lazy_if(s,UTF)) {
11197         const char * const w = s++;
11198         while (isALNUM_lazy_if(s,UTF))
11199             s++;
11200         while (s < PL_bufend && isSPACE(*s))
11201             s++;
11202         if (*s == ',') {
11203             GV* gv;
11204             if (keyword(w, s - w, 0))
11205                 return;
11206
11207             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11208             if (gv && GvCVu(gv))
11209                 return;
11210             Perl_croak(aTHX_ "No comma allowed after %s", what);
11211         }
11212     }
11213 }
11214
11215 /* Either returns sv, or mortalizes sv and returns a new SV*.
11216    Best used as sv=new_constant(..., sv, ...).
11217    If s, pv are NULL, calls subroutine with one argument,
11218    and type is used with error messages only. */
11219
11220 STATIC SV *
11221 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11222                SV *sv, SV *pv, const char *type, STRLEN typelen)
11223 {
11224     dVAR; dSP;
11225     HV * const table = GvHV(PL_hintgv);          /* ^H */
11226     SV *res;
11227     SV **cvp;
11228     SV *cv, *typesv;
11229     const char *why1 = "", *why2 = "", *why3 = "";
11230
11231     PERL_ARGS_ASSERT_NEW_CONSTANT;
11232
11233     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11234         SV *msg;
11235
11236         why2 = (const char *)
11237             (strEQ(key,"charnames")
11238              ? "(possibly a missing \"use charnames ...\")"
11239              : "");
11240         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11241                             (type ? type: "undef"), why2);
11242
11243         /* This is convoluted and evil ("goto considered harmful")
11244          * but I do not understand the intricacies of all the different
11245          * failure modes of %^H in here.  The goal here is to make
11246          * the most probable error message user-friendly. --jhi */
11247
11248         goto msgdone;
11249
11250     report:
11251         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11252                             (type ? type: "undef"), why1, why2, why3);
11253     msgdone:
11254         yyerror(SvPVX_const(msg));
11255         SvREFCNT_dec(msg);
11256         return sv;
11257     }
11258     cvp = hv_fetch(table, key, keylen, FALSE);
11259     if (!cvp || !SvOK(*cvp)) {
11260         why1 = "$^H{";
11261         why2 = key;
11262         why3 = "} is not defined";
11263         goto report;
11264     }
11265     sv_2mortal(sv);                     /* Parent created it permanently */
11266     cv = *cvp;
11267     if (!pv && s)
11268         pv = newSVpvn_flags(s, len, SVs_TEMP);
11269     if (type && pv)
11270         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11271     else
11272         typesv = &PL_sv_undef;
11273
11274     PUSHSTACKi(PERLSI_OVERLOAD);
11275     ENTER ;
11276     SAVETMPS;
11277
11278     PUSHMARK(SP) ;
11279     EXTEND(sp, 3);
11280     if (pv)
11281         PUSHs(pv);
11282     PUSHs(sv);
11283     if (pv)
11284         PUSHs(typesv);
11285     PUTBACK;
11286     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11287
11288     SPAGAIN ;
11289
11290     /* Check the eval first */
11291     if (!PL_in_eval && SvTRUE(ERRSV)) {
11292         sv_catpvs(ERRSV, "Propagated");
11293         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11294         (void)POPs;
11295         res = SvREFCNT_inc_simple(sv);
11296     }
11297     else {
11298         res = POPs;
11299         SvREFCNT_inc_simple_void(res);
11300     }
11301
11302     PUTBACK ;
11303     FREETMPS ;
11304     LEAVE ;
11305     POPSTACK;
11306
11307     if (!SvOK(res)) {
11308         why1 = "Call to &{$^H{";
11309         why2 = key;
11310         why3 = "}} did not return a defined value";
11311         sv = res;
11312         goto report;
11313     }
11314
11315     return res;
11316 }
11317
11318 /* Returns a NUL terminated string, with the length of the string written to
11319    *slp
11320    */
11321 STATIC char *
11322 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11323 {
11324     dVAR;
11325     register char *d = dest;
11326     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
11327
11328     PERL_ARGS_ASSERT_SCAN_WORD;
11329
11330     for (;;) {
11331         if (d >= e)
11332             Perl_croak(aTHX_ ident_too_long);
11333         if (isALNUM(*s))        /* UTF handled below */
11334             *d++ = *s++;
11335         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11336             *d++ = ':';
11337             *d++ = ':';
11338             s++;
11339         }
11340         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11341             *d++ = *s++;
11342             *d++ = *s++;
11343         }
11344         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11345             char *t = s + UTF8SKIP(s);
11346             size_t len;
11347             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11348                 t += UTF8SKIP(t);
11349             len = t - s;
11350             if (d + len > e)
11351                 Perl_croak(aTHX_ ident_too_long);
11352             Copy(s, d, len, char);
11353             d += len;
11354             s = t;
11355         }
11356         else {
11357             *d = '\0';
11358             *slp = d - dest;
11359             return s;
11360         }
11361     }
11362 }
11363
11364 STATIC char *
11365 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11366 {
11367     dVAR;
11368     char *bracket = NULL;
11369     char funny = *s++;
11370     register char *d = dest;
11371     register char * const e = d + destlen + 3;    /* two-character token, ending NUL */
11372
11373     PERL_ARGS_ASSERT_SCAN_IDENT;
11374
11375     if (isSPACE(*s))
11376         s = PEEKSPACE(s);
11377     if (isDIGIT(*s)) {
11378         while (isDIGIT(*s)) {
11379             if (d >= e)
11380                 Perl_croak(aTHX_ ident_too_long);
11381             *d++ = *s++;
11382         }
11383     }
11384     else {
11385         for (;;) {
11386             if (d >= e)
11387                 Perl_croak(aTHX_ ident_too_long);
11388             if (isALNUM(*s))    /* UTF handled below */
11389                 *d++ = *s++;
11390             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11391                 *d++ = ':';
11392                 *d++ = ':';
11393                 s++;
11394             }
11395             else if (*s == ':' && s[1] == ':') {
11396                 *d++ = *s++;
11397                 *d++ = *s++;
11398             }
11399             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11400                 char *t = s + UTF8SKIP(s);
11401                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11402                     t += UTF8SKIP(t);
11403                 if (d + (t - s) > e)
11404                     Perl_croak(aTHX_ ident_too_long);
11405                 Copy(s, d, t - s, char);
11406                 d += t - s;
11407                 s = t;
11408             }
11409             else
11410                 break;
11411         }
11412     }
11413     *d = '\0';
11414     d = dest;
11415     if (*d) {
11416         if (PL_lex_state != LEX_NORMAL)
11417             PL_lex_state = LEX_INTERPENDMAYBE;
11418         return s;
11419     }
11420     if (*s == '$' && s[1] &&
11421         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11422     {
11423         return s;
11424     }
11425     if (*s == '{') {
11426         bracket = s;
11427         s++;
11428     }
11429     else if (ck_uni)
11430         check_uni();
11431     if (s < send)
11432         *d = *s++;
11433     d[1] = '\0';
11434     if (*d == '^' && *s && isCONTROLVAR(*s)) {
11435         *d = toCTRL(*s);
11436         s++;
11437     }
11438     if (bracket) {
11439         if (isSPACE(s[-1])) {
11440             while (s < send) {
11441                 const char ch = *s++;
11442                 if (!SPACE_OR_TAB(ch)) {
11443                     *d = ch;
11444                     break;
11445                 }
11446             }
11447         }
11448         if (isIDFIRST_lazy_if(d,UTF)) {
11449             d++;
11450             if (UTF) {
11451                 char *end = s;
11452                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11453                     end += UTF8SKIP(end);
11454                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11455                         end += UTF8SKIP(end);
11456                 }
11457                 Copy(s, d, end - s, char);
11458                 d += end - s;
11459                 s = end;
11460             }
11461             else {
11462                 while ((isALNUM(*s) || *s == ':') && d < e)
11463                     *d++ = *s++;
11464                 if (d >= e)
11465                     Perl_croak(aTHX_ ident_too_long);
11466             }
11467             *d = '\0';
11468             while (s < send && SPACE_OR_TAB(*s))
11469                 s++;
11470             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11471                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11472                     const char * const brack =
11473                         (const char *)
11474                         ((*s == '[') ? "[...]" : "{...}");
11475                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11476                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11477                         funny, dest, brack, funny, dest, brack);
11478                 }
11479                 bracket++;
11480                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11481                 return s;
11482             }
11483         }
11484         /* Handle extended ${^Foo} variables
11485          * 1999-02-27 mjd-perl-patch@plover.com */
11486         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11487                  && isALNUM(*s))
11488         {
11489             d++;
11490             while (isALNUM(*s) && d < e) {
11491                 *d++ = *s++;
11492             }
11493             if (d >= e)
11494                 Perl_croak(aTHX_ ident_too_long);
11495             *d = '\0';
11496         }
11497         if (*s == '}') {
11498             s++;
11499             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11500                 PL_lex_state = LEX_INTERPEND;
11501                 PL_expect = XREF;
11502             }
11503             if (PL_lex_state == LEX_NORMAL) {
11504                 if (ckWARN(WARN_AMBIGUOUS) &&
11505                     (keyword(dest, d - dest, 0)
11506                      || get_cvn_flags(dest, d - dest, 0)))
11507                 {
11508                     if (funny == '#')
11509                         funny = '@';
11510                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11511                         "Ambiguous use of %c{%s} resolved to %c%s",
11512                         funny, dest, funny, dest);
11513                 }
11514             }
11515         }
11516         else {
11517             s = bracket;                /* let the parser handle it */
11518             *dest = '\0';
11519         }
11520     }
11521     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11522         PL_lex_state = LEX_INTERPEND;
11523     return s;
11524 }
11525
11526 static U32
11527 S_pmflag(U32 pmfl, const char ch) {
11528     switch (ch) {
11529         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11530     case GLOBAL_PAT_MOD:    pmfl |= PMf_GLOBAL; break;
11531     case CONTINUE_PAT_MOD:  pmfl |= PMf_CONTINUE; break;
11532     case ONCE_PAT_MOD:      pmfl |= PMf_KEEP; break;
11533     case KEEPCOPY_PAT_MOD:  pmfl |= PMf_KEEPCOPY; break;
11534     }
11535     return pmfl;
11536 }
11537
11538 void
11539 Perl_pmflag(pTHX_ U32* pmfl, int ch)
11540 {
11541     PERL_ARGS_ASSERT_PMFLAG;
11542
11543     Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
11544                      "Perl_pmflag() is deprecated, and will be removed from the XS API");
11545
11546     if (ch<256) {
11547         *pmfl = S_pmflag(*pmfl, (char)ch);
11548     }
11549 }
11550
11551 STATIC char *
11552 S_scan_pat(pTHX_ char *start, I32 type)
11553 {
11554     dVAR;
11555     PMOP *pm;
11556     char *s = scan_str(start,!!PL_madskills,FALSE);
11557     const char * const valid_flags =
11558         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11559 #ifdef PERL_MAD
11560     char *modstart;
11561 #endif
11562
11563     PERL_ARGS_ASSERT_SCAN_PAT;
11564
11565     if (!s) {
11566         const char * const delimiter = skipspace(start);
11567         Perl_croak(aTHX_
11568                    (const char *)
11569                    (*delimiter == '?'
11570                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
11571                     : "Search pattern not terminated" ));
11572     }
11573
11574     pm = (PMOP*)newPMOP(type, 0);
11575     if (PL_multi_open == '?') {
11576         /* This is the only point in the code that sets PMf_ONCE:  */
11577         pm->op_pmflags |= PMf_ONCE;
11578
11579         /* Hence it's safe to do this bit of PMOP book-keeping here, which
11580            allows us to restrict the list needed by reset to just the ??
11581            matches.  */
11582         assert(type != OP_TRANS);
11583         if (PL_curstash) {
11584             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11585             U32 elements;
11586             if (!mg) {
11587                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11588                                  0);
11589             }
11590             elements = mg->mg_len / sizeof(PMOP**);
11591             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11592             ((PMOP**)mg->mg_ptr) [elements++] = pm;
11593             mg->mg_len = elements * sizeof(PMOP**);
11594             PmopSTASH_set(pm,PL_curstash);
11595         }
11596     }
11597 #ifdef PERL_MAD
11598     modstart = s;
11599 #endif
11600     while (*s && strchr(valid_flags, *s))
11601         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11602 #ifdef PERL_MAD
11603     if (PL_madskills && modstart != s) {
11604         SV* tmptoken = newSVpvn(modstart, s - modstart);
11605         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11606     }
11607 #endif
11608     /* issue a warning if /c is specified,but /g is not */
11609     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11610     {
11611         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11612                        "Use of /c modifier is meaningless without /g" );
11613     }
11614
11615     PL_lex_op = (OP*)pm;
11616     pl_yylval.ival = OP_MATCH;
11617     return s;
11618 }
11619
11620 STATIC char *
11621 S_scan_subst(pTHX_ char *start)
11622 {
11623     dVAR;
11624     register char *s;
11625     register PMOP *pm;
11626     I32 first_start;
11627     I32 es = 0;
11628 #ifdef PERL_MAD
11629     char *modstart;
11630 #endif
11631
11632     PERL_ARGS_ASSERT_SCAN_SUBST;
11633
11634     pl_yylval.ival = OP_NULL;
11635
11636     s = scan_str(start,!!PL_madskills,FALSE);
11637
11638     if (!s)
11639         Perl_croak(aTHX_ "Substitution pattern not terminated");
11640
11641     if (s[-1] == PL_multi_open)
11642         s--;
11643 #ifdef PERL_MAD
11644     if (PL_madskills) {
11645         CURMAD('q', PL_thisopen);
11646         CURMAD('_', PL_thiswhite);
11647         CURMAD('E', PL_thisstuff);
11648         CURMAD('Q', PL_thisclose);
11649         PL_realtokenstart = s - SvPVX(PL_linestr);
11650     }
11651 #endif
11652
11653     first_start = PL_multi_start;
11654     s = scan_str(s,!!PL_madskills,FALSE);
11655     if (!s) {
11656         if (PL_lex_stuff) {
11657             SvREFCNT_dec(PL_lex_stuff);
11658             PL_lex_stuff = NULL;
11659         }
11660         Perl_croak(aTHX_ "Substitution replacement not terminated");
11661     }
11662     PL_multi_start = first_start;       /* so whole substitution is taken together */
11663
11664     pm = (PMOP*)newPMOP(OP_SUBST, 0);
11665
11666 #ifdef PERL_MAD
11667     if (PL_madskills) {
11668         CURMAD('z', PL_thisopen);
11669         CURMAD('R', PL_thisstuff);
11670         CURMAD('Z', PL_thisclose);
11671     }
11672     modstart = s;
11673 #endif
11674
11675     while (*s) {
11676         if (*s == EXEC_PAT_MOD) {
11677             s++;
11678             es++;
11679         }
11680         else if (strchr(S_PAT_MODS, *s))
11681             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11682         else
11683             break;
11684     }
11685
11686 #ifdef PERL_MAD
11687     if (PL_madskills) {
11688         if (modstart != s)
11689             curmad('m', newSVpvn(modstart, s - modstart));
11690         append_madprops(PL_thismad, (OP*)pm, 0);
11691         PL_thismad = 0;
11692     }
11693 #endif
11694     if ((pm->op_pmflags & PMf_CONTINUE)) {
11695         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11696     }
11697
11698     if (es) {
11699         SV * const repl = newSVpvs("");
11700
11701         PL_sublex_info.super_bufptr = s;
11702         PL_sublex_info.super_bufend = PL_bufend;
11703         PL_multi_end = 0;
11704         pm->op_pmflags |= PMf_EVAL;
11705         while (es-- > 0) {
11706             if (es)
11707                 sv_catpvs(repl, "eval ");
11708             else
11709                 sv_catpvs(repl, "do ");
11710         }
11711         sv_catpvs(repl, "{");
11712         sv_catsv(repl, PL_lex_repl);
11713         if (strchr(SvPVX(PL_lex_repl), '#'))
11714             sv_catpvs(repl, "\n");
11715         sv_catpvs(repl, "}");
11716         SvEVALED_on(repl);
11717         SvREFCNT_dec(PL_lex_repl);
11718         PL_lex_repl = repl;
11719     }
11720
11721     PL_lex_op = (OP*)pm;
11722     pl_yylval.ival = OP_SUBST;
11723     return s;
11724 }
11725
11726 STATIC char *
11727 S_scan_trans(pTHX_ char *start)
11728 {
11729     dVAR;
11730     register char* s;
11731     OP *o;
11732     short *tbl;
11733     U8 squash;
11734     U8 del;
11735     U8 complement;
11736 #ifdef PERL_MAD
11737     char *modstart;
11738 #endif
11739
11740     PERL_ARGS_ASSERT_SCAN_TRANS;
11741
11742     pl_yylval.ival = OP_NULL;
11743
11744     s = scan_str(start,!!PL_madskills,FALSE);
11745     if (!s)
11746         Perl_croak(aTHX_ "Transliteration pattern not terminated");
11747
11748     if (s[-1] == PL_multi_open)
11749         s--;
11750 #ifdef PERL_MAD
11751     if (PL_madskills) {
11752         CURMAD('q', PL_thisopen);
11753         CURMAD('_', PL_thiswhite);
11754         CURMAD('E', PL_thisstuff);
11755         CURMAD('Q', PL_thisclose);
11756         PL_realtokenstart = s - SvPVX(PL_linestr);
11757     }
11758 #endif
11759
11760     s = scan_str(s,!!PL_madskills,FALSE);
11761     if (!s) {
11762         if (PL_lex_stuff) {
11763             SvREFCNT_dec(PL_lex_stuff);
11764             PL_lex_stuff = NULL;
11765         }
11766         Perl_croak(aTHX_ "Transliteration replacement not terminated");
11767     }
11768     if (PL_madskills) {
11769         CURMAD('z', PL_thisopen);
11770         CURMAD('R', PL_thisstuff);
11771         CURMAD('Z', PL_thisclose);
11772     }
11773
11774     complement = del = squash = 0;
11775 #ifdef PERL_MAD
11776     modstart = s;
11777 #endif
11778     while (1) {
11779         switch (*s) {
11780         case 'c':
11781             complement = OPpTRANS_COMPLEMENT;
11782             break;
11783         case 'd':
11784             del = OPpTRANS_DELETE;
11785             break;
11786         case 's':
11787             squash = OPpTRANS_SQUASH;
11788             break;
11789         default:
11790             goto no_more;
11791         }
11792         s++;
11793     }
11794   no_more:
11795
11796     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11797     o = newPVOP(OP_TRANS, 0, (char*)tbl);
11798     o->op_private &= ~OPpTRANS_ALL;
11799     o->op_private |= del|squash|complement|
11800       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11801       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
11802
11803     PL_lex_op = o;
11804     pl_yylval.ival = OP_TRANS;
11805
11806 #ifdef PERL_MAD
11807     if (PL_madskills) {
11808         if (modstart != s)
11809             curmad('m', newSVpvn(modstart, s - modstart));
11810         append_madprops(PL_thismad, o, 0);
11811         PL_thismad = 0;
11812     }
11813 #endif
11814
11815     return s;
11816 }
11817
11818 STATIC char *
11819 S_scan_heredoc(pTHX_ register char *s)
11820 {
11821     dVAR;
11822     SV *herewas;
11823     I32 op_type = OP_SCALAR;
11824     I32 len;
11825     SV *tmpstr;
11826     char term;
11827     const char *found_newline;
11828     register char *d;
11829     register char *e;
11830     char *peek;
11831     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11832 #ifdef PERL_MAD
11833     I32 stuffstart = s - SvPVX(PL_linestr);
11834     char *tstart;
11835
11836     PL_realtokenstart = -1;
11837 #endif
11838
11839     PERL_ARGS_ASSERT_SCAN_HEREDOC;
11840
11841     s += 2;
11842     d = PL_tokenbuf;
11843     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11844     if (!outer)
11845         *d++ = '\n';
11846     peek = s;
11847     while (SPACE_OR_TAB(*peek))
11848         peek++;
11849     if (*peek == '`' || *peek == '\'' || *peek =='"') {
11850         s = peek;
11851         term = *s++;
11852         s = delimcpy(d, e, s, PL_bufend, term, &len);
11853         d += len;
11854         if (s < PL_bufend)
11855             s++;
11856     }
11857     else {
11858         if (*s == '\\')
11859             s++, term = '\'';
11860         else
11861             term = '"';
11862         if (!isALNUM_lazy_if(s,UTF))
11863             deprecate("bare << to mean <<\"\"");
11864         for (; isALNUM_lazy_if(s,UTF); s++) {
11865             if (d < e)
11866                 *d++ = *s;
11867         }
11868     }
11869     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11870         Perl_croak(aTHX_ "Delimiter for here document is too long");
11871     *d++ = '\n';
11872     *d = '\0';
11873     len = d - PL_tokenbuf;
11874
11875 #ifdef PERL_MAD
11876     if (PL_madskills) {
11877         tstart = PL_tokenbuf + !outer;
11878         PL_thisclose = newSVpvn(tstart, len - !outer);
11879         tstart = SvPVX(PL_linestr) + stuffstart;
11880         PL_thisopen = newSVpvn(tstart, s - tstart);
11881         stuffstart = s - SvPVX(PL_linestr);
11882     }
11883 #endif
11884 #ifndef PERL_STRICT_CR
11885     d = strchr(s, '\r');
11886     if (d) {
11887         char * const olds = s;
11888         s = d;
11889         while (s < PL_bufend) {
11890             if (*s == '\r') {
11891                 *d++ = '\n';
11892                 if (*++s == '\n')
11893                     s++;
11894             }
11895             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
11896                 *d++ = *s++;
11897                 s++;
11898             }
11899             else
11900                 *d++ = *s++;
11901         }
11902         *d = '\0';
11903         PL_bufend = d;
11904         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11905         s = olds;
11906     }
11907 #endif
11908 #ifdef PERL_MAD
11909     found_newline = 0;
11910 #endif
11911     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11912         herewas = newSVpvn(s,PL_bufend-s);
11913     }
11914     else {
11915 #ifdef PERL_MAD
11916         herewas = newSVpvn(s-1,found_newline-s+1);
11917 #else
11918         s--;
11919         herewas = newSVpvn(s,found_newline-s);
11920 #endif
11921     }
11922 #ifdef PERL_MAD
11923     if (PL_madskills) {
11924         tstart = SvPVX(PL_linestr) + stuffstart;
11925         if (PL_thisstuff)
11926             sv_catpvn(PL_thisstuff, tstart, s - tstart);
11927         else
11928             PL_thisstuff = newSVpvn(tstart, s - tstart);
11929     }
11930 #endif
11931     s += SvCUR(herewas);
11932
11933 #ifdef PERL_MAD
11934     stuffstart = s - SvPVX(PL_linestr);
11935
11936     if (found_newline)
11937         s--;
11938 #endif
11939
11940     tmpstr = newSV_type(SVt_PVIV);
11941     SvGROW(tmpstr, 80);
11942     if (term == '\'') {
11943         op_type = OP_CONST;
11944         SvIV_set(tmpstr, -1);
11945     }
11946     else if (term == '`') {
11947         op_type = OP_BACKTICK;
11948         SvIV_set(tmpstr, '\\');
11949     }
11950
11951     CLINE;
11952     PL_multi_start = CopLINE(PL_curcop);
11953     PL_multi_open = PL_multi_close = '<';
11954     term = *PL_tokenbuf;
11955     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11956         char * const bufptr = PL_sublex_info.super_bufptr;
11957         char * const bufend = PL_sublex_info.super_bufend;
11958         char * const olds = s - SvCUR(herewas);
11959         s = strchr(bufptr, '\n');
11960         if (!s)
11961             s = bufend;
11962         d = s;
11963         while (s < bufend &&
11964           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11965             if (*s++ == '\n')
11966                 CopLINE_inc(PL_curcop);
11967         }
11968         if (s >= bufend) {
11969             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11970             missingterm(PL_tokenbuf);
11971         }
11972         sv_setpvn(herewas,bufptr,d-bufptr+1);
11973         sv_setpvn(tmpstr,d+1,s-d);
11974         s += len - 1;
11975         sv_catpvn(herewas,s,bufend-s);
11976         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11977
11978         s = olds;
11979         goto retval;
11980     }
11981     else if (!outer) {
11982         d = s;
11983         while (s < PL_bufend &&
11984           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11985             if (*s++ == '\n')
11986                 CopLINE_inc(PL_curcop);
11987         }
11988         if (s >= PL_bufend) {
11989             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11990             missingterm(PL_tokenbuf);
11991         }
11992         sv_setpvn(tmpstr,d+1,s-d);
11993 #ifdef PERL_MAD
11994         if (PL_madskills) {
11995             if (PL_thisstuff)
11996                 sv_catpvn(PL_thisstuff, d + 1, s - d);
11997             else
11998                 PL_thisstuff = newSVpvn(d + 1, s - d);
11999             stuffstart = s - SvPVX(PL_linestr);
12000         }
12001 #endif
12002         s += len - 1;
12003         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12004
12005         sv_catpvn(herewas,s,PL_bufend-s);
12006         sv_setsv(PL_linestr,herewas);
12007         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12008         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12009         PL_last_lop = PL_last_uni = NULL;
12010     }
12011     else
12012         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
12013     while (s >= PL_bufend) {    /* multiple line string? */
12014 #ifdef PERL_MAD
12015         if (PL_madskills) {
12016             tstart = SvPVX(PL_linestr) + stuffstart;
12017             if (PL_thisstuff)
12018                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12019             else
12020                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12021         }
12022 #endif
12023         PL_bufptr = s;
12024         CopLINE_inc(PL_curcop);
12025         if (!outer || !lex_next_chunk(0)) {
12026             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12027             missingterm(PL_tokenbuf);
12028         }
12029         CopLINE_dec(PL_curcop);
12030         s = PL_bufptr;
12031 #ifdef PERL_MAD
12032         stuffstart = s - SvPVX(PL_linestr);
12033 #endif
12034         CopLINE_inc(PL_curcop);
12035         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12036         PL_last_lop = PL_last_uni = NULL;
12037 #ifndef PERL_STRICT_CR
12038         if (PL_bufend - PL_linestart >= 2) {
12039             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12040                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12041             {
12042                 PL_bufend[-2] = '\n';
12043                 PL_bufend--;
12044                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12045             }
12046             else if (PL_bufend[-1] == '\r')
12047                 PL_bufend[-1] = '\n';
12048         }
12049         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12050             PL_bufend[-1] = '\n';
12051 #endif
12052         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12053             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12054             *(SvPVX(PL_linestr) + off ) = ' ';
12055             sv_catsv(PL_linestr,herewas);
12056             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12057             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12058         }
12059         else {
12060             s = PL_bufend;
12061             sv_catsv(tmpstr,PL_linestr);
12062         }
12063     }
12064     s++;
12065 retval:
12066     PL_multi_end = CopLINE(PL_curcop);
12067     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12068         SvPV_shrink_to_cur(tmpstr);
12069     }
12070     SvREFCNT_dec(herewas);
12071     if (!IN_BYTES) {
12072         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12073             SvUTF8_on(tmpstr);
12074         else if (PL_encoding)
12075             sv_recode_to_utf8(tmpstr, PL_encoding);
12076     }
12077     PL_lex_stuff = tmpstr;
12078     pl_yylval.ival = op_type;
12079     return s;
12080 }
12081
12082 /* scan_inputsymbol
12083    takes: current position in input buffer
12084    returns: new position in input buffer
12085    side-effects: pl_yylval and lex_op are set.
12086
12087    This code handles:
12088
12089    <>           read from ARGV
12090    <FH>         read from filehandle
12091    <pkg::FH>    read from package qualified filehandle
12092    <pkg'FH>     read from package qualified filehandle
12093    <$fh>        read from filehandle in $fh
12094    <*.h>        filename glob
12095
12096 */
12097
12098 STATIC char *
12099 S_scan_inputsymbol(pTHX_ char *start)
12100 {
12101     dVAR;
12102     register char *s = start;           /* current position in buffer */
12103     char *end;
12104     I32 len;
12105     char *d = PL_tokenbuf;                                      /* start of temp holding space */
12106     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
12107
12108     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12109
12110     end = strchr(s, '\n');
12111     if (!end)
12112         end = PL_bufend;
12113     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
12114
12115     /* die if we didn't have space for the contents of the <>,
12116        or if it didn't end, or if we see a newline
12117     */
12118
12119     if (len >= (I32)sizeof PL_tokenbuf)
12120         Perl_croak(aTHX_ "Excessively long <> operator");
12121     if (s >= end)
12122         Perl_croak(aTHX_ "Unterminated <> operator");
12123
12124     s++;
12125
12126     /* check for <$fh>
12127        Remember, only scalar variables are interpreted as filehandles by
12128        this code.  Anything more complex (e.g., <$fh{$num}>) will be
12129        treated as a glob() call.
12130        This code makes use of the fact that except for the $ at the front,
12131        a scalar variable and a filehandle look the same.
12132     */
12133     if (*d == '$' && d[1]) d++;
12134
12135     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12136     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12137         d++;
12138
12139     /* If we've tried to read what we allow filehandles to look like, and
12140        there's still text left, then it must be a glob() and not a getline.
12141        Use scan_str to pull out the stuff between the <> and treat it
12142        as nothing more than a string.
12143     */
12144
12145     if (d - PL_tokenbuf != len) {
12146         pl_yylval.ival = OP_GLOB;
12147         s = scan_str(start,!!PL_madskills,FALSE);
12148         if (!s)
12149            Perl_croak(aTHX_ "Glob not terminated");
12150         return s;
12151     }
12152     else {
12153         bool readline_overriden = FALSE;
12154         GV *gv_readline;
12155         GV **gvp;
12156         /* we're in a filehandle read situation */
12157         d = PL_tokenbuf;
12158
12159         /* turn <> into <ARGV> */
12160         if (!len)
12161             Copy("ARGV",d,5,char);
12162
12163         /* Check whether readline() is overriden */
12164         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12165         if ((gv_readline
12166                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12167                 ||
12168                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12169                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12170                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12171             readline_overriden = TRUE;
12172
12173         /* if <$fh>, create the ops to turn the variable into a
12174            filehandle
12175         */
12176         if (*d == '$') {
12177             /* try to find it in the pad for this block, otherwise find
12178                add symbol table ops
12179             */
12180             const PADOFFSET tmp = pad_findmy(d, len, 0);
12181             if (tmp != NOT_IN_PAD) {
12182                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12183                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12184                     HEK * const stashname = HvNAME_HEK(stash);
12185                     SV * const sym = sv_2mortal(newSVhek(stashname));
12186                     sv_catpvs(sym, "::");
12187                     sv_catpv(sym, d+1);
12188                     d = SvPVX(sym);
12189                     goto intro_sym;
12190                 }
12191                 else {
12192                     OP * const o = newOP(OP_PADSV, 0);
12193                     o->op_targ = tmp;
12194                     PL_lex_op = readline_overriden
12195                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12196                                 append_elem(OP_LIST, o,
12197                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12198                         : (OP*)newUNOP(OP_READLINE, 0, o);
12199                 }
12200             }
12201             else {
12202                 GV *gv;
12203                 ++d;
12204 intro_sym:
12205                 gv = gv_fetchpv(d,
12206                                 (PL_in_eval
12207                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
12208                                  : GV_ADDMULTI),
12209                                 SVt_PV);
12210                 PL_lex_op = readline_overriden
12211                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12212                             append_elem(OP_LIST,
12213                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12214                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12215                     : (OP*)newUNOP(OP_READLINE, 0,
12216                             newUNOP(OP_RV2SV, 0,
12217                                 newGVOP(OP_GV, 0, gv)));
12218             }
12219             if (!readline_overriden)
12220                 PL_lex_op->op_flags |= OPf_SPECIAL;
12221             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12222             pl_yylval.ival = OP_NULL;
12223         }
12224
12225         /* If it's none of the above, it must be a literal filehandle
12226            (<Foo::BAR> or <FOO>) so build a simple readline OP */
12227         else {
12228             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12229             PL_lex_op = readline_overriden
12230                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12231                         append_elem(OP_LIST,
12232                             newGVOP(OP_GV, 0, gv),
12233                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12234                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12235             pl_yylval.ival = OP_NULL;
12236         }
12237     }
12238
12239     return s;
12240 }
12241
12242
12243 /* scan_str
12244    takes: start position in buffer
12245           keep_quoted preserve \ on the embedded delimiter(s)
12246           keep_delims preserve the delimiters around the string
12247    returns: position to continue reading from buffer
12248    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12249         updates the read buffer.
12250
12251    This subroutine pulls a string out of the input.  It is called for:
12252         q               single quotes           q(literal text)
12253         '               single quotes           'literal text'
12254         qq              double quotes           qq(interpolate $here please)
12255         "               double quotes           "interpolate $here please"
12256         qx              backticks               qx(/bin/ls -l)
12257         `               backticks               `/bin/ls -l`
12258         qw              quote words             @EXPORT_OK = qw( func() $spam )
12259         m//             regexp match            m/this/
12260         s///            regexp substitute       s/this/that/
12261         tr///           string transliterate    tr/this/that/
12262         y///            string transliterate    y/this/that/
12263         ($*@)           sub prototypes          sub foo ($)
12264         (stuff)         sub attr parameters     sub foo : attr(stuff)
12265         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
12266
12267    In most of these cases (all but <>, patterns and transliterate)
12268    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
12269    calls scan_str().  s/// makes yylex() call scan_subst() which calls
12270    scan_str().  tr/// and y/// make yylex() call scan_trans() which
12271    calls scan_str().
12272
12273    It skips whitespace before the string starts, and treats the first
12274    character as the delimiter.  If the delimiter is one of ([{< then
12275    the corresponding "close" character )]}> is used as the closing
12276    delimiter.  It allows quoting of delimiters, and if the string has
12277    balanced delimiters ([{<>}]) it allows nesting.
12278
12279    On success, the SV with the resulting string is put into lex_stuff or,
12280    if that is already non-NULL, into lex_repl. The second case occurs only
12281    when parsing the RHS of the special constructs s/// and tr/// (y///).
12282    For convenience, the terminating delimiter character is stuffed into
12283    SvIVX of the SV.
12284 */
12285
12286 STATIC char *
12287 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12288 {
12289     dVAR;
12290     SV *sv;                             /* scalar value: string */
12291     const char *tmps;                   /* temp string, used for delimiter matching */
12292     register char *s = start;           /* current position in the buffer */
12293     register char term;                 /* terminating character */
12294     register char *to;                  /* current position in the sv's data */
12295     I32 brackets = 1;                   /* bracket nesting level */
12296     bool has_utf8 = FALSE;              /* is there any utf8 content? */
12297     I32 termcode;                       /* terminating char. code */
12298     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
12299     STRLEN termlen;                     /* length of terminating string */
12300     int last_off = 0;                   /* last position for nesting bracket */
12301 #ifdef PERL_MAD
12302     int stuffstart;
12303     char *tstart;
12304 #endif
12305
12306     PERL_ARGS_ASSERT_SCAN_STR;
12307
12308     /* skip space before the delimiter */
12309     if (isSPACE(*s)) {
12310         s = PEEKSPACE(s);
12311     }
12312
12313 #ifdef PERL_MAD
12314     if (PL_realtokenstart >= 0) {
12315         stuffstart = PL_realtokenstart;
12316         PL_realtokenstart = -1;
12317     }
12318     else
12319         stuffstart = start - SvPVX(PL_linestr);
12320 #endif
12321     /* mark where we are, in case we need to report errors */
12322     CLINE;
12323
12324     /* after skipping whitespace, the next character is the terminator */
12325     term = *s;
12326     if (!UTF) {
12327         termcode = termstr[0] = term;
12328         termlen = 1;
12329     }
12330     else {
12331         termcode = utf8_to_uvchr((U8*)s, &termlen);
12332         Copy(s, termstr, termlen, U8);
12333         if (!UTF8_IS_INVARIANT(term))
12334             has_utf8 = TRUE;
12335     }
12336
12337     /* mark where we are */
12338     PL_multi_start = CopLINE(PL_curcop);
12339     PL_multi_open = term;
12340
12341     /* find corresponding closing delimiter */
12342     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12343         termcode = termstr[0] = term = tmps[5];
12344
12345     PL_multi_close = term;
12346
12347     /* create a new SV to hold the contents.  79 is the SV's initial length.
12348        What a random number. */
12349     sv = newSV_type(SVt_PVIV);
12350     SvGROW(sv, 80);
12351     SvIV_set(sv, termcode);
12352     (void)SvPOK_only(sv);               /* validate pointer */
12353
12354     /* move past delimiter and try to read a complete string */
12355     if (keep_delims)
12356         sv_catpvn(sv, s, termlen);
12357     s += termlen;
12358 #ifdef PERL_MAD
12359     tstart = SvPVX(PL_linestr) + stuffstart;
12360     if (!PL_thisopen && !keep_delims) {
12361         PL_thisopen = newSVpvn(tstart, s - tstart);
12362         stuffstart = s - SvPVX(PL_linestr);
12363     }
12364 #endif
12365     for (;;) {
12366         if (PL_encoding && !UTF) {
12367             bool cont = TRUE;
12368
12369             while (cont) {
12370                 int offset = s - SvPVX_const(PL_linestr);
12371                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12372                                            &offset, (char*)termstr, termlen);
12373                 const char * const ns = SvPVX_const(PL_linestr) + offset;
12374                 char * const svlast = SvEND(sv) - 1;
12375
12376                 for (; s < ns; s++) {
12377                     if (*s == '\n' && !PL_rsfp)
12378                         CopLINE_inc(PL_curcop);
12379                 }
12380                 if (!found)
12381                     goto read_more_line;
12382                 else {
12383                     /* handle quoted delimiters */
12384                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12385                         const char *t;
12386                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12387                             t--;
12388                         if ((svlast-1 - t) % 2) {
12389                             if (!keep_quoted) {
12390                                 *(svlast-1) = term;
12391                                 *svlast = '\0';
12392                                 SvCUR_set(sv, SvCUR(sv) - 1);
12393                             }
12394                             continue;
12395                         }
12396                     }
12397                     if (PL_multi_open == PL_multi_close) {
12398                         cont = FALSE;
12399                     }
12400                     else {
12401                         const char *t;
12402                         char *w;
12403                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12404                             /* At here, all closes are "was quoted" one,
12405                                so we don't check PL_multi_close. */
12406                             if (*t == '\\') {
12407                                 if (!keep_quoted && *(t+1) == PL_multi_open)
12408                                     t++;
12409                                 else
12410                                     *w++ = *t++;
12411                             }
12412                             else if (*t == PL_multi_open)
12413                                 brackets++;
12414
12415                             *w = *t;
12416                         }
12417                         if (w < t) {
12418                             *w++ = term;
12419                             *w = '\0';
12420                             SvCUR_set(sv, w - SvPVX_const(sv));
12421                         }
12422                         last_off = w - SvPVX(sv);
12423                         if (--brackets <= 0)
12424                             cont = FALSE;
12425                     }
12426                 }
12427             }
12428             if (!keep_delims) {
12429                 SvCUR_set(sv, SvCUR(sv) - 1);
12430                 *SvEND(sv) = '\0';
12431             }
12432             break;
12433         }
12434
12435         /* extend sv if need be */
12436         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12437         /* set 'to' to the next character in the sv's string */
12438         to = SvPVX(sv)+SvCUR(sv);
12439
12440         /* if open delimiter is the close delimiter read unbridle */
12441         if (PL_multi_open == PL_multi_close) {
12442             for (; s < PL_bufend; s++,to++) {
12443                 /* embedded newlines increment the current line number */
12444                 if (*s == '\n' && !PL_rsfp)
12445                     CopLINE_inc(PL_curcop);
12446                 /* handle quoted delimiters */
12447                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12448                     if (!keep_quoted && s[1] == term)
12449                         s++;
12450                 /* any other quotes are simply copied straight through */
12451                     else
12452                         *to++ = *s++;
12453                 }
12454                 /* terminate when run out of buffer (the for() condition), or
12455                    have found the terminator */
12456                 else if (*s == term) {
12457                     if (termlen == 1)
12458                         break;
12459                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12460                         break;
12461                 }
12462                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12463                     has_utf8 = TRUE;
12464                 *to = *s;
12465             }
12466         }
12467
12468         /* if the terminator isn't the same as the start character (e.g.,
12469            matched brackets), we have to allow more in the quoting, and
12470            be prepared for nested brackets.
12471         */
12472         else {
12473             /* read until we run out of string, or we find the terminator */
12474             for (; s < PL_bufend; s++,to++) {
12475                 /* embedded newlines increment the line count */
12476                 if (*s == '\n' && !PL_rsfp)
12477                     CopLINE_inc(PL_curcop);
12478                 /* backslashes can escape the open or closing characters */
12479                 if (*s == '\\' && s+1 < PL_bufend) {
12480                     if (!keep_quoted &&
12481                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12482                         s++;
12483                     else
12484                         *to++ = *s++;
12485                 }
12486                 /* allow nested opens and closes */
12487                 else if (*s == PL_multi_close && --brackets <= 0)
12488                     break;
12489                 else if (*s == PL_multi_open)
12490                     brackets++;
12491                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12492                     has_utf8 = TRUE;
12493                 *to = *s;
12494             }
12495         }
12496         /* terminate the copied string and update the sv's end-of-string */
12497         *to = '\0';
12498         SvCUR_set(sv, to - SvPVX_const(sv));
12499
12500         /*
12501          * this next chunk reads more into the buffer if we're not done yet
12502          */
12503
12504         if (s < PL_bufend)
12505             break;              /* handle case where we are done yet :-) */
12506
12507 #ifndef PERL_STRICT_CR
12508         if (to - SvPVX_const(sv) >= 2) {
12509             if ((to[-2] == '\r' && to[-1] == '\n') ||
12510                 (to[-2] == '\n' && to[-1] == '\r'))
12511             {
12512                 to[-2] = '\n';
12513                 to--;
12514                 SvCUR_set(sv, to - SvPVX_const(sv));
12515             }
12516             else if (to[-1] == '\r')
12517                 to[-1] = '\n';
12518         }
12519         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12520             to[-1] = '\n';
12521 #endif
12522
12523      read_more_line:
12524         /* if we're out of file, or a read fails, bail and reset the current
12525            line marker so we can report where the unterminated string began
12526         */
12527 #ifdef PERL_MAD
12528         if (PL_madskills) {
12529             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12530             if (PL_thisstuff)
12531                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12532             else
12533                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12534         }
12535 #endif
12536         CopLINE_inc(PL_curcop);
12537         PL_bufptr = PL_bufend;
12538         if (!lex_next_chunk(0)) {
12539             sv_free(sv);
12540             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12541             return NULL;
12542         }
12543         s = PL_bufptr;
12544 #ifdef PERL_MAD
12545         stuffstart = 0;
12546 #endif
12547     }
12548
12549     /* at this point, we have successfully read the delimited string */
12550
12551     if (!PL_encoding || UTF) {
12552 #ifdef PERL_MAD
12553         if (PL_madskills) {
12554             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12555             const int len = s - tstart;
12556             if (PL_thisstuff)
12557                 sv_catpvn(PL_thisstuff, tstart, len);
12558             else
12559                 PL_thisstuff = newSVpvn(tstart, len);
12560             if (!PL_thisclose && !keep_delims)
12561                 PL_thisclose = newSVpvn(s,termlen);
12562         }
12563 #endif
12564
12565         if (keep_delims)
12566             sv_catpvn(sv, s, termlen);
12567         s += termlen;
12568     }
12569 #ifdef PERL_MAD
12570     else {
12571         if (PL_madskills) {
12572             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12573             const int len = s - tstart - termlen;
12574             if (PL_thisstuff)
12575                 sv_catpvn(PL_thisstuff, tstart, len);
12576             else
12577                 PL_thisstuff = newSVpvn(tstart, len);
12578             if (!PL_thisclose && !keep_delims)
12579                 PL_thisclose = newSVpvn(s - termlen,termlen);
12580         }
12581     }
12582 #endif
12583     if (has_utf8 || PL_encoding)
12584         SvUTF8_on(sv);
12585
12586     PL_multi_end = CopLINE(PL_curcop);
12587
12588     /* if we allocated too much space, give some back */
12589     if (SvCUR(sv) + 5 < SvLEN(sv)) {
12590         SvLEN_set(sv, SvCUR(sv) + 1);
12591         SvPV_renew(sv, SvLEN(sv));
12592     }
12593
12594     /* decide whether this is the first or second quoted string we've read
12595        for this op
12596     */
12597
12598     if (PL_lex_stuff)
12599         PL_lex_repl = sv;
12600     else
12601         PL_lex_stuff = sv;
12602     return s;
12603 }
12604
12605 /*
12606   scan_num
12607   takes: pointer to position in buffer
12608   returns: pointer to new position in buffer
12609   side-effects: builds ops for the constant in pl_yylval.op
12610
12611   Read a number in any of the formats that Perl accepts:
12612
12613   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12614   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12615   0b[01](_?[01])*
12616   0[0-7](_?[0-7])*
12617   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12618
12619   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12620   thing it reads.
12621
12622   If it reads a number without a decimal point or an exponent, it will
12623   try converting the number to an integer and see if it can do so
12624   without loss of precision.
12625 */
12626
12627 char *
12628 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12629 {
12630     dVAR;
12631     register const char *s = start;     /* current position in buffer */
12632     register char *d;                   /* destination in temp buffer */
12633     register char *e;                   /* end of temp buffer */
12634     NV nv;                              /* number read, as a double */
12635     SV *sv = NULL;                      /* place to put the converted number */
12636     bool floatit;                       /* boolean: int or float? */
12637     const char *lastub = NULL;          /* position of last underbar */
12638     static char const number_too_long[] = "Number too long";
12639
12640     PERL_ARGS_ASSERT_SCAN_NUM;
12641
12642     /* We use the first character to decide what type of number this is */
12643
12644     switch (*s) {
12645     default:
12646       Perl_croak(aTHX_ "panic: scan_num");
12647
12648     /* if it starts with a 0, it could be an octal number, a decimal in
12649        0.13 disguise, or a hexadecimal number, or a binary number. */
12650     case '0':
12651         {
12652           /* variables:
12653              u          holds the "number so far"
12654              shift      the power of 2 of the base
12655                         (hex == 4, octal == 3, binary == 1)
12656              overflowed was the number more than we can hold?
12657
12658              Shift is used when we add a digit.  It also serves as an "are
12659              we in octal/hex/binary?" indicator to disallow hex characters
12660              when in octal mode.
12661            */
12662             NV n = 0.0;
12663             UV u = 0;
12664             I32 shift;
12665             bool overflowed = FALSE;
12666             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
12667             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12668             static const char* const bases[5] =
12669               { "", "binary", "", "octal", "hexadecimal" };
12670             static const char* const Bases[5] =
12671               { "", "Binary", "", "Octal", "Hexadecimal" };
12672             static const char* const maxima[5] =
12673               { "",
12674                 "0b11111111111111111111111111111111",
12675                 "",
12676                 "037777777777",
12677                 "0xffffffff" };
12678             const char *base, *Base, *max;
12679
12680             /* check for hex */
12681             if (s[1] == 'x') {
12682                 shift = 4;
12683                 s += 2;
12684                 just_zero = FALSE;
12685             } else if (s[1] == 'b') {
12686                 shift = 1;
12687                 s += 2;
12688                 just_zero = FALSE;
12689             }
12690             /* check for a decimal in disguise */
12691             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12692                 goto decimal;
12693             /* so it must be octal */
12694             else {
12695                 shift = 3;
12696                 s++;
12697             }
12698
12699             if (*s == '_') {
12700                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12701                                "Misplaced _ in number");
12702                lastub = s++;
12703             }
12704
12705             base = bases[shift];
12706             Base = Bases[shift];
12707             max  = maxima[shift];
12708
12709             /* read the rest of the number */
12710             for (;;) {
12711                 /* x is used in the overflow test,
12712                    b is the digit we're adding on. */
12713                 UV x, b;
12714
12715                 switch (*s) {
12716
12717                 /* if we don't mention it, we're done */
12718                 default:
12719                     goto out;
12720
12721                 /* _ are ignored -- but warned about if consecutive */
12722                 case '_':
12723                     if (lastub && s == lastub + 1)
12724                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12725                                        "Misplaced _ in number");
12726                     lastub = s++;
12727                     break;
12728
12729                 /* 8 and 9 are not octal */
12730                 case '8': case '9':
12731                     if (shift == 3)
12732                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12733                     /* FALL THROUGH */
12734
12735                 /* octal digits */
12736                 case '2': case '3': case '4':
12737                 case '5': case '6': case '7':
12738                     if (shift == 1)
12739                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12740                     /* FALL THROUGH */
12741
12742                 case '0': case '1':
12743                     b = *s++ & 15;              /* ASCII digit -> value of digit */
12744                     goto digit;
12745
12746                 /* hex digits */
12747                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12748                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12749                     /* make sure they said 0x */
12750                     if (shift != 4)
12751                         goto out;
12752                     b = (*s++ & 7) + 9;
12753
12754                     /* Prepare to put the digit we have onto the end
12755                        of the number so far.  We check for overflows.
12756                     */
12757
12758                   digit:
12759                     just_zero = FALSE;
12760                     if (!overflowed) {
12761                         x = u << shift; /* make room for the digit */
12762
12763                         if ((x >> shift) != u
12764                             && !(PL_hints & HINT_NEW_BINARY)) {
12765                             overflowed = TRUE;
12766                             n = (NV) u;
12767                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12768                                              "Integer overflow in %s number",
12769                                              base);
12770                         } else
12771                             u = x | b;          /* add the digit to the end */
12772                     }
12773                     if (overflowed) {
12774                         n *= nvshift[shift];
12775                         /* If an NV has not enough bits in its
12776                          * mantissa to represent an UV this summing of
12777                          * small low-order numbers is a waste of time
12778                          * (because the NV cannot preserve the
12779                          * low-order bits anyway): we could just
12780                          * remember when did we overflow and in the
12781                          * end just multiply n by the right
12782                          * amount. */
12783                         n += (NV) b;
12784                     }
12785                     break;
12786                 }
12787             }
12788
12789           /* if we get here, we had success: make a scalar value from
12790              the number.
12791           */
12792           out:
12793
12794             /* final misplaced underbar check */
12795             if (s[-1] == '_') {
12796                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12797             }
12798
12799             sv = newSV(0);
12800             if (overflowed) {
12801                 if (n > 4294967295.0)
12802                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12803                                    "%s number > %s non-portable",
12804                                    Base, max);
12805                 sv_setnv(sv, n);
12806             }
12807             else {
12808 #if UVSIZE > 4
12809                 if (u > 0xffffffff)
12810                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12811                                    "%s number > %s non-portable",
12812                                    Base, max);
12813 #endif
12814                 sv_setuv(sv, u);
12815             }
12816             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12817                 sv = new_constant(start, s - start, "integer",
12818                                   sv, NULL, NULL, 0);
12819             else if (PL_hints & HINT_NEW_BINARY)
12820                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12821         }
12822         break;
12823
12824     /*
12825       handle decimal numbers.
12826       we're also sent here when we read a 0 as the first digit
12827     */
12828     case '1': case '2': case '3': case '4': case '5':
12829     case '6': case '7': case '8': case '9': case '.':
12830       decimal:
12831         d = PL_tokenbuf;
12832         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12833         floatit = FALSE;
12834
12835         /* read next group of digits and _ and copy into d */
12836         while (isDIGIT(*s) || *s == '_') {
12837             /* skip underscores, checking for misplaced ones
12838                if -w is on
12839             */
12840             if (*s == '_') {
12841                 if (lastub && s == lastub + 1)
12842                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12843                                    "Misplaced _ in number");
12844                 lastub = s++;
12845             }
12846             else {
12847                 /* check for end of fixed-length buffer */
12848                 if (d >= e)
12849                     Perl_croak(aTHX_ number_too_long);
12850                 /* if we're ok, copy the character */
12851                 *d++ = *s++;
12852             }
12853         }
12854
12855         /* final misplaced underbar check */
12856         if (lastub && s == lastub + 1) {
12857             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12858         }
12859
12860         /* read a decimal portion if there is one.  avoid
12861            3..5 being interpreted as the number 3. followed
12862            by .5
12863         */
12864         if (*s == '.' && s[1] != '.') {
12865             floatit = TRUE;
12866             *d++ = *s++;
12867
12868             if (*s == '_') {
12869                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12870                                "Misplaced _ in number");
12871                 lastub = s;
12872             }
12873
12874             /* copy, ignoring underbars, until we run out of digits.
12875             */
12876             for (; isDIGIT(*s) || *s == '_'; s++) {
12877                 /* fixed length buffer check */
12878                 if (d >= e)
12879                     Perl_croak(aTHX_ number_too_long);
12880                 if (*s == '_') {
12881                    if (lastub && s == lastub + 1)
12882                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12883                                       "Misplaced _ in number");
12884                    lastub = s;
12885                 }
12886                 else
12887                     *d++ = *s;
12888             }
12889             /* fractional part ending in underbar? */
12890             if (s[-1] == '_') {
12891                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12892                                "Misplaced _ in number");
12893             }
12894             if (*s == '.' && isDIGIT(s[1])) {
12895                 /* oops, it's really a v-string, but without the "v" */
12896                 s = start;
12897                 goto vstring;
12898             }
12899         }
12900
12901         /* read exponent part, if present */
12902         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12903             floatit = TRUE;
12904             s++;
12905
12906             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12907             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
12908
12909             /* stray preinitial _ */
12910             if (*s == '_') {
12911                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12912                                "Misplaced _ in number");
12913                 lastub = s++;
12914             }
12915
12916             /* allow positive or negative exponent */
12917             if (*s == '+' || *s == '-')
12918                 *d++ = *s++;
12919
12920             /* stray initial _ */
12921             if (*s == '_') {
12922                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12923                                "Misplaced _ in number");
12924                 lastub = s++;
12925             }
12926
12927             /* read digits of exponent */
12928             while (isDIGIT(*s) || *s == '_') {
12929                 if (isDIGIT(*s)) {
12930                     if (d >= e)
12931                         Perl_croak(aTHX_ number_too_long);
12932                     *d++ = *s++;
12933                 }
12934                 else {
12935                    if (((lastub && s == lastub + 1) ||
12936                         (!isDIGIT(s[1]) && s[1] != '_')))
12937                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12938                                       "Misplaced _ in number");
12939                    lastub = s++;
12940                 }
12941             }
12942         }
12943
12944
12945         /* make an sv from the string */
12946         sv = newSV(0);
12947
12948         /*
12949            We try to do an integer conversion first if no characters
12950            indicating "float" have been found.
12951          */
12952
12953         if (!floatit) {
12954             UV uv;
12955             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12956
12957             if (flags == IS_NUMBER_IN_UV) {
12958               if (uv <= IV_MAX)
12959                 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12960               else
12961                 sv_setuv(sv, uv);
12962             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12963               if (uv <= (UV) IV_MIN)
12964                 sv_setiv(sv, -(IV)uv);
12965               else
12966                 floatit = TRUE;
12967             } else
12968               floatit = TRUE;
12969         }
12970         if (floatit) {
12971             /* terminate the string */
12972             *d = '\0';
12973             nv = Atof(PL_tokenbuf);
12974             sv_setnv(sv, nv);
12975         }
12976
12977         if ( floatit
12978              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12979             const char *const key = floatit ? "float" : "integer";
12980             const STRLEN keylen = floatit ? 5 : 7;
12981             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12982                                 key, keylen, sv, NULL, NULL, 0);
12983         }
12984         break;
12985
12986     /* if it starts with a v, it could be a v-string */
12987     case 'v':
12988 vstring:
12989                 sv = newSV(5); /* preallocate storage space */
12990                 s = scan_vstring(s, PL_bufend, sv);
12991         break;
12992     }
12993
12994     /* make the op for the constant and return */
12995
12996     if (sv)
12997         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12998     else
12999         lvalp->opval = NULL;
13000
13001     return (char *)s;
13002 }
13003
13004 STATIC char *
13005 S_scan_formline(pTHX_ register char *s)
13006 {
13007     dVAR;
13008     register char *eol;
13009     register char *t;
13010     SV * const stuff = newSVpvs("");
13011     bool needargs = FALSE;
13012     bool eofmt = FALSE;
13013 #ifdef PERL_MAD
13014     char *tokenstart = s;
13015     SV* savewhite = NULL;
13016
13017     if (PL_madskills) {
13018         savewhite = PL_thiswhite;
13019         PL_thiswhite = 0;
13020     }
13021 #endif
13022
13023     PERL_ARGS_ASSERT_SCAN_FORMLINE;
13024
13025     while (!needargs) {
13026         if (*s == '.') {
13027             t = s+1;
13028 #ifdef PERL_STRICT_CR
13029             while (SPACE_OR_TAB(*t))
13030                 t++;
13031 #else
13032             while (SPACE_OR_TAB(*t) || *t == '\r')
13033                 t++;
13034 #endif
13035             if (*t == '\n' || t == PL_bufend) {
13036                 eofmt = TRUE;
13037                 break;
13038             }
13039         }
13040         if (PL_in_eval && !PL_rsfp) {
13041             eol = (char *) memchr(s,'\n',PL_bufend-s);
13042             if (!eol++)
13043                 eol = PL_bufend;
13044         }
13045         else
13046             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13047         if (*s != '#') {
13048             for (t = s; t < eol; t++) {
13049                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13050                     needargs = FALSE;
13051                     goto enough;        /* ~~ must be first line in formline */
13052                 }
13053                 if (*t == '@' || *t == '^')
13054                     needargs = TRUE;
13055             }
13056             if (eol > s) {
13057                 sv_catpvn(stuff, s, eol-s);
13058 #ifndef PERL_STRICT_CR
13059                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13060                     char *end = SvPVX(stuff) + SvCUR(stuff);
13061                     end[-2] = '\n';
13062                     end[-1] = '\0';
13063                     SvCUR_set(stuff, SvCUR(stuff) - 1);
13064                 }
13065 #endif
13066             }
13067             else
13068               break;
13069         }
13070         s = (char*)eol;
13071         if (PL_rsfp) {
13072             bool got_some;
13073 #ifdef PERL_MAD
13074             if (PL_madskills) {
13075                 if (PL_thistoken)
13076                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13077                 else
13078                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13079             }
13080 #endif
13081             PL_bufptr = PL_bufend;
13082             CopLINE_inc(PL_curcop);
13083             got_some = lex_next_chunk(0);
13084             CopLINE_dec(PL_curcop);
13085             s = PL_bufptr;
13086 #ifdef PERL_MAD
13087             tokenstart = PL_bufptr;
13088 #endif
13089             if (!got_some)
13090                 break;
13091         }
13092         incline(s);
13093     }
13094   enough:
13095     if (SvCUR(stuff)) {
13096         PL_expect = XTERM;
13097         if (needargs) {
13098             PL_lex_state = LEX_NORMAL;
13099             start_force(PL_curforce);
13100             NEXTVAL_NEXTTOKE.ival = 0;
13101             force_next(',');
13102         }
13103         else
13104             PL_lex_state = LEX_FORMLINE;
13105         if (!IN_BYTES) {
13106             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13107                 SvUTF8_on(stuff);
13108             else if (PL_encoding)
13109                 sv_recode_to_utf8(stuff, PL_encoding);
13110         }
13111         start_force(PL_curforce);
13112         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13113         force_next(THING);
13114         start_force(PL_curforce);
13115         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13116         force_next(LSTOP);
13117     }
13118     else {
13119         SvREFCNT_dec(stuff);
13120         if (eofmt)
13121             PL_lex_formbrack = 0;
13122         PL_bufptr = s;
13123     }
13124 #ifdef PERL_MAD
13125     if (PL_madskills) {
13126         if (PL_thistoken)
13127             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13128         else
13129             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13130         PL_thiswhite = savewhite;
13131     }
13132 #endif
13133     return s;
13134 }
13135
13136 I32
13137 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13138 {
13139     dVAR;
13140     const I32 oldsavestack_ix = PL_savestack_ix;
13141     CV* const outsidecv = PL_compcv;
13142
13143     if (PL_compcv) {
13144         assert(SvTYPE(PL_compcv) == SVt_PVCV);
13145     }
13146     SAVEI32(PL_subline);
13147     save_item(PL_subname);
13148     SAVESPTR(PL_compcv);
13149
13150     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13151     CvFLAGS(PL_compcv) |= flags;
13152
13153     PL_subline = CopLINE(PL_curcop);
13154     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13155     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13156     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13157
13158     return oldsavestack_ix;
13159 }
13160
13161 #ifdef __SC__
13162 #pragma segment Perl_yylex
13163 #endif
13164 static int
13165 S_yywarn(pTHX_ const char *const s)
13166 {
13167     dVAR;
13168
13169     PERL_ARGS_ASSERT_YYWARN;
13170
13171     PL_in_eval |= EVAL_WARNONLY;
13172     yyerror(s);
13173     PL_in_eval &= ~EVAL_WARNONLY;
13174     return 0;
13175 }
13176
13177 int
13178 Perl_yyerror(pTHX_ const char *const s)
13179 {
13180     dVAR;
13181     const char *where = NULL;
13182     const char *context = NULL;
13183     int contlen = -1;
13184     SV *msg;
13185     int yychar  = PL_parser->yychar;
13186
13187     PERL_ARGS_ASSERT_YYERROR;
13188
13189     if (!yychar || (yychar == ';' && !PL_rsfp))
13190         where = "at EOF";
13191     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13192       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13193       PL_oldbufptr != PL_bufptr) {
13194         /*
13195                 Only for NetWare:
13196                 The code below is removed for NetWare because it abends/crashes on NetWare
13197                 when the script has error such as not having the closing quotes like:
13198                     if ($var eq "value)
13199                 Checking of white spaces is anyway done in NetWare code.
13200         */
13201 #ifndef NETWARE
13202         while (isSPACE(*PL_oldoldbufptr))
13203             PL_oldoldbufptr++;
13204 #endif
13205         context = PL_oldoldbufptr;
13206         contlen = PL_bufptr - PL_oldoldbufptr;
13207     }
13208     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13209       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13210         /*
13211                 Only for NetWare:
13212                 The code below is removed for NetWare because it abends/crashes on NetWare
13213                 when the script has error such as not having the closing quotes like:
13214                     if ($var eq "value)
13215                 Checking of white spaces is anyway done in NetWare code.
13216         */
13217 #ifndef NETWARE
13218         while (isSPACE(*PL_oldbufptr))
13219             PL_oldbufptr++;
13220 #endif
13221         context = PL_oldbufptr;
13222         contlen = PL_bufptr - PL_oldbufptr;
13223     }
13224     else if (yychar > 255)
13225         where = "next token ???";
13226     else if (yychar == -2) { /* YYEMPTY */
13227         if (PL_lex_state == LEX_NORMAL ||
13228            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13229             where = "at end of line";
13230         else if (PL_lex_inpat)
13231             where = "within pattern";
13232         else
13233             where = "within string";
13234     }
13235     else {
13236         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13237         if (yychar < 32)
13238             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13239         else if (isPRINT_LC(yychar)) {
13240             const char string = yychar;
13241             sv_catpvn(where_sv, &string, 1);
13242         }
13243         else
13244             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13245         where = SvPVX_const(where_sv);
13246     }
13247     msg = sv_2mortal(newSVpv(s, 0));
13248     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13249         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13250     if (context)
13251         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13252     else
13253         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13254     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13255         Perl_sv_catpvf(aTHX_ msg,
13256         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13257                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13258         PL_multi_end = 0;
13259     }
13260     if (PL_in_eval & EVAL_WARNONLY) {
13261         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13262     }
13263     else
13264         qerror(msg);
13265     if (PL_error_count >= 10) {
13266         if (PL_in_eval && SvCUR(ERRSV))
13267             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13268                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
13269         else
13270             Perl_croak(aTHX_ "%s has too many errors.\n",
13271             OutCopFILE(PL_curcop));
13272     }
13273     PL_in_my = 0;
13274     PL_in_my_stash = NULL;
13275     return 0;
13276 }
13277 #ifdef __SC__
13278 #pragma segment Main
13279 #endif
13280
13281 STATIC char*
13282 S_swallow_bom(pTHX_ U8 *s)
13283 {
13284     dVAR;
13285     const STRLEN slen = SvCUR(PL_linestr);
13286
13287     PERL_ARGS_ASSERT_SWALLOW_BOM;
13288
13289     switch (s[0]) {
13290     case 0xFF:
13291         if (s[1] == 0xFE) {
13292             /* UTF-16 little-endian? (or UTF32-LE?) */
13293             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13294                 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
13295 #ifndef PERL_NO_UTF16_FILTER
13296             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
13297             s += 2;
13298             if (PL_bufend > (char*)s) {
13299                 s = add_utf16_textfilter(s, TRUE);
13300             }
13301 #else
13302             Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
13303 #endif
13304         }
13305         break;
13306     case 0xFE:
13307         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13308 #ifndef PERL_NO_UTF16_FILTER
13309             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13310             s += 2;
13311             if (PL_bufend > (char *)s) {
13312                 s = add_utf16_textfilter(s, FALSE);
13313             }
13314 #else
13315             Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
13316 #endif
13317         }
13318         break;
13319     case 0xEF:
13320         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13321             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13322             s += 3;                      /* UTF-8 */
13323         }
13324         break;
13325     case 0:
13326         if (slen > 3) {
13327              if (s[1] == 0) {
13328                   if (s[2] == 0xFE && s[3] == 0xFF) {
13329                        /* UTF-32 big-endian */
13330                        Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
13331                   }
13332              }
13333              else if (s[2] == 0 && s[3] != 0) {
13334                   /* Leading bytes
13335                    * 00 xx 00 xx
13336                    * are a good indicator of UTF-16BE. */
13337                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13338                 s = add_utf16_textfilter(s, FALSE);
13339              }
13340         }
13341 #ifdef EBCDIC
13342     case 0xDD:
13343         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13344             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13345             s += 4;                      /* UTF-8 */
13346         }
13347         break;
13348 #endif
13349
13350     default:
13351          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13352                   /* Leading bytes
13353                    * xx 00 xx 00
13354                    * are a good indicator of UTF-16LE. */
13355               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13356               s = add_utf16_textfilter(s, TRUE);
13357          }
13358     }
13359     return (char*)s;
13360 }
13361
13362
13363 #ifndef PERL_NO_UTF16_FILTER
13364 static I32
13365 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13366 {
13367     dVAR;
13368     SV *const filter = FILTER_DATA(idx);
13369     /* We re-use this each time round, throwing the contents away before we
13370        return.  */
13371     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13372     SV *const utf8_buffer = filter;
13373     IV status = IoPAGE(filter);
13374     const bool reverse = (bool) IoLINES(filter);
13375     I32 retval;
13376
13377     /* As we're automatically added, at the lowest level, and hence only called
13378        from this file, we can be sure that we're not called in block mode. Hence
13379        don't bother writing code to deal with block mode.  */
13380     if (maxlen) {
13381         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13382     }
13383     if (status < 0) {
13384         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13385     }
13386     DEBUG_P(PerlIO_printf(Perl_debug_log,
13387                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13388                           FPTR2DPTR(void *, S_utf16_textfilter),
13389                           reverse ? 'l' : 'b', idx, maxlen, status,
13390                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13391
13392     while (1) {
13393         STRLEN chars;
13394         STRLEN have;
13395         I32 newlen;
13396         U8 *end;
13397         /* First, look in our buffer of existing UTF-8 data:  */
13398         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13399
13400         if (nl) {
13401             ++nl;
13402         } else if (status == 0) {
13403             /* EOF */
13404             IoPAGE(filter) = 0;
13405             nl = SvEND(utf8_buffer);
13406         }
13407         if (nl) {
13408             STRLEN got = nl - SvPVX(utf8_buffer);
13409             /* Did we have anything to append?  */
13410             retval = got != 0;
13411             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13412             /* Everything else in this code works just fine if SVp_POK isn't
13413                set.  This, however, needs it, and we need it to work, else
13414                we loop infinitely because the buffer is never consumed.  */
13415             sv_chop(utf8_buffer, nl);
13416             break;
13417         }
13418
13419         /* OK, not a complete line there, so need to read some more UTF-16.
13420            Read an extra octect if the buffer currently has an odd number. */
13421         while (1) {
13422             if (status <= 0)
13423                 break;
13424             if (SvCUR(utf16_buffer) >= 2) {
13425                 /* Location of the high octet of the last complete code point.
13426                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13427                    *coupled* with all the benefits of partial reads and
13428                    endianness.  */
13429                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13430                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13431
13432                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13433                     break;
13434                 }
13435
13436                 /* We have the first half of a surrogate. Read more.  */
13437                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13438             }
13439
13440             status = FILTER_READ(idx + 1, utf16_buffer,
13441                                  160 + (SvCUR(utf16_buffer) & 1));
13442             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13443             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13444             if (status < 0) {
13445                 /* Error */
13446                 IoPAGE(filter) = status;
13447                 return status;
13448             }
13449         }
13450
13451         chars = SvCUR(utf16_buffer) >> 1;
13452         have = SvCUR(utf8_buffer);
13453         SvGROW(utf8_buffer, have + chars * 3 + 1);
13454
13455         if (reverse) {
13456             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13457                                          (U8*)SvPVX_const(utf8_buffer) + have,
13458                                          chars * 2, &newlen);
13459         } else {
13460             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13461                                 (U8*)SvPVX_const(utf8_buffer) + have,
13462                                 chars * 2, &newlen);
13463         }
13464         SvCUR_set(utf8_buffer, have + newlen);
13465         *end = '\0';
13466
13467         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13468            it's private to us, and utf16_to_utf8{,reversed} take a
13469            (pointer,length) pair, rather than a NUL-terminated string.  */
13470         if(SvCUR(utf16_buffer) & 1) {
13471             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13472             SvCUR_set(utf16_buffer, 1);
13473         } else {
13474             SvCUR_set(utf16_buffer, 0);
13475         }
13476     }
13477     DEBUG_P(PerlIO_printf(Perl_debug_log,
13478                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13479                           status,
13480                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13481     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13482     return retval;
13483 }
13484
13485 static U8 *
13486 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13487 {
13488     SV *filter = filter_add(S_utf16_textfilter, NULL);
13489
13490     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13491     sv_setpvs(filter, "");
13492     IoLINES(filter) = reversed;
13493     IoPAGE(filter) = 1; /* Not EOF */
13494
13495     /* Sadly, we have to return a valid pointer, come what may, so we have to
13496        ignore any error return from this.  */
13497     SvCUR_set(PL_linestr, 0);
13498     if (FILTER_READ(0, PL_linestr, 0)) {
13499         SvUTF8_on(PL_linestr);
13500     } else {
13501         SvUTF8_on(PL_linestr);
13502     }
13503     PL_bufend = SvEND(PL_linestr);
13504     return (U8*)SvPVX(PL_linestr);
13505 }
13506 #endif
13507
13508 /*
13509 Returns a pointer to the next character after the parsed
13510 vstring, as well as updating the passed in sv.
13511
13512 Function must be called like
13513
13514         sv = newSV(5);
13515         s = scan_vstring(s,e,sv);
13516
13517 where s and e are the start and end of the string.
13518 The sv should already be large enough to store the vstring
13519 passed in, for performance reasons.
13520
13521 */
13522
13523 char *
13524 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13525 {
13526     dVAR;
13527     const char *pos = s;
13528     const char *start = s;
13529
13530     PERL_ARGS_ASSERT_SCAN_VSTRING;
13531
13532     if (*pos == 'v') pos++;  /* get past 'v' */
13533     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13534         pos++;
13535     if ( *pos != '.') {
13536         /* this may not be a v-string if followed by => */
13537         const char *next = pos;
13538         while (next < e && isSPACE(*next))
13539             ++next;
13540         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13541             /* return string not v-string */
13542             sv_setpvn(sv,(char *)s,pos-s);
13543             return (char *)pos;
13544         }
13545     }
13546
13547     if (!isALPHA(*pos)) {
13548         U8 tmpbuf[UTF8_MAXBYTES+1];
13549
13550         if (*s == 'v')
13551             s++;  /* get past 'v' */
13552
13553         sv_setpvs(sv, "");
13554
13555         for (;;) {
13556             /* this is atoi() that tolerates underscores */
13557             U8 *tmpend;
13558             UV rev = 0;
13559             const char *end = pos;
13560             UV mult = 1;
13561             while (--end >= s) {
13562                 if (*end != '_') {
13563                     const UV orev = rev;
13564                     rev += (*end - '0') * mult;
13565                     mult *= 10;
13566                     if (orev > rev)
13567                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13568                                          "Integer overflow in decimal number");
13569                 }
13570             }
13571 #ifdef EBCDIC
13572             if (rev > 0x7FFFFFFF)
13573                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13574 #endif
13575             /* Append native character for the rev point */
13576             tmpend = uvchr_to_utf8(tmpbuf, rev);
13577             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13578             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13579                  SvUTF8_on(sv);
13580             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13581                  s = ++pos;
13582             else {
13583                  s = pos;
13584                  break;
13585             }
13586             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13587                  pos++;
13588         }
13589         SvPOK_on(sv);
13590         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13591         SvRMAGICAL_on(sv);
13592     }
13593     return (char *)s;
13594 }
13595
13596 int
13597 Perl_keyword_plugin_standard(pTHX_
13598         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13599 {
13600     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13601     PERL_UNUSED_CONTEXT;
13602     PERL_UNUSED_ARG(keyword_ptr);
13603     PERL_UNUSED_ARG(keyword_len);
13604     PERL_UNUSED_ARG(op_ptr);
13605     return KEYWORD_PLUGIN_DECLINE;
13606 }
13607
13608 /*
13609  * Local variables:
13610  * c-indentation-style: bsd
13611  * c-basic-offset: 4
13612  * indent-tabs-mode: t
13613  * End:
13614  *
13615  * ex: set ts=8 sts=4 sw=4 noet:
13616  */