toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* YYINITDEPTH -- initial size of the parser's stacks.  */
  49 #define YYINITDEPTH 200
  50
  51 /* XXX temporary backwards compatibility */
  52 #define PL_lex_brackets         (PL_parser->lex_brackets)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XFAKEBRACK 128
 130 #define XENUMMASK 127
 131
 132 #ifdef USE_UTF8_SCRIPTS
 133 #   define UTF (!IN_BYTES)
 134 #else
 135 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 136 #endif
 137
 138 /* The maximum number of characters preceding the unrecognized one to display */
 139 #define UNRECOGNIZED_PRECEDE_COUNT 10
 140
 141 /* In variables named $^X, these are the legal values for X.
 142  * 1999-02-27 mjd-perl-patch@plover.com */
 143 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 144
 145 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 146
 147 /* LEX_* are values for PL_lex_state, the state of the lexer.
 148  * They are arranged oddly so that the guard on the switch statement
 149  * can get by with a single comparison (if the compiler is smart enough).
 150  */
 151
 152 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 153
 154 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 155 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 156 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 157 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 158 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 159
 160                                    /* at end of code, eg "$x" followed by:  */
 161 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 162 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 163
 164 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 165                                         string or after \E, $foo, etc       */
 166 #define LEX_INTERPCONST          2 /* NOT USED */
 167 #define LEX_FORMLINE             1 /* expecting a format line               */
 168 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 169
 170
 171 #ifdef DEBUGGING
 172 static const char* const lex_state_names[] = {
 173     "KNOWNEXT",
 174     "FORMLINE",
 175     "INTERPCONST",
 176     "INTERPCONCAT",
 177     "INTERPENDMAYBE",
 178     "INTERPEND",
 179     "INTERPSTART",
 180     "INTERPPUSH",
 181     "INTERPCASEMOD",
 182     "INTERPNORMAL",
 183     "NORMAL"
 184 };
 185 #endif
 186
 187 #ifdef ff_next
 188 #undef ff_next
 189 #endif
 190
 191 #include "keywords.h"
 192
 193 /* CLINE is a macro that ensures PL_copline has a sane value */
 194
 195 #ifdef CLINE
 196 #undef CLINE
 197 #endif
 198 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 199
 200 #ifdef PERL_MAD
 201 #  define SKIPSPACE0(s) skipspace0(s)
 202 #  define SKIPSPACE1(s) skipspace1(s)
 203 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 204 #  define PEEKSPACE(s) skipspace2(s,0)
 205 #else
 206 #  define SKIPSPACE0(s) skipspace(s)
 207 #  define SKIPSPACE1(s) skipspace(s)
 208 #  define SKIPSPACE2(s,tsv) skipspace(s)
 209 #  define PEEKSPACE(s) skipspace(s)
 210 #endif
 211
 212 /*
 213  * Convenience functions to return different tokens and prime the
 214  * lexer for the next token.  They all take an argument.
 215  *
 216  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 217  * OPERATOR     : generic operator
 218  * AOPERATOR    : assignment operator
 219  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 220  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 221  * PREREF       : *EXPR where EXPR is not a simple identifier
 222  * TERM         : expression term
 223  * LOOPX        : loop exiting command (goto, last, dump, etc)
 224  * FTST         : file test operator
 225  * FUN0         : zero-argument function
 226  * FUN1         : not used, except for not, which isn't a UNIOP
 227  * BOop         : bitwise or or xor
 228  * BAop         : bitwise and
 229  * SHop         : shift operator
 230  * PWop         : power operator
 231  * PMop         : pattern-matching operator
 232  * Aop          : addition-level operator
 233  * Mop          : multiplication-level operator
 234  * Eop          : equality-testing operator
 235  * Rop          : relational operator <= != gt
 236  *
 237  * Also see LOP and lop() below.
 238  */
 239
 240 #ifdef DEBUGGING /* Serve -DT. */
 241 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 242 #else
 243 #   define REPORT(retval) (retval)
 244 #endif
 245
 246 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 247 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 248 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 249 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 252 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 253 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 254 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 255 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI2(f,x) { \
 273         pl_yylval.ival = f; \
 274         PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI2(f,XTERM)
 284 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 285
 286 #define UNIBRACK(f) { \
 287         pl_yylval.ival = f; \
 288         PL_bufptr = s; \
 289         PL_last_uni = PL_oldbufptr; \
 290         if (*s == '(') \
 291             return REPORT( (int)FUNC1 ); \
 292         s = PEEKSPACE(s); \
 293         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 294         }
 295
 296 /* grandfather return to old style */
 297 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 298
 299 #ifdef DEBUGGING
 300
 301 /* how to interpret the pl_yylval associated with the token */
 302 enum token_type {
 303     TOKENTYPE_NONE,
 304     TOKENTYPE_IVAL,
 305     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 306     TOKENTYPE_PVAL,
 307     TOKENTYPE_OPVAL,
 308     TOKENTYPE_GVVAL
 309 };
 310
 311 static struct debug_tokens {
 312     const int token;
 313     enum token_type type;
 314     const char *name;
 315 } const debug_tokens[] =
 316 {
 317     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 318     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 319     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 320     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 321     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 322     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 323     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 324     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 325     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 326     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 327     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 328     { DO,               TOKENTYPE_NONE,         "DO" },
 329     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 330     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 331     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 332     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 333     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 334     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 335     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 336     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 337     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 338     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 339     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 340     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 341     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 342     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 343     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 344     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 345     { IF,               TOKENTYPE_IVAL,         "IF" },
 346     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 347     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 348     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 349     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 350     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 351     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 352     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 353     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 354     { MY,               TOKENTYPE_IVAL,         "MY" },
 355     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 356     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 357     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 358     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 359     { OROR,             TOKENTYPE_NONE,         "OROR" },
 360     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 361     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 362     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 363     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 364     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 365     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 366     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 367     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 368     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 369     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 370     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 371     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 372     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 373     { SUB,              TOKENTYPE_NONE,         "SUB" },
 374     { THING,            TOKENTYPE_OPVAL,        "THING" },
 375     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 376     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 377     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 378     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 379     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 380     { USE,              TOKENTYPE_IVAL,         "USE" },
 381     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 382     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 383     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 384     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 385     { 0,                TOKENTYPE_NONE,         NULL }
 386 };
 387
 388 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 389
 390 STATIC int
 391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 392 {
 393     dVAR;
 394
 395     PERL_ARGS_ASSERT_TOKEREPORT;
 396
 397     if (DEBUG_T_TEST) {
 398         const char *name = NULL;
 399         enum token_type type = TOKENTYPE_NONE;
 400         const struct debug_tokens *p;
 401         SV* const report = newSVpvs("<== ");
 402
 403         for (p = debug_tokens; p->token; p++) {
 404             if (p->token == (int)rv) {
 405                 name = p->name;
 406                 type = p->type;
 407                 break;
 408             }
 409         }
 410         if (name)
 411             Perl_sv_catpv(aTHX_ report, name);
 412         else if ((char)rv > ' ' && (char)rv < '~')
 413             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 414         else if (!rv)
 415             sv_catpvs(report, "EOF");
 416         else
 417             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 418         switch (type) {
 419         case TOKENTYPE_NONE:
 420         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 421             break;
 422         case TOKENTYPE_IVAL:
 423             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 424             break;
 425         case TOKENTYPE_OPNUM:
 426             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 427                                     PL_op_name[lvalp->ival]);
 428             break;
 429         case TOKENTYPE_PVAL:
 430             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 431             break;
 432         case TOKENTYPE_OPVAL:
 433             if (lvalp->opval) {
 434                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 435                                     PL_op_name[lvalp->opval->op_type]);
 436                 if (lvalp->opval->op_type == OP_CONST) {
 437                     Perl_sv_catpvf(aTHX_ report, " %s",
 438                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 439                 }
 440
 441             }
 442             else
 443                 sv_catpvs(report, "(opval=null)");
 444             break;
 445         }
 446         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 447     };
 448     return (int)rv;
 449 }
 450
 451
 452 /* print the buffer with suitable escapes */
 453
 454 STATIC void
 455 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 456 {
 457     SV* const tmp = newSVpvs("");
 458
 459     PERL_ARGS_ASSERT_PRINTBUF;
 460
 461     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 static int
 468 S_deprecate_commaless_var_list(pTHX) {
 469     PL_expect = XTERM;
 470     deprecate("comma-less variable list");
 471     return REPORT(','); /* grandfather non-comma-format format */
 472 }
 473
 474 /*
 475  * S_ao
 476  *
 477  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 478  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 479  */
 480
 481 STATIC int
 482 S_ao(pTHX_ int toketype)
 483 {
 484     dVAR;
 485     if (*PL_bufptr == '=') {
 486         PL_bufptr++;
 487         if (toketype == ANDAND)
 488             pl_yylval.ival = OP_ANDASSIGN;
 489         else if (toketype == OROR)
 490             pl_yylval.ival = OP_ORASSIGN;
 491         else if (toketype == DORDOR)
 492             pl_yylval.ival = OP_DORASSIGN;
 493         toketype = ASSIGNOP;
 494     }
 495     return toketype;
 496 }
 497
 498 /*
 499  * S_no_op
 500  * When Perl expects an operator and finds something else, no_op
 501  * prints the warning.  It always prints "<something> found where
 502  * operator expected.  It prints "Missing semicolon on previous line?"
 503  * if the surprise occurs at the start of the line.  "do you need to
 504  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 505  * where the compiler doesn't know if foo is a method call or a function.
 506  * It prints "Missing operator before end of line" if there's nothing
 507  * after the missing operator, or "... before <...>" if there is something
 508  * after the missing operator.
 509  */
 510
 511 STATIC void
 512 S_no_op(pTHX_ const char *const what, char *s)
 513 {
 514     dVAR;
 515     char * const oldbp = PL_bufptr;
 516     const bool is_first = (PL_oldbufptr == PL_linestart);
 517
 518     PERL_ARGS_ASSERT_NO_OP;
 519
 520     if (!s)
 521         s = oldbp;
 522     else
 523         PL_bufptr = s;
 524     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 525     if (ckWARN_d(WARN_SYNTAX)) {
 526         if (is_first)
 527             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 528                     "\t(Missing semicolon on previous line?)\n");
 529         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 530             const char *t;
 531             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 532                 NOOP;
 533             if (t < PL_bufptr && isSPACE(*t))
 534                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 535                         "\t(Do you need to predeclare %.*s?)\n",
 536                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 537         }
 538         else {
 539             assert(s >= oldbp);
 540             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 541                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 542         }
 543     }
 544     PL_bufptr = oldbp;
 545 }
 546
 547 /*
 548  * S_missingterm
 549  * Complain about missing quote/regexp/heredoc terminator.
 550  * If it's called with NULL then it cauterizes the line buffer.
 551  * If we're in a delimited string and the delimiter is a control
 552  * character, it's reformatted into a two-char sequence like ^C.
 553  * This is fatal.
 554  */
 555
 556 STATIC void
 557 S_missingterm(pTHX_ char *s)
 558 {
 559     dVAR;
 560     char tmpbuf[3];
 561     char q;
 562     if (s) {
 563         char * const nl = strrchr(s,'\n');
 564         if (nl)
 565             *nl = '\0';
 566     }
 567     else if (isCNTRL(PL_multi_close)) {
 568         *tmpbuf = '^';
 569         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 570         tmpbuf[2] = '\0';
 571         s = tmpbuf;
 572     }
 573     else {
 574         *tmpbuf = (char)PL_multi_close;
 575         tmpbuf[1] = '\0';
 576         s = tmpbuf;
 577     }
 578     q = strchr(s,'"') ? '\'' : '"';
 579     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 580 }
 581
 582 #define FEATURE_IS_ENABLED(name)                                        \
 583         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 584             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 585 /* The longest string we pass in.  */
 586 #define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
 587
 588 /*
 589  * S_feature_is_enabled
 590  * Check whether the named feature is enabled.
 591  */
 592 STATIC bool
 593 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 594 {
 595     dVAR;
 596     HV * const hinthv = GvHV(PL_hintgv);
 597     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 598
 599     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 600
 601     assert(namelen <= MAX_FEATURE_LEN);
 602     memcpy(&he_name[8], name, namelen);
 603
 604     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 605 }
 606
 607 /*
 608  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 609  * utf16-to-utf8-reversed.
 610  */
 611
 612 #ifdef PERL_CR_FILTER
 613 static void
 614 strip_return(SV *sv)
 615 {
 616     register const char *s = SvPVX_const(sv);
 617     register const char * const e = s + SvCUR(sv);
 618
 619     PERL_ARGS_ASSERT_STRIP_RETURN;
 620
 621     /* outer loop optimized to do nothing if there are no CR-LFs */
 622     while (s < e) {
 623         if (*s++ == '\r' && *s == '\n') {
 624             /* hit a CR-LF, need to copy the rest */
 625             register char *d = s - 1;
 626             *d++ = *s++;
 627             while (s < e) {
 628                 if (*s == '\r' && s[1] == '\n')
 629                     s++;
 630                 *d++ = *s++;
 631             }
 632             SvCUR(sv) -= s - d;
 633             return;
 634         }
 635     }
 636 }
 637
 638 STATIC I32
 639 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 640 {
 641     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 642     if (count > 0 && !maxlen)
 643         strip_return(sv);
 644     return count;
 645 }
 646 #endif
 647
 648
 649
 650 /*
 651  * Perl_lex_start
 652  *
 653  * Create a parser object and initialise its parser and lexer fields
 654  *
 655  * rsfp       is the opened file handle to read from (if any),
 656  *
 657  * line       holds any initial content already read from the file (or in
 658  *            the case of no file, such as an eval, the whole contents);
 659  *
 660  * new_filter indicates that this is a new file and it shouldn't inherit
 661  *            the filters from the current parser (ie require).
 662  */
 663
 664 void
 665 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 666 {
 667     dVAR;
 668     const char *s = NULL;
 669     STRLEN len;
 670     yy_parser *parser, *oparser;
 671
 672     /* create and initialise a parser */
 673
 674     Newxz(parser, 1, yy_parser);
 675     parser->old_parser = oparser = PL_parser;
 676     PL_parser = parser;
 677
 678     Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
 679     parser->ps = parser->stack;
 680     parser->stack_size = YYINITDEPTH;
 681
 682     parser->stack->state = 0;
 683     parser->yyerrstatus = 0;
 684     parser->yychar = YYEMPTY;           /* Cause a token to be read.  */
 685
 686     /* on scope exit, free this parser and restore any outer one */
 687     SAVEPARSER(parser);
 688     parser->saved_curcop = PL_curcop;
 689
 690     /* initialise lexer state */
 691
 692 #ifdef PERL_MAD
 693     parser->curforce = -1;
 694 #else
 695     parser->nexttoke = 0;
 696 #endif
 697     parser->error_count = oparser ? oparser->error_count : 0;
 698     parser->copline = NOLINE;
 699     parser->lex_state = LEX_NORMAL;
 700     parser->expect = XSTATE;
 701     parser->rsfp = rsfp;
 702     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 703                 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 704
 705     Newx(parser->lex_brackstack, 120, char);
 706     Newx(parser->lex_casestack, 12, char);
 707     *parser->lex_casestack = '\0';
 708
 709     if (line) {
 710         s = SvPV_const(line, len);
 711     } else {
 712         len = 0;
 713     }
 714
 715     if (!len) {
 716         parser->linestr = newSVpvs("\n;");
 717     } else if (SvREADONLY(line) || s[len-1] != ';') {
 718         parser->linestr = newSVsv(line);
 719         if (s[len-1] != ';')
 720             sv_catpvs(parser->linestr, "\n;");
 721     } else {
 722         SvTEMP_off(line);
 723         SvREFCNT_inc_simple_void_NN(line);
 724         parser->linestr = line;
 725     }
 726     parser->oldoldbufptr =
 727         parser->oldbufptr =
 728         parser->bufptr =
 729         parser->linestart = SvPVX(parser->linestr);
 730     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 731     parser->last_lop = parser->last_uni = NULL;
 732 }
 733
 734
 735 /* delete a parser object */
 736
 737 void
 738 Perl_parser_free(pTHX_  const yy_parser *parser)
 739 {
 740     PERL_ARGS_ASSERT_PARSER_FREE;
 741
 742     PL_curcop = parser->saved_curcop;
 743     SvREFCNT_dec(parser->linestr);
 744
 745     if (parser->rsfp == PerlIO_stdin())
 746         PerlIO_clearerr(parser->rsfp);
 747     else if (parser->rsfp && (!parser->old_parser ||
 748                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 749         PerlIO_close(parser->rsfp);
 750     SvREFCNT_dec(parser->rsfp_filters);
 751
 752     Safefree(parser->stack);
 753     Safefree(parser->lex_brackstack);
 754     Safefree(parser->lex_casestack);
 755     PL_parser = parser->old_parser;
 756     Safefree(parser);
 757 }
 758
 759
 760 /*
 761  * Perl_lex_end
 762  * Finalizer for lexing operations.  Must be called when the parser is
 763  * done with the lexer.
 764  */
 765
 766 void
 767 Perl_lex_end(pTHX)
 768 {
 769     dVAR;
 770     PL_doextract = FALSE;
 771 }
 772
 773 /*
 774 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 775
 776 Buffer scalar containing the chunk currently under consideration of the
 777 text currently being lexed.  This is always a plain string scalar (for
 778 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 779 normal scalar means; instead refer to the buffer directly by the pointer
 780 variables described below.
 781
 782 The lexer maintains various C<char*> pointers to things in the
 783 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 784 reallocated, all of these pointers must be updated.  Don't attempt to
 785 do this manually, but rather use L</lex_grow_linestr> if you need to
 786 reallocate the buffer.
 787
 788 The content of the text chunk in the buffer is commonly exactly one
 789 complete line of input, up to and including a newline terminator,
 790 but there are situations where it is otherwise.  The octets of the
 791 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 792 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 793 flag on this scalar, which may disagree with it.
 794
 795 For direct examination of the buffer, the variable
 796 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 797 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 798 of these pointers is usually preferable to examination of the scalar
 799 through normal scalar means.
 800
 801 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 802
 803 Direct pointer to the end of the chunk of text currently being lexed, the
 804 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 805 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 806 always located at the end of the buffer, and does not count as part of
 807 the buffer's contents.
 808
 809 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 810
 811 Points to the current position of lexing inside the lexer buffer.
 812 Characters around this point may be freely examined, within
 813 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 814 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 815 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 816
 817 Lexing code (whether in the Perl core or not) moves this pointer past
 818 the characters that it consumes.  It is also expected to perform some
 819 bookkeeping whenever a newline character is consumed.  This movement
 820 can be more conveniently performed by the function L</lex_read_to>,
 821 which handles newlines appropriately.
 822
 823 Interpretation of the buffer's octets can be abstracted out by
 824 using the slightly higher-level functions L</lex_peek_unichar> and
 825 L</lex_read_unichar>.
 826
 827 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 828
 829 Points to the start of the current line inside the lexer buffer.
 830 This is useful for indicating at which column an error occurred, and
 831 not much else.  This must be updated by any lexing code that consumes
 832 a newline; the function L</lex_read_to> handles this detail.
 833
 834 =cut
 835 */
 836
 837 /*
 838 =for apidoc Amx|bool|lex_bufutf8
 839
 840 Indicates whether the octets in the lexer buffer
 841 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 842 of Unicode characters.  If not, they should be interpreted as Latin-1
 843 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 844
 845 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 846 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 847 encoding.
 848
 849 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 850 is significant, but not the whole story regarding the input character
 851 encoding.  Normally, when a file is being read, the scalar contains octets
 852 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 853 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 854 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 855 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 856 is in effect.  This logic may change in the future; use this function
 857 instead of implementing the logic yourself.
 858
 859 =cut
 860 */
 861
 862 bool
 863 Perl_lex_bufutf8(pTHX)
 864 {
 865     return UTF;
 866 }
 867
 868 /*
 869 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 870
 871 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 872 at least I<len> octets (including terminating NUL).  Returns a
 873 pointer to the reallocated buffer.  This is necessary before making
 874 any direct modification of the buffer that would increase its length.
 875 L</lex_stuff_pvn> provides a more convenient way to insert text into
 876 the buffer.
 877
 878 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 879 this function updates all of the lexer's variables that point directly
 880 into the buffer.
 881
 882 =cut
 883 */
 884
 885 char *
 886 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 887 {
 888     SV *linestr;
 889     char *buf;
 890     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 891     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 892     linestr = PL_parser->linestr;
 893     buf = SvPVX(linestr);
 894     if (len <= SvLEN(linestr))
 895         return buf;
 896     bufend_pos = PL_parser->bufend - buf;
 897     bufptr_pos = PL_parser->bufptr - buf;
 898     oldbufptr_pos = PL_parser->oldbufptr - buf;
 899     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 900     linestart_pos = PL_parser->linestart - buf;
 901     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 902     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 903     buf = sv_grow(linestr, len);
 904     PL_parser->bufend = buf + bufend_pos;
 905     PL_parser->bufptr = buf + bufptr_pos;
 906     PL_parser->oldbufptr = buf + oldbufptr_pos;
 907     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 908     PL_parser->linestart = buf + linestart_pos;
 909     if (PL_parser->last_uni)
 910         PL_parser->last_uni = buf + last_uni_pos;
 911     if (PL_parser->last_lop)
 912         PL_parser->last_lop = buf + last_lop_pos;
 913     return buf;
 914 }
 915
 916 /*
 917 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 918
 919 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 920 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 921 reallocating the buffer if necessary.  This means that lexing code that
 922 runs later will see the characters as if they had appeared in the input.
 923 It is not recommended to do this as part of normal parsing, and most
 924 uses of this facility run the risk of the inserted characters being
 925 interpreted in an unintended manner.
 926
 927 The string to be inserted is represented by I<len> octets starting
 928 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 929 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 930 The characters are recoded for the lexer buffer, according to how the
 931 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 932 to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
 933 function is more convenient.
 934
 935 =cut
 936 */
 937
 938 void
 939 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 940 {
 941     dVAR;
 942     char *bufptr;
 943     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 944     if (flags & ~(LEX_STUFF_UTF8))
 945         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 946     if (UTF) {
 947         if (flags & LEX_STUFF_UTF8) {
 948             goto plain_copy;
 949         } else {
 950             STRLEN highhalf = 0;
 951             const char *p, *e = pv+len;
 952             for (p = pv; p != e; p++)
 953                 highhalf += !!(((U8)*p) & 0x80);
 954             if (!highhalf)
 955                 goto plain_copy;
 956             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 957             bufptr = PL_parser->bufptr;
 958             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 959             SvCUR_set(PL_parser->linestr,
 960                 SvCUR(PL_parser->linestr) + len+highhalf);
 961             PL_parser->bufend += len+highhalf;
 962             for (p = pv; p != e; p++) {
 963                 U8 c = (U8)*p;
 964                 if (c & 0x80) {
 965                     *bufptr++ = (char)(0xc0 | (c >> 6));
 966                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 967                 } else {
 968                     *bufptr++ = (char)c;
 969                 }
 970             }
 971         }
 972     } else {
 973         if (flags & LEX_STUFF_UTF8) {
 974             STRLEN highhalf = 0;
 975             const char *p, *e = pv+len;
 976             for (p = pv; p != e; p++) {
 977                 U8 c = (U8)*p;
 978                 if (c >= 0xc4) {
 979                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 980                                 "non-Latin-1 character into Latin-1 input");
 981                 } else if (c >= 0xc2 && p+1 != e &&
 982                             (((U8)p[1]) & 0xc0) == 0x80) {
 983                     p++;
 984                     highhalf++;
 985                 } else if (c >= 0x80) {
 986                     /* malformed UTF-8 */
 987                     ENTER;
 988                     SAVESPTR(PL_warnhook);
 989                     PL_warnhook = PERL_WARNHOOK_FATAL;
 990                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 991                     LEAVE;
 992                 }
 993             }
 994             if (!highhalf)
 995                 goto plain_copy;
 996             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 997             bufptr = PL_parser->bufptr;
 998             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 999             SvCUR_set(PL_parser->linestr,
1000                 SvCUR(PL_parser->linestr) + len-highhalf);
1001             PL_parser->bufend += len-highhalf;
1002             for (p = pv; p != e; p++) {
1003                 U8 c = (U8)*p;
1004                 if (c & 0x80) {
1005                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1006                     p++;
1007                 } else {
1008                     *bufptr++ = (char)c;
1009                 }
1010             }
1011         } else {
1012             plain_copy:
1013             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1014             bufptr = PL_parser->bufptr;
1015             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1016             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1017             PL_parser->bufend += len;
1018             Copy(pv, bufptr, len, char);
1019         }
1020     }
1021 }
1022
1023 /*
1024 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1025
1026 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1027 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1028 reallocating the buffer if necessary.  This means that lexing code that
1029 runs later will see the characters as if they had appeared in the input.
1030 It is not recommended to do this as part of normal parsing, and most
1031 uses of this facility run the risk of the inserted characters being
1032 interpreted in an unintended manner.
1033
1034 The string to be inserted is the string value of I<sv>.  The characters
1035 are recoded for the lexer buffer, according to how the buffer is currently
1036 being interpreted (L</lex_bufutf8>).  If a string to be interpreted is
1037 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1038 need to construct a scalar.
1039
1040 =cut
1041 */
1042
1043 void
1044 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1045 {
1046     char *pv;
1047     STRLEN len;
1048     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1049     if (flags)
1050         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1051     pv = SvPV(sv, len);
1052     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1053 }
1054
1055 /*
1056 =for apidoc Amx|void|lex_unstuff|char *ptr
1057
1058 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1059 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1060 This hides the discarded text from any lexing code that runs later,
1061 as if the text had never appeared.
1062
1063 This is not the normal way to consume lexed text.  For that, use
1064 L</lex_read_to>.
1065
1066 =cut
1067 */
1068
1069 void
1070 Perl_lex_unstuff(pTHX_ char *ptr)
1071 {
1072     char *buf, *bufend;
1073     STRLEN unstuff_len;
1074     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1075     buf = PL_parser->bufptr;
1076     if (ptr < buf)
1077         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1078     if (ptr == buf)
1079         return;
1080     bufend = PL_parser->bufend;
1081     if (ptr > bufend)
1082         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1083     unstuff_len = ptr - buf;
1084     Move(ptr, buf, bufend+1-ptr, char);
1085     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1086     PL_parser->bufend = bufend - unstuff_len;
1087 }
1088
1089 /*
1090 =for apidoc Amx|void|lex_read_to|char *ptr
1091
1092 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1093 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1094 performing the correct bookkeeping whenever a newline character is passed.
1095 This is the normal way to consume lexed text.
1096
1097 Interpretation of the buffer's octets can be abstracted out by
1098 using the slightly higher-level functions L</lex_peek_unichar> and
1099 L</lex_read_unichar>.
1100
1101 =cut
1102 */
1103
1104 void
1105 Perl_lex_read_to(pTHX_ char *ptr)
1106 {
1107     char *s;
1108     PERL_ARGS_ASSERT_LEX_READ_TO;
1109     s = PL_parser->bufptr;
1110     if (ptr < s || ptr > PL_parser->bufend)
1111         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1112     for (; s != ptr; s++)
1113         if (*s == '\n') {
1114             CopLINE_inc(PL_curcop);
1115             PL_parser->linestart = s+1;
1116         }
1117     PL_parser->bufptr = ptr;
1118 }
1119
1120 /*
1121 =for apidoc Amx|void|lex_discard_to|char *ptr
1122
1123 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1124 up to I<ptr>.  The remaining content of the buffer will be moved, and
1125 all pointers into the buffer updated appropriately.  I<ptr> must not
1126 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1127 it is not permitted to discard text that has yet to be lexed.
1128
1129 Normally it is not necessarily to do this directly, because it suffices to
1130 use the implicit discarding behaviour of L</lex_next_chunk> and things
1131 based on it.  However, if a token stretches across multiple lines,
1132 and the lexing code has kept multiple lines of text in the buffer for
1133 that purpose, then after completion of the token it would be wise to
1134 explicitly discard the now-unneeded earlier lines, to avoid future
1135 multi-line tokens growing the buffer without bound.
1136
1137 =cut
1138 */
1139
1140 void
1141 Perl_lex_discard_to(pTHX_ char *ptr)
1142 {
1143     char *buf;
1144     STRLEN discard_len;
1145     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1146     buf = SvPVX(PL_parser->linestr);
1147     if (ptr < buf)
1148         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1149     if (ptr == buf)
1150         return;
1151     if (ptr > PL_parser->bufptr)
1152         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1153     discard_len = ptr - buf;
1154     if (PL_parser->oldbufptr < ptr)
1155         PL_parser->oldbufptr = ptr;
1156     if (PL_parser->oldoldbufptr < ptr)
1157         PL_parser->oldoldbufptr = ptr;
1158     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1159         PL_parser->last_uni = NULL;
1160     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1161         PL_parser->last_lop = NULL;
1162     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1163     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1164     PL_parser->bufend -= discard_len;
1165     PL_parser->bufptr -= discard_len;
1166     PL_parser->oldbufptr -= discard_len;
1167     PL_parser->oldoldbufptr -= discard_len;
1168     if (PL_parser->last_uni)
1169         PL_parser->last_uni -= discard_len;
1170     if (PL_parser->last_lop)
1171         PL_parser->last_lop -= discard_len;
1172 }
1173
1174 /*
1175 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1176
1177 Reads in the next chunk of text to be lexed, appending it to
1178 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1179 looked to the end of the current chunk and wants to know more.  It is
1180 usual, but not necessary, for lexing to have consumed the entirety of
1181 the current chunk at this time.
1182
1183 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1184 chunk (i.e., the current chunk has been entirely consumed), normally the
1185 current chunk will be discarded at the same time that the new chunk is
1186 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1187 will not be discarded.  If the current chunk has not been entirely
1188 consumed, then it will not be discarded regardless of the flag.
1189
1190 Returns true if some new text was added to the buffer, or false if the
1191 buffer has reached the end of the input text.
1192
1193 =cut
1194 */
1195
1196 #define LEX_FAKE_EOF 0x80000000
1197
1198 bool
1199 Perl_lex_next_chunk(pTHX_ U32 flags)
1200 {
1201     SV *linestr;
1202     char *buf;
1203     STRLEN old_bufend_pos, new_bufend_pos;
1204     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1205     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1206     bool got_some_for_debugger = 0;
1207     bool got_some;
1208     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1209         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1210     linestr = PL_parser->linestr;
1211     buf = SvPVX(linestr);
1212     if (!(flags & LEX_KEEP_PREVIOUS) &&
1213             PL_parser->bufptr == PL_parser->bufend) {
1214         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1215         linestart_pos = 0;
1216         if (PL_parser->last_uni != PL_parser->bufend)
1217             PL_parser->last_uni = NULL;
1218         if (PL_parser->last_lop != PL_parser->bufend)
1219             PL_parser->last_lop = NULL;
1220         last_uni_pos = last_lop_pos = 0;
1221         *buf = 0;
1222         SvCUR(linestr) = 0;
1223     } else {
1224         old_bufend_pos = PL_parser->bufend - buf;
1225         bufptr_pos = PL_parser->bufptr - buf;
1226         oldbufptr_pos = PL_parser->oldbufptr - buf;
1227         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1228         linestart_pos = PL_parser->linestart - buf;
1229         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1230         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1231     }
1232     if (flags & LEX_FAKE_EOF) {
1233         goto eof;
1234     } else if (!PL_parser->rsfp) {
1235         got_some = 0;
1236     } else if (filter_gets(linestr, old_bufend_pos)) {
1237         got_some = 1;
1238         got_some_for_debugger = 1;
1239     } else {
1240         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1241             sv_setpvs(linestr, "");
1242         eof:
1243         /* End of real input.  Close filehandle (unless it was STDIN),
1244          * then add implicit termination.
1245          */
1246         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1247             PerlIO_clearerr(PL_parser->rsfp);
1248         else if (PL_parser->rsfp)
1249             (void)PerlIO_close(PL_parser->rsfp);
1250         PL_parser->rsfp = NULL;
1251         PL_doextract = FALSE;
1252 #ifdef PERL_MAD
1253         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1254             PL_faketokens = 1;
1255 #endif
1256         if (!PL_in_eval && PL_minus_p) {
1257             sv_catpvs(linestr,
1258                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1259             PL_minus_n = PL_minus_p = 0;
1260         } else if (!PL_in_eval && PL_minus_n) {
1261             sv_catpvs(linestr, /*{*/";}");
1262             PL_minus_n = 0;
1263         } else
1264             sv_catpvs(linestr, ";");
1265         got_some = 1;
1266     }
1267     buf = SvPVX(linestr);
1268     new_bufend_pos = SvCUR(linestr);
1269     PL_parser->bufend = buf + new_bufend_pos;
1270     PL_parser->bufptr = buf + bufptr_pos;
1271     PL_parser->oldbufptr = buf + oldbufptr_pos;
1272     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1273     PL_parser->linestart = buf + linestart_pos;
1274     if (PL_parser->last_uni)
1275         PL_parser->last_uni = buf + last_uni_pos;
1276     if (PL_parser->last_lop)
1277         PL_parser->last_lop = buf + last_lop_pos;
1278     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1279             PL_curstash != PL_debstash) {
1280         /* debugger active and we're not compiling the debugger code,
1281          * so store the line into the debugger's array of lines
1282          */
1283         update_debugger_info(NULL, buf+old_bufend_pos,
1284             new_bufend_pos-old_bufend_pos);
1285     }
1286     return got_some;
1287 }
1288
1289 /*
1290 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1291
1292 Looks ahead one (Unicode) character in the text currently being lexed.
1293 Returns the codepoint (unsigned integer value) of the next character,
1294 or -1 if lexing has reached the end of the input text.  To consume the
1295 peeked character, use L</lex_read_unichar>.
1296
1297 If the next character is in (or extends into) the next chunk of input
1298 text, the next chunk will be read in.  Normally the current chunk will be
1299 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1300 then the current chunk will not be discarded.
1301
1302 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1303 is encountered, an exception is generated.
1304
1305 =cut
1306 */
1307
1308 I32
1309 Perl_lex_peek_unichar(pTHX_ U32 flags)
1310 {
1311     dVAR;
1312     char *s, *bufend;
1313     if (flags & ~(LEX_KEEP_PREVIOUS))
1314         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1315     s = PL_parser->bufptr;
1316     bufend = PL_parser->bufend;
1317     if (UTF) {
1318         U8 head;
1319         I32 unichar;
1320         STRLEN len, retlen;
1321         if (s == bufend) {
1322             if (!lex_next_chunk(flags))
1323                 return -1;
1324             s = PL_parser->bufptr;
1325             bufend = PL_parser->bufend;
1326         }
1327         head = (U8)*s;
1328         if (!(head & 0x80))
1329             return head;
1330         if (head & 0x40) {
1331             len = PL_utf8skip[head];
1332             while ((STRLEN)(bufend-s) < len) {
1333                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1334                     break;
1335                 s = PL_parser->bufptr;
1336                 bufend = PL_parser->bufend;
1337             }
1338         }
1339         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1340         if (retlen == (STRLEN)-1) {
1341             /* malformed UTF-8 */
1342             ENTER;
1343             SAVESPTR(PL_warnhook);
1344             PL_warnhook = PERL_WARNHOOK_FATAL;
1345             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1346             LEAVE;
1347         }
1348         return unichar;
1349     } else {
1350         if (s == bufend) {
1351             if (!lex_next_chunk(flags))
1352                 return -1;
1353             s = PL_parser->bufptr;
1354         }
1355         return (U8)*s;
1356     }
1357 }
1358
1359 /*
1360 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1361
1362 Reads the next (Unicode) character in the text currently being lexed.
1363 Returns the codepoint (unsigned integer value) of the character read,
1364 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1365 if lexing has reached the end of the input text.  To non-destructively
1366 examine the next character, use L</lex_peek_unichar> instead.
1367
1368 If the next character is in (or extends into) the next chunk of input
1369 text, the next chunk will be read in.  Normally the current chunk will be
1370 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1371 then the current chunk will not be discarded.
1372
1373 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1374 is encountered, an exception is generated.
1375
1376 =cut
1377 */
1378
1379 I32
1380 Perl_lex_read_unichar(pTHX_ U32 flags)
1381 {
1382     I32 c;
1383     if (flags & ~(LEX_KEEP_PREVIOUS))
1384         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1385     c = lex_peek_unichar(flags);
1386     if (c != -1) {
1387         if (c == '\n')
1388             CopLINE_inc(PL_curcop);
1389         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1390     }
1391     return c;
1392 }
1393
1394 /*
1395 =for apidoc Amx|void|lex_read_space|U32 flags
1396
1397 Reads optional spaces, in Perl style, in the text currently being
1398 lexed.  The spaces may include ordinary whitespace characters and
1399 Perl-style comments.  C<#line> directives are processed if encountered.
1400 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1401 at a non-space character (or the end of the input text).
1402
1403 If spaces extend into the next chunk of input text, the next chunk will
1404 be read in.  Normally the current chunk will be discarded at the same
1405 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1406 chunk will not be discarded.
1407
1408 =cut
1409 */
1410
1411 #define LEX_NO_NEXT_CHUNK 0x80000000
1412
1413 void
1414 Perl_lex_read_space(pTHX_ U32 flags)
1415 {
1416     char *s, *bufend;
1417     bool need_incline = 0;
1418     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1419         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1420 #ifdef PERL_MAD
1421     if (PL_skipwhite) {
1422         sv_free(PL_skipwhite);
1423         PL_skipwhite = NULL;
1424     }
1425     if (PL_madskills)
1426         PL_skipwhite = newSVpvs("");
1427 #endif /* PERL_MAD */
1428     s = PL_parser->bufptr;
1429     bufend = PL_parser->bufend;
1430     while (1) {
1431         char c = *s;
1432         if (c == '#') {
1433             do {
1434                 c = *++s;
1435             } while (!(c == '\n' || (c == 0 && s == bufend)));
1436         } else if (c == '\n') {
1437             s++;
1438             PL_parser->linestart = s;
1439             if (s == bufend)
1440                 need_incline = 1;
1441             else
1442                 incline(s);
1443         } else if (isSPACE(c)) {
1444             s++;
1445         } else if (c == 0 && s == bufend) {
1446             bool got_more;
1447 #ifdef PERL_MAD
1448             if (PL_madskills)
1449                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1450 #endif /* PERL_MAD */
1451             if (flags & LEX_NO_NEXT_CHUNK)
1452                 break;
1453             PL_parser->bufptr = s;
1454             CopLINE_inc(PL_curcop);
1455             got_more = lex_next_chunk(flags);
1456             CopLINE_dec(PL_curcop);
1457             s = PL_parser->bufptr;
1458             bufend = PL_parser->bufend;
1459             if (!got_more)
1460                 break;
1461             if (need_incline && PL_parser->rsfp) {
1462                 incline(s);
1463                 need_incline = 0;
1464             }
1465         } else {
1466             break;
1467         }
1468     }
1469 #ifdef PERL_MAD
1470     if (PL_madskills)
1471         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1472 #endif /* PERL_MAD */
1473     PL_parser->bufptr = s;
1474 }
1475
1476 /*
1477  * S_incline
1478  * This subroutine has nothing to do with tilting, whether at windmills
1479  * or pinball tables.  Its name is short for "increment line".  It
1480  * increments the current line number in CopLINE(PL_curcop) and checks
1481  * to see whether the line starts with a comment of the form
1482  *    # line 500 "foo.pm"
1483  * If so, it sets the current line number and file to the values in the comment.
1484  */
1485
1486 STATIC void
1487 S_incline(pTHX_ const char *s)
1488 {
1489     dVAR;
1490     const char *t;
1491     const char *n;
1492     const char *e;
1493
1494     PERL_ARGS_ASSERT_INCLINE;
1495
1496     CopLINE_inc(PL_curcop);
1497     if (*s++ != '#')
1498         return;
1499     while (SPACE_OR_TAB(*s))
1500         s++;
1501     if (strnEQ(s, "line", 4))
1502         s += 4;
1503     else
1504         return;
1505     if (SPACE_OR_TAB(*s))
1506         s++;
1507     else
1508         return;
1509     while (SPACE_OR_TAB(*s))
1510         s++;
1511     if (!isDIGIT(*s))
1512         return;
1513
1514     n = s;
1515     while (isDIGIT(*s))
1516         s++;
1517     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1518         return;
1519     while (SPACE_OR_TAB(*s))
1520         s++;
1521     if (*s == '"' && (t = strchr(s+1, '"'))) {
1522         s++;
1523         e = t + 1;
1524     }
1525     else {
1526         t = s;
1527         while (!isSPACE(*t))
1528             t++;
1529         e = t;
1530     }
1531     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1532         e++;
1533     if (*e != '\n' && *e != '\0')
1534         return;         /* false alarm */
1535
1536     if (t - s > 0) {
1537         const STRLEN len = t - s;
1538 #ifndef USE_ITHREADS
1539         SV *const temp_sv = CopFILESV(PL_curcop);
1540         const char *cf;
1541         STRLEN tmplen;
1542
1543         if (temp_sv) {
1544             cf = SvPVX(temp_sv);
1545             tmplen = SvCUR(temp_sv);
1546         } else {
1547             cf = NULL;
1548             tmplen = 0;
1549         }
1550
1551         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1552             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1553              * to *{"::_<newfilename"} */
1554             /* However, the long form of evals is only turned on by the
1555                debugger - usually they're "(eval %lu)" */
1556             char smallbuf[128];
1557             char *tmpbuf;
1558             GV **gvp;
1559             STRLEN tmplen2 = len;
1560             if (tmplen + 2 <= sizeof smallbuf)
1561                 tmpbuf = smallbuf;
1562             else
1563                 Newx(tmpbuf, tmplen + 2, char);
1564             tmpbuf[0] = '_';
1565             tmpbuf[1] = '<';
1566             memcpy(tmpbuf + 2, cf, tmplen);
1567             tmplen += 2;
1568             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1569             if (gvp) {
1570                 char *tmpbuf2;
1571                 GV *gv2;
1572
1573                 if (tmplen2 + 2 <= sizeof smallbuf)
1574                     tmpbuf2 = smallbuf;
1575                 else
1576                     Newx(tmpbuf2, tmplen2 + 2, char);
1577
1578                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1579                     /* Either they malloc'd it, or we malloc'd it,
1580                        so no prefix is present in ours.  */
1581                     tmpbuf2[0] = '_';
1582                     tmpbuf2[1] = '<';
1583                 }
1584
1585                 memcpy(tmpbuf2 + 2, s, tmplen2);
1586                 tmplen2 += 2;
1587
1588                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1589                 if (!isGV(gv2)) {
1590                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1591                     /* adjust ${"::_<newfilename"} to store the new file name */
1592                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1593                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1594                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1595                 }
1596
1597                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1598             }
1599             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1600         }
1601 #endif
1602         CopFILE_free(PL_curcop);
1603         CopFILE_setn(PL_curcop, s, len);
1604     }
1605     CopLINE_set(PL_curcop, atoi(n)-1);
1606 }
1607
1608 #ifdef PERL_MAD
1609 /* skip space before PL_thistoken */
1610
1611 STATIC char *
1612 S_skipspace0(pTHX_ register char *s)
1613 {
1614     PERL_ARGS_ASSERT_SKIPSPACE0;
1615
1616     s = skipspace(s);
1617     if (!PL_madskills)
1618         return s;
1619     if (PL_skipwhite) {
1620         if (!PL_thiswhite)
1621             PL_thiswhite = newSVpvs("");
1622         sv_catsv(PL_thiswhite, PL_skipwhite);
1623         sv_free(PL_skipwhite);
1624         PL_skipwhite = 0;
1625     }
1626     PL_realtokenstart = s - SvPVX(PL_linestr);
1627     return s;
1628 }
1629
1630 /* skip space after PL_thistoken */
1631
1632 STATIC char *
1633 S_skipspace1(pTHX_ register char *s)
1634 {
1635     const char *start = s;
1636     I32 startoff = start - SvPVX(PL_linestr);
1637
1638     PERL_ARGS_ASSERT_SKIPSPACE1;
1639
1640     s = skipspace(s);
1641     if (!PL_madskills)
1642         return s;
1643     start = SvPVX(PL_linestr) + startoff;
1644     if (!PL_thistoken && PL_realtokenstart >= 0) {
1645         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1646         PL_thistoken = newSVpvn(tstart, start - tstart);
1647     }
1648     PL_realtokenstart = -1;
1649     if (PL_skipwhite) {
1650         if (!PL_nextwhite)
1651             PL_nextwhite = newSVpvs("");
1652         sv_catsv(PL_nextwhite, PL_skipwhite);
1653         sv_free(PL_skipwhite);
1654         PL_skipwhite = 0;
1655     }
1656     return s;
1657 }
1658
1659 STATIC char *
1660 S_skipspace2(pTHX_ register char *s, SV **svp)
1661 {
1662     char *start;
1663     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1664     const I32 startoff = s - SvPVX(PL_linestr);
1665
1666     PERL_ARGS_ASSERT_SKIPSPACE2;
1667
1668     s = skipspace(s);
1669     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1670     if (!PL_madskills || !svp)
1671         return s;
1672     start = SvPVX(PL_linestr) + startoff;
1673     if (!PL_thistoken && PL_realtokenstart >= 0) {
1674         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1675         PL_thistoken = newSVpvn(tstart, start - tstart);
1676         PL_realtokenstart = -1;
1677     }
1678     if (PL_skipwhite) {
1679         if (!*svp)
1680             *svp = newSVpvs("");
1681         sv_setsv(*svp, PL_skipwhite);
1682         sv_free(PL_skipwhite);
1683         PL_skipwhite = 0;
1684     }
1685
1686     return s;
1687 }
1688 #endif
1689
1690 STATIC void
1691 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1692 {
1693     AV *av = CopFILEAVx(PL_curcop);
1694     if (av) {
1695         SV * const sv = newSV_type(SVt_PVMG);
1696         if (orig_sv)
1697             sv_setsv(sv, orig_sv);
1698         else
1699             sv_setpvn(sv, buf, len);
1700         (void)SvIOK_on(sv);
1701         SvIV_set(sv, 0);
1702         av_store(av, (I32)CopLINE(PL_curcop), sv);
1703     }
1704 }
1705
1706 /*
1707  * S_skipspace
1708  * Called to gobble the appropriate amount and type of whitespace.
1709  * Skips comments as well.
1710  */
1711
1712 STATIC char *
1713 S_skipspace(pTHX_ register char *s)
1714 {
1715 #ifdef PERL_MAD
1716     char *start = s;
1717 #endif /* PERL_MAD */
1718     PERL_ARGS_ASSERT_SKIPSPACE;
1719 #ifdef PERL_MAD
1720     if (PL_skipwhite) {
1721         sv_free(PL_skipwhite);
1722         PL_skipwhite = NULL;
1723     }
1724 #endif /* PERL_MAD */
1725     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1726         while (s < PL_bufend && SPACE_OR_TAB(*s))
1727             s++;
1728     } else {
1729         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1730         PL_bufptr = s;
1731         lex_read_space(LEX_KEEP_PREVIOUS |
1732                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1733                     LEX_NO_NEXT_CHUNK : 0));
1734         s = PL_bufptr;
1735         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1736         if (PL_linestart > PL_bufptr)
1737             PL_bufptr = PL_linestart;
1738         return s;
1739     }
1740 #ifdef PERL_MAD
1741     if (PL_madskills)
1742         PL_skipwhite = newSVpvn(start, s-start);
1743 #endif /* PERL_MAD */
1744     return s;
1745 }
1746
1747 /*
1748  * S_check_uni
1749  * Check the unary operators to ensure there's no ambiguity in how they're
1750  * used.  An ambiguous piece of code would be:
1751  *     rand + 5
1752  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1753  * the +5 is its argument.
1754  */
1755
1756 STATIC void
1757 S_check_uni(pTHX)
1758 {
1759     dVAR;
1760     const char *s;
1761     const char *t;
1762
1763     if (PL_oldoldbufptr != PL_last_uni)
1764         return;
1765     while (isSPACE(*PL_last_uni))
1766         PL_last_uni++;
1767     s = PL_last_uni;
1768     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1769         s++;
1770     if ((t = strchr(s, '(')) && t < PL_bufptr)
1771         return;
1772
1773     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1774                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1775                      (int)(s - PL_last_uni), PL_last_uni);
1776 }
1777
1778 /*
1779  * LOP : macro to build a list operator.  Its behaviour has been replaced
1780  * with a subroutine, S_lop() for which LOP is just another name.
1781  */
1782
1783 #define LOP(f,x) return lop(f,x,s)
1784
1785 /*
1786  * S_lop
1787  * Build a list operator (or something that might be one).  The rules:
1788  *  - if we have a next token, then it's a list operator [why?]
1789  *  - if the next thing is an opening paren, then it's a function
1790  *  - else it's a list operator
1791  */
1792
1793 STATIC I32
1794 S_lop(pTHX_ I32 f, int x, char *s)
1795 {
1796     dVAR;
1797
1798     PERL_ARGS_ASSERT_LOP;
1799
1800     pl_yylval.ival = f;
1801     CLINE;
1802     PL_expect = x;
1803     PL_bufptr = s;
1804     PL_last_lop = PL_oldbufptr;
1805     PL_last_lop_op = (OPCODE)f;
1806 #ifdef PERL_MAD
1807     if (PL_lasttoke)
1808         return REPORT(LSTOP);
1809 #else
1810     if (PL_nexttoke)
1811         return REPORT(LSTOP);
1812 #endif
1813     if (*s == '(')
1814         return REPORT(FUNC);
1815     s = PEEKSPACE(s);
1816     if (*s == '(')
1817         return REPORT(FUNC);
1818     else
1819         return REPORT(LSTOP);
1820 }
1821
1822 #ifdef PERL_MAD
1823  /*
1824  * S_start_force
1825  * Sets up for an eventual force_next().  start_force(0) basically does
1826  * an unshift, while start_force(-1) does a push.  yylex removes items
1827  * on the "pop" end.
1828  */
1829
1830 STATIC void
1831 S_start_force(pTHX_ int where)
1832 {
1833     int i;
1834
1835     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1836         where = PL_lasttoke;
1837     assert(PL_curforce < 0 || PL_curforce == where);
1838     if (PL_curforce != where) {
1839         for (i = PL_lasttoke; i > where; --i) {
1840             PL_nexttoke[i] = PL_nexttoke[i-1];
1841         }
1842         PL_lasttoke++;
1843     }
1844     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1845         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1846     PL_curforce = where;
1847     if (PL_nextwhite) {
1848         if (PL_madskills)
1849             curmad('^', newSVpvs(""));
1850         CURMAD('_', PL_nextwhite);
1851     }
1852 }
1853
1854 STATIC void
1855 S_curmad(pTHX_ char slot, SV *sv)
1856 {
1857     MADPROP **where;
1858
1859     if (!sv)
1860         return;
1861     if (PL_curforce < 0)
1862         where = &PL_thismad;
1863     else
1864         where = &PL_nexttoke[PL_curforce].next_mad;
1865
1866     if (PL_faketokens)
1867         sv_setpvs(sv, "");
1868     else {
1869         if (!IN_BYTES) {
1870             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1871                 SvUTF8_on(sv);
1872             else if (PL_encoding) {
1873                 sv_recode_to_utf8(sv, PL_encoding);
1874             }
1875         }
1876     }
1877
1878     /* keep a slot open for the head of the list? */
1879     if (slot != '_' && *where && (*where)->mad_key == '^') {
1880         (*where)->mad_key = slot;
1881         sv_free(MUTABLE_SV(((*where)->mad_val)));
1882         (*where)->mad_val = (void*)sv;
1883     }
1884     else
1885         addmad(newMADsv(slot, sv), where, 0);
1886 }
1887 #else
1888 #  define start_force(where)    NOOP
1889 #  define curmad(slot, sv)      NOOP
1890 #endif
1891
1892 /*
1893  * S_force_next
1894  * When the lexer realizes it knows the next token (for instance,
1895  * it is reordering tokens for the parser) then it can call S_force_next
1896  * to know what token to return the next time the lexer is called.  Caller
1897  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1898  * and possibly PL_expect to ensure the lexer handles the token correctly.
1899  */
1900
1901 STATIC void
1902 S_force_next(pTHX_ I32 type)
1903 {
1904     dVAR;
1905 #ifdef DEBUGGING
1906     if (DEBUG_T_TEST) {
1907         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1908         tokereport(type, &NEXTVAL_NEXTTOKE);
1909     }
1910 #endif
1911 #ifdef PERL_MAD
1912     if (PL_curforce < 0)
1913         start_force(PL_lasttoke);
1914     PL_nexttoke[PL_curforce].next_type = type;
1915     if (PL_lex_state != LEX_KNOWNEXT)
1916         PL_lex_defer = PL_lex_state;
1917     PL_lex_state = LEX_KNOWNEXT;
1918     PL_lex_expect = PL_expect;
1919     PL_curforce = -1;
1920 #else
1921     PL_nexttype[PL_nexttoke] = type;
1922     PL_nexttoke++;
1923     if (PL_lex_state != LEX_KNOWNEXT) {
1924         PL_lex_defer = PL_lex_state;
1925         PL_lex_expect = PL_expect;
1926         PL_lex_state = LEX_KNOWNEXT;
1927     }
1928 #endif
1929 }
1930
1931 STATIC SV *
1932 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1933 {
1934     dVAR;
1935     SV * const sv = newSVpvn_utf8(start, len,
1936                                   !IN_BYTES
1937                                   && UTF
1938                                   && !is_ascii_string((const U8*)start, len)
1939                                   && is_utf8_string((const U8*)start, len));
1940     return sv;
1941 }
1942
1943 /*
1944  * S_force_word
1945  * When the lexer knows the next thing is a word (for instance, it has
1946  * just seen -> and it knows that the next char is a word char, then
1947  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1948  * lookahead.
1949  *
1950  * Arguments:
1951  *   char *start : buffer position (must be within PL_linestr)
1952  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1953  *   int check_keyword : if true, Perl checks to make sure the word isn't
1954  *       a keyword (do this if the word is a label, e.g. goto FOO)
1955  *   int allow_pack : if true, : characters will also be allowed (require,
1956  *       use, etc. do this)
1957  *   int allow_initial_tick : used by the "sub" lexer only.
1958  */
1959
1960 STATIC char *
1961 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1962 {
1963     dVAR;
1964     register char *s;
1965     STRLEN len;
1966
1967     PERL_ARGS_ASSERT_FORCE_WORD;
1968
1969     start = SKIPSPACE1(start);
1970     s = start;
1971     if (isIDFIRST_lazy_if(s,UTF) ||
1972         (allow_pack && *s == ':') ||
1973         (allow_initial_tick && *s == '\'') )
1974     {
1975         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1976         if (check_keyword && keyword(PL_tokenbuf, len, 0))
1977             return start;
1978         start_force(PL_curforce);
1979         if (PL_madskills)
1980             curmad('X', newSVpvn(start,s-start));
1981         if (token == METHOD) {
1982             s = SKIPSPACE1(s);
1983             if (*s == '(')
1984                 PL_expect = XTERM;
1985             else {
1986                 PL_expect = XOPERATOR;
1987             }
1988         }
1989         if (PL_madskills)
1990             curmad('g', newSVpvs( "forced" ));
1991         NEXTVAL_NEXTTOKE.opval
1992             = (OP*)newSVOP(OP_CONST,0,
1993                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1994         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1995         force_next(token);
1996     }
1997     return s;
1998 }
1999
2000 /*
2001  * S_force_ident
2002  * Called when the lexer wants $foo *foo &foo etc, but the program
2003  * text only contains the "foo" portion.  The first argument is a pointer
2004  * to the "foo", and the second argument is the type symbol to prefix.
2005  * Forces the next token to be a "WORD".
2006  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2007  */
2008
2009 STATIC void
2010 S_force_ident(pTHX_ register const char *s, int kind)
2011 {
2012     dVAR;
2013
2014     PERL_ARGS_ASSERT_FORCE_IDENT;
2015
2016     if (*s) {
2017         const STRLEN len = strlen(s);
2018         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2019         start_force(PL_curforce);
2020         NEXTVAL_NEXTTOKE.opval = o;
2021         force_next(WORD);
2022         if (kind) {
2023             o->op_private = OPpCONST_ENTERED;
2024             /* XXX see note in pp_entereval() for why we forgo typo
2025                warnings if the symbol must be introduced in an eval.
2026                GSAR 96-10-12 */
2027             gv_fetchpvn_flags(s, len,
2028                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2029                               : GV_ADD,
2030                               kind == '$' ? SVt_PV :
2031                               kind == '@' ? SVt_PVAV :
2032                               kind == '%' ? SVt_PVHV :
2033                               SVt_PVGV
2034                               );
2035         }
2036     }
2037 }
2038
2039 NV
2040 Perl_str_to_version(pTHX_ SV *sv)
2041 {
2042     NV retval = 0.0;
2043     NV nshift = 1.0;
2044     STRLEN len;
2045     const char *start = SvPV_const(sv,len);
2046     const char * const end = start + len;
2047     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2048
2049     PERL_ARGS_ASSERT_STR_TO_VERSION;
2050
2051     while (start < end) {
2052         STRLEN skip;
2053         UV n;
2054         if (utf)
2055             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2056         else {
2057             n = *(U8*)start;
2058             skip = 1;
2059         }
2060         retval += ((NV)n)/nshift;
2061         start += skip;
2062         nshift *= 1000;
2063     }
2064     return retval;
2065 }
2066
2067 /*
2068  * S_force_version
2069  * Forces the next token to be a version number.
2070  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2071  * and if "guessing" is TRUE, then no new token is created (and the caller
2072  * must use an alternative parsing method).
2073  */
2074
2075 STATIC char *
2076 S_force_version(pTHX_ char *s, int guessing)
2077 {
2078     dVAR;
2079     OP *version = NULL;
2080     char *d;
2081 #ifdef PERL_MAD
2082     I32 startoff = s - SvPVX(PL_linestr);
2083 #endif
2084
2085     PERL_ARGS_ASSERT_FORCE_VERSION;
2086
2087     s = SKIPSPACE1(s);
2088
2089     d = s;
2090     if (*d == 'v')
2091         d++;
2092     if (isDIGIT(*d)) {
2093         while (isDIGIT(*d) || *d == '_' || *d == '.')
2094             d++;
2095 #ifdef PERL_MAD
2096         if (PL_madskills) {
2097             start_force(PL_curforce);
2098             curmad('X', newSVpvn(s,d-s));
2099         }
2100 #endif
2101         if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
2102             SV *ver;
2103 #ifdef USE_LOCALE_NUMERIC
2104             char *loc = setlocale(LC_NUMERIC, "C");
2105 #endif
2106             s = scan_num(s, &pl_yylval);
2107 #ifdef USE_LOCALE_NUMERIC
2108             setlocale(LC_NUMERIC, loc);
2109 #endif
2110             version = pl_yylval.opval;
2111             ver = cSVOPx(version)->op_sv;
2112             if (SvPOK(ver) && !SvNIOK(ver)) {
2113                 SvUPGRADE(ver, SVt_PVNV);
2114                 SvNV_set(ver, str_to_version(ver));
2115                 SvNOK_on(ver);          /* hint that it is a version */
2116             }
2117         }
2118         else if (guessing) {
2119 #ifdef PERL_MAD
2120             if (PL_madskills) {
2121                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2122                 PL_nextwhite = 0;
2123                 s = SvPVX(PL_linestr) + startoff;
2124             }
2125 #endif
2126             return s;
2127         }
2128     }
2129
2130 #ifdef PERL_MAD
2131     if (PL_madskills && !version) {
2132         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2133         PL_nextwhite = 0;
2134         s = SvPVX(PL_linestr) + startoff;
2135     }
2136 #endif
2137     /* NOTE: The parser sees the package name and the VERSION swapped */
2138     start_force(PL_curforce);
2139     NEXTVAL_NEXTTOKE.opval = version;
2140     force_next(WORD);
2141
2142     return s;
2143 }
2144
2145 /*
2146  * S_force_strict_version
2147  * Forces the next token to be a version number using strict syntax rules.
2148  */
2149
2150 STATIC char *
2151 S_force_strict_version(pTHX_ char *s)
2152 {
2153     dVAR;
2154     OP *version = NULL;
2155 #ifdef PERL_MAD
2156     I32 startoff = s - SvPVX(PL_linestr);
2157 #endif
2158     const char *errstr = NULL;
2159
2160     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2161
2162     while (isSPACE(*s)) /* leading whitespace */
2163         s++;
2164
2165     if (is_STRICT_VERSION(s,&errstr)) {
2166         SV *ver = newSV(0);
2167         s = (char *)scan_version(s, ver, 0);
2168         version = newSVOP(OP_CONST, 0, ver);
2169     }
2170     else if ( (*s != ';' && *s != '}' ) && (s = SKIPSPACE1(s), (*s != ';' && *s !='}' ))) {
2171         PL_bufptr = s;
2172         if (errstr)
2173             yyerror(errstr); /* version required */
2174         return s;
2175     }
2176
2177 #ifdef PERL_MAD
2178     if (PL_madskills && !version) {
2179         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2180         PL_nextwhite = 0;
2181         s = SvPVX(PL_linestr) + startoff;
2182     }
2183 #endif
2184     /* NOTE: The parser sees the package name and the VERSION swapped */
2185     start_force(PL_curforce);
2186     NEXTVAL_NEXTTOKE.opval = version;
2187     force_next(WORD);
2188
2189     return s;
2190 }
2191
2192 /*
2193  * S_tokeq
2194  * Tokenize a quoted string passed in as an SV.  It finds the next
2195  * chunk, up to end of string or a backslash.  It may make a new
2196  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2197  * turns \\ into \.
2198  */
2199
2200 STATIC SV *
2201 S_tokeq(pTHX_ SV *sv)
2202 {
2203     dVAR;
2204     register char *s;
2205     register char *send;
2206     register char *d;
2207     STRLEN len = 0;
2208     SV *pv = sv;
2209
2210     PERL_ARGS_ASSERT_TOKEQ;
2211
2212     if (!SvLEN(sv))
2213         goto finish;
2214
2215     s = SvPV_force(sv, len);
2216     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2217         goto finish;
2218     send = s + len;
2219     while (s < send && *s != '\\')
2220         s++;
2221     if (s == send)
2222         goto finish;
2223     d = s;
2224     if ( PL_hints & HINT_NEW_STRING ) {
2225         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2226     }
2227     while (s < send) {
2228         if (*s == '\\') {
2229             if (s + 1 < send && (s[1] == '\\'))
2230                 s++;            /* all that, just for this */
2231         }
2232         *d++ = *s++;
2233     }
2234     *d = '\0';
2235     SvCUR_set(sv, d - SvPVX_const(sv));
2236   finish:
2237     if ( PL_hints & HINT_NEW_STRING )
2238        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2239     return sv;
2240 }
2241
2242 /*
2243  * Now come three functions related to double-quote context,
2244  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2245  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2246  * interact with PL_lex_state, and create fake ( ... ) argument lists
2247  * to handle functions and concatenation.
2248  * They assume that whoever calls them will be setting up a fake
2249  * join call, because each subthing puts a ',' after it.  This lets
2250  *   "lower \luPpEr"
2251  * become
2252  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2253  *
2254  * (I'm not sure whether the spurious commas at the end of lcfirst's
2255  * arguments and join's arguments are created or not).
2256  */
2257
2258 /*
2259  * S_sublex_start
2260  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2261  *
2262  * Pattern matching will set PL_lex_op to the pattern-matching op to
2263  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2264  *
2265  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2266  *
2267  * Everything else becomes a FUNC.
2268  *
2269  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2270  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2271  * call to S_sublex_push().
2272  */
2273
2274 STATIC I32
2275 S_sublex_start(pTHX)
2276 {
2277     dVAR;
2278     register const I32 op_type = pl_yylval.ival;
2279
2280     if (op_type == OP_NULL) {
2281         pl_yylval.opval = PL_lex_op;
2282         PL_lex_op = NULL;
2283         return THING;
2284     }
2285     if (op_type == OP_CONST || op_type == OP_READLINE) {
2286         SV *sv = tokeq(PL_lex_stuff);
2287
2288         if (SvTYPE(sv) == SVt_PVIV) {
2289             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2290             STRLEN len;
2291             const char * const p = SvPV_const(sv, len);
2292             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2293             SvREFCNT_dec(sv);
2294             sv = nsv;
2295         }
2296         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2297         PL_lex_stuff = NULL;
2298         /* Allow <FH> // "foo" */
2299         if (op_type == OP_READLINE)
2300             PL_expect = XTERMORDORDOR;
2301         return THING;
2302     }
2303     else if (op_type == OP_BACKTICK && PL_lex_op) {
2304         /* readpipe() vas overriden */
2305         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2306         pl_yylval.opval = PL_lex_op;
2307         PL_lex_op = NULL;
2308         PL_lex_stuff = NULL;
2309         return THING;
2310     }
2311
2312     PL_sublex_info.super_state = PL_lex_state;
2313     PL_sublex_info.sub_inwhat = (U16)op_type;
2314     PL_sublex_info.sub_op = PL_lex_op;
2315     PL_lex_state = LEX_INTERPPUSH;
2316
2317     PL_expect = XTERM;
2318     if (PL_lex_op) {
2319         pl_yylval.opval = PL_lex_op;
2320         PL_lex_op = NULL;
2321         return PMFUNC;
2322     }
2323     else
2324         return FUNC;
2325 }
2326
2327 /*
2328  * S_sublex_push
2329  * Create a new scope to save the lexing state.  The scope will be
2330  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2331  * to the uc, lc, etc. found before.
2332  * Sets PL_lex_state to LEX_INTERPCONCAT.
2333  */
2334
2335 STATIC I32
2336 S_sublex_push(pTHX)
2337 {
2338     dVAR;
2339     ENTER;
2340
2341     PL_lex_state = PL_sublex_info.super_state;
2342     SAVEBOOL(PL_lex_dojoin);
2343     SAVEI32(PL_lex_brackets);
2344     SAVEI32(PL_lex_casemods);
2345     SAVEI32(PL_lex_starts);
2346     SAVEI8(PL_lex_state);
2347     SAVEVPTR(PL_lex_inpat);
2348     SAVEI16(PL_lex_inwhat);
2349     SAVECOPLINE(PL_curcop);
2350     SAVEPPTR(PL_bufptr);
2351     SAVEPPTR(PL_bufend);
2352     SAVEPPTR(PL_oldbufptr);
2353     SAVEPPTR(PL_oldoldbufptr);
2354     SAVEPPTR(PL_last_lop);
2355     SAVEPPTR(PL_last_uni);
2356     SAVEPPTR(PL_linestart);
2357     SAVESPTR(PL_linestr);
2358     SAVEGENERICPV(PL_lex_brackstack);
2359     SAVEGENERICPV(PL_lex_casestack);
2360
2361     PL_linestr = PL_lex_stuff;
2362     PL_lex_stuff = NULL;
2363
2364     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2365         = SvPVX(PL_linestr);
2366     PL_bufend += SvCUR(PL_linestr);
2367     PL_last_lop = PL_last_uni = NULL;
2368     SAVEFREESV(PL_linestr);
2369
2370     PL_lex_dojoin = FALSE;
2371     PL_lex_brackets = 0;
2372     Newx(PL_lex_brackstack, 120, char);
2373     Newx(PL_lex_casestack, 12, char);
2374     PL_lex_casemods = 0;
2375     *PL_lex_casestack = '\0';
2376     PL_lex_starts = 0;
2377     PL_lex_state = LEX_INTERPCONCAT;
2378     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2379
2380     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2381     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2382         PL_lex_inpat = PL_sublex_info.sub_op;
2383     else
2384         PL_lex_inpat = NULL;
2385
2386     return '(';
2387 }
2388
2389 /*
2390  * S_sublex_done
2391  * Restores lexer state after a S_sublex_push.
2392  */
2393
2394 STATIC I32
2395 S_sublex_done(pTHX)
2396 {
2397     dVAR;
2398     if (!PL_lex_starts++) {
2399         SV * const sv = newSVpvs("");
2400         if (SvUTF8(PL_linestr))
2401             SvUTF8_on(sv);
2402         PL_expect = XOPERATOR;
2403         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2404         return THING;
2405     }
2406
2407     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2408         PL_lex_state = LEX_INTERPCASEMOD;
2409         return yylex();
2410     }
2411
2412     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2413     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2414         PL_linestr = PL_lex_repl;
2415         PL_lex_inpat = 0;
2416         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2417         PL_bufend += SvCUR(PL_linestr);
2418         PL_last_lop = PL_last_uni = NULL;
2419         SAVEFREESV(PL_linestr);
2420         PL_lex_dojoin = FALSE;
2421         PL_lex_brackets = 0;
2422         PL_lex_casemods = 0;
2423         *PL_lex_casestack = '\0';
2424         PL_lex_starts = 0;
2425         if (SvEVALED(PL_lex_repl)) {
2426             PL_lex_state = LEX_INTERPNORMAL;
2427             PL_lex_starts++;
2428             /*  we don't clear PL_lex_repl here, so that we can check later
2429                 whether this is an evalled subst; that means we rely on the
2430                 logic to ensure sublex_done() is called again only via the
2431                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2432         }
2433         else {
2434             PL_lex_state = LEX_INTERPCONCAT;
2435             PL_lex_repl = NULL;
2436         }
2437         return ',';
2438     }
2439     else {
2440 #ifdef PERL_MAD
2441         if (PL_madskills) {
2442             if (PL_thiswhite) {
2443                 if (!PL_endwhite)
2444                     PL_endwhite = newSVpvs("");
2445                 sv_catsv(PL_endwhite, PL_thiswhite);
2446                 PL_thiswhite = 0;
2447             }
2448             if (PL_thistoken)
2449                 sv_setpvs(PL_thistoken,"");
2450             else
2451                 PL_realtokenstart = -1;
2452         }
2453 #endif
2454         LEAVE;
2455         PL_bufend = SvPVX(PL_linestr);
2456         PL_bufend += SvCUR(PL_linestr);
2457         PL_expect = XOPERATOR;
2458         PL_sublex_info.sub_inwhat = 0;
2459         return ')';
2460     }
2461 }
2462
2463 /*
2464   scan_const
2465
2466   Extracts a pattern, double-quoted string, or transliteration.  This
2467   is terrifying code.
2468
2469   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2470   processing a pattern (PL_lex_inpat is true), a transliteration
2471   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2472
2473   Returns a pointer to the character scanned up to. If this is
2474   advanced from the start pointer supplied (i.e. if anything was
2475   successfully parsed), will leave an OP for the substring scanned
2476   in pl_yylval. Caller must intuit reason for not parsing further
2477   by looking at the next characters herself.
2478
2479   In patterns:
2480     backslashes:
2481       constants: \N{NAME} only
2482       case and quoting: \U \Q \E
2483     stops on @ and $, but not for $ as tail anchor
2484
2485   In transliterations:
2486     characters are VERY literal, except for - not at the start or end
2487     of the string, which indicates a range. If the range is in bytes,
2488     scan_const expands the range to the full set of intermediate
2489     characters. If the range is in utf8, the hyphen is replaced with
2490     a certain range mark which will be handled by pmtrans() in op.c.
2491
2492   In double-quoted strings:
2493     backslashes:
2494       double-quoted style: \r and \n
2495       constants: \x31, etc.
2496       deprecated backrefs: \1 (in substitution replacements)
2497       case and quoting: \U \Q \E
2498     stops on @ and $
2499
2500   scan_const does *not* construct ops to handle interpolated strings.
2501   It stops processing as soon as it finds an embedded $ or @ variable
2502   and leaves it to the caller to work out what's going on.
2503
2504   embedded arrays (whether in pattern or not) could be:
2505       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2506
2507   $ in double-quoted strings must be the symbol of an embedded scalar.
2508
2509   $ in pattern could be $foo or could be tail anchor.  Assumption:
2510   it's a tail anchor if $ is the last thing in the string, or if it's
2511   followed by one of "()| \r\n\t"
2512
2513   \1 (backreferences) are turned into $1
2514
2515   The structure of the code is
2516       while (there's a character to process) {
2517           handle transliteration ranges
2518           skip regexp comments /(?#comment)/ and codes /(?{code})/
2519           skip #-initiated comments in //x patterns
2520           check for embedded arrays
2521           check for embedded scalars
2522           if (backslash) {
2523               deprecate \1 in substitution replacements
2524               handle string-changing backslashes \l \U \Q \E, etc.
2525               switch (what was escaped) {
2526                   handle \- in a transliteration (becomes a literal -)
2527                   if a pattern and not \N{, go treat as regular character
2528                   handle \132 (octal characters)
2529                   handle \x15 and \x{1234} (hex characters)
2530                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2531                   handle \cV (control characters)
2532                   handle printf-style backslashes (\f, \r, \n, etc)
2533               } (end switch)
2534               continue
2535           } (end if backslash)
2536           handle regular character
2537     } (end while character to read)
2538
2539 */
2540
2541 STATIC char *
2542 S_scan_const(pTHX_ char *start)
2543 {
2544     dVAR;
2545     register char *send = PL_bufend;            /* end of the constant */
2546     SV *sv = newSV(send - start);               /* sv for the constant.  See
2547                                                    note below on sizing. */
2548     register char *s = start;                   /* start of the constant */
2549     register char *d = SvPVX(sv);               /* destination for copies */
2550     bool dorange = FALSE;                       /* are we in a translit range? */
2551     bool didrange = FALSE;                      /* did we just finish a range? */
2552     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
2553     I32  this_utf8 = UTF;                       /* Is the source string assumed
2554                                                    to be UTF8?  But, this can
2555                                                    show as true when the source
2556                                                    isn't utf8, as for example
2557                                                    when it is entirely composed
2558                                                    of hex constants */
2559
2560     /* Note on sizing:  The scanned constant is placed into sv, which is
2561      * initialized by newSV() assuming one byte of output for every byte of
2562      * input.  This routine expects newSV() to allocate an extra byte for a
2563      * trailing NUL, which this routine will append if it gets to the end of
2564      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2565      * CAPITAL LETTER A}), or more output than input if the constant ends up
2566      * recoded to utf8, but each time a construct is found that might increase
2567      * the needed size, SvGROW() is called.  Its size parameter each time is
2568      * based on the best guess estimate at the time, namely the length used so
2569      * far, plus the length the current construct will occupy, plus room for
2570      * the trailing NUL, plus one byte for every input byte still unscanned */
2571
2572     UV uv;
2573 #ifdef EBCDIC
2574     UV literal_endpoint = 0;
2575     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2576 #endif
2577
2578     PERL_ARGS_ASSERT_SCAN_CONST;
2579
2580     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2581         /* If we are doing a trans and we know we want UTF8 set expectation */
2582         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2583         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2584     }
2585
2586
2587     while (s < send || dorange) {
2588
2589         /* get transliterations out of the way (they're most literal) */
2590         if (PL_lex_inwhat == OP_TRANS) {
2591             /* expand a range A-Z to the full set of characters.  AIE! */
2592             if (dorange) {
2593                 I32 i;                          /* current expanded character */
2594                 I32 min;                        /* first character in range */
2595                 I32 max;                        /* last character in range */
2596
2597 #ifdef EBCDIC
2598                 UV uvmax = 0;
2599 #endif
2600
2601                 if (has_utf8
2602 #ifdef EBCDIC
2603                     && !native_range
2604 #endif
2605                     ) {
2606                     char * const c = (char*)utf8_hop((U8*)d, -1);
2607                     char *e = d++;
2608                     while (e-- > c)
2609                         *(e + 1) = *e;
2610                     *c = (char)UTF_TO_NATIVE(0xff);
2611                     /* mark the range as done, and continue */
2612                     dorange = FALSE;
2613                     didrange = TRUE;
2614                     continue;
2615                 }
2616
2617                 i = d - SvPVX_const(sv);                /* remember current offset */
2618 #ifdef EBCDIC
2619                 SvGROW(sv,
2620                        SvLEN(sv) + (has_utf8 ?
2621                                     (512 - UTF_CONTINUATION_MARK +
2622                                      UNISKIP(0x100))
2623                                     : 256));
2624                 /* How many two-byte within 0..255: 128 in UTF-8,
2625                  * 96 in UTF-8-mod. */
2626 #else
2627                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2628 #endif
2629                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2630 #ifdef EBCDIC
2631                 if (has_utf8) {
2632                     int j;
2633                     for (j = 0; j <= 1; j++) {
2634                         char * const c = (char*)utf8_hop((U8*)d, -1);
2635                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2636                         if (j)
2637                             min = (U8)uv;
2638                         else if (uv < 256)
2639                             max = (U8)uv;
2640                         else {
2641                             max = (U8)0xff; /* only to \xff */
2642                             uvmax = uv; /* \x{100} to uvmax */
2643                         }
2644                         d = c; /* eat endpoint chars */
2645                      }
2646                 }
2647                else {
2648 #endif
2649                    d -= 2;              /* eat the first char and the - */
2650                    min = (U8)*d;        /* first char in range */
2651                    max = (U8)d[1];      /* last char in range  */
2652 #ifdef EBCDIC
2653                }
2654 #endif
2655
2656                 if (min > max) {
2657                     Perl_croak(aTHX_
2658                                "Invalid range \"%c-%c\" in transliteration operator",
2659                                (char)min, (char)max);
2660                 }
2661
2662 #ifdef EBCDIC
2663                 if (literal_endpoint == 2 &&
2664                     ((isLOWER(min) && isLOWER(max)) ||
2665                      (isUPPER(min) && isUPPER(max)))) {
2666                     if (isLOWER(min)) {
2667                         for (i = min; i <= max; i++)
2668                             if (isLOWER(i))
2669                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2670                     } else {
2671                         for (i = min; i <= max; i++)
2672                             if (isUPPER(i))
2673                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2674                     }
2675                 }
2676                 else
2677 #endif
2678                     for (i = min; i <= max; i++)
2679 #ifdef EBCDIC
2680                         if (has_utf8) {
2681                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2682                             if (UNI_IS_INVARIANT(ch))
2683                                 *d++ = (U8)i;
2684                             else {
2685                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2686                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2687                             }
2688                         }
2689                         else
2690 #endif
2691                             *d++ = (char)i;
2692
2693 #ifdef EBCDIC
2694                 if (uvmax) {
2695                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2696                     if (uvmax > 0x101)
2697                         *d++ = (char)UTF_TO_NATIVE(0xff);
2698                     if (uvmax > 0x100)
2699                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2700                 }
2701 #endif
2702
2703                 /* mark the range as done, and continue */
2704                 dorange = FALSE;
2705                 didrange = TRUE;
2706 #ifdef EBCDIC
2707                 literal_endpoint = 0;
2708 #endif
2709                 continue;
2710             }
2711
2712             /* range begins (ignore - as first or last char) */
2713             else if (*s == '-' && s+1 < send  && s != start) {
2714                 if (didrange) {
2715                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2716                 }
2717                 if (has_utf8
2718 #ifdef EBCDIC
2719                     && !native_range
2720 #endif
2721                     ) {
2722                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2723                     s++;
2724                     continue;
2725                 }
2726                 dorange = TRUE;
2727                 s++;
2728             }
2729             else {
2730                 didrange = FALSE;
2731 #ifdef EBCDIC
2732                 literal_endpoint = 0;
2733                 native_range = TRUE;
2734 #endif
2735             }
2736         }
2737
2738         /* if we get here, we're not doing a transliteration */
2739
2740         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2741            except for the last char, which will be done separately. */
2742         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2743             if (s[2] == '#') {
2744                 while (s+1 < send && *s != ')')
2745                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2746             }
2747             else if (s[2] == '{' /* This should match regcomp.c */
2748                     || (s[2] == '?' && s[3] == '{'))
2749             {
2750                 I32 count = 1;
2751                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2752                 char c;
2753
2754                 while (count && (c = *regparse)) {
2755                     if (c == '\\' && regparse[1])
2756                         regparse++;
2757                     else if (c == '{')
2758                         count++;
2759                     else if (c == '}')
2760                         count--;
2761                     regparse++;
2762                 }
2763                 if (*regparse != ')')
2764                     regparse--;         /* Leave one char for continuation. */
2765                 while (s < regparse)
2766                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2767             }
2768         }
2769
2770         /* likewise skip #-initiated comments in //x patterns */
2771         else if (*s == '#' && PL_lex_inpat &&
2772           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2773             while (s+1 < send && *s != '\n')
2774                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2775         }
2776
2777         /* check for embedded arrays
2778            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2779            */
2780         else if (*s == '@' && s[1]) {
2781             if (isALNUM_lazy_if(s+1,UTF))
2782                 break;
2783             if (strchr(":'{$", s[1]))
2784                 break;
2785             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2786                 break; /* in regexp, neither @+ nor @- are interpolated */
2787         }
2788
2789         /* check for embedded scalars.  only stop if we're sure it's a
2790            variable.
2791         */
2792         else if (*s == '$') {
2793             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2794                 break;
2795             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2796                 if (s[1] == '\\') {
2797                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2798                                    "Possible unintended interpolation of $\\ in regex");
2799                 }
2800                 break;          /* in regexp, $ might be tail anchor */
2801             }
2802         }
2803
2804         /* End of else if chain - OP_TRANS rejoin rest */
2805
2806         /* backslashes */
2807         if (*s == '\\' && s+1 < send) {
2808             char* e;    /* Can be used for ending '}', etc. */
2809
2810             s++;
2811
2812             /* deprecate \1 in strings and substitution replacements */
2813             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2814                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2815             {
2816                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2817                 *--s = '$';
2818                 break;
2819             }
2820
2821             /* string-change backslash escapes */
2822             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2823                 --s;
2824                 break;
2825             }
2826             /* In a pattern, process \N, but skip any other backslash escapes.
2827              * This is because we don't want to translate an escape sequence
2828              * into a meta symbol and have the regex compiler use the meta
2829              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2830              * in spite of this, we do have to process \N here while the proper
2831              * charnames handler is in scope.  See bugs #56444 and #62056.
2832              * There is a complication because \N in a pattern may also stand
2833              * for 'match a non-nl', and not mean a charname, in which case its
2834              * processing should be deferred to the regex compiler.  To be a
2835              * charname it must be followed immediately by a '{', and not look
2836              * like \N followed by a curly quantifier, i.e., not something like
2837              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2838              * quantifier */
2839             else if (PL_lex_inpat
2840                     && (*s != 'N'
2841                         || s[1] != '{'
2842                         || regcurly(s + 1)))
2843             {
2844                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2845                 goto default_action;
2846             }
2847
2848             switch (*s) {
2849
2850             /* quoted - in transliterations */
2851             case '-':
2852                 if (PL_lex_inwhat == OP_TRANS) {
2853                     *d++ = *s++;
2854                     continue;
2855                 }
2856                 /* FALL THROUGH */
2857             default:
2858                 {
2859                     if ((isALPHA(*s) || isDIGIT(*s)))
2860                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2861                                        "Unrecognized escape \\%c passed through",
2862                                        *s);
2863                     /* default action is to copy the quoted character */
2864                     goto default_action;
2865                 }
2866
2867             /* eg. \132 indicates the octal constant 0x132 */
2868             case '0': case '1': case '2': case '3':
2869             case '4': case '5': case '6': case '7':
2870                 {
2871                     I32 flags = 0;
2872                     STRLEN len = 3;
2873                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2874                     s += len;
2875                 }
2876                 goto NUM_ESCAPE_INSERT;
2877
2878             /* eg. \x24 indicates the hex constant 0x24 */
2879             case 'x':
2880                 ++s;
2881                 if (*s == '{') {
2882                     char* const e = strchr(s, '}');
2883                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2884                       PERL_SCAN_DISALLOW_PREFIX;
2885                     STRLEN len;
2886
2887                     ++s;
2888                     if (!e) {
2889                         yyerror("Missing right brace on \\x{}");
2890                         continue;
2891                     }
2892                     len = e - s;
2893                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2894                     s = e + 1;
2895                 }
2896                 else {
2897                     {
2898                         STRLEN len = 2;
2899                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2900                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2901                         s += len;
2902                     }
2903                 }
2904
2905               NUM_ESCAPE_INSERT:
2906                 /* Insert oct or hex escaped character.  There will always be
2907                  * enough room in sv since such escapes will be longer than any
2908                  * UTF-8 sequence they can end up as, except if they force us
2909                  * to recode the rest of the string into utf8 */
2910
2911                 /* Here uv is the ordinal of the next character being added in
2912                  * unicode (converted from native). */
2913                 if (!UNI_IS_INVARIANT(uv)) {
2914                     if (!has_utf8 && uv > 255) {
2915                         /* Might need to recode whatever we have accumulated so
2916                          * far if it contains any chars variant in utf8 or
2917                          * utf-ebcdic. */
2918
2919                         SvCUR_set(sv, d - SvPVX_const(sv));
2920                         SvPOK_on(sv);
2921                         *d = '\0';
2922                         /* See Note on sizing above.  */
2923                         sv_utf8_upgrade_flags_grow(sv,
2924                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2925                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
2926                         d = SvPVX(sv) + SvCUR(sv);
2927                         has_utf8 = TRUE;
2928                     }
2929
2930                     if (has_utf8) {
2931                         d = (char*)uvuni_to_utf8((U8*)d, uv);
2932                         if (PL_lex_inwhat == OP_TRANS &&
2933                             PL_sublex_info.sub_op) {
2934                             PL_sublex_info.sub_op->op_private |=
2935                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2936                                              : OPpTRANS_TO_UTF);
2937                         }
2938 #ifdef EBCDIC
2939                         if (uv > 255 && !dorange)
2940                             native_range = FALSE;
2941 #endif
2942                     }
2943                     else {
2944                         *d++ = (char)uv;
2945                     }
2946                 }
2947                 else {
2948                     *d++ = (char) uv;
2949                 }
2950                 continue;
2951
2952             case 'N':
2953                 /* In a non-pattern \N must be a named character, like \N{LATIN
2954                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
2955                  * mean to match a non-newline.  For non-patterns, named
2956                  * characters are converted to their string equivalents. In
2957                  * patterns, named characters are not converted to their
2958                  * ultimate forms for the same reasons that other escapes
2959                  * aren't.  Instead, they are converted to the \N{U+...} form
2960                  * to get the value from the charnames that is in effect right
2961                  * now, while preserving the fact that it was a named character
2962                  * so that the regex compiler knows this */
2963
2964                 /* This section of code doesn't generally use the
2965                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
2966                  * a close examination of this macro and determined it is a
2967                  * no-op except on utfebcdic variant characters.  Every
2968                  * character generated by this that would normally need to be
2969                  * enclosed by this macro is invariant, so the macro is not
2970                  * needed, and would complicate use of copy(). There are other
2971                  * parts of this file where the macro is used inconsistently,
2972                  * but are saved by it being a no-op */
2973
2974                 /* The structure of this section of code (besides checking for
2975                  * errors and upgrading to utf8) is:
2976                  *  Further disambiguate between the two meanings of \N, and if
2977                  *      not a charname, go process it elsewhere
2978                  *  If of form \N{U+...}, pass it through if a pattern;
2979                  *      otherwise convert to utf8
2980                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
2981                  *  pattern; otherwise convert to utf8 */
2982
2983                 /* Here, s points to the 'N'; the test below is guaranteed to
2984                  * succeed if we are being called on a pattern as we already
2985                  * know from a test above that the next character is a '{'.
2986                  * On a non-pattern \N must mean 'named sequence, which
2987                  * requires braces */
2988                 s++;
2989                 if (*s != '{') {
2990                     yyerror("Missing braces on \\N{}");
2991                     continue;
2992                 }
2993                 s++;
2994
2995                 /* If there is no matching '}', it is an error. */
2996                 if (! (e = strchr(s, '}'))) {
2997                     if (! PL_lex_inpat) {
2998                         yyerror("Missing right brace on \\N{}");
2999                     } else {
3000                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3001                     }
3002                     continue;
3003                 }
3004
3005                 /* Here it looks like a named character */
3006
3007                 if (PL_lex_inpat) {
3008
3009                     /* XXX This block is temporary code.  \N{} implies that the
3010                      * pattern is to have Unicode semantics, and therefore
3011                      * currently has to be encoded in utf8.  By putting it in
3012                      * utf8 now, we save a whole pass in the regular expression
3013                      * compiler.  Once that code is changed so Unicode
3014                      * semantics doesn't necessarily have to be in utf8, this
3015                      * block should be removed */
3016                     if (!has_utf8) {
3017                         SvCUR_set(sv, d - SvPVX_const(sv));
3018                         SvPOK_on(sv);
3019                         *d = '\0';
3020                         /* See Note on sizing above.  */
3021                         sv_utf8_upgrade_flags_grow(sv,
3022                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3023                                         /* 5 = '\N{' + cur char + NUL */
3024                                         (STRLEN)(send - s) + 5);
3025                         d = SvPVX(sv) + SvCUR(sv);
3026                         has_utf8 = TRUE;
3027                     }
3028                 }
3029
3030                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3031                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3032                                 | PERL_SCAN_DISALLOW_PREFIX;
3033                     STRLEN len;
3034
3035                     /* For \N{U+...}, the '...' is a unicode value even on
3036                      * EBCDIC machines */
3037                     s += 2;         /* Skip to next char after the 'U+' */
3038                     len = e - s;
3039                     uv = grok_hex(s, &len, &flags, NULL);
3040                     if (len == 0 || len != (STRLEN)(e - s)) {
3041                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3042                         s = e + 1;
3043                         continue;
3044                     }
3045
3046                     if (PL_lex_inpat) {
3047
3048                         /* Pass through to the regex compiler unchanged.  The
3049                          * reason we evaluated the number above is to make sure
3050                          * there wasn't a syntax error. */
3051                         s -= 5;     /* Include the '\N{U+' */
3052                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3053                         d += e - s + 1;
3054                     }
3055                     else {  /* Not a pattern: convert the hex to string */
3056
3057                          /* If destination is not in utf8, unconditionally
3058                           * recode it to be so.  This is because \N{} implies
3059                           * Unicode semantics, and scalars have to be in utf8
3060                           * to guarantee those semantics */
3061                         if (! has_utf8) {
3062                             SvCUR_set(sv, d - SvPVX_const(sv));
3063                             SvPOK_on(sv);
3064                             *d = '\0';
3065                             /* See Note on sizing above.  */
3066                             sv_utf8_upgrade_flags_grow(
3067                                         sv,
3068                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3069                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3070                             d = SvPVX(sv) + SvCUR(sv);
3071                             has_utf8 = TRUE;
3072                         }
3073
3074                         /* Add the string to the output */
3075                         if (UNI_IS_INVARIANT(uv)) {
3076                             *d++ = (char) uv;
3077                         }
3078                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3079                     }
3080                 }
3081                 else { /* Here is \N{NAME} but not \N{U+...}. */
3082
3083                     SV *res;            /* result from charnames */
3084                     const char *str;    /* the string in 'res' */
3085                     STRLEN len;         /* its length */
3086
3087                     /* Get the value for NAME */
3088                     res = newSVpvn(s, e - s);
3089                     res = new_constant( NULL, 0, "charnames",
3090                                         /* includes all of: \N{...} */
3091                                         res, NULL, s - 3, e - s + 4 );
3092
3093                     /* Most likely res will be in utf8 already since the
3094                      * standard charnames uses pack U, but a custom translator
3095                      * can leave it otherwise, so make sure.  XXX This can be
3096                      * revisited to not have charnames use utf8 for characters
3097                      * that don't need it when regexes don't have to be in utf8
3098                      * for Unicode semantics.  If doing so, remember EBCDIC */
3099                     sv_utf8_upgrade(res);
3100                     str = SvPV_const(res, len);
3101
3102                     /* Don't accept malformed input */
3103                     if (! is_utf8_string((U8 *) str, len)) {
3104                         yyerror("Malformed UTF-8 returned by \\N");
3105                     }
3106                     else if (PL_lex_inpat) {
3107
3108                         if (! len) { /* The name resolved to an empty string */
3109                             Copy("\\N{}", d, 4, char);
3110                             d += 4;
3111                         }
3112                         else {
3113                             /* In order to not lose information for the regex
3114                             * compiler, pass the result in the specially made
3115                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3116                             * the code points in hex of each character
3117                             * returned by charnames */
3118
3119                             const char *str_end = str + len;
3120                             STRLEN char_length;     /* cur char's byte length */
3121                             STRLEN output_length;   /* and the number of bytes
3122                                                        after this is translated
3123                                                        into hex digits */
3124                             const STRLEN off = d - SvPVX_const(sv);
3125
3126                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3127                              * max('U+', '.'); and 1 for NUL */
3128                             char hex_string[2 * UTF8_MAXBYTES + 5];
3129
3130                             /* Get the first character of the result. */
3131                             U32 uv = utf8n_to_uvuni((U8 *) str,
3132                                                     len,
3133                                                     &char_length,
3134                                                     UTF8_ALLOW_ANYUV);
3135
3136                             /* The call to is_utf8_string() above hopefully
3137                              * guarantees that there won't be an error.  But
3138                              * it's easy here to make sure.  The function just
3139                              * above warns and returns 0 if invalid utf8, but
3140                              * it can also return 0 if the input is validly a
3141                              * NUL. Disambiguate */
3142                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3143                                 uv = UNICODE_REPLACEMENT;
3144                             }
3145
3146                             /* Convert first code point to hex, including the
3147                              * boiler plate before it */
3148                             sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3149                             output_length = strlen(hex_string);
3150
3151                             /* Make sure there is enough space to hold it */
3152                             d = off + SvGROW(sv, off
3153                                                  + output_length
3154                                                  + (STRLEN)(send - e)
3155                                                  + 2);  /* '}' + NUL */
3156                             /* And output it */
3157                             Copy(hex_string, d, output_length, char);
3158                             d += output_length;
3159
3160                             /* For each subsequent character, append dot and
3161                              * its ordinal in hex */
3162                             while ((str += char_length) < str_end) {
3163                                 const STRLEN off = d - SvPVX_const(sv);
3164                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3165                                                         str_end - str,
3166                                                         &char_length,
3167                                                         UTF8_ALLOW_ANYUV);
3168                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3169                                     uv = UNICODE_REPLACEMENT;
3170                                 }
3171
3172                                 sprintf(hex_string, ".%X", (unsigned int) uv);
3173                                 output_length = strlen(hex_string);
3174
3175                                 d = off + SvGROW(sv, off
3176                                                      + output_length
3177                                                      + (STRLEN)(send - e)
3178                                                      + 2);      /* '}' +  NUL */
3179                                 Copy(hex_string, d, output_length, char);
3180                                 d += output_length;
3181                             }
3182
3183                             *d++ = '}'; /* Done.  Add the trailing brace */
3184                         }
3185                     }
3186                     else { /* Here, not in a pattern.  Convert the name to a
3187                             * string. */
3188
3189                          /* If destination is not in utf8, unconditionally
3190                           * recode it to be so.  This is because \N{} implies
3191                           * Unicode semantics, and scalars have to be in utf8
3192                           * to guarantee those semantics */
3193                         if (! has_utf8) {
3194                             SvCUR_set(sv, d - SvPVX_const(sv));
3195                             SvPOK_on(sv);
3196                             *d = '\0';
3197                             /* See Note on sizing above.  */
3198                             sv_utf8_upgrade_flags_grow(sv,
3199                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3200                                                 len + (STRLEN)(send - s) + 1);
3201                             d = SvPVX(sv) + SvCUR(sv);
3202                             has_utf8 = TRUE;
3203                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3204
3205                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3206                              * set correctly here). */
3207                             const STRLEN off = d - SvPVX_const(sv);
3208                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3209                         }
3210                         Copy(str, d, len, char);
3211                         d += len;
3212                     }
3213                     SvREFCNT_dec(res);
3214
3215                     /* Deprecate non-approved name syntax */
3216                     if (ckWARN_d(WARN_DEPRECATED)) {
3217                         bool problematic = FALSE;
3218                         char* i = s;
3219
3220                         /* For non-ut8 input, look to see that the first
3221                          * character is an alpha, then loop through the rest
3222                          * checking that each is a continuation */
3223                         if (! this_utf8) {
3224                             if (! isALPHAU(*i)) problematic = TRUE;
3225                             else for (i = s + 1; i < e; i++) {
3226                                 if (isCHARNAME_CONT(*i)) continue;
3227                                 problematic = TRUE;
3228                                 break;
3229                             }
3230                         }
3231                         else {
3232                             /* Similarly for utf8.  For invariants can check
3233                              * directly.  We accept anything above the latin1
3234                              * range because it is immaterial to Perl if it is
3235                              * correct or not, and is expensive to check.  But
3236                              * it is fairly easy in the latin1 range to convert
3237                              * the variants into a single character and check
3238                              * those */
3239                             if (UTF8_IS_INVARIANT(*i)) {
3240                                 if (! isALPHAU(*i)) problematic = TRUE;
3241                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3242                                 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3243                                                                             *(i+1)))))
3244                                 {
3245                                     problematic = TRUE;
3246                                 }
3247                             }
3248                             if (! problematic) for (i = s + UTF8SKIP(s);
3249                                                     i < e;
3250                                                     i+= UTF8SKIP(i))
3251                             {
3252                                 if (UTF8_IS_INVARIANT(*i)) {
3253                                     if (isCHARNAME_CONT(*i)) continue;
3254                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3255                                     continue;
3256                                 } else if (isCHARNAME_CONT(
3257                                             UNI_TO_NATIVE(
3258                                             UTF8_ACCUMULATE(*i, *(i+1)))))
3259                                 {
3260                                     continue;
3261                                 }
3262                                 problematic = TRUE;
3263                                 break;
3264                             }
3265                         }
3266                         if (problematic) {
3267                             /* The e-i passed to the final %.*s makes sure that
3268                              * should the trailing NUL be missing that this
3269                              * print won't run off the end of the string */
3270                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3271                                 "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s", i - s + 1, s, e - i, i + 1);
3272                         }
3273                     }
3274                 } /* End \N{NAME} */
3275 #ifdef EBCDIC
3276                 if (!dorange)
3277                     native_range = FALSE; /* \N{} is defined to be Unicode */
3278 #endif
3279                 s = e + 1;  /* Point to just after the '}' */
3280                 continue;
3281
3282             /* \c is a control character */
3283             case 'c':
3284                 s++;
3285                 if (s < send) {
3286                     *d++ = grok_bslash_c(*s++, 1);
3287                 }
3288                 else {
3289                     yyerror("Missing control char name in \\c");
3290                 }
3291                 continue;
3292
3293             /* printf-style backslashes, formfeeds, newlines, etc */
3294             case 'b':
3295                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3296                 break;
3297             case 'n':
3298                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3299                 break;
3300             case 'r':
3301                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3302                 break;
3303             case 'f':
3304                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3305                 break;
3306             case 't':
3307                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3308                 break;
3309             case 'e':
3310                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3311                 break;
3312             case 'a':
3313                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3314                 break;
3315             } /* end switch */
3316
3317             s++;
3318             continue;
3319         } /* end if (backslash) */
3320 #ifdef EBCDIC
3321         else
3322             literal_endpoint++;
3323 #endif
3324
3325     default_action:
3326         /* If we started with encoded form, or already know we want it,
3327            then encode the next character */
3328         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3329             STRLEN len  = 1;
3330
3331
3332             /* One might think that it is wasted effort in the case of the
3333              * source being utf8 (this_utf8 == TRUE) to take the next character
3334              * in the source, convert it to an unsigned value, and then convert
3335              * it back again.  But the source has not been validated here.  The
3336              * routine that does the conversion checks for errors like
3337              * malformed utf8 */
3338
3339             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3340             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3341             if (!has_utf8) {
3342                 SvCUR_set(sv, d - SvPVX_const(sv));
3343                 SvPOK_on(sv);
3344                 *d = '\0';
3345                 /* See Note on sizing above.  */
3346                 sv_utf8_upgrade_flags_grow(sv,
3347                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3348                                         need + (STRLEN)(send - s) + 1);
3349                 d = SvPVX(sv) + SvCUR(sv);
3350                 has_utf8 = TRUE;
3351             } else if (need > len) {
3352                 /* encoded value larger than old, may need extra space (NOTE:
3353                  * SvCUR() is not set correctly here).   See Note on sizing
3354                  * above.  */
3355                 const STRLEN off = d - SvPVX_const(sv);
3356                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3357             }
3358             s += len;
3359
3360             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3361 #ifdef EBCDIC
3362             if (uv > 255 && !dorange)
3363                 native_range = FALSE;
3364 #endif
3365         }
3366         else {
3367             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3368         }
3369     } /* while loop to process each character */
3370
3371     /* terminate the string and set up the sv */
3372     *d = '\0';
3373     SvCUR_set(sv, d - SvPVX_const(sv));
3374     if (SvCUR(sv) >= SvLEN(sv))
3375         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3376
3377     SvPOK_on(sv);
3378     if (PL_encoding && !has_utf8) {
3379         sv_recode_to_utf8(sv, PL_encoding);
3380         if (SvUTF8(sv))
3381             has_utf8 = TRUE;
3382     }
3383     if (has_utf8) {
3384         SvUTF8_on(sv);
3385         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3386             PL_sublex_info.sub_op->op_private |=
3387                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3388         }
3389     }
3390
3391     /* shrink the sv if we allocated more than we used */
3392     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3393         SvPV_shrink_to_cur(sv);
3394     }
3395
3396     /* return the substring (via pl_yylval) only if we parsed anything */
3397     if (s > PL_bufptr) {
3398         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3399             const char *const key = PL_lex_inpat ? "qr" : "q";
3400             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3401             const char *type;
3402             STRLEN typelen;
3403
3404             if (PL_lex_inwhat == OP_TRANS) {
3405                 type = "tr";
3406                 typelen = 2;
3407             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3408                 type = "s";
3409                 typelen = 1;
3410             } else  {
3411                 type = "qq";
3412                 typelen = 2;
3413             }
3414
3415             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3416                                 type, typelen);
3417         }
3418         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3419     } else
3420         SvREFCNT_dec(sv);
3421     return s;
3422 }
3423
3424 /* S_intuit_more
3425  * Returns TRUE if there's more to the expression (e.g., a subscript),
3426  * FALSE otherwise.
3427  *
3428  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3429  *
3430  * ->[ and ->{ return TRUE
3431  * { and [ outside a pattern are always subscripts, so return TRUE
3432  * if we're outside a pattern and it's not { or [, then return FALSE
3433  * if we're in a pattern and the first char is a {
3434  *   {4,5} (any digits around the comma) returns FALSE
3435  * if we're in a pattern and the first char is a [
3436  *   [] returns FALSE
3437  *   [SOMETHING] has a funky algorithm to decide whether it's a
3438  *      character class or not.  It has to deal with things like
3439  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3440  * anything else returns TRUE
3441  */
3442
3443 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3444
3445 STATIC int
3446 S_intuit_more(pTHX_ register char *s)
3447 {
3448     dVAR;
3449
3450     PERL_ARGS_ASSERT_INTUIT_MORE;
3451
3452     if (PL_lex_brackets)
3453         return TRUE;
3454     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3455         return TRUE;
3456     if (*s != '{' && *s != '[')
3457         return FALSE;
3458     if (!PL_lex_inpat)
3459         return TRUE;
3460
3461     /* In a pattern, so maybe we have {n,m}. */
3462     if (*s == '{') {
3463         s++;
3464         if (!isDIGIT(*s))
3465             return TRUE;
3466         while (isDIGIT(*s))
3467             s++;
3468         if (*s == ',')
3469             s++;
3470         while (isDIGIT(*s))
3471             s++;
3472         if (*s == '}')
3473             return FALSE;
3474         return TRUE;
3475
3476     }
3477
3478     /* On the other hand, maybe we have a character class */
3479
3480     s++;
3481     if (*s == ']' || *s == '^')
3482         return FALSE;
3483     else {
3484         /* this is terrifying, and it works */
3485         int weight = 2;         /* let's weigh the evidence */
3486         char seen[256];
3487         unsigned char un_char = 255, last_un_char;
3488         const char * const send = strchr(s,']');
3489         char tmpbuf[sizeof PL_tokenbuf * 4];
3490
3491         if (!send)              /* has to be an expression */
3492             return TRUE;
3493
3494         Zero(seen,256,char);
3495         if (*s == '$')
3496             weight -= 3;
3497         else if (isDIGIT(*s)) {
3498             if (s[1] != ']') {
3499                 if (isDIGIT(s[1]) && s[2] == ']')
3500                     weight -= 10;
3501             }
3502             else
3503                 weight -= 100;
3504         }
3505         for (; s < send; s++) {
3506             last_un_char = un_char;
3507             un_char = (unsigned char)*s;
3508             switch (*s) {
3509             case '@':
3510             case '&':
3511             case '$':
3512                 weight -= seen[un_char] * 10;
3513                 if (isALNUM_lazy_if(s+1,UTF)) {
3514                     int len;
3515                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3516                     len = (int)strlen(tmpbuf);
3517                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3518                         weight -= 100;
3519                     else
3520                         weight -= 10;
3521                 }
3522                 else if (*s == '$' && s[1] &&
3523                   strchr("[#!%*<>()-=",s[1])) {
3524                     if (/*{*/ strchr("])} =",s[2]))
3525                         weight -= 10;
3526                     else
3527                         weight -= 1;
3528                 }
3529                 break;
3530             case '\\':
3531                 un_char = 254;
3532                 if (s[1]) {
3533                     if (strchr("wds]",s[1]))
3534                         weight += 100;
3535                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3536                         weight += 1;
3537                     else if (strchr("rnftbxcav",s[1]))
3538                         weight += 40;
3539                     else if (isDIGIT(s[1])) {
3540                         weight += 40;
3541                         while (s[1] && isDIGIT(s[1]))
3542                             s++;
3543                     }
3544                 }
3545                 else
3546                     weight += 100;
3547                 break;
3548             case '-':
3549                 if (s[1] == '\\')
3550                     weight += 50;
3551                 if (strchr("aA01! ",last_un_char))
3552                     weight += 30;
3553                 if (strchr("zZ79~",s[1]))
3554                     weight += 30;
3555                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3556                     weight -= 5;        /* cope with negative subscript */
3557                 break;
3558             default:
3559                 if (!isALNUM(last_un_char)
3560                     && !(last_un_char == '$' || last_un_char == '@'
3561                          || last_un_char == '&')
3562                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3563                     char *d = tmpbuf;
3564                     while (isALPHA(*s))
3565                         *d++ = *s++;
3566                     *d = '\0';
3567                     if (keyword(tmpbuf, d - tmpbuf, 0))
3568                         weight -= 150;
3569                 }
3570                 if (un_char == last_un_char + 1)
3571                     weight += 5;
3572                 weight -= seen[un_char];
3573                 break;
3574             }
3575             seen[un_char]++;
3576         }
3577         if (weight >= 0)        /* probably a character class */
3578             return FALSE;
3579     }
3580
3581     return TRUE;
3582 }
3583
3584 /*
3585  * S_intuit_method
3586  *
3587  * Does all the checking to disambiguate
3588  *   foo bar
3589  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3590  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3591  *
3592  * First argument is the stuff after the first token, e.g. "bar".
3593  *
3594  * Not a method if bar is a filehandle.
3595  * Not a method if foo is a subroutine prototyped to take a filehandle.
3596  * Not a method if it's really "Foo $bar"
3597  * Method if it's "foo $bar"
3598  * Not a method if it's really "print foo $bar"
3599  * Method if it's really "foo package::" (interpreted as package->foo)
3600  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3601  * Not a method if bar is a filehandle or package, but is quoted with
3602  *   =>
3603  */
3604
3605 STATIC int
3606 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3607 {
3608     dVAR;
3609     char *s = start + (*start == '$');
3610     char tmpbuf[sizeof PL_tokenbuf];
3611     STRLEN len;
3612     GV* indirgv;
3613 #ifdef PERL_MAD
3614     int soff;
3615 #endif
3616
3617     PERL_ARGS_ASSERT_INTUIT_METHOD;
3618
3619     if (gv) {
3620         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3621             return 0;
3622         if (cv) {
3623             if (SvPOK(cv)) {
3624                 const char *proto = SvPVX_const(cv);
3625                 if (proto) {
3626                     if (*proto == ';')
3627                         proto++;
3628                     if (*proto == '*')
3629                         return 0;
3630                 }
3631             }
3632         } else
3633             gv = NULL;
3634     }
3635     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3636     /* start is the beginning of the possible filehandle/object,
3637      * and s is the end of it
3638      * tmpbuf is a copy of it
3639      */
3640
3641     if (*start == '$') {
3642         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3643                 isUPPER(*PL_tokenbuf))
3644             return 0;
3645 #ifdef PERL_MAD
3646         len = start - SvPVX(PL_linestr);
3647 #endif
3648         s = PEEKSPACE(s);
3649 #ifdef PERL_MAD
3650         start = SvPVX(PL_linestr) + len;
3651 #endif
3652         PL_bufptr = start;
3653         PL_expect = XREF;
3654         return *s == '(' ? FUNCMETH : METHOD;
3655     }
3656     if (!keyword(tmpbuf, len, 0)) {
3657         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3658             len -= 2;
3659             tmpbuf[len] = '\0';
3660 #ifdef PERL_MAD
3661             soff = s - SvPVX(PL_linestr);
3662 #endif
3663             goto bare_package;
3664         }
3665         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3666         if (indirgv && GvCVu(indirgv))
3667             return 0;
3668         /* filehandle or package name makes it a method */
3669         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3670 #ifdef PERL_MAD
3671             soff = s - SvPVX(PL_linestr);
3672 #endif
3673             s = PEEKSPACE(s);
3674             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3675                 return 0;       /* no assumptions -- "=>" quotes bearword */
3676       bare_package:
3677             start_force(PL_curforce);
3678             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3679                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3680             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3681             if (PL_madskills)
3682                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3683             PL_expect = XTERM;
3684             force_next(WORD);
3685             PL_bufptr = s;
3686 #ifdef PERL_MAD
3687             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3688 #endif
3689             return *s == '(' ? FUNCMETH : METHOD;
3690         }
3691     }
3692     return 0;
3693 }
3694
3695 /* Encoded script support. filter_add() effectively inserts a
3696  * 'pre-processing' function into the current source input stream.
3697  * Note that the filter function only applies to the current source file
3698  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3699  *
3700  * The datasv parameter (which may be NULL) can be used to pass
3701  * private data to this instance of the filter. The filter function
3702  * can recover the SV using the FILTER_DATA macro and use it to
3703  * store private buffers and state information.
3704  *
3705  * The supplied datasv parameter is upgraded to a PVIO type
3706  * and the IoDIRP/IoANY field is used to store the function pointer,
3707  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3708  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3709  * private use must be set using malloc'd pointers.
3710  */
3711
3712 SV *
3713 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3714 {
3715     dVAR;
3716     if (!funcp)
3717         return NULL;
3718
3719     if (!PL_parser)
3720         return NULL;
3721
3722     if (!PL_rsfp_filters)
3723         PL_rsfp_filters = newAV();
3724     if (!datasv)
3725         datasv = newSV(0);
3726     SvUPGRADE(datasv, SVt_PVIO);
3727     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3728     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3729     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3730                           FPTR2DPTR(void *, IoANY(datasv)),
3731                           SvPV_nolen(datasv)));
3732     av_unshift(PL_rsfp_filters, 1);
3733     av_store(PL_rsfp_filters, 0, datasv) ;
3734     return(datasv);
3735 }
3736
3737
3738 /* Delete most recently added instance of this filter function. */
3739 void
3740 Perl_filter_del(pTHX_ filter_t funcp)
3741 {
3742     dVAR;
3743     SV *datasv;
3744
3745     PERL_ARGS_ASSERT_FILTER_DEL;
3746
3747 #ifdef DEBUGGING
3748     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3749                           FPTR2DPTR(void*, funcp)));
3750 #endif
3751     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3752         return;
3753     /* if filter is on top of stack (usual case) just pop it off */
3754     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3755     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3756         IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
3757         IoANY(datasv) = (void *)NULL;
3758         sv_free(av_pop(PL_rsfp_filters));
3759
3760         return;
3761     }
3762     /* we need to search for the correct entry and clear it     */
3763     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3764 }
3765
3766
3767 /* Invoke the idxth filter function for the current rsfp.        */
3768 /* maxlen 0 = read one text line */
3769 I32
3770 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3771 {
3772     dVAR;
3773     filter_t funcp;
3774     SV *datasv = NULL;
3775     /* This API is bad. It should have been using unsigned int for maxlen.
3776        Not sure if we want to change the API, but if not we should sanity
3777        check the value here.  */
3778     const unsigned int correct_length
3779         = maxlen < 0 ?
3780 #ifdef PERL_MICRO
3781         0x7FFFFFFF
3782 #else
3783         INT_MAX
3784 #endif
3785         : maxlen;
3786
3787     PERL_ARGS_ASSERT_FILTER_READ;
3788
3789     if (!PL_parser || !PL_rsfp_filters)
3790         return -1;
3791     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3792         /* Provide a default input filter to make life easy.    */
3793         /* Note that we append to the line. This is handy.      */
3794         DEBUG_P(PerlIO_printf(Perl_debug_log,
3795                               "filter_read %d: from rsfp\n", idx));
3796         if (correct_length) {
3797             /* Want a block */
3798             int len ;
3799             const int old_len = SvCUR(buf_sv);
3800
3801             /* ensure buf_sv is large enough */
3802             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3803             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3804                                    correct_length)) <= 0) {
3805                 if (PerlIO_error(PL_rsfp))
3806                     return -1;          /* error */
3807                 else
3808                     return 0 ;          /* end of file */
3809             }
3810             SvCUR_set(buf_sv, old_len + len) ;
3811             SvPVX(buf_sv)[old_len + len] = '\0';
3812         } else {
3813             /* Want a line */
3814             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3815                 if (PerlIO_error(PL_rsfp))
3816                     return -1;          /* error */
3817                 else
3818                     return 0 ;          /* end of file */
3819             }
3820         }
3821         return SvCUR(buf_sv);
3822     }
3823     /* Skip this filter slot if filter has been deleted */
3824     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3825         DEBUG_P(PerlIO_printf(Perl_debug_log,
3826                               "filter_read %d: skipped (filter deleted)\n",
3827                               idx));
3828         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3829     }
3830     /* Get function pointer hidden within datasv        */
3831     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3832     DEBUG_P(PerlIO_printf(Perl_debug_log,
3833                           "filter_read %d: via function %p (%s)\n",
3834                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3835     /* Call function. The function is expected to       */
3836     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3837     /* Return: <0:error, =0:eof, >0:not eof             */
3838     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3839 }
3840
3841 STATIC char *
3842 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3843 {
3844     dVAR;
3845
3846     PERL_ARGS_ASSERT_FILTER_GETS;
3847
3848 #ifdef PERL_CR_FILTER
3849     if (!PL_rsfp_filters) {
3850         filter_add(S_cr_textfilter,NULL);
3851     }
3852 #endif
3853     if (PL_rsfp_filters) {
3854         if (!append)
3855             SvCUR_set(sv, 0);   /* start with empty line        */
3856         if (FILTER_READ(0, sv, 0) > 0)
3857             return ( SvPVX(sv) ) ;
3858         else
3859             return NULL ;
3860     }
3861     else
3862         return (sv_gets(sv, PL_rsfp, append));
3863 }
3864
3865 STATIC HV *
3866 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3867 {
3868     dVAR;
3869     GV *gv;
3870
3871     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3872
3873     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3874         return PL_curstash;
3875
3876     if (len > 2 &&
3877         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3878         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3879     {
3880         return GvHV(gv);                        /* Foo:: */
3881     }
3882
3883     /* use constant CLASS => 'MyClass' */
3884     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3885     if (gv && GvCV(gv)) {
3886         SV * const sv = cv_const_sv(GvCV(gv));
3887         if (sv)
3888             pkgname = SvPV_const(sv, len);
3889     }
3890
3891     return gv_stashpvn(pkgname, len, 0);
3892 }
3893
3894 /*
3895  * S_readpipe_override
3896  * Check whether readpipe() is overriden, and generates the appropriate
3897  * optree, provided sublex_start() is called afterwards.
3898  */
3899 STATIC void
3900 S_readpipe_override(pTHX)
3901 {
3902     GV **gvp;
3903     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3904     pl_yylval.ival = OP_BACKTICK;
3905     if ((gv_readpipe
3906                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3907             ||
3908             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3909              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3910              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3911     {
3912         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3913             append_elem(OP_LIST,
3914                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3915                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3916     }
3917 }
3918
3919 #ifdef PERL_MAD
3920  /*
3921  * Perl_madlex
3922  * The intent of this yylex wrapper is to minimize the changes to the
3923  * tokener when we aren't interested in collecting madprops.  It remains
3924  * to be seen how successful this strategy will be...
3925  */
3926
3927 int
3928 Perl_madlex(pTHX)
3929 {
3930     int optype;
3931     char *s = PL_bufptr;
3932
3933     /* make sure PL_thiswhite is initialized */
3934     PL_thiswhite = 0;
3935     PL_thismad = 0;
3936
3937     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3938     if (PL_pending_ident)
3939         return S_pending_ident(aTHX);
3940
3941     /* previous token ate up our whitespace? */
3942     if (!PL_lasttoke && PL_nextwhite) {
3943         PL_thiswhite = PL_nextwhite;
3944         PL_nextwhite = 0;
3945     }
3946
3947     /* isolate the token, and figure out where it is without whitespace */
3948     PL_realtokenstart = -1;
3949     PL_thistoken = 0;
3950     optype = yylex();
3951     s = PL_bufptr;
3952     assert(PL_curforce < 0);
3953
3954     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3955         if (!PL_thistoken) {
3956             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3957                 PL_thistoken = newSVpvs("");
3958             else {
3959                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3960                 PL_thistoken = newSVpvn(tstart, s - tstart);
3961             }
3962         }
3963         if (PL_thismad) /* install head */
3964             CURMAD('X', PL_thistoken);
3965     }
3966
3967     /* last whitespace of a sublex? */
3968     if (optype == ')' && PL_endwhite) {
3969         CURMAD('X', PL_endwhite);
3970     }
3971
3972     if (!PL_thismad) {
3973
3974         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
3975         if (!PL_thiswhite && !PL_endwhite && !optype) {
3976             sv_free(PL_thistoken);
3977             PL_thistoken = 0;
3978             return 0;
3979         }
3980
3981         /* put off final whitespace till peg */
3982         if (optype == ';' && !PL_rsfp) {
3983             PL_nextwhite = PL_thiswhite;
3984             PL_thiswhite = 0;
3985         }
3986         else if (PL_thisopen) {
3987             CURMAD('q', PL_thisopen);
3988             if (PL_thistoken)
3989                 sv_free(PL_thistoken);
3990             PL_thistoken = 0;
3991         }
3992         else {
3993             /* Store actual token text as madprop X */
3994             CURMAD('X', PL_thistoken);
3995         }
3996
3997         if (PL_thiswhite) {
3998             /* add preceding whitespace as madprop _ */
3999             CURMAD('_', PL_thiswhite);
4000         }
4001
4002         if (PL_thisstuff) {
4003             /* add quoted material as madprop = */
4004             CURMAD('=', PL_thisstuff);
4005         }
4006
4007         if (PL_thisclose) {
4008             /* add terminating quote as madprop Q */
4009             CURMAD('Q', PL_thisclose);
4010         }
4011     }
4012
4013     /* special processing based on optype */
4014
4015     switch (optype) {
4016
4017     /* opval doesn't need a TOKEN since it can already store mp */
4018     case WORD:
4019     case METHOD:
4020     case FUNCMETH:
4021     case THING:
4022     case PMFUNC:
4023     case PRIVATEREF:
4024     case FUNC0SUB:
4025     case UNIOPSUB:
4026     case LSTOPSUB:
4027         if (pl_yylval.opval)
4028             append_madprops(PL_thismad, pl_yylval.opval, 0);
4029         PL_thismad = 0;
4030         return optype;
4031
4032     /* fake EOF */
4033     case 0:
4034         optype = PEG;
4035         if (PL_endwhite) {
4036             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4037             PL_endwhite = 0;
4038         }
4039         break;
4040
4041     case ']':
4042     case '}':
4043         if (PL_faketokens)
4044             break;
4045         /* remember any fake bracket that lexer is about to discard */
4046         if (PL_lex_brackets == 1 &&
4047             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4048         {
4049             s = PL_bufptr;
4050             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4051                 s++;
4052             if (*s == '}') {
4053                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4054                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4055                 PL_thiswhite = 0;
4056                 PL_bufptr = s - 1;
4057                 break;  /* don't bother looking for trailing comment */
4058             }
4059             else
4060                 s = PL_bufptr;
4061         }
4062         if (optype == ']')
4063             break;
4064         /* FALLTHROUGH */
4065
4066     /* attach a trailing comment to its statement instead of next token */
4067     case ';':
4068         if (PL_faketokens)
4069             break;
4070         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4071             s = PL_bufptr;
4072             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4073                 s++;
4074             if (*s == '\n' || *s == '#') {
4075                 while (s < PL_bufend && *s != '\n')
4076                     s++;
4077                 if (s < PL_bufend)
4078                     s++;
4079                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4080                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4081                 PL_thiswhite = 0;
4082                 PL_bufptr = s;
4083             }
4084         }
4085         break;
4086
4087     /* pval */
4088     case LABEL:
4089         break;
4090
4091     /* ival */
4092     default:
4093         break;
4094
4095     }
4096
4097     /* Create new token struct.  Note: opvals return early above. */
4098     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4099     PL_thismad = 0;
4100     return optype;
4101 }
4102 #endif
4103
4104 STATIC char *
4105 S_tokenize_use(pTHX_ int is_use, char *s) {
4106     dVAR;
4107
4108     PERL_ARGS_ASSERT_TOKENIZE_USE;
4109
4110     if (PL_expect != XSTATE)
4111         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4112                     is_use ? "use" : "no"));
4113     s = SKIPSPACE1(s);
4114     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4115         s = force_version(s, TRUE);
4116         if (*s == ';' || *s == '}'
4117                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4118             start_force(PL_curforce);
4119             NEXTVAL_NEXTTOKE.opval = NULL;
4120             force_next(WORD);
4121         }
4122         else if (*s == 'v') {
4123             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4124             s = force_version(s, FALSE);
4125         }
4126     }
4127     else {
4128         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4129         s = force_version(s, FALSE);
4130     }
4131     pl_yylval.ival = is_use;
4132     return s;
4133 }
4134 #ifdef DEBUGGING
4135     static const char* const exp_name[] =
4136         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4137           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4138         };
4139 #endif
4140
4141 /*
4142   yylex
4143
4144   Works out what to call the token just pulled out of the input
4145   stream.  The yacc parser takes care of taking the ops we return and
4146   stitching them into a tree.
4147
4148   Returns:
4149     PRIVATEREF
4150
4151   Structure:
4152       if read an identifier
4153           if we're in a my declaration
4154               croak if they tried to say my($foo::bar)
4155               build the ops for a my() declaration
4156           if it's an access to a my() variable
4157               are we in a sort block?
4158                   croak if my($a); $a <=> $b
4159               build ops for access to a my() variable
4160           if in a dq string, and they've said @foo and we can't find @foo
4161               croak
4162           build ops for a bareword
4163       if we already built the token before, use it.
4164 */
4165
4166
4167 #ifdef __SC__
4168 #pragma segment Perl_yylex
4169 #endif
4170 int
4171 Perl_yylex(pTHX)
4172 {
4173     dVAR;
4174     register char *s = PL_bufptr;
4175     register char *d;
4176     STRLEN len;
4177     bool bof = FALSE;
4178     U32 fake_eof = 0;
4179
4180     /* orig_keyword, gvp, and gv are initialized here because
4181      * jump to the label just_a_word_zero can bypass their
4182      * initialization later. */
4183     I32 orig_keyword = 0;
4184     GV *gv = NULL;
4185     GV **gvp = NULL;
4186
4187     DEBUG_T( {
4188         SV* tmp = newSVpvs("");
4189         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4190             (IV)CopLINE(PL_curcop),
4191             lex_state_names[PL_lex_state],
4192             exp_name[PL_expect],
4193             pv_display(tmp, s, strlen(s), 0, 60));
4194         SvREFCNT_dec(tmp);
4195     } );
4196     /* check if there's an identifier for us to look at */
4197     if (PL_pending_ident)
4198         return REPORT(S_pending_ident(aTHX));
4199
4200     /* no identifier pending identification */
4201
4202     switch (PL_lex_state) {
4203 #ifdef COMMENTARY
4204     case LEX_NORMAL:            /* Some compilers will produce faster */
4205     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4206         break;
4207 #endif
4208
4209     /* when we've already built the next token, just pull it out of the queue */
4210     case LEX_KNOWNEXT:
4211 #ifdef PERL_MAD
4212         PL_lasttoke--;
4213         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4214         if (PL_madskills) {
4215             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4216             PL_nexttoke[PL_lasttoke].next_mad = 0;
4217             if (PL_thismad && PL_thismad->mad_key == '_') {
4218                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4219                 PL_thismad->mad_val = 0;
4220                 mad_free(PL_thismad);
4221                 PL_thismad = 0;
4222             }
4223         }
4224         if (!PL_lasttoke) {
4225             PL_lex_state = PL_lex_defer;
4226             PL_expect = PL_lex_expect;
4227             PL_lex_defer = LEX_NORMAL;
4228             if (!PL_nexttoke[PL_lasttoke].next_type)
4229                 return yylex();
4230         }
4231 #else
4232         PL_nexttoke--;
4233         pl_yylval = PL_nextval[PL_nexttoke];
4234         if (!PL_nexttoke) {
4235             PL_lex_state = PL_lex_defer;
4236             PL_expect = PL_lex_expect;
4237             PL_lex_defer = LEX_NORMAL;
4238         }
4239 #endif
4240 #ifdef PERL_MAD
4241         /* FIXME - can these be merged?  */
4242         return(PL_nexttoke[PL_lasttoke].next_type);
4243 #else
4244         return REPORT(PL_nexttype[PL_nexttoke]);
4245 #endif
4246
4247     /* interpolated case modifiers like \L \U, including \Q and \E.
4248        when we get here, PL_bufptr is at the \
4249     */
4250     case LEX_INTERPCASEMOD:
4251 #ifdef DEBUGGING
4252         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4253             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4254 #endif
4255         /* handle \E or end of string */
4256         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4257             /* if at a \E */
4258             if (PL_lex_casemods) {
4259                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4260                 PL_lex_casestack[PL_lex_casemods] = '\0';
4261
4262                 if (PL_bufptr != PL_bufend
4263                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4264                     PL_bufptr += 2;
4265                     PL_lex_state = LEX_INTERPCONCAT;
4266 #ifdef PERL_MAD
4267                     if (PL_madskills)
4268                         PL_thistoken = newSVpvs("\\E");
4269 #endif
4270                 }
4271                 return REPORT(')');
4272             }
4273 #ifdef PERL_MAD
4274             while (PL_bufptr != PL_bufend &&
4275               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4276                 if (!PL_thiswhite)
4277                     PL_thiswhite = newSVpvs("");
4278                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4279                 PL_bufptr += 2;
4280             }
4281 #else
4282             if (PL_bufptr != PL_bufend)
4283                 PL_bufptr += 2;
4284 #endif
4285             PL_lex_state = LEX_INTERPCONCAT;
4286             return yylex();
4287         }
4288         else {
4289             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4290               "### Saw case modifier\n"); });
4291             s = PL_bufptr + 1;
4292             if (s[1] == '\\' && s[2] == 'E') {
4293 #ifdef PERL_MAD
4294                 if (!PL_thiswhite)
4295                     PL_thiswhite = newSVpvs("");
4296                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4297 #endif
4298                 PL_bufptr = s + 3;
4299                 PL_lex_state = LEX_INTERPCONCAT;
4300                 return yylex();
4301             }
4302             else {
4303                 I32 tmp;
4304                 if (!PL_madskills) /* when just compiling don't need correct */
4305                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4306                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4307                 if ((*s == 'L' || *s == 'U') &&
4308                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4309                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4310                     return REPORT(')');
4311                 }
4312                 if (PL_lex_casemods > 10)
4313                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4314                 PL_lex_casestack[PL_lex_casemods++] = *s;
4315                 PL_lex_casestack[PL_lex_casemods] = '\0';
4316                 PL_lex_state = LEX_INTERPCONCAT;
4317                 start_force(PL_curforce);
4318                 NEXTVAL_NEXTTOKE.ival = 0;
4319                 force_next('(');
4320                 start_force(PL_curforce);
4321                 if (*s == 'l')
4322                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4323                 else if (*s == 'u')
4324                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4325                 else if (*s == 'L')
4326                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4327                 else if (*s == 'U')
4328                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4329                 else if (*s == 'Q')
4330                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4331                 else
4332                     Perl_croak(aTHX_ "panic: yylex");
4333                 if (PL_madskills) {
4334                     SV* const tmpsv = newSVpvs("\\ ");
4335                     /* replace the space with the character we want to escape
4336                      */
4337                     SvPVX(tmpsv)[1] = *s;
4338                     curmad('_', tmpsv);
4339                 }
4340                 PL_bufptr = s + 1;
4341             }
4342             force_next(FUNC);
4343             if (PL_lex_starts) {
4344                 s = PL_bufptr;
4345                 PL_lex_starts = 0;
4346 #ifdef PERL_MAD
4347                 if (PL_madskills) {
4348                     if (PL_thistoken)
4349                         sv_free(PL_thistoken);
4350                     PL_thistoken = newSVpvs("");
4351                 }
4352 #endif
4353                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4354                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4355                     OPERATOR(',');
4356                 else
4357                     Aop(OP_CONCAT);
4358             }
4359             else
4360                 return yylex();
4361         }
4362
4363     case LEX_INTERPPUSH:
4364         return REPORT(sublex_push());
4365
4366     case LEX_INTERPSTART:
4367         if (PL_bufptr == PL_bufend)
4368             return REPORT(sublex_done());
4369         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4370               "### Interpolated variable\n"); });
4371         PL_expect = XTERM;
4372         PL_lex_dojoin = (*PL_bufptr == '@');
4373         PL_lex_state = LEX_INTERPNORMAL;
4374         if (PL_lex_dojoin) {
4375             start_force(PL_curforce);
4376             NEXTVAL_NEXTTOKE.ival = 0;
4377             force_next(',');
4378             start_force(PL_curforce);
4379             force_ident("\"", '$');
4380             start_force(PL_curforce);
4381             NEXTVAL_NEXTTOKE.ival = 0;
4382             force_next('$');
4383             start_force(PL_curforce);
4384             NEXTVAL_NEXTTOKE.ival = 0;
4385             force_next('(');
4386             start_force(PL_curforce);
4387             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4388             force_next(FUNC);
4389         }
4390         if (PL_lex_starts++) {
4391             s = PL_bufptr;
4392 #ifdef PERL_MAD
4393             if (PL_madskills) {
4394                 if (PL_thistoken)
4395                     sv_free(PL_thistoken);
4396                 PL_thistoken = newSVpvs("");
4397             }
4398 #endif
4399             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4400             if (!PL_lex_casemods && PL_lex_inpat)
4401                 OPERATOR(',');
4402             else
4403                 Aop(OP_CONCAT);
4404         }
4405         return yylex();
4406
4407     case LEX_INTERPENDMAYBE:
4408         if (intuit_more(PL_bufptr)) {
4409             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4410             break;
4411         }
4412         /* FALL THROUGH */
4413
4414     case LEX_INTERPEND:
4415         if (PL_lex_dojoin) {
4416             PL_lex_dojoin = FALSE;
4417             PL_lex_state = LEX_INTERPCONCAT;
4418 #ifdef PERL_MAD
4419             if (PL_madskills) {
4420                 if (PL_thistoken)
4421                     sv_free(PL_thistoken);
4422                 PL_thistoken = newSVpvs("");
4423             }
4424 #endif
4425             return REPORT(')');
4426         }
4427         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4428             && SvEVALED(PL_lex_repl))
4429         {
4430             if (PL_bufptr != PL_bufend)
4431                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4432             PL_lex_repl = NULL;
4433         }
4434         /* FALLTHROUGH */
4435     case LEX_INTERPCONCAT:
4436 #ifdef DEBUGGING
4437         if (PL_lex_brackets)
4438             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4439 #endif
4440         if (PL_bufptr == PL_bufend)
4441             return REPORT(sublex_done());
4442
4443         if (SvIVX(PL_linestr) == '\'') {
4444             SV *sv = newSVsv(PL_linestr);
4445             if (!PL_lex_inpat)
4446                 sv = tokeq(sv);
4447             else if ( PL_hints & HINT_NEW_RE )
4448                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4449             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4450             s = PL_bufend;
4451         }
4452         else {
4453             s = scan_const(PL_bufptr);
4454             if (*s == '\\')
4455                 PL_lex_state = LEX_INTERPCASEMOD;
4456             else
4457                 PL_lex_state = LEX_INTERPSTART;
4458         }
4459
4460         if (s != PL_bufptr) {
4461             start_force(PL_curforce);
4462             if (PL_madskills) {
4463                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4464             }
4465             NEXTVAL_NEXTTOKE = pl_yylval;
4466             PL_expect = XTERM;
4467             force_next(THING);
4468             if (PL_lex_starts++) {
4469 #ifdef PERL_MAD
4470                 if (PL_madskills) {
4471                     if (PL_thistoken)
4472                         sv_free(PL_thistoken);
4473                     PL_thistoken = newSVpvs("");
4474                 }
4475 #endif
4476                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4477                 if (!PL_lex_casemods && PL_lex_inpat)
4478                     OPERATOR(',');
4479                 else
4480                     Aop(OP_CONCAT);
4481             }
4482             else {
4483                 PL_bufptr = s;
4484                 return yylex();
4485             }
4486         }
4487
4488         return yylex();
4489     case LEX_FORMLINE:
4490         PL_lex_state = LEX_NORMAL;
4491         s = scan_formline(PL_bufptr);
4492         if (!PL_lex_formbrack)
4493             goto rightbracket;
4494         OPERATOR(';');
4495     }
4496
4497     s = PL_bufptr;
4498     PL_oldoldbufptr = PL_oldbufptr;
4499     PL_oldbufptr = s;
4500
4501   retry:
4502 #ifdef PERL_MAD
4503     if (PL_thistoken) {
4504         sv_free(PL_thistoken);
4505         PL_thistoken = 0;
4506     }
4507     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4508 #endif
4509     switch (*s) {
4510     default:
4511         if (isIDFIRST_lazy_if(s,UTF))
4512             goto keylookup;
4513         {
4514         unsigned char c = *s;
4515         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4516         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4517             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4518         } else {
4519             d = PL_linestart;
4520         }
4521         *s = '\0';
4522         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4523     }
4524     case 4:
4525     case 26:
4526         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4527     case 0:
4528 #ifdef PERL_MAD
4529         if (PL_madskills)
4530             PL_faketokens = 0;
4531 #endif
4532         if (!PL_rsfp) {
4533             PL_last_uni = 0;
4534             PL_last_lop = 0;
4535             if (PL_lex_brackets) {
4536                 yyerror((const char *)
4537                         (PL_lex_formbrack
4538                          ? "Format not terminated"
4539                          : "Missing right curly or square bracket"));
4540             }
4541             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4542                         "### Tokener got EOF\n");
4543             } );
4544             TOKEN(0);
4545         }
4546         if (s++ < PL_bufend)
4547             goto retry;                 /* ignore stray nulls */
4548         PL_last_uni = 0;
4549         PL_last_lop = 0;
4550         if (!PL_in_eval && !PL_preambled) {
4551             PL_preambled = TRUE;
4552 #ifdef PERL_MAD
4553             if (PL_madskills)
4554                 PL_faketokens = 1;
4555 #endif
4556             if (PL_perldb) {
4557                 /* Generate a string of Perl code to load the debugger.
4558                  * If PERL5DB is set, it will return the contents of that,
4559                  * otherwise a compile-time require of perl5db.pl.  */
4560
4561                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4562
4563                 if (pdb) {
4564                     sv_setpv(PL_linestr, pdb);
4565                     sv_catpvs(PL_linestr,";");
4566                 } else {
4567                     SETERRNO(0,SS_NORMAL);
4568                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4569                 }
4570             } else
4571                 sv_setpvs(PL_linestr,"");
4572             if (PL_preambleav) {
4573                 SV **svp = AvARRAY(PL_preambleav);
4574                 SV **const end = svp + AvFILLp(PL_preambleav);
4575                 while(svp <= end) {
4576                     sv_catsv(PL_linestr, *svp);
4577                     ++svp;
4578                     sv_catpvs(PL_linestr, ";");
4579                 }
4580                 sv_free(MUTABLE_SV(PL_preambleav));
4581                 PL_preambleav = NULL;
4582             }
4583             if (PL_minus_E)
4584                 sv_catpvs(PL_linestr,
4585                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4586             if (PL_minus_n || PL_minus_p) {
4587                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4588                 if (PL_minus_l)
4589                     sv_catpvs(PL_linestr,"chomp;");
4590                 if (PL_minus_a) {
4591                     if (PL_minus_F) {
4592                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4593                              || *PL_splitstr == '"')
4594                               && strchr(PL_splitstr + 1, *PL_splitstr))
4595                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4596                         else {
4597                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4598                                bytes can be used as quoting characters.  :-) */
4599                             const char *splits = PL_splitstr;
4600                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4601                             do {
4602                                 /* Need to \ \s  */
4603                                 if (*splits == '\\')
4604                                     sv_catpvn(PL_linestr, splits, 1);
4605                                 sv_catpvn(PL_linestr, splits, 1);
4606                             } while (*splits++);
4607                             /* This loop will embed the trailing NUL of
4608                                PL_linestr as the last thing it does before
4609                                terminating.  */
4610                             sv_catpvs(PL_linestr, ");");
4611                         }
4612                     }
4613                     else
4614                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4615                 }
4616             }
4617             sv_catpvs(PL_linestr, "\n");
4618             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4619             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4620             PL_last_lop = PL_last_uni = NULL;
4621             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4622                 update_debugger_info(PL_linestr, NULL, 0);
4623             goto retry;
4624         }
4625         do {
4626             fake_eof = 0;
4627             bof = PL_rsfp ? TRUE : FALSE;
4628             if (0) {
4629               fake_eof:
4630                 fake_eof = LEX_FAKE_EOF;
4631             }
4632             PL_bufptr = PL_bufend;
4633             CopLINE_inc(PL_curcop);
4634             if (!lex_next_chunk(fake_eof)) {
4635                 CopLINE_dec(PL_curcop);
4636                 s = PL_bufptr;
4637                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4638             }
4639             CopLINE_dec(PL_curcop);
4640 #ifdef PERL_MAD
4641             if (!PL_rsfp)
4642                 PL_realtokenstart = -1;
4643 #endif
4644             s = PL_bufptr;
4645             /* If it looks like the start of a BOM or raw UTF-16,
4646              * check if it in fact is. */
4647             if (bof && PL_rsfp &&
4648                      (*s == 0 ||
4649                       *(U8*)s == 0xEF ||
4650                       *(U8*)s >= 0xFE ||
4651                       s[1] == 0)) {
4652                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4653                 if (bof) {
4654                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4655                     s = swallow_bom((U8*)s);
4656                 }
4657             }
4658             if (PL_doextract) {
4659                 /* Incest with pod. */
4660 #ifdef PERL_MAD
4661                 if (PL_madskills)
4662                     sv_catsv(PL_thiswhite, PL_linestr);
4663 #endif
4664                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4665                     sv_setpvs(PL_linestr, "");
4666                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4667                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4668                     PL_last_lop = PL_last_uni = NULL;
4669                     PL_doextract = FALSE;
4670                 }
4671             }
4672             if (PL_rsfp)
4673                 incline(s);
4674         } while (PL_doextract);
4675         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4676         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4677         PL_last_lop = PL_last_uni = NULL;
4678         if (CopLINE(PL_curcop) == 1) {
4679             while (s < PL_bufend && isSPACE(*s))
4680                 s++;
4681             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4682                 s++;
4683 #ifdef PERL_MAD
4684             if (PL_madskills)
4685                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4686 #endif
4687             d = NULL;
4688             if (!PL_in_eval) {
4689                 if (*s == '#' && *(s+1) == '!')
4690                     d = s + 2;
4691 #ifdef ALTERNATE_SHEBANG
4692                 else {
4693                     static char const as[] = ALTERNATE_SHEBANG;
4694                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4695                         d = s + (sizeof(as) - 1);
4696                 }
4697 #endif /* ALTERNATE_SHEBANG */
4698             }
4699             if (d) {
4700                 char *ipath;
4701                 char *ipathend;
4702
4703                 while (isSPACE(*d))
4704                     d++;
4705                 ipath = d;
4706                 while (*d && !isSPACE(*d))
4707                     d++;
4708                 ipathend = d;
4709
4710 #ifdef ARG_ZERO_IS_SCRIPT
4711                 if (ipathend > ipath) {
4712                     /*
4713                      * HP-UX (at least) sets argv[0] to the script name,
4714                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4715                      * at least, set argv[0] to the basename of the Perl
4716                      * interpreter. So, having found "#!", we'll set it right.
4717                      */
4718                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4719                                                     SVt_PV)); /* $^X */
4720                     assert(SvPOK(x) || SvGMAGICAL(x));
4721                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4722                         sv_setpvn(x, ipath, ipathend - ipath);
4723                         SvSETMAGIC(x);
4724                     }
4725                     else {
4726                         STRLEN blen;
4727                         STRLEN llen;
4728                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4729                         const char * const lstart = SvPV_const(x,llen);
4730                         if (llen < blen) {
4731                             bstart += blen - llen;
4732                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4733                                 sv_setpvn(x, ipath, ipathend - ipath);
4734                                 SvSETMAGIC(x);
4735                             }
4736                         }
4737                     }
4738                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4739                 }
4740 #endif /* ARG_ZERO_IS_SCRIPT */
4741
4742                 /*
4743                  * Look for options.
4744                  */
4745                 d = instr(s,"perl -");
4746                 if (!d) {
4747                     d = instr(s,"perl");
4748 #if defined(DOSISH)
4749                     /* avoid getting into infinite loops when shebang
4750                      * line contains "Perl" rather than "perl" */
4751                     if (!d) {
4752                         for (d = ipathend-4; d >= ipath; --d) {
4753                             if ((*d == 'p' || *d == 'P')
4754                                 && !ibcmp(d, "perl", 4))
4755                             {
4756                                 break;
4757                             }
4758                         }
4759                         if (d < ipath)
4760                             d = NULL;
4761                     }
4762 #endif
4763                 }
4764 #ifdef ALTERNATE_SHEBANG
4765                 /*
4766                  * If the ALTERNATE_SHEBANG on this system starts with a
4767                  * character that can be part of a Perl expression, then if
4768                  * we see it but not "perl", we're probably looking at the
4769                  * start of Perl code, not a request to hand off to some
4770                  * other interpreter.  Similarly, if "perl" is there, but
4771                  * not in the first 'word' of the line, we assume the line
4772                  * contains the start of the Perl program.
4773                  */
4774                 if (d && *s != '#') {
4775                     const char *c = ipath;
4776                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4777                         c++;
4778                     if (c < d)
4779                         d = NULL;       /* "perl" not in first word; ignore */
4780                     else
4781                         *s = '#';       /* Don't try to parse shebang line */
4782                 }
4783 #endif /* ALTERNATE_SHEBANG */
4784                 if (!d &&
4785                     *s == '#' &&
4786                     ipathend > ipath &&
4787                     !PL_minus_c &&
4788                     !instr(s,"indir") &&
4789                     instr(PL_origargv[0],"perl"))
4790                 {
4791                     dVAR;
4792                     char **newargv;
4793
4794                     *ipathend = '\0';
4795                     s = ipathend + 1;
4796                     while (s < PL_bufend && isSPACE(*s))
4797                         s++;
4798                     if (s < PL_bufend) {
4799                         Newx(newargv,PL_origargc+3,char*);
4800                         newargv[1] = s;
4801                         while (s < PL_bufend && !isSPACE(*s))
4802                             s++;
4803                         *s = '\0';
4804                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4805                     }
4806                     else
4807                         newargv = PL_origargv;
4808                     newargv[0] = ipath;
4809                     PERL_FPU_PRE_EXEC
4810                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4811                     PERL_FPU_POST_EXEC
4812                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4813                 }
4814                 if (d) {
4815                     while (*d && !isSPACE(*d))
4816                         d++;
4817                     while (SPACE_OR_TAB(*d))
4818                         d++;
4819
4820                     if (*d++ == '-') {
4821                         const bool switches_done = PL_doswitches;
4822                         const U32 oldpdb = PL_perldb;
4823                         const bool oldn = PL_minus_n;
4824                         const bool oldp = PL_minus_p;
4825                         const char *d1 = d;
4826
4827                         do {
4828                             bool baduni = FALSE;
4829                             if (*d1 == 'C') {
4830                                 const char *d2 = d1 + 1;
4831                                 if (parse_unicode_opts((const char **)&d2)
4832                                     != PL_unicode)
4833                                     baduni = TRUE;
4834                             }
4835                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4836                                 const char * const m = d1;
4837                                 while (*d1 && !isSPACE(*d1))
4838                                     d1++;
4839                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4840                                       (int)(d1 - m), m);
4841                             }
4842                             d1 = moreswitches(d1);
4843                         } while (d1);
4844                         if (PL_doswitches && !switches_done) {
4845                             int argc = PL_origargc;
4846                             char **argv = PL_origargv;
4847                             do {
4848                                 argc--,argv++;
4849                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4850                             init_argv_symbols(argc,argv);
4851                         }
4852                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4853                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4854                               /* if we have already added "LINE: while (<>) {",
4855                                  we must not do it again */
4856                         {
4857                             sv_setpvs(PL_linestr, "");
4858                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4859                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4860                             PL_last_lop = PL_last_uni = NULL;
4861                             PL_preambled = FALSE;
4862                             if (PERLDB_LINE || PERLDB_SAVESRC)
4863                                 (void)gv_fetchfile(PL_origfilename);
4864                             goto retry;
4865                         }
4866                     }
4867                 }
4868             }
4869         }
4870         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4871             PL_bufptr = s;
4872             PL_lex_state = LEX_FORMLINE;
4873             return yylex();
4874         }
4875         goto retry;
4876     case '\r':
4877 #ifdef PERL_STRICT_CR
4878         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4879         Perl_croak(aTHX_
4880       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4881 #endif
4882     case ' ': case '\t': case '\f': case 013:
4883 #ifdef PERL_MAD
4884         PL_realtokenstart = -1;
4885         if (!PL_thiswhite)
4886             PL_thiswhite = newSVpvs("");
4887         sv_catpvn(PL_thiswhite, s, 1);
4888 #endif
4889         s++;
4890         goto retry;
4891     case '#':
4892     case '\n':
4893 #ifdef PERL_MAD
4894         PL_realtokenstart = -1;
4895         if (PL_madskills)
4896             PL_faketokens = 0;
4897 #endif
4898         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4899             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4900                 /* handle eval qq[#line 1 "foo"\n ...] */
4901                 CopLINE_dec(PL_curcop);
4902                 incline(s);
4903             }
4904             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4905                 s = SKIPSPACE0(s);
4906                 if (!PL_in_eval || PL_rsfp)
4907                     incline(s);
4908             }
4909             else {
4910                 d = s;
4911                 while (d < PL_bufend && *d != '\n')
4912                     d++;
4913                 if (d < PL_bufend)
4914                     d++;
4915                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4916                   Perl_croak(aTHX_ "panic: input overflow");
4917 #ifdef PERL_MAD
4918                 if (PL_madskills)
4919                     PL_thiswhite = newSVpvn(s, d - s);
4920 #endif
4921                 s = d;
4922                 incline(s);
4923             }
4924             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4925                 PL_bufptr = s;
4926                 PL_lex_state = LEX_FORMLINE;
4927                 return yylex();
4928             }
4929         }
4930         else {
4931 #ifdef PERL_MAD
4932             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4933                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4934                     PL_faketokens = 0;
4935                     s = SKIPSPACE0(s);
4936                     TOKEN(PEG); /* make sure any #! line is accessible */
4937                 }
4938                 s = SKIPSPACE0(s);
4939             }
4940             else {
4941 /*              if (PL_madskills && PL_lex_formbrack) { */
4942                     d = s;
4943                     while (d < PL_bufend && *d != '\n')
4944                         d++;
4945                     if (d < PL_bufend)
4946                         d++;
4947                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4948                       Perl_croak(aTHX_ "panic: input overflow");
4949                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4950                         if (!PL_thiswhite)
4951                             PL_thiswhite = newSVpvs("");
4952                         if (CopLINE(PL_curcop) == 1) {
4953                             sv_setpvs(PL_thiswhite, "");
4954                             PL_faketokens = 0;
4955                         }
4956                         sv_catpvn(PL_thiswhite, s, d - s);
4957                     }
4958                     s = d;
4959 /*              }
4960                 *s = '\0';
4961                 PL_bufend = s; */
4962             }
4963 #else
4964             *s = '\0';
4965             PL_bufend = s;
4966 #endif
4967         }
4968         goto retry;
4969     case '-':
4970         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4971             I32 ftst = 0;
4972             char tmp;
4973
4974             s++;
4975             PL_bufptr = s;
4976             tmp = *s++;
4977
4978             while (s < PL_bufend && SPACE_OR_TAB(*s))
4979                 s++;
4980
4981             if (strnEQ(s,"=>",2)) {
4982                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4983                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4984                 OPERATOR('-');          /* unary minus */
4985             }
4986             PL_last_uni = PL_oldbufptr;
4987             switch (tmp) {
4988             case 'r': ftst = OP_FTEREAD;        break;
4989             case 'w': ftst = OP_FTEWRITE;       break;
4990             case 'x': ftst = OP_FTEEXEC;        break;
4991             case 'o': ftst = OP_FTEOWNED;       break;
4992             case 'R': ftst = OP_FTRREAD;        break;
4993             case 'W': ftst = OP_FTRWRITE;       break;
4994             case 'X': ftst = OP_FTREXEC;        break;
4995             case 'O': ftst = OP_FTROWNED;       break;
4996             case 'e': ftst = OP_FTIS;           break;
4997             case 'z': ftst = OP_FTZERO;         break;
4998             case 's': ftst = OP_FTSIZE;         break;
4999             case 'f': ftst = OP_FTFILE;         break;
5000             case 'd': ftst = OP_FTDIR;          break;
5001             case 'l': ftst = OP_FTLINK;         break;
5002             case 'p': ftst = OP_FTPIPE;         break;
5003             case 'S': ftst = OP_FTSOCK;         break;
5004             case 'u': ftst = OP_FTSUID;         break;
5005             case 'g': ftst = OP_FTSGID;         break;
5006             case 'k': ftst = OP_FTSVTX;         break;
5007             case 'b': ftst = OP_FTBLK;          break;
5008             case 'c': ftst = OP_FTCHR;          break;
5009             case 't': ftst = OP_FTTTY;          break;
5010             case 'T': ftst = OP_FTTEXT;         break;
5011             case 'B': ftst = OP_FTBINARY;       break;
5012             case 'M': case 'A': case 'C':
5013                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5014                 switch (tmp) {
5015                 case 'M': ftst = OP_FTMTIME;    break;
5016                 case 'A': ftst = OP_FTATIME;    break;
5017                 case 'C': ftst = OP_FTCTIME;    break;
5018                 default:                        break;
5019                 }
5020                 break;
5021             default:
5022                 break;
5023             }
5024             if (ftst) {
5025                 PL_last_lop_op = (OPCODE)ftst;
5026                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5027                         "### Saw file test %c\n", (int)tmp);
5028                 } );
5029                 FTST(ftst);
5030             }
5031             else {
5032                 /* Assume it was a minus followed by a one-letter named
5033                  * subroutine call (or a -bareword), then. */
5034                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5035                         "### '-%c' looked like a file test but was not\n",
5036                         (int) tmp);
5037                 } );
5038                 s = --PL_bufptr;
5039             }
5040         }
5041         {
5042             const char tmp = *s++;
5043             if (*s == tmp) {
5044                 s++;
5045                 if (PL_expect == XOPERATOR)
5046                     TERM(POSTDEC);
5047                 else
5048                     OPERATOR(PREDEC);
5049             }
5050             else if (*s == '>') {
5051                 s++;
5052                 s = SKIPSPACE1(s);
5053                 if (isIDFIRST_lazy_if(s,UTF)) {
5054                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5055                     TOKEN(ARROW);
5056                 }
5057                 else if (*s == '$')
5058                     OPERATOR(ARROW);
5059                 else
5060                     TERM(ARROW);
5061             }
5062             if (PL_expect == XOPERATOR)
5063                 Aop(OP_SUBTRACT);
5064             else {
5065                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5066                     check_uni();
5067                 OPERATOR('-');          /* unary minus */
5068             }
5069         }
5070
5071     case '+':
5072         {
5073             const char tmp = *s++;
5074             if (*s == tmp) {
5075                 s++;
5076                 if (PL_expect == XOPERATOR)
5077                     TERM(POSTINC);
5078                 else
5079                     OPERATOR(PREINC);
5080             }
5081             if (PL_expect == XOPERATOR)
5082                 Aop(OP_ADD);
5083             else {
5084                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5085                     check_uni();
5086                 OPERATOR('+');
5087             }
5088         }
5089
5090     case '*':
5091         if (PL_expect != XOPERATOR) {
5092             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5093             PL_expect = XOPERATOR;
5094             force_ident(PL_tokenbuf, '*');
5095             if (!*PL_tokenbuf)
5096                 PREREF('*');
5097             TERM('*');
5098         }
5099         s++;
5100         if (*s == '*') {
5101             s++;
5102             PWop(OP_POW);
5103         }
5104         Mop(OP_MULTIPLY);
5105
5106     case '%':
5107         if (PL_expect == XOPERATOR) {
5108             ++s;
5109             Mop(OP_MODULO);
5110         }
5111         PL_tokenbuf[0] = '%';
5112         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5113                 sizeof PL_tokenbuf - 1, FALSE);
5114         if (!PL_tokenbuf[1]) {
5115             PREREF('%');
5116         }
5117         PL_pending_ident = '%';
5118         TERM('%');
5119
5120     case '^':
5121         s++;
5122         BOop(OP_BIT_XOR);
5123     case '[':
5124         PL_lex_brackets++;
5125         {
5126             const char tmp = *s++;
5127             OPERATOR(tmp);
5128         }
5129     case '~':
5130         if (s[1] == '~'
5131             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5132         {
5133             s += 2;
5134             Eop(OP_SMARTMATCH);
5135         }
5136     case ',':
5137         {
5138             const char tmp = *s++;
5139             OPERATOR(tmp);
5140         }
5141     case ':':
5142         if (s[1] == ':') {
5143             len = 0;
5144             goto just_a_word_zero_gv;
5145         }
5146         s++;
5147         switch (PL_expect) {
5148             OP *attrs;
5149 #ifdef PERL_MAD
5150             I32 stuffstart;
5151 #endif
5152         case XOPERATOR:
5153             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5154                 break;
5155             PL_bufptr = s;      /* update in case we back off */
5156             if (*s == '=') {
5157                 deprecate(":= for an empty attribute list");
5158             }
5159             goto grabattrs;
5160         case XATTRBLOCK:
5161             PL_expect = XBLOCK;
5162             goto grabattrs;
5163         case XATTRTERM:
5164             PL_expect = XTERMBLOCK;
5165          grabattrs:
5166 #ifdef PERL_MAD
5167             stuffstart = s - SvPVX(PL_linestr) - 1;
5168 #endif
5169             s = PEEKSPACE(s);
5170             attrs = NULL;
5171             while (isIDFIRST_lazy_if(s,UTF)) {
5172                 I32 tmp;
5173                 SV *sv;
5174                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5175                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5176                     if (tmp < 0) tmp = -tmp;
5177                     switch (tmp) {
5178                     case KEY_or:
5179                     case KEY_and:
5180                     case KEY_for:
5181                     case KEY_foreach:
5182                     case KEY_unless:
5183                     case KEY_if:
5184                     case KEY_while:
5185                     case KEY_until:
5186                         goto got_attrs;
5187                     default:
5188                         break;
5189                     }
5190                 }
5191                 sv = newSVpvn(s, len);
5192                 if (*d == '(') {
5193                     d = scan_str(d,TRUE,TRUE);
5194                     if (!d) {
5195                         /* MUST advance bufptr here to avoid bogus
5196                            "at end of line" context messages from yyerror().
5197                          */
5198                         PL_bufptr = s + len;
5199                         yyerror("Unterminated attribute parameter in attribute list");
5200                         if (attrs)
5201                             op_free(attrs);
5202                         sv_free(sv);
5203                         return REPORT(0);       /* EOF indicator */
5204                     }
5205                 }
5206                 if (PL_lex_stuff) {
5207                     sv_catsv(sv, PL_lex_stuff);
5208                     attrs = append_elem(OP_LIST, attrs,
5209                                         newSVOP(OP_CONST, 0, sv));
5210                     SvREFCNT_dec(PL_lex_stuff);
5211                     PL_lex_stuff = NULL;
5212                 }
5213                 else {
5214                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5215                         sv_free(sv);
5216                         if (PL_in_my == KEY_our) {
5217                             deprecate(":unique");
5218                         }
5219                         else
5220                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5221                     }
5222
5223                     /* NOTE: any CV attrs applied here need to be part of
5224                        the CVf_BUILTIN_ATTRS define in cv.h! */
5225                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5226                         sv_free(sv);
5227                         CvLVALUE_on(PL_compcv);
5228                     }
5229                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5230                         sv_free(sv);
5231                         deprecate(":locked");
5232                     }
5233                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5234                         sv_free(sv);
5235                         CvMETHOD_on(PL_compcv);
5236                     }
5237                     /* After we've set the flags, it could be argued that
5238                        we don't need to do the attributes.pm-based setting
5239                        process, and shouldn't bother appending recognized
5240                        flags.  To experiment with that, uncomment the
5241                        following "else".  (Note that's already been
5242                        uncommented.  That keeps the above-applied built-in
5243                        attributes from being intercepted (and possibly
5244                        rejected) by a package's attribute routines, but is
5245                        justified by the performance win for the common case
5246                        of applying only built-in attributes.) */
5247                     else
5248                         attrs = append_elem(OP_LIST, attrs,
5249                                             newSVOP(OP_CONST, 0,
5250                                                     sv));
5251                 }
5252                 s = PEEKSPACE(d);
5253                 if (*s == ':' && s[1] != ':')
5254                     s = PEEKSPACE(s+1);
5255                 else if (s == d)
5256                     break;      /* require real whitespace or :'s */
5257                 /* XXX losing whitespace on sequential attributes here */
5258             }
5259             {
5260                 const char tmp
5261                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5262                 if (*s != ';' && *s != '}' && *s != tmp
5263                     && (tmp != '=' || *s != ')')) {
5264                     const char q = ((*s == '\'') ? '"' : '\'');
5265                     /* If here for an expression, and parsed no attrs, back
5266                        off. */
5267                     if (tmp == '=' && !attrs) {
5268                         s = PL_bufptr;
5269                         break;
5270                     }
5271                     /* MUST advance bufptr here to avoid bogus "at end of line"
5272                        context messages from yyerror().
5273                     */
5274                     PL_bufptr = s;
5275                     yyerror( (const char *)
5276                              (*s
5277                               ? Perl_form(aTHX_ "Invalid separator character "
5278                                           "%c%c%c in attribute list", q, *s, q)
5279                               : "Unterminated attribute list" ) );
5280                     if (attrs)
5281                         op_free(attrs);
5282                     OPERATOR(':');
5283                 }
5284             }
5285         got_attrs:
5286             if (attrs) {
5287                 start_force(PL_curforce);
5288                 NEXTVAL_NEXTTOKE.opval = attrs;
5289                 CURMAD('_', PL_nextwhite);
5290                 force_next(THING);
5291             }
5292 #ifdef PERL_MAD
5293             if (PL_madskills) {
5294                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5295                                      (s - SvPVX(PL_linestr)) - stuffstart);
5296             }
5297 #endif
5298             TOKEN(COLONATTR);
5299         }
5300         OPERATOR(':');
5301     case '(':
5302         s++;
5303         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5304             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5305         else
5306             PL_expect = XTERM;
5307         s = SKIPSPACE1(s);
5308         TOKEN('(');
5309     case ';':
5310         CLINE;
5311         {
5312             const char tmp = *s++;
5313             OPERATOR(tmp);
5314         }
5315     case ')':
5316         {
5317             const char tmp = *s++;
5318             s = SKIPSPACE1(s);
5319             if (*s == '{')
5320                 PREBLOCK(tmp);
5321             TERM(tmp);
5322         }
5323     case ']':
5324         s++;
5325         if (PL_lex_brackets <= 0)
5326             yyerror("Unmatched right square bracket");
5327         else
5328             --PL_lex_brackets;
5329         if (PL_lex_state == LEX_INTERPNORMAL) {
5330             if (PL_lex_brackets == 0) {
5331                 if (*s == '-' && s[1] == '>')
5332                     PL_lex_state = LEX_INTERPENDMAYBE;
5333                 else if (*s != '[' && *s != '{')
5334                     PL_lex_state = LEX_INTERPEND;
5335             }
5336         }
5337         TERM(']');
5338     case '{':
5339       leftbracket:
5340         s++;
5341         if (PL_lex_brackets > 100) {
5342             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5343         }
5344         switch (PL_expect) {
5345         case XTERM:
5346             if (PL_lex_formbrack) {
5347                 s--;
5348                 PRETERMBLOCK(DO);
5349             }
5350             if (PL_oldoldbufptr == PL_last_lop)
5351                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5352             else
5353                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5354             OPERATOR(HASHBRACK);
5355         case XOPERATOR:
5356             while (s < PL_bufend && SPACE_OR_TAB(*s))
5357                 s++;
5358             d = s;
5359             PL_tokenbuf[0] = '\0';
5360             if (d < PL_bufend && *d == '-') {
5361                 PL_tokenbuf[0] = '-';
5362                 d++;
5363                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5364                     d++;
5365             }
5366             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5367                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5368                               FALSE, &len);
5369                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5370                     d++;
5371                 if (*d == '}') {
5372                     const char minus = (PL_tokenbuf[0] == '-');
5373                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5374                     if (minus)
5375                         force_next('-');
5376                 }
5377             }
5378             /* FALL THROUGH */
5379         case XATTRBLOCK:
5380         case XBLOCK:
5381             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5382             PL_expect = XSTATE;
5383             break;
5384         case XATTRTERM:
5385         case XTERMBLOCK:
5386             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5387             PL_expect = XSTATE;
5388             break;
5389         default: {
5390                 const char *t;
5391                 if (PL_oldoldbufptr == PL_last_lop)
5392                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5393                 else
5394                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5395                 s = SKIPSPACE1(s);
5396                 if (*s == '}') {
5397                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5398                         PL_expect = XTERM;
5399                         /* This hack is to get the ${} in the message. */
5400                         PL_bufptr = s+1;
5401                         yyerror("syntax error");
5402                         break;
5403                     }
5404                     OPERATOR(HASHBRACK);
5405                 }
5406                 /* This hack serves to disambiguate a pair of curlies
5407                  * as being a block or an anon hash.  Normally, expectation
5408                  * determines that, but in cases where we're not in a
5409                  * position to expect anything in particular (like inside
5410                  * eval"") we have to resolve the ambiguity.  This code
5411                  * covers the case where the first term in the curlies is a
5412                  * quoted string.  Most other cases need to be explicitly
5413                  * disambiguated by prepending a "+" before the opening
5414                  * curly in order to force resolution as an anon hash.
5415                  *
5416                  * XXX should probably propagate the outer expectation
5417                  * into eval"" to rely less on this hack, but that could
5418                  * potentially break current behavior of eval"".
5419                  * GSAR 97-07-21
5420                  */
5421                 t = s;
5422                 if (*s == '\'' || *s == '"' || *s == '`') {
5423                     /* common case: get past first string, handling escapes */
5424                     for (t++; t < PL_bufend && *t != *s;)
5425                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5426                             t++;
5427                     t++;
5428                 }
5429                 else if (*s == 'q') {
5430                     if (++t < PL_bufend
5431                         && (!isALNUM(*t)
5432                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5433                                 && !isALNUM(*t))))
5434                     {
5435                         /* skip q//-like construct */
5436                         const char *tmps;
5437                         char open, close, term;
5438                         I32 brackets = 1;
5439
5440                         while (t < PL_bufend && isSPACE(*t))
5441                             t++;
5442                         /* check for q => */
5443                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5444                             OPERATOR(HASHBRACK);
5445                         }
5446                         term = *t;
5447                         open = term;
5448                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5449                             term = tmps[5];
5450                         close = term;
5451                         if (open == close)
5452                             for (t++; t < PL_bufend; t++) {
5453                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5454                                     t++;
5455                                 else if (*t == open)
5456                                     break;
5457                             }
5458                         else {
5459                             for (t++; t < PL_bufend; t++) {
5460                                 if (*t == '\\' && t+1 < PL_bufend)
5461                                     t++;
5462                                 else if (*t == close && --brackets <= 0)
5463                                     break;
5464                                 else if (*t == open)
5465                                     brackets++;
5466                             }
5467                         }
5468                         t++;
5469                     }
5470                     else
5471                         /* skip plain q word */
5472                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5473                              t += UTF8SKIP(t);
5474                 }
5475                 else if (isALNUM_lazy_if(t,UTF)) {
5476                     t += UTF8SKIP(t);
5477                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5478                          t += UTF8SKIP(t);
5479                 }
5480                 while (t < PL_bufend && isSPACE(*t))
5481                     t++;
5482                 /* if comma follows first term, call it an anon hash */
5483                 /* XXX it could be a comma expression with loop modifiers */
5484                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5485                                    || (*t == '=' && t[1] == '>')))
5486                     OPERATOR(HASHBRACK);
5487                 if (PL_expect == XREF)
5488                     PL_expect = XTERM;
5489                 else {
5490                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5491                     PL_expect = XSTATE;
5492                 }
5493             }
5494             break;
5495         }
5496         pl_yylval.ival = CopLINE(PL_curcop);
5497         if (isSPACE(*s) || *s == '#')
5498             PL_copline = NOLINE;   /* invalidate current command line number */
5499         TOKEN('{');
5500     case '}':
5501       rightbracket:
5502         s++;
5503         if (PL_lex_brackets <= 0)
5504             yyerror("Unmatched right curly bracket");
5505         else
5506             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5507         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5508             PL_lex_formbrack = 0;
5509         if (PL_lex_state == LEX_INTERPNORMAL) {
5510             if (PL_lex_brackets == 0) {
5511                 if (PL_expect & XFAKEBRACK) {
5512                     PL_expect &= XENUMMASK;
5513                     PL_lex_state = LEX_INTERPEND;
5514                     PL_bufptr = s;
5515 #if 0
5516                     if (PL_madskills) {
5517                         if (!PL_thiswhite)
5518                             PL_thiswhite = newSVpvs("");
5519                         sv_catpvs(PL_thiswhite,"}");
5520                     }
5521 #endif
5522                     return yylex();     /* ignore fake brackets */
5523                 }
5524                 if (*s == '-' && s[1] == '>')
5525                     PL_lex_state = LEX_INTERPENDMAYBE;
5526                 else if (*s != '[' && *s != '{')
5527                     PL_lex_state = LEX_INTERPEND;
5528             }
5529         }
5530         if (PL_expect & XFAKEBRACK) {
5531             PL_expect &= XENUMMASK;
5532             PL_bufptr = s;
5533             return yylex();             /* ignore fake brackets */
5534         }
5535         start_force(PL_curforce);
5536         if (PL_madskills) {
5537             curmad('X', newSVpvn(s-1,1));
5538             CURMAD('_', PL_thiswhite);
5539         }
5540         force_next('}');
5541 #ifdef PERL_MAD
5542         if (!PL_thistoken)
5543             PL_thistoken = newSVpvs("");
5544 #endif
5545         TOKEN(';');
5546     case '&':
5547         s++;
5548         if (*s++ == '&')
5549             AOPERATOR(ANDAND);
5550         s--;
5551         if (PL_expect == XOPERATOR) {
5552             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5553                 && isIDFIRST_lazy_if(s,UTF))
5554             {
5555                 CopLINE_dec(PL_curcop);
5556                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5557                 CopLINE_inc(PL_curcop);
5558             }
5559             BAop(OP_BIT_AND);
5560         }
5561
5562         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5563         if (*PL_tokenbuf) {
5564             PL_expect = XOPERATOR;
5565             force_ident(PL_tokenbuf, '&');
5566         }
5567         else
5568             PREREF('&');
5569         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5570         TERM('&');
5571
5572     case '|':
5573         s++;
5574         if (*s++ == '|')
5575             AOPERATOR(OROR);
5576         s--;
5577         BOop(OP_BIT_OR);
5578     case '=':
5579         s++;
5580         {
5581             const char tmp = *s++;
5582             if (tmp == '=')
5583                 Eop(OP_EQ);
5584             if (tmp == '>')
5585                 OPERATOR(',');
5586             if (tmp == '~')
5587                 PMop(OP_MATCH);
5588             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5589                 && strchr("+-*/%.^&|<",tmp))
5590                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5591                             "Reversed %c= operator",(int)tmp);
5592             s--;
5593             if (PL_expect == XSTATE && isALPHA(tmp) &&
5594                 (s == PL_linestart+1 || s[-2] == '\n') )
5595                 {
5596                     if (PL_in_eval && !PL_rsfp) {
5597                         d = PL_bufend;
5598                         while (s < d) {
5599                             if (*s++ == '\n') {
5600                                 incline(s);
5601                                 if (strnEQ(s,"=cut",4)) {
5602                                     s = strchr(s,'\n');
5603                                     if (s)
5604                                         s++;
5605                                     else
5606                                         s = d;
5607                                     incline(s);
5608                                     goto retry;
5609                                 }
5610                             }
5611                         }
5612                         goto retry;
5613                     }
5614 #ifdef PERL_MAD
5615                     if (PL_madskills) {
5616                         if (!PL_thiswhite)
5617                             PL_thiswhite = newSVpvs("");
5618                         sv_catpvn(PL_thiswhite, PL_linestart,
5619                                   PL_bufend - PL_linestart);
5620                     }
5621 #endif
5622                     s = PL_bufend;
5623                     PL_doextract = TRUE;
5624                     goto retry;
5625                 }
5626         }
5627         if (PL_lex_brackets < PL_lex_formbrack) {
5628             const char *t = s;
5629 #ifdef PERL_STRICT_CR
5630             while (SPACE_OR_TAB(*t))
5631 #else
5632             while (SPACE_OR_TAB(*t) || *t == '\r')
5633 #endif
5634                 t++;
5635             if (*t == '\n' || *t == '#') {
5636                 s--;
5637                 PL_expect = XBLOCK;
5638                 goto leftbracket;
5639             }
5640         }
5641         pl_yylval.ival = 0;
5642         OPERATOR(ASSIGNOP);
5643     case '!':
5644         s++;
5645         {
5646             const char tmp = *s++;
5647             if (tmp == '=') {
5648                 /* was this !=~ where !~ was meant?
5649                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5650
5651                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5652                     const char *t = s+1;
5653
5654                     while (t < PL_bufend && isSPACE(*t))
5655                         ++t;
5656
5657                     if (*t == '/' || *t == '?' ||
5658                         ((*t == 'm' || *t == 's' || *t == 'y')
5659                          && !isALNUM(t[1])) ||
5660                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5661                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5662                                     "!=~ should be !~");
5663                 }
5664                 Eop(OP_NE);
5665             }
5666             if (tmp == '~')
5667                 PMop(OP_NOT);
5668         }
5669         s--;
5670         OPERATOR('!');
5671     case '<':
5672         if (PL_expect != XOPERATOR) {
5673             if (s[1] != '<' && !strchr(s,'>'))
5674                 check_uni();
5675             if (s[1] == '<')
5676                 s = scan_heredoc(s);
5677             else
5678                 s = scan_inputsymbol(s);
5679             TERM(sublex_start());
5680         }
5681         s++;
5682         {
5683             char tmp = *s++;
5684             if (tmp == '<')
5685                 SHop(OP_LEFT_SHIFT);
5686             if (tmp == '=') {
5687                 tmp = *s++;
5688                 if (tmp == '>')
5689                     Eop(OP_NCMP);
5690                 s--;
5691                 Rop(OP_LE);
5692             }
5693         }
5694         s--;
5695         Rop(OP_LT);
5696     case '>':
5697         s++;
5698         {
5699             const char tmp = *s++;
5700             if (tmp == '>')
5701                 SHop(OP_RIGHT_SHIFT);
5702             else if (tmp == '=')
5703                 Rop(OP_GE);
5704         }
5705         s--;
5706         Rop(OP_GT);
5707
5708     case '$':
5709         CLINE;
5710
5711         if (PL_expect == XOPERATOR) {
5712             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5713                 return deprecate_commaless_var_list();
5714             }
5715         }
5716
5717         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
5718             PL_tokenbuf[0] = '@';
5719             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5720                            sizeof PL_tokenbuf - 1, FALSE);
5721             if (PL_expect == XOPERATOR)
5722                 no_op("Array length", s);
5723             if (!PL_tokenbuf[1])
5724                 PREREF(DOLSHARP);
5725             PL_expect = XOPERATOR;
5726             PL_pending_ident = '#';
5727             TOKEN(DOLSHARP);
5728         }
5729
5730         PL_tokenbuf[0] = '$';
5731         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5732                        sizeof PL_tokenbuf - 1, FALSE);
5733         if (PL_expect == XOPERATOR)
5734             no_op("Scalar", s);
5735         if (!PL_tokenbuf[1]) {
5736             if (s == PL_bufend)
5737                 yyerror("Final $ should be \\$ or $name");
5738             PREREF('$');
5739         }
5740
5741         /* This kludge not intended to be bulletproof. */
5742         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5743             pl_yylval.opval = newSVOP(OP_CONST, 0,
5744                                    newSViv(CopARYBASE_get(&PL_compiling)));
5745             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5746             TERM(THING);
5747         }
5748
5749         d = s;
5750         {
5751             const char tmp = *s;
5752             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5753                 s = SKIPSPACE1(s);
5754
5755             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5756                 && intuit_more(s)) {
5757                 if (*s == '[') {
5758                     PL_tokenbuf[0] = '@';
5759                     if (ckWARN(WARN_SYNTAX)) {
5760                         char *t = s+1;
5761
5762                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5763                             t++;
5764                         if (*t++ == ',') {
5765                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5766                             while (t < PL_bufend && *t != ']')
5767                                 t++;
5768                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5769                                         "Multidimensional syntax %.*s not supported",
5770                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5771                         }
5772                     }
5773                 }
5774                 else if (*s == '{') {
5775                     char *t;
5776                     PL_tokenbuf[0] = '%';
5777                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5778                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5779                         {
5780                             char tmpbuf[sizeof PL_tokenbuf];
5781                             do {
5782                                 t++;
5783                             } while (isSPACE(*t));
5784                             if (isIDFIRST_lazy_if(t,UTF)) {
5785                                 STRLEN len;
5786                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5787                                               &len);
5788                                 while (isSPACE(*t))
5789                                     t++;
5790                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5791                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5792                                                 "You need to quote \"%s\"",
5793                                                 tmpbuf);
5794                             }
5795                         }
5796                 }
5797             }
5798
5799             PL_expect = XOPERATOR;
5800             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5801                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5802                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5803                     PL_expect = XOPERATOR;
5804                 else if (strchr("$@\"'`q", *s))
5805                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5806                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5807                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5808                 else if (isIDFIRST_lazy_if(s,UTF)) {
5809                     char tmpbuf[sizeof PL_tokenbuf];
5810                     int t2;
5811                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5812                     if ((t2 = keyword(tmpbuf, len, 0))) {
5813                         /* binary operators exclude handle interpretations */
5814                         switch (t2) {
5815                         case -KEY_x:
5816                         case -KEY_eq:
5817                         case -KEY_ne:
5818                         case -KEY_gt:
5819                         case -KEY_lt:
5820                         case -KEY_ge:
5821                         case -KEY_le:
5822                         case -KEY_cmp:
5823                             break;
5824                         default:
5825                             PL_expect = XTERM;  /* e.g. print $fh length() */
5826                             break;
5827                         }
5828                     }
5829                     else {
5830                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5831                     }
5832                 }
5833                 else if (isDIGIT(*s))
5834                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5835                 else if (*s == '.' && isDIGIT(s[1]))
5836                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5837                 else if ((*s == '?' || *s == '-' || *s == '+')
5838                          && !isSPACE(s[1]) && s[1] != '=')
5839                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5840                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5841                          && s[1] != '/')
5842                     PL_expect = XTERM;          /* e.g. print $fh /.../
5843                                                    XXX except DORDOR operator
5844                                                 */
5845                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5846                          && s[2] != '=')
5847                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5848             }
5849         }
5850         PL_pending_ident = '$';
5851         TOKEN('$');
5852
5853     case '@':
5854         if (PL_expect == XOPERATOR)
5855             no_op("Array", s);
5856         PL_tokenbuf[0] = '@';
5857         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5858         if (!PL_tokenbuf[1]) {
5859             PREREF('@');
5860         }
5861         if (PL_lex_state == LEX_NORMAL)
5862             s = SKIPSPACE1(s);
5863         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5864             if (*s == '{')
5865                 PL_tokenbuf[0] = '%';
5866
5867             /* Warn about @ where they meant $. */
5868             if (*s == '[' || *s == '{') {
5869                 if (ckWARN(WARN_SYNTAX)) {
5870                     const char *t = s + 1;
5871                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5872                         t++;
5873                     if (*t == '}' || *t == ']') {
5874                         t++;
5875                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5876                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5877                             "Scalar value %.*s better written as $%.*s",
5878                             (int)(t-PL_bufptr), PL_bufptr,
5879                             (int)(t-PL_bufptr-1), PL_bufptr+1);
5880                     }
5881                 }
5882             }
5883         }
5884         PL_pending_ident = '@';
5885         TERM('@');
5886
5887      case '/':                  /* may be division, defined-or, or pattern */
5888         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5889             s += 2;
5890             AOPERATOR(DORDOR);
5891         }
5892      case '?':                  /* may either be conditional or pattern */
5893         if (PL_expect == XOPERATOR) {
5894              char tmp = *s++;
5895              if(tmp == '?') {
5896                 OPERATOR('?');
5897              }
5898              else {
5899                  tmp = *s++;
5900                  if(tmp == '/') {
5901                      /* A // operator. */
5902                     AOPERATOR(DORDOR);
5903                  }
5904                  else {
5905                      s--;
5906                      Mop(OP_DIVIDE);
5907                  }
5908              }
5909          }
5910          else {
5911              /* Disable warning on "study /blah/" */
5912              if (PL_oldoldbufptr == PL_last_uni
5913               && (*PL_last_uni != 's' || s - PL_last_uni < 5
5914                   || memNE(PL_last_uni, "study", 5)
5915                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
5916               ))
5917                  check_uni();
5918              s = scan_pat(s,OP_MATCH);
5919              TERM(sublex_start());
5920          }
5921
5922     case '.':
5923         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5924 #ifdef PERL_STRICT_CR
5925             && s[1] == '\n'
5926 #else
5927             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5928 #endif
5929             && (s == PL_linestart || s[-1] == '\n') )
5930         {
5931             PL_lex_formbrack = 0;
5932             PL_expect = XSTATE;
5933             goto rightbracket;
5934         }
5935         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5936             s += 3;
5937             OPERATOR(YADAYADA);
5938         }
5939         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5940             char tmp = *s++;
5941             if (*s == tmp) {
5942                 s++;
5943                 if (*s == tmp) {
5944                     s++;
5945                     pl_yylval.ival = OPf_SPECIAL;
5946                 }
5947                 else
5948                     pl_yylval.ival = 0;
5949                 OPERATOR(DOTDOT);
5950             }
5951             Aop(OP_CONCAT);
5952         }
5953         /* FALL THROUGH */
5954     case '0': case '1': case '2': case '3': case '4':
5955     case '5': case '6': case '7': case '8': case '9':
5956         s = scan_num(s, &pl_yylval);
5957         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5958         if (PL_expect == XOPERATOR)
5959             no_op("Number",s);
5960         TERM(THING);
5961
5962     case '\'':
5963         s = scan_str(s,!!PL_madskills,FALSE);
5964         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5965         if (PL_expect == XOPERATOR) {
5966             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5967                 return deprecate_commaless_var_list();
5968             }
5969             else
5970                 no_op("String",s);
5971         }
5972         if (!s)
5973             missingterm(NULL);
5974         pl_yylval.ival = OP_CONST;
5975         TERM(sublex_start());
5976
5977     case '"':
5978         s = scan_str(s,!!PL_madskills,FALSE);
5979         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5980         if (PL_expect == XOPERATOR) {
5981             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5982                 return deprecate_commaless_var_list();
5983             }
5984             else
5985                 no_op("String",s);
5986         }
5987         if (!s)
5988             missingterm(NULL);
5989         pl_yylval.ival = OP_CONST;
5990         /* FIXME. I think that this can be const if char *d is replaced by
5991            more localised variables.  */
5992         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5993             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5994                 pl_yylval.ival = OP_STRINGIFY;
5995                 break;
5996             }
5997         }
5998         TERM(sublex_start());
5999
6000     case '`':
6001         s = scan_str(s,!!PL_madskills,FALSE);
6002         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6003         if (PL_expect == XOPERATOR)
6004             no_op("Backticks",s);
6005         if (!s)
6006             missingterm(NULL);
6007         readpipe_override();
6008         TERM(sublex_start());
6009
6010     case '\\':
6011         s++;
6012         if (PL_lex_inwhat && isDIGIT(*s))
6013             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6014                            *s, *s);
6015         if (PL_expect == XOPERATOR)
6016             no_op("Backslash",s);
6017         OPERATOR(REFGEN);
6018
6019     case 'v':
6020         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6021             char *start = s + 2;
6022             while (isDIGIT(*start) || *start == '_')
6023                 start++;
6024             if (*start == '.' && isDIGIT(start[1])) {
6025                 s = scan_num(s, &pl_yylval);
6026                 TERM(THING);
6027             }
6028             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6029             else if (!isALPHA(*start) && (PL_expect == XTERM
6030                         || PL_expect == XREF || PL_expect == XSTATE
6031                         || PL_expect == XTERMORDORDOR)) {
6032                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6033                 if (!gv) {
6034                     s = scan_num(s, &pl_yylval);
6035                     TERM(THING);
6036                 }
6037             }
6038         }
6039         goto keylookup;
6040     case 'x':
6041         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6042             s++;
6043             Mop(OP_REPEAT);
6044         }
6045         goto keylookup;
6046
6047     case '_':
6048     case 'a': case 'A':
6049     case 'b': case 'B':
6050     case 'c': case 'C':
6051     case 'd': case 'D':
6052     case 'e': case 'E':
6053     case 'f': case 'F':
6054     case 'g': case 'G':
6055     case 'h': case 'H':
6056     case 'i': case 'I':
6057     case 'j': case 'J':
6058     case 'k': case 'K':
6059     case 'l': case 'L':
6060     case 'm': case 'M':
6061     case 'n': case 'N':
6062     case 'o': case 'O':
6063     case 'p': case 'P':
6064     case 'q': case 'Q':
6065     case 'r': case 'R':
6066     case 's': case 'S':
6067     case 't': case 'T':
6068     case 'u': case 'U':
6069               case 'V':
6070     case 'w': case 'W':
6071               case 'X':
6072     case 'y': case 'Y':
6073     case 'z': case 'Z':
6074
6075       keylookup: {
6076         bool anydelim;
6077         I32 tmp;
6078
6079         orig_keyword = 0;
6080         gv = NULL;
6081         gvp = NULL;
6082
6083         PL_bufptr = s;
6084         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6085
6086         /* Some keywords can be followed by any delimiter, including ':' */
6087         anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
6088                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6089                              (PL_tokenbuf[0] == 'q' &&
6090                               strchr("qwxr", PL_tokenbuf[1])))));
6091
6092         /* x::* is just a word, unless x is "CORE" */
6093         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6094             goto just_a_word;
6095
6096         d = s;
6097         while (d < PL_bufend && isSPACE(*d))
6098                 d++;    /* no comments skipped here, or s### is misparsed */
6099
6100         /* Is this a word before a => operator? */
6101         if (*d == '=' && d[1] == '>') {
6102             CLINE;
6103             pl_yylval.opval
6104                 = (OP*)newSVOP(OP_CONST, 0,
6105                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6106             pl_yylval.opval->op_private = OPpCONST_BARE;
6107             TERM(WORD);
6108         }
6109
6110         /* Check for plugged-in keyword */
6111         {
6112             OP *o;
6113             int result;
6114             char *saved_bufptr = PL_bufptr;
6115             PL_bufptr = s;
6116             result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
6117             s = PL_bufptr;
6118             if (result == KEYWORD_PLUGIN_DECLINE) {
6119                 /* not a plugged-in keyword */
6120                 PL_bufptr = saved_bufptr;
6121             } else if (result == KEYWORD_PLUGIN_STMT) {
6122                 pl_yylval.opval = o;
6123                 CLINE;
6124                 PL_expect = XSTATE;
6125                 return REPORT(PLUGSTMT);
6126             } else if (result == KEYWORD_PLUGIN_EXPR) {
6127                 pl_yylval.opval = o;
6128                 CLINE;
6129                 PL_expect = XOPERATOR;
6130                 return REPORT(PLUGEXPR);
6131             } else {
6132                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6133                                         PL_tokenbuf);
6134             }
6135         }
6136
6137         /* Check for built-in keyword */
6138         tmp = keyword(PL_tokenbuf, len, 0);
6139
6140         /* Is this a label? */
6141         if (!anydelim && PL_expect == XSTATE
6142               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6143             s = d + 1;
6144             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6145             CLINE;
6146             TOKEN(LABEL);
6147         }
6148
6149         if (tmp < 0) {                  /* second-class keyword? */
6150             GV *ogv = NULL;     /* override (winner) */
6151             GV *hgv = NULL;     /* hidden (loser) */
6152             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6153                 CV *cv;
6154                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6155                     (cv = GvCVu(gv)))
6156                 {
6157                     if (GvIMPORTED_CV(gv))
6158                         ogv = gv;
6159                     else if (! CvMETHOD(cv))
6160                         hgv = gv;
6161                 }
6162                 if (!ogv &&
6163                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6164                     (gv = *gvp) && isGV_with_GP(gv) &&
6165                     GvCVu(gv) && GvIMPORTED_CV(gv))
6166                 {
6167                     ogv = gv;
6168                 }
6169             }
6170             if (ogv) {
6171                 orig_keyword = tmp;
6172                 tmp = 0;                /* overridden by import or by GLOBAL */
6173             }
6174             else if (gv && !gvp
6175                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6176                      && GvCVu(gv))
6177             {
6178                 tmp = 0;                /* any sub overrides "weak" keyword */
6179             }
6180             else {                      /* no override */
6181                 tmp = -tmp;
6182                 if (tmp == KEY_dump) {
6183                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6184                                    "dump() better written as CORE::dump()");
6185                 }
6186                 gv = NULL;
6187                 gvp = 0;
6188                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6189                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6190                                    "Ambiguous call resolved as CORE::%s(), %s",
6191                                    GvENAME(hgv), "qualify as such or use &");
6192             }
6193         }
6194
6195       reserved_word:
6196         switch (tmp) {
6197
6198         default:                        /* not a keyword */
6199             /* Trade off - by using this evil construction we can pull the
6200                variable gv into the block labelled keylookup. If not, then
6201                we have to give it function scope so that the goto from the
6202                earlier ':' case doesn't bypass the initialisation.  */
6203             if (0) {
6204             just_a_word_zero_gv:
6205                 gv = NULL;
6206                 gvp = NULL;
6207                 orig_keyword = 0;
6208             }
6209           just_a_word: {
6210                 SV *sv;
6211                 int pkgname = 0;
6212                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6213                 OP *rv2cv_op;
6214                 CV *cv;
6215 #ifdef PERL_MAD
6216                 SV *nextPL_nextwhite = 0;
6217 #endif
6218
6219
6220                 /* Get the rest if it looks like a package qualifier */
6221
6222                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6223                     STRLEN morelen;
6224                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6225                                   TRUE, &morelen);
6226                     if (!morelen)
6227                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6228                                 *s == '\'' ? "'" : "::");
6229                     len += morelen;
6230                     pkgname = 1;
6231                 }
6232
6233                 if (PL_expect == XOPERATOR) {
6234                     if (PL_bufptr == PL_linestart) {
6235                         CopLINE_dec(PL_curcop);
6236                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6237                         CopLINE_inc(PL_curcop);
6238                     }
6239                     else
6240                         no_op("Bareword",s);
6241                 }
6242
6243                 /* Look for a subroutine with this name in current package,
6244                    unless name is "Foo::", in which case Foo is a bearword
6245                    (and a package name). */
6246
6247                 if (len > 2 && !PL_madskills &&
6248                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6249                 {
6250                     if (ckWARN(WARN_BAREWORD)
6251                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6252                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6253                             "Bareword \"%s\" refers to nonexistent package",
6254                              PL_tokenbuf);
6255                     len -= 2;
6256                     PL_tokenbuf[len] = '\0';
6257                     gv = NULL;
6258                     gvp = 0;
6259                 }
6260                 else {
6261                     if (!gv) {
6262                         /* Mustn't actually add anything to a symbol table.
6263                            But also don't want to "initialise" any placeholder
6264                            constants that might already be there into full
6265                            blown PVGVs with attached PVCV.  */
6266                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6267                                                GV_NOADD_NOINIT, SVt_PVCV);
6268                     }
6269                     len = 0;
6270                 }
6271
6272                 /* if we saw a global override before, get the right name */
6273
6274                 if (gvp) {
6275                     sv = newSVpvs("CORE::GLOBAL::");
6276                     sv_catpv(sv,PL_tokenbuf);
6277                 }
6278                 else {
6279                     /* If len is 0, newSVpv does strlen(), which is correct.
6280                        If len is non-zero, then it will be the true length,
6281                        and so the scalar will be created correctly.  */
6282                     sv = newSVpv(PL_tokenbuf,len);
6283                 }
6284 #ifdef PERL_MAD
6285                 if (PL_madskills && !PL_thistoken) {
6286                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6287                     PL_thistoken = newSVpvn(start,s - start);
6288                     PL_realtokenstart = s - SvPVX(PL_linestr);
6289                 }
6290 #endif
6291
6292                 /* Presume this is going to be a bareword of some sort. */
6293
6294                 CLINE;
6295                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6296                 pl_yylval.opval->op_private = OPpCONST_BARE;
6297                 /* UTF-8 package name? */
6298                 if (UTF && !IN_BYTES &&
6299                     is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
6300                     SvUTF8_on(sv);
6301
6302                 /* And if "Foo::", then that's what it certainly is. */
6303
6304                 if (len)
6305                     goto safe_bareword;
6306
6307                 cv = NULL;
6308                 {
6309                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6310                     const_op->op_private = OPpCONST_BARE;
6311                     rv2cv_op = newCVREF(0, const_op);
6312                 }
6313                 if (rv2cv_op->op_type == OP_RV2CV &&
6314                         (rv2cv_op->op_flags & OPf_KIDS)) {
6315                     OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6316                     switch (rv_op->op_type) {
6317                         case OP_CONST: {
6318                             SV *sv = cSVOPx_sv(rv_op);
6319                             if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6320                                 cv = (CV*)SvRV(sv);
6321                         } break;
6322                         case OP_GV: {
6323                             GV *gv = cGVOPx_gv(rv_op);
6324                             CV *maybe_cv = GvCVu(gv);
6325                             if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6326                                 cv = maybe_cv;
6327                         } break;
6328                     }
6329                 }
6330
6331                 /* See if it's the indirect object for a list operator. */
6332
6333                 if (PL_oldoldbufptr &&
6334                     PL_oldoldbufptr < PL_bufptr &&
6335                     (PL_oldoldbufptr == PL_last_lop
6336                      || PL_oldoldbufptr == PL_last_uni) &&
6337                     /* NO SKIPSPACE BEFORE HERE! */
6338                     (PL_expect == XREF ||
6339                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6340                 {
6341                     bool immediate_paren = *s == '(';
6342
6343                     /* (Now we can afford to cross potential line boundary.) */
6344                     s = SKIPSPACE2(s,nextPL_nextwhite);
6345 #ifdef PERL_MAD
6346                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6347 #endif
6348
6349                     /* Two barewords in a row may indicate method call. */
6350
6351                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6352                         (tmp = intuit_method(s, gv, cv))) {
6353                         op_free(rv2cv_op);
6354                         return REPORT(tmp);
6355                     }
6356
6357                     /* If not a declared subroutine, it's an indirect object. */
6358                     /* (But it's an indir obj regardless for sort.) */
6359                     /* Also, if "_" follows a filetest operator, it's a bareword */
6360
6361                     if (
6362                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6363                          (!cv &&
6364                         (PL_last_lop_op != OP_MAPSTART &&
6365                          PL_last_lop_op != OP_GREPSTART))))
6366                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6367                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6368                        )
6369                     {
6370                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6371                         goto bareword;
6372                     }
6373                 }
6374
6375                 PL_expect = XOPERATOR;
6376 #ifdef PERL_MAD
6377                 if (isSPACE(*s))
6378                     s = SKIPSPACE2(s,nextPL_nextwhite);
6379                 PL_nextwhite = nextPL_nextwhite;
6380 #else
6381                 s = skipspace(s);
6382 #endif
6383
6384                 /* Is this a word before a => operator? */
6385                 if (*s == '=' && s[1] == '>' && !pkgname) {
6386                     op_free(rv2cv_op);
6387                     CLINE;
6388                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6389                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6390                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6391                     TERM(WORD);
6392                 }
6393
6394                 /* If followed by a paren, it's certainly a subroutine. */
6395                 if (*s == '(') {
6396                     CLINE;
6397                     if (cv) {
6398                         d = s + 1;
6399                         while (SPACE_OR_TAB(*d))
6400                             d++;
6401                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6402                             s = d + 1;
6403                             goto its_constant;
6404                         }
6405                     }
6406 #ifdef PERL_MAD
6407                     if (PL_madskills) {
6408                         PL_nextwhite = PL_thiswhite;
6409                         PL_thiswhite = 0;
6410                     }
6411                     start_force(PL_curforce);
6412 #endif
6413                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6414                     PL_expect = XOPERATOR;
6415 #ifdef PERL_MAD
6416                     if (PL_madskills) {
6417                         PL_nextwhite = nextPL_nextwhite;
6418                         curmad('X', PL_thistoken);
6419                         PL_thistoken = newSVpvs("");
6420                     }
6421 #endif
6422                     op_free(rv2cv_op);
6423                     force_next(WORD);
6424                     pl_yylval.ival = 0;
6425                     TOKEN('&');
6426                 }
6427
6428                 /* If followed by var or block, call it a method (unless sub) */
6429
6430                 if ((*s == '$' || *s == '{') && !cv) {
6431                     op_free(rv2cv_op);
6432                     PL_last_lop = PL_oldbufptr;
6433                     PL_last_lop_op = OP_METHOD;
6434                     PREBLOCK(METHOD);
6435                 }
6436
6437                 /* If followed by a bareword, see if it looks like indir obj. */
6438
6439                 if (!orig_keyword
6440                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6441                         && (tmp = intuit_method(s, gv, cv))) {
6442                     op_free(rv2cv_op);
6443                     return REPORT(tmp);
6444                 }
6445
6446                 /* Not a method, so call it a subroutine (if defined) */
6447
6448                 if (cv) {
6449                     if (lastchar == '-')
6450                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6451                                          "Ambiguous use of -%s resolved as -&%s()",
6452                                          PL_tokenbuf, PL_tokenbuf);
6453                     /* Check for a constant sub */
6454                     if ((sv = cv_const_sv(cv))) {
6455                   its_constant:
6456                         op_free(rv2cv_op);
6457                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6458                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6459                         pl_yylval.opval->op_private = 0;
6460                         TOKEN(WORD);
6461                     }
6462
6463                     op_free(pl_yylval.opval);
6464                     pl_yylval.opval = rv2cv_op;
6465                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6466                     PL_last_lop = PL_oldbufptr;
6467                     PL_last_lop_op = OP_ENTERSUB;
6468                     /* Is there a prototype? */
6469                     if (
6470 #ifdef PERL_MAD
6471                         cv &&
6472 #endif
6473                         SvPOK(cv))
6474                     {
6475                         STRLEN protolen;
6476                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6477                         if (!protolen)
6478                             TERM(FUNC0SUB);
6479                         if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
6480                             OPERATOR(UNIOPSUB);
6481                         while (*proto == ';')
6482                             proto++;
6483                         if (*proto == '&' && *s == '{') {
6484                             if (PL_curstash)
6485                                 sv_setpvs(PL_subname, "__ANON__");
6486                             else
6487                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6488                             PREBLOCK(LSTOPSUB);
6489                         }
6490                     }
6491 #ifdef PERL_MAD
6492                     {
6493                         if (PL_madskills) {
6494                             PL_nextwhite = PL_thiswhite;
6495                             PL_thiswhite = 0;
6496                         }
6497                         start_force(PL_curforce);
6498                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6499                         PL_expect = XTERM;
6500                         if (PL_madskills) {
6501                             PL_nextwhite = nextPL_nextwhite;
6502                             curmad('X', PL_thistoken);
6503                             PL_thistoken = newSVpvs("");
6504                         }
6505                         force_next(WORD);
6506                         TOKEN(NOAMP);
6507                     }
6508                 }
6509
6510                 /* Guess harder when madskills require "best effort". */
6511                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6512                     int probable_sub = 0;
6513                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6514                         probable_sub = 1;
6515                     else if (isALPHA(*s)) {
6516                         char tmpbuf[1024];
6517                         STRLEN tmplen;
6518                         d = s;
6519                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6520                         if (!keyword(tmpbuf, tmplen, 0))
6521                             probable_sub = 1;
6522                         else {
6523                             while (d < PL_bufend && isSPACE(*d))
6524                                 d++;
6525                             if (*d == '=' && d[1] == '>')
6526                                 probable_sub = 1;
6527                         }
6528                     }
6529                     if (probable_sub) {
6530                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6531                         op_free(pl_yylval.opval);
6532                         pl_yylval.opval = rv2cv_op;
6533                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6534                         PL_last_lop = PL_oldbufptr;
6535                         PL_last_lop_op = OP_ENTERSUB;
6536                         PL_nextwhite = PL_thiswhite;
6537                         PL_thiswhite = 0;
6538                         start_force(PL_curforce);
6539                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6540                         PL_expect = XTERM;
6541                         PL_nextwhite = nextPL_nextwhite;
6542                         curmad('X', PL_thistoken);
6543                         PL_thistoken = newSVpvs("");
6544                         force_next(WORD);
6545                         TOKEN(NOAMP);
6546                     }
6547 #else
6548                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6549                     PL_expect = XTERM;
6550                     force_next(WORD);
6551                     TOKEN(NOAMP);
6552 #endif
6553                 }
6554
6555                 /* Call it a bare word */
6556
6557                 if (PL_hints & HINT_STRICT_SUBS)
6558                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6559                 else {
6560                 bareword:
6561                     /* after "print" and similar functions (corresponding to
6562                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6563                      * a filehandle should be subject to "strict subs".
6564                      * Likewise for the optional indirect-object argument to system
6565                      * or exec, which can't be a bareword */
6566                     if ((PL_last_lop_op == OP_PRINT
6567                             || PL_last_lop_op == OP_PRTF
6568                             || PL_last_lop_op == OP_SAY
6569                             || PL_last_lop_op == OP_SYSTEM
6570                             || PL_last_lop_op == OP_EXEC)
6571                             && (PL_hints & HINT_STRICT_SUBS))
6572                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6573                     if (lastchar != '-') {
6574                         if (ckWARN(WARN_RESERVED)) {
6575                             d = PL_tokenbuf;
6576                             while (isLOWER(*d))
6577                                 d++;
6578                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6579                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6580                                        PL_tokenbuf);
6581                         }
6582                     }
6583                 }
6584                 op_free(rv2cv_op);
6585
6586             safe_bareword:
6587                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6588                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6589                                      "Operator or semicolon missing before %c%s",
6590                                      lastchar, PL_tokenbuf);
6591                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6592                                      "Ambiguous use of %c resolved as operator %c",
6593                                      lastchar, lastchar);
6594                 }
6595                 TOKEN(WORD);
6596             }
6597
6598         case KEY___FILE__:
6599             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6600                                         newSVpv(CopFILE(PL_curcop),0));
6601             TERM(THING);
6602
6603         case KEY___LINE__:
6604             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6605                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6606             TERM(THING);
6607
6608         case KEY___PACKAGE__:
6609             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6610                                         (PL_curstash
6611                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6612                                          : &PL_sv_undef));
6613             TERM(THING);
6614
6615         case KEY___DATA__:
6616         case KEY___END__: {
6617             GV *gv;
6618             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6619                 const char *pname = "main";
6620                 if (PL_tokenbuf[2] == 'D')
6621                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6622                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6623                                 SVt_PVIO);
6624                 GvMULTI_on(gv);
6625                 if (!GvIO(gv))
6626                     GvIOp(gv) = newIO();
6627                 IoIFP(GvIOp(gv)) = PL_rsfp;
6628 #if defined(HAS_FCNTL) && defined(F_SETFD)
6629                 {
6630                     const int fd = PerlIO_fileno(PL_rsfp);
6631                     fcntl(fd,F_SETFD,fd >= 3);
6632                 }
6633 #endif
6634                 /* Mark this internal pseudo-handle as clean */
6635                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6636                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6637                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6638                 else
6639                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6640 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6641                 /* if the script was opened in binmode, we need to revert
6642                  * it to text mode for compatibility; but only iff it has CRs
6643                  * XXX this is a questionable hack at best. */
6644                 if (PL_bufend-PL_bufptr > 2
6645                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6646                 {
6647                     Off_t loc = 0;
6648                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6649                         loc = PerlIO_tell(PL_rsfp);
6650                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6651                     }
6652 #ifdef NETWARE
6653                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6654 #else
6655                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6656 #endif  /* NETWARE */
6657 #ifdef PERLIO_IS_STDIO /* really? */
6658 #  if defined(__BORLANDC__)
6659                         /* XXX see note in do_binmode() */
6660                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6661 #  endif
6662 #endif
6663                         if (loc > 0)
6664                             PerlIO_seek(PL_rsfp, loc, 0);
6665                     }
6666                 }
6667 #endif
6668 #ifdef PERLIO_LAYERS
6669                 if (!IN_BYTES) {
6670                     if (UTF)
6671                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6672                     else if (PL_encoding) {
6673                         SV *name;
6674                         dSP;
6675                         ENTER;
6676                         SAVETMPS;
6677                         PUSHMARK(sp);
6678                         EXTEND(SP, 1);
6679                         XPUSHs(PL_encoding);
6680                         PUTBACK;
6681                         call_method("name", G_SCALAR);
6682                         SPAGAIN;
6683                         name = POPs;
6684                         PUTBACK;
6685                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6686                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6687                                                       SVfARG(name)));
6688                         FREETMPS;
6689                         LEAVE;
6690                     }
6691                 }
6692 #endif
6693 #ifdef PERL_MAD
6694                 if (PL_madskills) {
6695                     if (PL_realtokenstart >= 0) {
6696                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6697                         if (!PL_endwhite)
6698                             PL_endwhite = newSVpvs("");
6699                         sv_catsv(PL_endwhite, PL_thiswhite);
6700                         PL_thiswhite = 0;
6701                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6702                         PL_realtokenstart = -1;
6703                     }
6704                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6705                            != NULL) ;
6706                 }
6707 #endif
6708                 PL_rsfp = NULL;
6709             }
6710             goto fake_eof;
6711         }
6712
6713         case KEY_AUTOLOAD:
6714         case KEY_DESTROY:
6715         case KEY_BEGIN:
6716         case KEY_UNITCHECK:
6717         case KEY_CHECK:
6718         case KEY_INIT:
6719         case KEY_END:
6720             if (PL_expect == XSTATE) {
6721                 s = PL_bufptr;
6722                 goto really_sub;
6723             }
6724             goto just_a_word;
6725
6726         case KEY_CORE:
6727             if (*s == ':' && s[1] == ':') {
6728                 s += 2;
6729                 d = s;
6730                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6731                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6732                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6733                 if (tmp < 0)
6734                     tmp = -tmp;
6735                 else if (tmp == KEY_require || tmp == KEY_do)
6736                     /* that's a way to remember we saw "CORE::" */
6737                     orig_keyword = tmp;
6738                 goto reserved_word;
6739             }
6740             goto just_a_word;
6741
6742         case KEY_abs:
6743             UNI(OP_ABS);
6744
6745         case KEY_alarm:
6746             UNI(OP_ALARM);
6747
6748         case KEY_accept:
6749             LOP(OP_ACCEPT,XTERM);
6750
6751         case KEY_and:
6752             OPERATOR(ANDOP);
6753
6754         case KEY_atan2:
6755             LOP(OP_ATAN2,XTERM);
6756
6757         case KEY_bind:
6758             LOP(OP_BIND,XTERM);
6759
6760         case KEY_binmode:
6761             LOP(OP_BINMODE,XTERM);
6762
6763         case KEY_bless:
6764             LOP(OP_BLESS,XTERM);
6765
6766         case KEY_break:
6767             FUN0(OP_BREAK);
6768
6769         case KEY_chop:
6770             UNI(OP_CHOP);
6771
6772         case KEY_continue:
6773             /* When 'use switch' is in effect, continue has a dual
6774                life as a control operator. */
6775             {
6776                 if (!FEATURE_IS_ENABLED("switch"))
6777                     PREBLOCK(CONTINUE);
6778                 else {
6779                     /* We have to disambiguate the two senses of
6780                       "continue". If the next token is a '{' then
6781                       treat it as the start of a continue block;
6782                       otherwise treat it as a control operator.
6783                      */
6784                     s = skipspace(s);
6785                     if (*s == '{')
6786             PREBLOCK(CONTINUE);
6787                     else
6788                         FUN0(OP_CONTINUE);
6789                 }
6790             }
6791
6792         case KEY_chdir:
6793             /* may use HOME */
6794             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6795             UNI(OP_CHDIR);
6796
6797         case KEY_close:
6798             UNI(OP_CLOSE);
6799
6800         case KEY_closedir:
6801             UNI(OP_CLOSEDIR);
6802
6803         case KEY_cmp:
6804             Eop(OP_SCMP);
6805
6806         case KEY_caller:
6807             UNI(OP_CALLER);
6808
6809         case KEY_crypt:
6810 #ifdef FCRYPT
6811             if (!PL_cryptseen) {
6812                 PL_cryptseen = TRUE;
6813                 init_des();
6814             }
6815 #endif
6816             LOP(OP_CRYPT,XTERM);
6817
6818         case KEY_chmod:
6819             LOP(OP_CHMOD,XTERM);
6820
6821         case KEY_chown:
6822             LOP(OP_CHOWN,XTERM);
6823
6824         case KEY_connect:
6825             LOP(OP_CONNECT,XTERM);
6826
6827         case KEY_chr:
6828             UNI(OP_CHR);
6829
6830         case KEY_cos:
6831             UNI(OP_COS);
6832
6833         case KEY_chroot:
6834             UNI(OP_CHROOT);
6835
6836         case KEY_default:
6837             PREBLOCK(DEFAULT);
6838
6839         case KEY_do:
6840             s = SKIPSPACE1(s);
6841             if (*s == '{')
6842                 PRETERMBLOCK(DO);
6843             if (*s != '\'')
6844                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6845             if (orig_keyword == KEY_do) {
6846                 orig_keyword = 0;
6847                 pl_yylval.ival = 1;
6848             }
6849             else
6850                 pl_yylval.ival = 0;
6851             OPERATOR(DO);
6852
6853         case KEY_die:
6854             PL_hints |= HINT_BLOCK_SCOPE;
6855             LOP(OP_DIE,XTERM);
6856
6857         case KEY_defined:
6858             UNI(OP_DEFINED);
6859
6860         case KEY_delete:
6861             UNI(OP_DELETE);
6862
6863         case KEY_dbmopen:
6864             gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6865             LOP(OP_DBMOPEN,XTERM);
6866
6867         case KEY_dbmclose:
6868             UNI(OP_DBMCLOSE);
6869
6870         case KEY_dump:
6871             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6872             LOOPX(OP_DUMP);
6873
6874         case KEY_else:
6875             PREBLOCK(ELSE);
6876
6877         case KEY_elsif:
6878             pl_yylval.ival = CopLINE(PL_curcop);
6879             OPERATOR(ELSIF);
6880
6881         case KEY_eq:
6882             Eop(OP_SEQ);
6883
6884         case KEY_exists:
6885             UNI(OP_EXISTS);
6886
6887         case KEY_exit:
6888             if (PL_madskills)
6889                 UNI(OP_INT);
6890             UNI(OP_EXIT);
6891
6892         case KEY_eval:
6893             s = SKIPSPACE1(s);
6894             if (*s == '{') { /* block eval */
6895                 PL_expect = XTERMBLOCK;
6896                 UNIBRACK(OP_ENTERTRY);
6897             }
6898             else { /* string eval */
6899                 PL_expect = XTERM;
6900                 UNIBRACK(OP_ENTEREVAL);
6901             }
6902
6903         case KEY_eof:
6904             UNI(OP_EOF);
6905
6906         case KEY_exp:
6907             UNI(OP_EXP);
6908
6909         case KEY_each:
6910             UNI(OP_EACH);
6911
6912         case KEY_exec:
6913             LOP(OP_EXEC,XREF);
6914
6915         case KEY_endhostent:
6916             FUN0(OP_EHOSTENT);
6917
6918         case KEY_endnetent:
6919             FUN0(OP_ENETENT);
6920
6921         case KEY_endservent:
6922             FUN0(OP_ESERVENT);
6923
6924         case KEY_endprotoent:
6925             FUN0(OP_EPROTOENT);
6926
6927         case KEY_endpwent:
6928             FUN0(OP_EPWENT);
6929
6930         case KEY_endgrent:
6931             FUN0(OP_EGRENT);
6932
6933         case KEY_for:
6934         case KEY_foreach:
6935             pl_yylval.ival = CopLINE(PL_curcop);
6936             s = SKIPSPACE1(s);
6937             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6938                 char *p = s;
6939 #ifdef PERL_MAD
6940                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6941 #endif
6942
6943                 if ((PL_bufend - p) >= 3 &&
6944                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6945                     p += 2;
6946                 else if ((PL_bufend - p) >= 4 &&
6947                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6948                     p += 3;
6949                 p = PEEKSPACE(p);
6950                 if (isIDFIRST_lazy_if(p,UTF)) {
6951                     p = scan_ident(p, PL_bufend,
6952                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6953                     p = PEEKSPACE(p);
6954                 }
6955                 if (*p != '$')
6956                     Perl_croak(aTHX_ "Missing $ on loop variable");
6957 #ifdef PERL_MAD
6958                 s = SvPVX(PL_linestr) + soff;
6959 #endif
6960             }
6961             OPERATOR(FOR);
6962
6963         case KEY_formline:
6964             LOP(OP_FORMLINE,XTERM);
6965
6966         case KEY_fork:
6967             FUN0(OP_FORK);
6968
6969         case KEY_fcntl:
6970             LOP(OP_FCNTL,XTERM);
6971
6972         case KEY_fileno:
6973             UNI(OP_FILENO);
6974
6975         case KEY_flock:
6976             LOP(OP_FLOCK,XTERM);
6977
6978         case KEY_gt:
6979             Rop(OP_SGT);
6980
6981         case KEY_ge:
6982             Rop(OP_SGE);
6983
6984         case KEY_grep:
6985             LOP(OP_GREPSTART, XREF);
6986
6987         case KEY_goto:
6988             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6989             LOOPX(OP_GOTO);
6990
6991         case KEY_gmtime:
6992             UNI(OP_GMTIME);
6993
6994         case KEY_getc:
6995             UNIDOR(OP_GETC);
6996
6997         case KEY_getppid:
6998             FUN0(OP_GETPPID);
6999
7000         case KEY_getpgrp:
7001             UNI(OP_GETPGRP);
7002
7003         case KEY_getpriority:
7004             LOP(OP_GETPRIORITY,XTERM);
7005
7006         case KEY_getprotobyname:
7007             UNI(OP_GPBYNAME);
7008
7009         case KEY_getprotobynumber:
7010             LOP(OP_GPBYNUMBER,XTERM);
7011
7012         case KEY_getprotoent:
7013             FUN0(OP_GPROTOENT);
7014
7015         case KEY_getpwent:
7016             FUN0(OP_GPWENT);
7017
7018         case KEY_getpwnam:
7019             UNI(OP_GPWNAM);
7020
7021         case KEY_getpwuid:
7022             UNI(OP_GPWUID);
7023
7024         case KEY_getpeername:
7025             UNI(OP_GETPEERNAME);
7026
7027         case KEY_gethostbyname:
7028             UNI(OP_GHBYNAME);
7029
7030         case KEY_gethostbyaddr:
7031             LOP(OP_GHBYADDR,XTERM);
7032
7033         case KEY_gethostent:
7034             FUN0(OP_GHOSTENT);
7035
7036         case KEY_getnetbyname:
7037             UNI(OP_GNBYNAME);
7038
7039         case KEY_getnetbyaddr:
7040             LOP(OP_GNBYADDR,XTERM);
7041
7042         case KEY_getnetent:
7043             FUN0(OP_GNETENT);
7044
7045         case KEY_getservbyname:
7046             LOP(OP_GSBYNAME,XTERM);
7047
7048         case KEY_getservbyport:
7049             LOP(OP_GSBYPORT,XTERM);
7050
7051         case KEY_getservent:
7052             FUN0(OP_GSERVENT);
7053
7054         case KEY_getsockname:
7055             UNI(OP_GETSOCKNAME);
7056
7057         case KEY_getsockopt:
7058             LOP(OP_GSOCKOPT,XTERM);
7059
7060         case KEY_getgrent:
7061             FUN0(OP_GGRENT);
7062
7063         case KEY_getgrnam:
7064             UNI(OP_GGRNAM);
7065
7066         case KEY_getgrgid:
7067             UNI(OP_GGRGID);
7068
7069         case KEY_getlogin:
7070             FUN0(OP_GETLOGIN);
7071
7072         case KEY_given:
7073             pl_yylval.ival = CopLINE(PL_curcop);
7074             OPERATOR(GIVEN);
7075
7076         case KEY_glob:
7077             LOP(OP_GLOB,XTERM);
7078
7079         case KEY_hex:
7080             UNI(OP_HEX);
7081
7082         case KEY_if:
7083             pl_yylval.ival = CopLINE(PL_curcop);
7084             OPERATOR(IF);
7085
7086         case KEY_index:
7087             LOP(OP_INDEX,XTERM);
7088
7089         case KEY_int:
7090             UNI(OP_INT);
7091
7092         case KEY_ioctl:
7093             LOP(OP_IOCTL,XTERM);
7094
7095         case KEY_join:
7096             LOP(OP_JOIN,XTERM);
7097
7098         case KEY_keys:
7099             UNI(OP_KEYS);
7100
7101         case KEY_kill:
7102             LOP(OP_KILL,XTERM);
7103
7104         case KEY_last:
7105             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7106             LOOPX(OP_LAST);
7107
7108         case KEY_lc:
7109             UNI(OP_LC);
7110
7111         case KEY_lcfirst:
7112             UNI(OP_LCFIRST);
7113
7114         case KEY_local:
7115             pl_yylval.ival = 0;
7116             OPERATOR(LOCAL);
7117
7118         case KEY_length:
7119             UNI(OP_LENGTH);
7120
7121         case KEY_lt:
7122             Rop(OP_SLT);
7123
7124         case KEY_le:
7125             Rop(OP_SLE);
7126
7127         case KEY_localtime:
7128             UNI(OP_LOCALTIME);
7129
7130         case KEY_log:
7131             UNI(OP_LOG);
7132
7133         case KEY_link:
7134             LOP(OP_LINK,XTERM);
7135
7136         case KEY_listen:
7137             LOP(OP_LISTEN,XTERM);
7138
7139         case KEY_lock:
7140             UNI(OP_LOCK);
7141
7142         case KEY_lstat:
7143             UNI(OP_LSTAT);
7144
7145         case KEY_m:
7146             s = scan_pat(s,OP_MATCH);
7147             TERM(sublex_start());
7148
7149         case KEY_map:
7150             LOP(OP_MAPSTART, XREF);
7151
7152         case KEY_mkdir:
7153             LOP(OP_MKDIR,XTERM);
7154
7155         case KEY_msgctl:
7156             LOP(OP_MSGCTL,XTERM);
7157
7158         case KEY_msgget:
7159             LOP(OP_MSGGET,XTERM);
7160
7161         case KEY_msgrcv:
7162             LOP(OP_MSGRCV,XTERM);
7163
7164         case KEY_msgsnd:
7165             LOP(OP_MSGSND,XTERM);
7166
7167         case KEY_our:
7168         case KEY_my:
7169         case KEY_state:
7170             PL_in_my = (U16)tmp;
7171             s = SKIPSPACE1(s);
7172             if (isIDFIRST_lazy_if(s,UTF)) {
7173 #ifdef PERL_MAD
7174                 char* start = s;
7175 #endif
7176                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7177                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7178                     goto really_sub;
7179                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7180                 if (!PL_in_my_stash) {
7181                     char tmpbuf[1024];
7182                     PL_bufptr = s;
7183                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7184                     yyerror(tmpbuf);
7185                 }
7186 #ifdef PERL_MAD
7187                 if (PL_madskills) {     /* just add type to declarator token */
7188                     sv_catsv(PL_thistoken, PL_nextwhite);
7189                     PL_nextwhite = 0;
7190                     sv_catpvn(PL_thistoken, start, s - start);
7191                 }
7192 #endif
7193             }
7194             pl_yylval.ival = 1;
7195             OPERATOR(MY);
7196
7197         case KEY_next:
7198             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7199             LOOPX(OP_NEXT);
7200
7201         case KEY_ne:
7202             Eop(OP_SNE);
7203
7204         case KEY_no:
7205             s = tokenize_use(0, s);
7206             OPERATOR(USE);
7207
7208         case KEY_not:
7209             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7210                 FUN1(OP_NOT);
7211             else
7212                 OPERATOR(NOTOP);
7213
7214         case KEY_open:
7215             s = SKIPSPACE1(s);
7216             if (isIDFIRST_lazy_if(s,UTF)) {
7217                 const char *t;
7218                 for (d = s; isALNUM_lazy_if(d,UTF);)
7219                     d++;
7220                 for (t=d; isSPACE(*t);)
7221                     t++;
7222                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7223                     /* [perl #16184] */
7224                     && !(t[0] == '=' && t[1] == '>')
7225                 ) {
7226                     int parms_len = (int)(d-s);
7227                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7228                            "Precedence problem: open %.*s should be open(%.*s)",
7229                             parms_len, s, parms_len, s);
7230                 }
7231             }
7232             LOP(OP_OPEN,XTERM);
7233
7234         case KEY_or:
7235             pl_yylval.ival = OP_OR;
7236             OPERATOR(OROP);
7237
7238         case KEY_ord:
7239             UNI(OP_ORD);
7240
7241         case KEY_oct:
7242             UNI(OP_OCT);
7243
7244         case KEY_opendir:
7245             LOP(OP_OPEN_DIR,XTERM);
7246
7247         case KEY_print:
7248             checkcomma(s,PL_tokenbuf,"filehandle");
7249             LOP(OP_PRINT,XREF);
7250
7251         case KEY_printf:
7252             checkcomma(s,PL_tokenbuf,"filehandle");
7253             LOP(OP_PRTF,XREF);
7254
7255         case KEY_prototype:
7256             UNI(OP_PROTOTYPE);
7257
7258         case KEY_push:
7259             LOP(OP_PUSH,XTERM);
7260
7261         case KEY_pop:
7262             UNIDOR(OP_POP);
7263
7264         case KEY_pos:
7265             UNIDOR(OP_POS);
7266
7267         case KEY_pack:
7268             LOP(OP_PACK,XTERM);
7269
7270         case KEY_package:
7271             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7272             s = SKIPSPACE1(s);
7273             s = force_strict_version(s);
7274             OPERATOR(PACKAGE);
7275
7276         case KEY_pipe:
7277             LOP(OP_PIPE_OP,XTERM);
7278
7279         case KEY_q:
7280             s = scan_str(s,!!PL_madskills,FALSE);
7281             if (!s)
7282                 missingterm(NULL);
7283             pl_yylval.ival = OP_CONST;
7284             TERM(sublex_start());
7285
7286         case KEY_quotemeta:
7287             UNI(OP_QUOTEMETA);
7288
7289         case KEY_qw:
7290             s = scan_str(s,!!PL_madskills,FALSE);
7291             if (!s)
7292                 missingterm(NULL);
7293             PL_expect = XOPERATOR;
7294             force_next(')');
7295             if (SvCUR(PL_lex_stuff)) {
7296                 OP *words = NULL;
7297                 int warned = 0;
7298                 d = SvPV_force(PL_lex_stuff, len);
7299                 while (len) {
7300                     for (; isSPACE(*d) && len; --len, ++d)
7301                         /**/;
7302                     if (len) {
7303                         SV *sv;
7304                         const char *b = d;
7305                         if (!warned && ckWARN(WARN_QW)) {
7306                             for (; !isSPACE(*d) && len; --len, ++d) {
7307                                 if (*d == ',') {
7308                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7309                                         "Possible attempt to separate words with commas");
7310                                     ++warned;
7311                                 }
7312                                 else if (*d == '#') {
7313                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7314                                         "Possible attempt to put comments in qw() list");
7315                                     ++warned;
7316                                 }
7317                             }
7318                         }
7319                         else {
7320                             for (; !isSPACE(*d) && len; --len, ++d)
7321                                 /**/;
7322                         }
7323                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7324                         words = append_elem(OP_LIST, words,
7325                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7326                     }
7327                 }
7328                 if (words) {
7329                     start_force(PL_curforce);
7330                     NEXTVAL_NEXTTOKE.opval = words;
7331                     force_next(THING);
7332                 }
7333             }
7334             if (PL_lex_stuff) {
7335                 SvREFCNT_dec(PL_lex_stuff);
7336                 PL_lex_stuff = NULL;
7337             }
7338             PL_expect = XTERM;
7339             TOKEN('(');
7340
7341         case KEY_qq:
7342             s = scan_str(s,!!PL_madskills,FALSE);
7343             if (!s)
7344                 missingterm(NULL);
7345             pl_yylval.ival = OP_STRINGIFY;
7346             if (SvIVX(PL_lex_stuff) == '\'')
7347                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
7348             TERM(sublex_start());
7349
7350         case KEY_qr:
7351             s = scan_pat(s,OP_QR);
7352             TERM(sublex_start());
7353
7354         case KEY_qx:
7355             s = scan_str(s,!!PL_madskills,FALSE);
7356             if (!s)
7357                 missingterm(NULL);
7358             readpipe_override();
7359             TERM(sublex_start());
7360
7361         case KEY_return:
7362             OLDLOP(OP_RETURN);
7363
7364         case KEY_require:
7365             s = SKIPSPACE1(s);
7366             if (isDIGIT(*s)) {
7367                 s = force_version(s, FALSE);
7368             }
7369             else if (*s != 'v' || !isDIGIT(s[1])
7370                     || (s = force_version(s, TRUE), *s == 'v'))
7371             {
7372                 *PL_tokenbuf = '\0';
7373                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7374                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7375                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7376                 else if (*s == '<')
7377                     yyerror("<> should be quotes");
7378             }
7379             if (orig_keyword == KEY_require) {
7380                 orig_keyword = 0;
7381                 pl_yylval.ival = 1;
7382             }
7383             else
7384                 pl_yylval.ival = 0;
7385             PL_expect = XTERM;
7386             PL_bufptr = s;
7387             PL_last_uni = PL_oldbufptr;
7388             PL_last_lop_op = OP_REQUIRE;
7389             s = skipspace(s);
7390             return REPORT( (int)REQUIRE );
7391
7392         case KEY_reset:
7393             UNI(OP_RESET);
7394
7395         case KEY_redo:
7396             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7397             LOOPX(OP_REDO);
7398
7399         case KEY_rename:
7400             LOP(OP_RENAME,XTERM);
7401
7402         case KEY_rand:
7403             UNI(OP_RAND);
7404
7405         case KEY_rmdir:
7406             UNI(OP_RMDIR);
7407
7408         case KEY_rindex:
7409             LOP(OP_RINDEX,XTERM);
7410
7411         case KEY_read:
7412             LOP(OP_READ,XTERM);
7413
7414         case KEY_readdir:
7415             UNI(OP_READDIR);
7416
7417         case KEY_readline:
7418             UNIDOR(OP_READLINE);
7419
7420         case KEY_readpipe:
7421             UNIDOR(OP_BACKTICK);
7422
7423         case KEY_rewinddir:
7424             UNI(OP_REWINDDIR);
7425
7426         case KEY_recv:
7427             LOP(OP_RECV,XTERM);
7428
7429         case KEY_reverse:
7430             LOP(OP_REVERSE,XTERM);
7431
7432         case KEY_readlink:
7433             UNIDOR(OP_READLINK);
7434
7435         case KEY_ref:
7436             UNI(OP_REF);
7437
7438         case KEY_s:
7439             s = scan_subst(s);
7440             if (pl_yylval.opval)
7441                 TERM(sublex_start());
7442             else
7443                 TOKEN(1);       /* force error */
7444
7445         case KEY_say:
7446             checkcomma(s,PL_tokenbuf,"filehandle");
7447             LOP(OP_SAY,XREF);
7448
7449         case KEY_chomp:
7450             UNI(OP_CHOMP);
7451
7452         case KEY_scalar:
7453             UNI(OP_SCALAR);
7454
7455         case KEY_select:
7456             LOP(OP_SELECT,XTERM);
7457
7458         case KEY_seek:
7459             LOP(OP_SEEK,XTERM);
7460
7461         case KEY_semctl:
7462             LOP(OP_SEMCTL,XTERM);
7463
7464         case KEY_semget:
7465             LOP(OP_SEMGET,XTERM);
7466
7467         case KEY_semop:
7468             LOP(OP_SEMOP,XTERM);
7469
7470         case KEY_send:
7471             LOP(OP_SEND,XTERM);
7472
7473         case KEY_setpgrp:
7474             LOP(OP_SETPGRP,XTERM);
7475
7476         case KEY_setpriority:
7477             LOP(OP_SETPRIORITY,XTERM);
7478
7479         case KEY_sethostent:
7480             UNI(OP_SHOSTENT);
7481
7482         case KEY_setnetent:
7483             UNI(OP_SNETENT);
7484
7485         case KEY_setservent:
7486             UNI(OP_SSERVENT);
7487
7488         case KEY_setprotoent:
7489             UNI(OP_SPROTOENT);
7490
7491         case KEY_setpwent:
7492             FUN0(OP_SPWENT);
7493
7494         case KEY_setgrent:
7495             FUN0(OP_SGRENT);
7496
7497         case KEY_seekdir:
7498             LOP(OP_SEEKDIR,XTERM);
7499
7500         case KEY_setsockopt:
7501             LOP(OP_SSOCKOPT,XTERM);
7502
7503         case KEY_shift:
7504             UNIDOR(OP_SHIFT);
7505
7506         case KEY_shmctl:
7507             LOP(OP_SHMCTL,XTERM);
7508
7509         case KEY_shmget:
7510             LOP(OP_SHMGET,XTERM);
7511
7512         case KEY_shmread:
7513             LOP(OP_SHMREAD,XTERM);
7514
7515         case KEY_shmwrite:
7516             LOP(OP_SHMWRITE,XTERM);
7517
7518         case KEY_shutdown:
7519             LOP(OP_SHUTDOWN,XTERM);
7520
7521         case KEY_sin:
7522             UNI(OP_SIN);
7523
7524         case KEY_sleep:
7525             UNI(OP_SLEEP);
7526
7527         case KEY_socket:
7528             LOP(OP_SOCKET,XTERM);
7529
7530         case KEY_socketpair:
7531             LOP(OP_SOCKPAIR,XTERM);
7532
7533         case KEY_sort:
7534             checkcomma(s,PL_tokenbuf,"subroutine name");
7535             s = SKIPSPACE1(s);
7536             if (*s == ';' || *s == ')')         /* probably a close */
7537                 Perl_croak(aTHX_ "sort is now a reserved word");
7538             PL_expect = XTERM;
7539             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7540             LOP(OP_SORT,XREF);
7541
7542         case KEY_split:
7543             LOP(OP_SPLIT,XTERM);
7544
7545         case KEY_sprintf:
7546             LOP(OP_SPRINTF,XTERM);
7547
7548         case KEY_splice:
7549             LOP(OP_SPLICE,XTERM);
7550
7551         case KEY_sqrt:
7552             UNI(OP_SQRT);
7553
7554         case KEY_srand:
7555             UNI(OP_SRAND);
7556
7557         case KEY_stat:
7558             UNI(OP_STAT);
7559
7560         case KEY_study:
7561             UNI(OP_STUDY);
7562
7563         case KEY_substr:
7564             LOP(OP_SUBSTR,XTERM);
7565
7566         case KEY_format:
7567         case KEY_sub:
7568           really_sub:
7569             {
7570                 char tmpbuf[sizeof PL_tokenbuf];
7571                 SSize_t tboffset = 0;
7572                 expectation attrful;
7573                 bool have_name, have_proto;
7574                 const int key = tmp;
7575
7576 #ifdef PERL_MAD
7577                 SV *tmpwhite = 0;
7578
7579                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7580                 SV *subtoken = newSVpvn(tstart, s - tstart);
7581                 PL_thistoken = 0;
7582
7583                 d = s;
7584                 s = SKIPSPACE2(s,tmpwhite);
7585 #else
7586                 s = skipspace(s);
7587 #endif
7588
7589                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7590                     (*s == ':' && s[1] == ':'))
7591                 {
7592 #ifdef PERL_MAD
7593                     SV *nametoke = NULL;
7594 #endif
7595
7596                     PL_expect = XBLOCK;
7597                     attrful = XATTRBLOCK;
7598                     /* remember buffer pos'n for later force_word */
7599                     tboffset = s - PL_oldbufptr;
7600                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7601 #ifdef PERL_MAD
7602                     if (PL_madskills)
7603                         nametoke = newSVpvn(s, d - s);
7604 #endif
7605                     if (memchr(tmpbuf, ':', len))
7606                         sv_setpvn(PL_subname, tmpbuf, len);
7607                     else {
7608                         sv_setsv(PL_subname,PL_curstname);
7609                         sv_catpvs(PL_subname,"::");
7610                         sv_catpvn(PL_subname,tmpbuf,len);
7611                     }
7612                     have_name = TRUE;
7613
7614 #ifdef PERL_MAD
7615
7616                     start_force(0);
7617                     CURMAD('X', nametoke);
7618                     CURMAD('_', tmpwhite);
7619                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7620                                       FALSE, TRUE, TRUE);
7621
7622                     s = SKIPSPACE2(d,tmpwhite);
7623 #else
7624                     s = skipspace(d);
7625 #endif
7626                 }
7627                 else {
7628                     if (key == KEY_my)
7629                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7630                     PL_expect = XTERMBLOCK;
7631                     attrful = XATTRTERM;
7632                     sv_setpvs(PL_subname,"?");
7633                     have_name = FALSE;
7634                 }
7635
7636                 if (key == KEY_format) {
7637                     if (*s == '=')
7638                         PL_lex_formbrack = PL_lex_brackets + 1;
7639 #ifdef PERL_MAD
7640                     PL_thistoken = subtoken;
7641                     s = d;
7642 #else
7643                     if (have_name)
7644                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7645                                           FALSE, TRUE, TRUE);
7646 #endif
7647                     OPERATOR(FORMAT);
7648                 }
7649
7650                 /* Look for a prototype */
7651                 if (*s == '(') {
7652                     char *p;
7653                     bool bad_proto = FALSE;
7654                     bool in_brackets = FALSE;
7655                     char greedy_proto = ' ';
7656                     bool proto_after_greedy_proto = FALSE;
7657                     bool must_be_last = FALSE;
7658                     bool underscore = FALSE;
7659                     bool seen_underscore = FALSE;
7660                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
7661
7662                     s = scan_str(s,!!PL_madskills,FALSE);
7663                     if (!s)
7664                         Perl_croak(aTHX_ "Prototype not terminated");
7665                     /* strip spaces and check for bad characters */
7666                     d = SvPVX(PL_lex_stuff);
7667                     tmp = 0;
7668                     for (p = d; *p; ++p) {
7669                         if (!isSPACE(*p)) {
7670                             d[tmp++] = *p;
7671
7672                             if (warnillegalproto) {
7673                                 if (must_be_last)
7674                                     proto_after_greedy_proto = TRUE;
7675                                 if (!strchr("$@%*;[]&\\_", *p)) {
7676                                     bad_proto = TRUE;
7677                                 }
7678                                 else {
7679                                     if ( underscore ) {
7680                                         if ( *p != ';' )
7681                                             bad_proto = TRUE;
7682                                         underscore = FALSE;
7683                                     }
7684                                     if ( *p == '[' ) {
7685                                         in_brackets = TRUE;
7686                                     }
7687                                     else if ( *p == ']' ) {
7688                                         in_brackets = FALSE;
7689                                     }
7690                                     else if ( (*p == '@' || *p == '%') &&
7691                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
7692                                          !in_brackets ) {
7693                                         must_be_last = TRUE;
7694                                         greedy_proto = *p;
7695                                     }
7696                                     else if ( *p == '_' ) {
7697                                         underscore = seen_underscore = TRUE;
7698                                     }
7699                                 }
7700                             }
7701                         }
7702                     }
7703                     d[tmp] = '\0';
7704                     if (proto_after_greedy_proto)
7705                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7706                                     "Prototype after '%c' for %"SVf" : %s",
7707                                     greedy_proto, SVfARG(PL_subname), d);
7708                     if (bad_proto)
7709                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7710                                     "Illegal character %sin prototype for %"SVf" : %s",
7711                                     seen_underscore ? "after '_' " : "",
7712                                     SVfARG(PL_subname), d);
7713                     SvCUR_set(PL_lex_stuff, tmp);
7714                     have_proto = TRUE;
7715
7716 #ifdef PERL_MAD
7717                     start_force(0);
7718                     CURMAD('q', PL_thisopen);
7719                     CURMAD('_', tmpwhite);
7720                     CURMAD('=', PL_thisstuff);
7721                     CURMAD('Q', PL_thisclose);
7722                     NEXTVAL_NEXTTOKE.opval =
7723                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7724                     PL_lex_stuff = NULL;
7725                     force_next(THING);
7726
7727                     s = SKIPSPACE2(s,tmpwhite);
7728 #else
7729                     s = skipspace(s);
7730 #endif
7731                 }
7732                 else
7733                     have_proto = FALSE;
7734
7735                 if (*s == ':' && s[1] != ':')
7736                     PL_expect = attrful;
7737                 else if (*s != '{' && key == KEY_sub) {
7738                     if (!have_name)
7739                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7740                     else if (*s != ';' && *s != '}')
7741                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7742                 }
7743
7744 #ifdef PERL_MAD
7745                 start_force(0);
7746                 if (tmpwhite) {
7747                     if (PL_madskills)
7748                         curmad('^', newSVpvs(""));
7749                     CURMAD('_', tmpwhite);
7750                 }
7751                 force_next(0);
7752
7753                 PL_thistoken = subtoken;
7754 #else
7755                 if (have_proto) {
7756                     NEXTVAL_NEXTTOKE.opval =
7757                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7758                     PL_lex_stuff = NULL;
7759                     force_next(THING);
7760                 }
7761 #endif
7762                 if (!have_name) {
7763                     if (PL_curstash)
7764                         sv_setpvs(PL_subname, "__ANON__");
7765                     else
7766                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7767                     TOKEN(ANONSUB);
7768                 }
7769 #ifndef PERL_MAD
7770                 (void) force_word(PL_oldbufptr + tboffset, WORD,
7771                                   FALSE, TRUE, TRUE);
7772 #endif
7773                 if (key == KEY_my)
7774                     TOKEN(MYSUB);
7775                 TOKEN(SUB);
7776             }
7777
7778         case KEY_system:
7779             LOP(OP_SYSTEM,XREF);
7780
7781         case KEY_symlink:
7782             LOP(OP_SYMLINK,XTERM);
7783
7784         case KEY_syscall:
7785             LOP(OP_SYSCALL,XTERM);
7786
7787         case KEY_sysopen:
7788             LOP(OP_SYSOPEN,XTERM);
7789
7790         case KEY_sysseek:
7791             LOP(OP_SYSSEEK,XTERM);
7792
7793         case KEY_sysread:
7794             LOP(OP_SYSREAD,XTERM);
7795
7796         case KEY_syswrite:
7797             LOP(OP_SYSWRITE,XTERM);
7798
7799         case KEY_tr:
7800             s = scan_trans(s);
7801             TERM(sublex_start());
7802
7803         case KEY_tell:
7804             UNI(OP_TELL);
7805
7806         case KEY_telldir:
7807             UNI(OP_TELLDIR);
7808
7809         case KEY_tie:
7810             LOP(OP_TIE,XTERM);
7811
7812         case KEY_tied:
7813             UNI(OP_TIED);
7814
7815         case KEY_time:
7816             FUN0(OP_TIME);
7817
7818         case KEY_times:
7819             FUN0(OP_TMS);
7820
7821         case KEY_truncate:
7822             LOP(OP_TRUNCATE,XTERM);
7823
7824         case KEY_uc:
7825             UNI(OP_UC);
7826
7827         case KEY_ucfirst:
7828             UNI(OP_UCFIRST);
7829
7830         case KEY_untie:
7831             UNI(OP_UNTIE);
7832
7833         case KEY_until:
7834             pl_yylval.ival = CopLINE(PL_curcop);
7835             OPERATOR(UNTIL);
7836
7837         case KEY_unless:
7838             pl_yylval.ival = CopLINE(PL_curcop);
7839             OPERATOR(UNLESS);
7840
7841         case KEY_unlink:
7842             LOP(OP_UNLINK,XTERM);
7843
7844         case KEY_undef:
7845             UNIDOR(OP_UNDEF);
7846
7847         case KEY_unpack:
7848             LOP(OP_UNPACK,XTERM);
7849
7850         case KEY_utime:
7851             LOP(OP_UTIME,XTERM);
7852
7853         case KEY_umask:
7854             UNIDOR(OP_UMASK);
7855
7856         case KEY_unshift:
7857             LOP(OP_UNSHIFT,XTERM);
7858
7859         case KEY_use:
7860             s = tokenize_use(1, s);
7861             OPERATOR(USE);
7862
7863         case KEY_values:
7864             UNI(OP_VALUES);
7865
7866         case KEY_vec:
7867             LOP(OP_VEC,XTERM);
7868
7869         case KEY_when:
7870             pl_yylval.ival = CopLINE(PL_curcop);
7871             OPERATOR(WHEN);
7872
7873         case KEY_while:
7874             pl_yylval.ival = CopLINE(PL_curcop);
7875             OPERATOR(WHILE);
7876
7877         case KEY_warn:
7878             PL_hints |= HINT_BLOCK_SCOPE;
7879             LOP(OP_WARN,XTERM);
7880
7881         case KEY_wait:
7882             FUN0(OP_WAIT);
7883
7884         case KEY_waitpid:
7885             LOP(OP_WAITPID,XTERM);
7886
7887         case KEY_wantarray:
7888             FUN0(OP_WANTARRAY);
7889
7890         case KEY_write:
7891 #ifdef EBCDIC
7892         {
7893             char ctl_l[2];
7894             ctl_l[0] = toCTRL('L');
7895             ctl_l[1] = '\0';
7896             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7897         }
7898 #else
7899             /* Make sure $^L is defined */
7900             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7901 #endif
7902             UNI(OP_ENTERWRITE);
7903
7904         case KEY_x:
7905             if (PL_expect == XOPERATOR)
7906                 Mop(OP_REPEAT);
7907             check_uni();
7908             goto just_a_word;
7909
7910         case KEY_xor:
7911             pl_yylval.ival = OP_XOR;
7912             OPERATOR(OROP);
7913
7914         case KEY_y:
7915             s = scan_trans(s);
7916             TERM(sublex_start());
7917         }
7918     }}
7919 }
7920 #ifdef __SC__
7921 #pragma segment Main
7922 #endif
7923
7924 static int
7925 S_pending_ident(pTHX)
7926 {
7927     dVAR;
7928     register char *d;
7929     PADOFFSET tmp = 0;
7930     /* pit holds the identifier we read and pending_ident is reset */
7931     char pit = PL_pending_ident;
7932     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7933     /* All routes through this function want to know if there is a colon.  */
7934     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7935     PL_pending_ident = 0;
7936
7937     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7938     DEBUG_T({ PerlIO_printf(Perl_debug_log,
7939           "### Pending identifier '%s'\n", PL_tokenbuf); });
7940
7941     /* if we're in a my(), we can't allow dynamics here.
7942        $foo'bar has already been turned into $foo::bar, so
7943        just check for colons.
7944
7945        if it's a legal name, the OP is a PADANY.
7946     */
7947     if (PL_in_my) {
7948         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
7949             if (has_colon)
7950                 yyerror(Perl_form(aTHX_ "No package name allowed for "
7951                                   "variable %s in \"our\"",
7952                                   PL_tokenbuf));
7953             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7954         }
7955         else {
7956             if (has_colon)
7957                 yyerror(Perl_form(aTHX_ PL_no_myglob,
7958                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7959
7960             pl_yylval.opval = newOP(OP_PADANY, 0);
7961             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7962             return PRIVATEREF;
7963         }
7964     }
7965
7966     /*
7967        build the ops for accesses to a my() variable.
7968
7969        Deny my($a) or my($b) in a sort block, *if* $a or $b is
7970        then used in a comparison.  This catches most, but not
7971        all cases.  For instance, it catches
7972            sort { my($a); $a <=> $b }
7973        but not
7974            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7975        (although why you'd do that is anyone's guess).
7976     */
7977
7978     if (!has_colon) {
7979         if (!PL_in_my)
7980             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
7981         if (tmp != NOT_IN_PAD) {
7982             /* might be an "our" variable" */
7983             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7984                 /* build ops for a bareword */
7985                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
7986                 HEK * const stashname = HvNAME_HEK(stash);
7987                 SV *  const sym = newSVhek(stashname);
7988                 sv_catpvs(sym, "::");
7989                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7990                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7991                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7992                 gv_fetchsv(sym,
7993                     (PL_in_eval
7994                         ? (GV_ADDMULTI | GV_ADDINEVAL)
7995                         : GV_ADDMULTI
7996                     ),
7997                     ((PL_tokenbuf[0] == '$') ? SVt_PV
7998                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7999                      : SVt_PVHV));
8000                 return WORD;
8001             }
8002
8003             /* if it's a sort block and they're naming $a or $b */
8004             if (PL_last_lop_op == OP_SORT &&
8005                 PL_tokenbuf[0] == '$' &&
8006                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8007                 && !PL_tokenbuf[2])
8008             {
8009                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8010                      d < PL_bufend && *d != '\n';
8011                      d++)
8012                 {
8013                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8014                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8015                               PL_tokenbuf);
8016                     }
8017                 }
8018             }
8019
8020             pl_yylval.opval = newOP(OP_PADANY, 0);
8021             pl_yylval.opval->op_targ = tmp;
8022             return PRIVATEREF;
8023         }
8024     }
8025
8026     /*
8027        Whine if they've said @foo in a doublequoted string,
8028        and @foo isn't a variable we can find in the symbol
8029        table.
8030     */
8031     if (ckWARN(WARN_AMBIGUOUS) &&
8032         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8033         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8034                                          SVt_PVAV);
8035         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8036                 /* DO NOT warn for @- and @+ */
8037                 && !( PL_tokenbuf[2] == '\0' &&
8038                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8039            )
8040         {
8041             /* Downgraded from fatal to warning 20000522 mjd */
8042             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8043                         "Possible unintended interpolation of %s in string",
8044                         PL_tokenbuf);
8045         }
8046     }
8047
8048     /* build ops for a bareword */
8049     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8050                                                       tokenbuf_len - 1));
8051     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8052     gv_fetchpvn_flags(
8053             PL_tokenbuf + 1, tokenbuf_len - 1,
8054             /* If the identifier refers to a stash, don't autovivify it.
8055              * Change 24660 had the side effect of causing symbol table
8056              * hashes to always be defined, even if they were freshly
8057              * created and the only reference in the entire program was
8058              * the single statement with the defined %foo::bar:: test.
8059              * It appears that all code in the wild doing this actually
8060              * wants to know whether sub-packages have been loaded, so
8061              * by avoiding auto-vivifying symbol tables, we ensure that
8062              * defined %foo::bar:: continues to be false, and the existing
8063              * tests still give the expected answers, even though what
8064              * they're actually testing has now changed subtly.
8065              */
8066             (*PL_tokenbuf == '%'
8067              && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
8068              && d[-1] == ':'
8069              ? 0
8070              : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
8071             ((PL_tokenbuf[0] == '$') ? SVt_PV
8072              : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8073              : SVt_PVHV));
8074     return WORD;
8075 }
8076
8077 /*
8078  *  The following code was generated by perl_keyword.pl.
8079  */
8080
8081 I32
8082 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
8083 {
8084     dVAR;
8085
8086     PERL_ARGS_ASSERT_KEYWORD;
8087
8088   switch (len)
8089   {
8090     case 1: /* 5 tokens of length 1 */
8091       switch (name[0])
8092       {
8093         case 'm':
8094           {                                       /* m          */
8095             return KEY_m;
8096           }
8097
8098         case 'q':
8099           {                                       /* q          */
8100             return KEY_q;
8101           }
8102
8103         case 's':
8104           {                                       /* s          */
8105             return KEY_s;
8106           }
8107
8108         case 'x':
8109           {                                       /* x          */
8110             return -KEY_x;
8111           }
8112
8113         case 'y':
8114           {                                       /* y          */
8115             return KEY_y;
8116           }
8117
8118         default:
8119           goto unknown;
8120       }
8121
8122     case 2: /* 18 tokens of length 2 */
8123       switch (name[0])
8124       {
8125         case 'd':
8126           if (name[1] == 'o')
8127           {                                       /* do         */
8128             return KEY_do;
8129           }
8130
8131           goto unknown;
8132
8133         case 'e':
8134           if (name[1] == 'q')
8135           {                                       /* eq         */
8136             return -KEY_eq;
8137           }
8138
8139           goto unknown;
8140
8141         case 'g':
8142           switch (name[1])
8143           {
8144             case 'e':
8145               {                                   /* ge         */
8146                 return -KEY_ge;
8147               }
8148
8149             case 't':
8150               {                                   /* gt         */
8151                 return -KEY_gt;
8152               }
8153
8154             default:
8155               goto unknown;
8156           }
8157
8158         case 'i':
8159           if (name[1] == 'f')
8160           {                                       /* if         */
8161             return KEY_if;
8162           }
8163
8164           goto unknown;
8165
8166         case 'l':
8167           switch (name[1])
8168           {
8169             case 'c':
8170               {                                   /* lc         */
8171                 return -KEY_lc;
8172               }
8173
8174             case 'e':
8175               {                                   /* le         */
8176                 return -KEY_le;
8177               }
8178
8179             case 't':
8180               {                                   /* lt         */
8181                 return -KEY_lt;
8182               }
8183
8184             default:
8185               goto unknown;
8186           }
8187
8188         case 'm':
8189           if (name[1] == 'y')
8190           {                                       /* my         */
8191             return KEY_my;
8192           }
8193
8194           goto unknown;
8195
8196         case 'n':
8197           switch (name[1])
8198           {
8199             case 'e':
8200               {                                   /* ne         */
8201                 return -KEY_ne;
8202               }
8203
8204             case 'o':
8205               {                                   /* no         */
8206                 return KEY_no;
8207               }
8208
8209             default:
8210               goto unknown;
8211           }
8212
8213         case 'o':
8214           if (name[1] == 'r')
8215           {                                       /* or         */
8216             return -KEY_or;
8217           }
8218
8219           goto unknown;
8220
8221         case 'q':
8222           switch (name[1])
8223           {
8224             case 'q':
8225               {                                   /* qq         */
8226                 return KEY_qq;
8227               }
8228
8229             case 'r':
8230               {                                   /* qr         */
8231                 return KEY_qr;
8232               }
8233
8234             case 'w':
8235               {                                   /* qw         */
8236                 return KEY_qw;
8237               }
8238
8239             case 'x':
8240               {                                   /* qx         */
8241                 return KEY_qx;
8242               }
8243
8244             default:
8245               goto unknown;
8246           }
8247
8248         case 't':
8249           if (name[1] == 'r')
8250           {                                       /* tr         */
8251             return KEY_tr;
8252           }
8253
8254           goto unknown;
8255
8256         case 'u':
8257           if (name[1] == 'c')
8258           {                                       /* uc         */
8259             return -KEY_uc;
8260           }
8261
8262           goto unknown;
8263
8264         default:
8265           goto unknown;
8266       }
8267
8268     case 3: /* 29 tokens of length 3 */
8269       switch (name[0])
8270       {
8271         case 'E':
8272           if (name[1] == 'N' &&
8273               name[2] == 'D')
8274           {                                       /* END        */
8275             return KEY_END;
8276           }
8277
8278           goto unknown;
8279
8280         case 'a':
8281           switch (name[1])
8282           {
8283             case 'b':
8284               if (name[2] == 's')
8285               {                                   /* abs        */
8286                 return -KEY_abs;
8287               }
8288
8289               goto unknown;
8290
8291             case 'n':
8292               if (name[2] == 'd')
8293               {                                   /* and        */
8294                 return -KEY_and;
8295               }
8296
8297               goto unknown;
8298
8299             default:
8300               goto unknown;
8301           }
8302
8303         case 'c':
8304           switch (name[1])
8305           {
8306             case 'h':
8307               if (name[2] == 'r')
8308               {                                   /* chr        */
8309                 return -KEY_chr;
8310               }
8311
8312               goto unknown;
8313
8314             case 'm':
8315               if (name[2] == 'p')
8316               {                                   /* cmp        */
8317                 return -KEY_cmp;
8318               }
8319
8320               goto unknown;
8321
8322             case 'o':
8323               if (name[2] == 's')
8324               {                                   /* cos        */
8325                 return -KEY_cos;
8326               }
8327
8328               goto unknown;
8329
8330             default:
8331               goto unknown;
8332           }
8333
8334         case 'd':
8335           if (name[1] == 'i' &&
8336               name[2] == 'e')
8337           {                                       /* die        */
8338             return -KEY_die;
8339           }
8340
8341           goto unknown;
8342
8343         case 'e':
8344           switch (name[1])
8345           {
8346             case 'o':
8347               if (name[2] == 'f')
8348               {                                   /* eof        */
8349                 return -KEY_eof;
8350               }
8351
8352               goto unknown;
8353
8354             case 'x':
8355               if (name[2] == 'p')
8356               {                                   /* exp        */
8357                 return -KEY_exp;
8358               }
8359
8360               goto unknown;
8361
8362             default:
8363               goto unknown;
8364           }
8365
8366         case 'f':
8367           if (name[1] == 'o' &&
8368               name[2] == 'r')
8369           {                                       /* for        */
8370             return KEY_for;
8371           }
8372
8373           goto unknown;
8374
8375         case 'h':
8376           if (name[1] == 'e' &&
8377               name[2] == 'x')
8378           {                                       /* hex        */
8379             return -KEY_hex;
8380           }
8381
8382           goto unknown;
8383
8384         case 'i':
8385           if (name[1] == 'n' &&
8386               name[2] == 't')
8387           {                                       /* int        */
8388             return -KEY_int;
8389           }
8390
8391           goto unknown;
8392
8393         case 'l':
8394           if (name[1] == 'o' &&
8395               name[2] == 'g')
8396           {                                       /* log        */
8397             return -KEY_log;
8398           }
8399
8400           goto unknown;
8401
8402         case 'm':
8403           if (name[1] == 'a' &&
8404               name[2] == 'p')
8405           {                                       /* map        */
8406             return KEY_map;
8407           }
8408
8409           goto unknown;
8410
8411         case 'n':
8412           if (name[1] == 'o' &&
8413               name[2] == 't')
8414           {                                       /* not        */
8415             return -KEY_not;
8416           }
8417
8418           goto unknown;
8419
8420         case 'o':
8421           switch (name[1])
8422           {
8423             case 'c':
8424               if (name[2] == 't')
8425               {                                   /* oct        */
8426                 return -KEY_oct;
8427               }
8428
8429               goto unknown;
8430
8431             case 'r':
8432               if (name[2] == 'd')
8433               {                                   /* ord        */
8434                 return -KEY_ord;
8435               }
8436
8437               goto unknown;
8438
8439             case 'u':
8440               if (name[2] == 'r')
8441               {                                   /* our        */
8442                 return KEY_our;
8443               }
8444
8445               goto unknown;
8446
8447             default:
8448               goto unknown;
8449           }
8450
8451         case 'p':
8452           if (name[1] == 'o')
8453           {
8454             switch (name[2])
8455             {
8456               case 'p':
8457                 {                                 /* pop        */
8458                   return -KEY_pop;
8459                 }
8460
8461               case 's':
8462                 {                                 /* pos        */
8463                   return KEY_pos;
8464                 }
8465
8466               default:
8467                 goto unknown;
8468             }
8469           }
8470
8471           goto unknown;
8472
8473         case 'r':
8474           if (name[1] == 'e' &&
8475               name[2] == 'f')
8476           {                                       /* ref        */
8477             return -KEY_ref;
8478           }
8479
8480           goto unknown;
8481
8482         case 's':
8483           switch (name[1])
8484           {
8485             case 'a':
8486               if (name[2] == 'y')
8487               {                                   /* say        */
8488                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8489               }
8490
8491               goto unknown;
8492
8493             case 'i':
8494               if (name[2] == 'n')
8495               {                                   /* sin        */
8496                 return -KEY_sin;
8497               }
8498
8499               goto unknown;
8500
8501             case 'u':
8502               if (name[2] == 'b')
8503               {                                   /* sub        */
8504                 return KEY_sub;
8505               }
8506
8507               goto unknown;
8508
8509             default:
8510               goto unknown;
8511           }
8512
8513         case 't':
8514           if (name[1] == 'i' &&
8515               name[2] == 'e')
8516           {                                       /* tie        */
8517             return KEY_tie;
8518           }
8519
8520           goto unknown;
8521
8522         case 'u':
8523           if (name[1] == 's' &&
8524               name[2] == 'e')
8525           {                                       /* use        */
8526             return KEY_use;
8527           }
8528
8529           goto unknown;
8530
8531         case 'v':
8532           if (name[1] == 'e' &&
8533               name[2] == 'c')
8534           {                                       /* vec        */
8535             return -KEY_vec;
8536           }
8537
8538           goto unknown;
8539
8540         case 'x':
8541           if (name[1] == 'o' &&
8542               name[2] == 'r')
8543           {                                       /* xor        */
8544             return -KEY_xor;
8545           }
8546
8547           goto unknown;
8548
8549         default:
8550           goto unknown;
8551       }
8552
8553     case 4: /* 41 tokens of length 4 */
8554       switch (name[0])
8555       {
8556         case 'C':
8557           if (name[1] == 'O' &&
8558               name[2] == 'R' &&
8559               name[3] == 'E')
8560           {                                       /* CORE       */
8561             return -KEY_CORE;
8562           }
8563
8564           goto unknown;
8565
8566         case 'I':
8567           if (name[1] == 'N' &&
8568               name[2] == 'I' &&
8569               name[3] == 'T')
8570           {                                       /* INIT       */
8571             return KEY_INIT;
8572           }
8573
8574           goto unknown;
8575
8576         case 'b':
8577           if (name[1] == 'i' &&
8578               name[2] == 'n' &&
8579               name[3] == 'd')
8580           {                                       /* bind       */
8581             return -KEY_bind;
8582           }
8583
8584           goto unknown;
8585
8586         case 'c':
8587           if (name[1] == 'h' &&
8588               name[2] == 'o' &&
8589               name[3] == 'p')
8590           {                                       /* chop       */
8591             return -KEY_chop;
8592           }
8593
8594           goto unknown;
8595
8596         case 'd':
8597           if (name[1] == 'u' &&
8598               name[2] == 'm' &&
8599               name[3] == 'p')
8600           {                                       /* dump       */
8601             return -KEY_dump;
8602           }
8603
8604           goto unknown;
8605
8606         case 'e':
8607           switch (name[1])
8608           {
8609             case 'a':
8610               if (name[2] == 'c' &&
8611                   name[3] == 'h')
8612               {                                   /* each       */
8613                 return -KEY_each;
8614               }
8615
8616               goto unknown;
8617
8618             case 'l':
8619               if (name[2] == 's' &&
8620                   name[3] == 'e')
8621               {                                   /* else       */
8622                 return KEY_else;
8623               }
8624
8625               goto unknown;
8626
8627             case 'v':
8628               if (name[2] == 'a' &&
8629                   name[3] == 'l')
8630               {                                   /* eval       */
8631                 return KEY_eval;
8632               }
8633
8634               goto unknown;
8635
8636             case 'x':
8637               switch (name[2])
8638               {
8639                 case 'e':
8640                   if (name[3] == 'c')
8641                   {                               /* exec       */
8642                     return -KEY_exec;
8643                   }
8644
8645                   goto unknown;
8646
8647                 case 'i':
8648                   if (name[3] == 't')
8649                   {                               /* exit       */
8650                     return -KEY_exit;
8651                   }
8652
8653                   goto unknown;
8654
8655                 default:
8656                   goto unknown;
8657               }
8658
8659             default:
8660               goto unknown;
8661           }
8662
8663         case 'f':
8664           if (name[1] == 'o' &&
8665               name[2] == 'r' &&
8666               name[3] == 'k')
8667           {                                       /* fork       */
8668             return -KEY_fork;
8669           }
8670
8671           goto unknown;
8672
8673         case 'g':
8674           switch (name[1])
8675           {
8676             case 'e':
8677               if (name[2] == 't' &&
8678                   name[3] == 'c')
8679               {                                   /* getc       */
8680                 return -KEY_getc;
8681               }
8682
8683               goto unknown;
8684
8685             case 'l':
8686               if (name[2] == 'o' &&
8687                   name[3] == 'b')
8688               {                                   /* glob       */
8689                 return KEY_glob;
8690               }
8691
8692               goto unknown;
8693
8694             case 'o':
8695               if (name[2] == 't' &&
8696                   name[3] == 'o')
8697               {                                   /* goto       */
8698                 return KEY_goto;
8699               }
8700
8701               goto unknown;
8702
8703             case 'r':
8704               if (name[2] == 'e' &&
8705                   name[3] == 'p')
8706               {                                   /* grep       */
8707                 return KEY_grep;
8708               }
8709
8710               goto unknown;
8711
8712             default:
8713               goto unknown;
8714           }
8715
8716         case 'j':
8717           if (name[1] == 'o' &&
8718               name[2] == 'i' &&
8719               name[3] == 'n')
8720           {                                       /* join       */
8721             return -KEY_join;
8722           }
8723
8724           goto unknown;
8725
8726         case 'k':
8727           switch (name[1])
8728           {
8729             case 'e':
8730               if (name[2] == 'y' &&
8731                   name[3] == 's')
8732               {                                   /* keys       */
8733                 return -KEY_keys;
8734               }
8735
8736               goto unknown;
8737
8738             case 'i':
8739               if (name[2] == 'l' &&
8740                   name[3] == 'l')
8741               {                                   /* kill       */
8742                 return -KEY_kill;
8743               }
8744
8745               goto unknown;
8746
8747             default:
8748               goto unknown;
8749           }
8750
8751         case 'l':
8752           switch (name[1])
8753           {
8754             case 'a':
8755               if (name[2] == 's' &&
8756                   name[3] == 't')
8757               {                                   /* last       */
8758                 return KEY_last;
8759               }
8760
8761               goto unknown;
8762
8763             case 'i':
8764               if (name[2] == 'n' &&
8765                   name[3] == 'k')
8766               {                                   /* link       */
8767                 return -KEY_link;
8768               }
8769
8770               goto unknown;
8771
8772             case 'o':
8773               if (name[2] == 'c' &&
8774                   name[3] == 'k')
8775               {                                   /* lock       */
8776                 return -KEY_lock;
8777               }
8778
8779               goto unknown;
8780
8781             default:
8782               goto unknown;
8783           }
8784
8785         case 'n':
8786           if (name[1] == 'e' &&
8787               name[2] == 'x' &&
8788               name[3] == 't')
8789           {                                       /* next       */
8790             return KEY_next;
8791           }
8792
8793           goto unknown;
8794
8795         case 'o':
8796           if (name[1] == 'p' &&
8797               name[2] == 'e' &&
8798               name[3] == 'n')
8799           {                                       /* open       */
8800             return -KEY_open;
8801           }
8802
8803           goto unknown;
8804
8805         case 'p':
8806           switch (name[1])
8807           {
8808             case 'a':
8809               if (name[2] == 'c' &&
8810                   name[3] == 'k')
8811               {                                   /* pack       */
8812                 return -KEY_pack;
8813               }
8814
8815               goto unknown;
8816
8817             case 'i':
8818               if (name[2] == 'p' &&
8819                   name[3] == 'e')
8820               {                                   /* pipe       */
8821                 return -KEY_pipe;
8822               }
8823
8824               goto unknown;
8825
8826             case 'u':
8827               if (name[2] == 's' &&
8828                   name[3] == 'h')
8829               {                                   /* push       */
8830                 return -KEY_push;
8831               }
8832
8833               goto unknown;
8834
8835             default:
8836               goto unknown;
8837           }
8838
8839         case 'r':
8840           switch (name[1])
8841           {
8842             case 'a':
8843               if (name[2] == 'n' &&
8844                   name[3] == 'd')
8845               {                                   /* rand       */
8846                 return -KEY_rand;
8847               }
8848
8849               goto unknown;
8850
8851             case 'e':
8852               switch (name[2])
8853               {
8854                 case 'a':
8855                   if (name[3] == 'd')
8856                   {                               /* read       */
8857                     return -KEY_read;
8858                   }
8859
8860                   goto unknown;
8861
8862                 case 'c':
8863                   if (name[3] == 'v')
8864                   {                               /* recv       */
8865                     return -KEY_recv;
8866                   }
8867
8868                   goto unknown;
8869
8870                 case 'd':
8871                   if (name[3] == 'o')
8872                   {                               /* redo       */
8873                     return KEY_redo;
8874                   }
8875
8876                   goto unknown;
8877
8878                 default:
8879                   goto unknown;
8880               }
8881
8882             default:
8883               goto unknown;
8884           }
8885
8886         case 's':
8887           switch (name[1])
8888           {
8889             case 'e':
8890               switch (name[2])
8891               {
8892                 case 'e':
8893                   if (name[3] == 'k')
8894                   {                               /* seek       */
8895                     return -KEY_seek;
8896                   }
8897
8898                   goto unknown;
8899
8900                 case 'n':
8901                   if (name[3] == 'd')
8902                   {                               /* send       */
8903                     return -KEY_send;
8904                   }
8905
8906                   goto unknown;
8907
8908                 default:
8909                   goto unknown;
8910               }
8911
8912             case 'o':
8913               if (name[2] == 'r' &&
8914                   name[3] == 't')
8915               {                                   /* sort       */
8916                 return KEY_sort;
8917               }
8918
8919               goto unknown;
8920
8921             case 'q':
8922               if (name[2] == 'r' &&
8923                   name[3] == 't')
8924               {                                   /* sqrt       */
8925                 return -KEY_sqrt;
8926               }
8927
8928               goto unknown;
8929
8930             case 't':
8931               if (name[2] == 'a' &&
8932                   name[3] == 't')
8933               {                                   /* stat       */
8934                 return -KEY_stat;
8935               }
8936
8937               goto unknown;
8938
8939             default:
8940               goto unknown;
8941           }
8942
8943         case 't':
8944           switch (name[1])
8945           {
8946             case 'e':
8947               if (name[2] == 'l' &&
8948                   name[3] == 'l')
8949               {                                   /* tell       */
8950                 return -KEY_tell;
8951               }
8952
8953               goto unknown;
8954
8955             case 'i':
8956               switch (name[2])
8957               {
8958                 case 'e':
8959                   if (name[3] == 'd')
8960                   {                               /* tied       */
8961                     return KEY_tied;
8962                   }
8963
8964                   goto unknown;
8965
8966                 case 'm':
8967                   if (name[3] == 'e')
8968                   {                               /* time       */
8969                     return -KEY_time;
8970                   }
8971
8972                   goto unknown;
8973
8974                 default:
8975                   goto unknown;
8976               }
8977
8978             default:
8979               goto unknown;
8980           }
8981
8982         case 'w':
8983           switch (name[1])
8984           {
8985             case 'a':
8986               switch (name[2])
8987               {
8988                 case 'i':
8989                   if (name[3] == 't')
8990                   {                               /* wait       */
8991                     return -KEY_wait;
8992                   }
8993
8994                   goto unknown;
8995
8996                 case 'r':
8997                   if (name[3] == 'n')
8998                   {                               /* warn       */
8999                     return -KEY_warn;
9000                   }
9001
9002                   goto unknown;
9003
9004                 default:
9005                   goto unknown;
9006               }
9007
9008             case 'h':
9009               if (name[2] == 'e' &&
9010                   name[3] == 'n')
9011               {                                   /* when       */
9012                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
9013               }
9014
9015               goto unknown;
9016
9017             default:
9018               goto unknown;
9019           }
9020
9021         default:
9022           goto unknown;
9023       }
9024
9025     case 5: /* 39 tokens of length 5 */
9026       switch (name[0])
9027       {
9028         case 'B':
9029           if (name[1] == 'E' &&
9030               name[2] == 'G' &&
9031               name[3] == 'I' &&
9032               name[4] == 'N')
9033           {                                       /* BEGIN      */
9034             return KEY_BEGIN;
9035           }
9036
9037           goto unknown;
9038
9039         case 'C':
9040           if (name[1] == 'H' &&
9041               name[2] == 'E' &&
9042               name[3] == 'C' &&
9043               name[4] == 'K')
9044           {                                       /* CHECK      */
9045             return KEY_CHECK;
9046           }
9047
9048           goto unknown;
9049
9050         case 'a':
9051           switch (name[1])
9052           {
9053             case 'l':
9054               if (name[2] == 'a' &&
9055                   name[3] == 'r' &&
9056                   name[4] == 'm')
9057               {                                   /* alarm      */
9058                 return -KEY_alarm;
9059               }
9060
9061               goto unknown;
9062
9063             case 't':
9064               if (name[2] == 'a' &&
9065                   name[3] == 'n' &&
9066                   name[4] == '2')
9067               {                                   /* atan2      */
9068                 return -KEY_atan2;
9069               }
9070
9071               goto unknown;
9072
9073             default:
9074               goto unknown;
9075           }
9076
9077         case 'b':
9078           switch (name[1])
9079           {
9080             case 'l':
9081               if (name[2] == 'e' &&
9082                   name[3] == 's' &&
9083                   name[4] == 's')
9084               {                                   /* bless      */
9085                 return -KEY_bless;
9086               }
9087
9088               goto unknown;
9089
9090             case 'r':
9091               if (name[2] == 'e' &&
9092                   name[3] == 'a' &&
9093                   name[4] == 'k')
9094               {                                   /* break      */
9095                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
9096               }
9097
9098               goto unknown;
9099
9100             default:
9101               goto unknown;
9102           }
9103
9104         case 'c':
9105           switch (name[1])
9106           {
9107             case 'h':
9108               switch (name[2])
9109               {
9110                 case 'd':
9111                   if (name[3] == 'i' &&
9112                       name[4] == 'r')
9113                   {                               /* chdir      */
9114                     return -KEY_chdir;
9115                   }
9116
9117                   goto unknown;
9118
9119                 case 'm':
9120                   if (name[3] == 'o' &&
9121                       name[4] == 'd')
9122                   {                               /* chmod      */
9123                     return -KEY_chmod;
9124                   }
9125
9126                   goto unknown;
9127
9128                 case 'o':
9129                   switch (name[3])
9130                   {
9131                     case 'm':
9132                       if (name[4] == 'p')
9133                       {                           /* chomp      */
9134                         return -KEY_chomp;
9135                       }
9136
9137                       goto unknown;
9138
9139                     case 'w':
9140                       if (name[4] == 'n')
9141                       {                           /* chown      */
9142                         return -KEY_chown;
9143                       }
9144
9145                       goto unknown;
9146
9147                     default:
9148                       goto unknown;
9149                   }
9150
9151                 default:
9152                   goto unknown;
9153               }
9154
9155             case 'l':
9156               if (name[2] == 'o' &&
9157                   name[3] == 's' &&
9158                   name[4] == 'e')
9159               {                                   /* close      */
9160                 return -KEY_close;
9161               }
9162
9163               goto unknown;
9164
9165             case 'r':
9166               if (name[2] == 'y' &&
9167                   name[3] == 'p' &&
9168                   name[4] == 't')
9169               {                                   /* crypt      */
9170                 return -KEY_crypt;
9171               }
9172
9173               goto unknown;
9174
9175             default:
9176               goto unknown;
9177           }
9178
9179         case 'e':
9180           if (name[1] == 'l' &&
9181               name[2] == 's' &&
9182               name[3] == 'i' &&
9183               name[4] == 'f')
9184           {                                       /* elsif      */
9185             return KEY_elsif;
9186           }
9187
9188           goto unknown;
9189
9190         case 'f':
9191           switch (name[1])
9192           {
9193             case 'c':
9194               if (name[2] == 'n' &&
9195                   name[3] == 't' &&
9196                   name[4] == 'l')
9197               {                                   /* fcntl      */
9198                 return -KEY_fcntl;
9199               }
9200
9201               goto unknown;
9202
9203             case 'l':
9204               if (name[2] == 'o' &&
9205                   name[3] == 'c' &&
9206                   name[4] == 'k')
9207               {                                   /* flock      */
9208                 return -KEY_flock;
9209               }
9210
9211               goto unknown;
9212
9213             default:
9214               goto unknown;
9215           }
9216
9217         case 'g':
9218           if (name[1] == 'i' &&
9219               name[2] == 'v' &&
9220               name[3] == 'e' &&
9221               name[4] == 'n')
9222           {                                       /* given      */
9223             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
9224           }
9225
9226           goto unknown;
9227
9228         case 'i':
9229           switch (name[1])
9230           {
9231             case 'n':
9232               if (name[2] == 'd' &&
9233                   name[3] == 'e' &&
9234                   name[4] == 'x')
9235               {                                   /* index      */
9236                 return -KEY_index;
9237               }
9238
9239               goto unknown;
9240
9241             case 'o':
9242               if (name[2] == 'c' &&
9243                   name[3] == 't' &&
9244                   name[4] == 'l')
9245               {                                   /* ioctl      */
9246                 return -KEY_ioctl;
9247               }
9248
9249               goto unknown;
9250
9251             default:
9252               goto unknown;
9253           }
9254
9255         case 'l':
9256           switch (name[1])
9257           {
9258             case 'o':
9259               if (name[2] == 'c' &&
9260                   name[3] == 'a' &&
9261                   name[4] == 'l')
9262               {                                   /* local      */
9263                 return KEY_local;
9264               }
9265
9266               goto unknown;
9267
9268             case 's':
9269               if (name[2] == 't' &&
9270                   name[3] == 'a' &&
9271                   name[4] == 't')
9272               {                                   /* lstat      */
9273                 return -KEY_lstat;
9274               }
9275
9276               goto unknown;
9277
9278             default:
9279               goto unknown;
9280           }
9281
9282         case 'm':
9283           if (name[1] == 'k' &&
9284               name[2] == 'd' &&
9285               name[3] == 'i' &&
9286               name[4] == 'r')
9287           {                                       /* mkdir      */
9288             return -KEY_mkdir;
9289           }
9290
9291           goto unknown;
9292
9293         case 'p':
9294           if (name[1] == 'r' &&
9295               name[2] == 'i' &&
9296               name[3] == 'n' &&
9297               name[4] == 't')
9298           {                                       /* print      */
9299             return KEY_print;
9300           }
9301
9302           goto unknown;
9303
9304         case 'r':
9305           switch (name[1])
9306           {
9307             case 'e':
9308               if (name[2] == 's' &&
9309                   name[3] == 'e' &&
9310                   name[4] == 't')
9311               {                                   /* reset      */
9312                 return -KEY_reset;
9313               }
9314
9315               goto unknown;
9316
9317             case 'm':
9318               if (name[2] == 'd' &&
9319                   name[3] == 'i' &&
9320                   name[4] == 'r')
9321               {                                   /* rmdir      */
9322                 return -KEY_rmdir;
9323               }
9324
9325               goto unknown;
9326
9327             default:
9328               goto unknown;
9329           }
9330
9331         case 's':
9332           switch (name[1])
9333           {
9334             case 'e':
9335               if (name[2] == 'm' &&
9336                   name[3] == 'o' &&
9337                   name[4] == 'p')
9338               {                                   /* semop      */
9339                 return -KEY_semop;
9340               }
9341
9342               goto unknown;
9343
9344             case 'h':
9345               if (name[2] == 'i' &&
9346                   name[3] == 'f' &&
9347                   name[4] == 't')
9348               {                                   /* shift      */
9349                 return -KEY_shift;
9350               }
9351
9352               goto unknown;
9353
9354             case 'l':
9355               if (name[2] == 'e' &&
9356                   name[3] == 'e' &&
9357                   name[4] == 'p')
9358               {                                   /* sleep      */
9359                 return -KEY_sleep;
9360               }
9361
9362               goto unknown;
9363
9364             case 'p':
9365               if (name[2] == 'l' &&
9366                   name[3] == 'i' &&
9367                   name[4] == 't')
9368               {                                   /* split      */
9369                 return KEY_split;
9370               }
9371
9372               goto unknown;
9373
9374             case 'r':
9375               if (name[2] == 'a' &&
9376                   name[3] == 'n' &&
9377                   name[4] == 'd')
9378               {                                   /* srand      */
9379                 return -KEY_srand;
9380               }
9381
9382               goto unknown;
9383
9384             case 't':
9385               switch (name[2])
9386               {
9387                 case 'a':
9388                   if (name[3] == 't' &&
9389                       name[4] == 'e')
9390                   {                               /* state      */
9391                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9392                   }
9393
9394                   goto unknown;
9395
9396                 case 'u':
9397                   if (name[3] == 'd' &&
9398                       name[4] == 'y')
9399                   {                               /* study      */
9400                     return KEY_study;
9401                   }
9402
9403                   goto unknown;
9404
9405                 default:
9406                   goto unknown;
9407               }
9408
9409             default:
9410               goto unknown;
9411           }
9412
9413         case 't':
9414           if (name[1] == 'i' &&
9415               name[2] == 'm' &&
9416               name[3] == 'e' &&
9417               name[4] == 's')
9418           {                                       /* times      */
9419             return -KEY_times;
9420           }
9421
9422           goto unknown;
9423
9424         case 'u':
9425           switch (name[1])
9426           {
9427             case 'm':
9428               if (name[2] == 'a' &&
9429                   name[3] == 's' &&
9430                   name[4] == 'k')
9431               {                                   /* umask      */
9432                 return -KEY_umask;
9433               }
9434
9435               goto unknown;
9436
9437             case 'n':
9438               switch (name[2])
9439               {
9440                 case 'd':
9441                   if (name[3] == 'e' &&
9442                       name[4] == 'f')
9443                   {                               /* undef      */
9444                     return KEY_undef;
9445                   }
9446
9447                   goto unknown;
9448
9449                 case 't':
9450                   if (name[3] == 'i')
9451                   {
9452                     switch (name[4])
9453                     {
9454                       case 'e':
9455                         {                         /* untie      */
9456                           return KEY_untie;
9457                         }
9458
9459                       case 'l':
9460                         {                         /* until      */
9461                           return KEY_until;
9462                         }
9463
9464                       default:
9465                         goto unknown;
9466                     }
9467                   }
9468
9469                   goto unknown;
9470
9471                 default:
9472                   goto unknown;
9473               }
9474
9475             case 't':
9476               if (name[2] == 'i' &&
9477                   name[3] == 'm' &&
9478                   name[4] == 'e')
9479               {                                   /* utime      */
9480                 return -KEY_utime;
9481               }
9482
9483               goto unknown;
9484
9485             default:
9486               goto unknown;
9487           }
9488
9489         case 'w':
9490           switch (name[1])
9491           {
9492             case 'h':
9493               if (name[2] == 'i' &&
9494                   name[3] == 'l' &&
9495                   name[4] == 'e')
9496               {                                   /* while      */
9497                 return KEY_while;
9498               }
9499
9500               goto unknown;
9501
9502             case 'r':
9503               if (name[2] == 'i' &&
9504                   name[3] == 't' &&
9505                   name[4] == 'e')
9506               {                                   /* write      */
9507                 return -KEY_write;
9508               }
9509
9510               goto unknown;
9511
9512             default:
9513               goto unknown;
9514           }
9515
9516         default:
9517           goto unknown;
9518       }
9519
9520     case 6: /* 33 tokens of length 6 */
9521       switch (name[0])
9522       {
9523         case 'a':
9524           if (name[1] == 'c' &&
9525               name[2] == 'c' &&
9526               name[3] == 'e' &&
9527               name[4] == 'p' &&
9528               name[5] == 't')
9529           {                                       /* accept     */
9530             return -KEY_accept;
9531           }
9532
9533           goto unknown;
9534
9535         case 'c':
9536           switch (name[1])
9537           {
9538             case 'a':
9539               if (name[2] == 'l' &&
9540                   name[3] == 'l' &&
9541                   name[4] == 'e' &&
9542                   name[5] == 'r')
9543               {                                   /* caller     */
9544                 return -KEY_caller;
9545               }
9546
9547               goto unknown;
9548
9549             case 'h':
9550               if (name[2] == 'r' &&
9551                   name[3] == 'o' &&
9552                   name[4] == 'o' &&
9553                   name[5] == 't')
9554               {                                   /* chroot     */
9555                 return -KEY_chroot;
9556               }
9557
9558               goto unknown;
9559
9560             default:
9561               goto unknown;
9562           }
9563
9564         case 'd':
9565           if (name[1] == 'e' &&
9566               name[2] == 'l' &&
9567               name[3] == 'e' &&
9568               name[4] == 't' &&
9569               name[5] == 'e')
9570           {                                       /* delete     */
9571             return KEY_delete;
9572           }
9573
9574           goto unknown;
9575
9576         case 'e':
9577           switch (name[1])
9578           {
9579             case 'l':
9580               if (name[2] == 's' &&
9581                   name[3] == 'e' &&
9582                   name[4] == 'i' &&
9583                   name[5] == 'f')
9584               {                                   /* elseif     */
9585                   Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9586               }
9587
9588               goto unknown;
9589
9590             case 'x':
9591               if (name[2] == 'i' &&
9592                   name[3] == 's' &&
9593                   name[4] == 't' &&
9594                   name[5] == 's')
9595               {                                   /* exists     */
9596                 return KEY_exists;
9597               }
9598
9599               goto unknown;
9600
9601             default:
9602               goto unknown;
9603           }
9604
9605         case 'f':
9606           switch (name[1])
9607           {
9608             case 'i':
9609               if (name[2] == 'l' &&
9610                   name[3] == 'e' &&
9611                   name[4] == 'n' &&
9612                   name[5] == 'o')
9613               {                                   /* fileno     */
9614                 return -KEY_fileno;
9615               }
9616
9617               goto unknown;
9618
9619             case 'o':
9620               if (name[2] == 'r' &&
9621                   name[3] == 'm' &&
9622                   name[4] == 'a' &&
9623                   name[5] == 't')
9624               {                                   /* format     */
9625                 return KEY_format;
9626               }
9627
9628               goto unknown;
9629
9630             default:
9631               goto unknown;
9632           }
9633
9634         case 'g':
9635           if (name[1] == 'm' &&
9636               name[2] == 't' &&
9637               name[3] == 'i' &&
9638               name[4] == 'm' &&
9639               name[5] == 'e')
9640           {                                       /* gmtime     */
9641             return -KEY_gmtime;
9642           }
9643
9644           goto unknown;
9645
9646         case 'l':
9647           switch (name[1])
9648           {
9649             case 'e':
9650               if (name[2] == 'n' &&
9651                   name[3] == 'g' &&
9652                   name[4] == 't' &&
9653                   name[5] == 'h')
9654               {                                   /* length     */
9655                 return -KEY_length;
9656               }
9657
9658               goto unknown;
9659
9660             case 'i':
9661               if (name[2] == 's' &&
9662                   name[3] == 't' &&
9663                   name[4] == 'e' &&
9664                   name[5] == 'n')
9665               {                                   /* listen     */
9666                 return -KEY_listen;
9667               }
9668
9669               goto unknown;
9670
9671             default:
9672               goto unknown;
9673           }
9674
9675         case 'm':
9676           if (name[1] == 's' &&
9677               name[2] == 'g')
9678           {
9679             switch (name[3])
9680             {
9681               case 'c':
9682                 if (name[4] == 't' &&
9683                     name[5] == 'l')
9684                 {                                 /* msgctl     */
9685                   return -KEY_msgctl;
9686                 }
9687
9688                 goto unknown;
9689
9690               case 'g':
9691                 if (name[4] == 'e' &&
9692                     name[5] == 't')
9693                 {                                 /* msgget     */
9694                   return -KEY_msgget;
9695                 }
9696
9697                 goto unknown;
9698
9699               case 'r':
9700                 if (name[4] == 'c' &&
9701                     name[5] == 'v')
9702                 {                                 /* msgrcv     */
9703                   return -KEY_msgrcv;
9704                 }
9705
9706                 goto unknown;
9707
9708               case 's':
9709                 if (name[4] == 'n' &&
9710                     name[5] == 'd')
9711                 {                                 /* msgsnd     */
9712                   return -KEY_msgsnd;
9713                 }
9714
9715                 goto unknown;
9716
9717               default:
9718                 goto unknown;
9719             }
9720           }
9721
9722           goto unknown;
9723
9724         case 'p':
9725           if (name[1] == 'r' &&
9726               name[2] == 'i' &&
9727               name[3] == 'n' &&
9728               name[4] == 't' &&
9729               name[5] == 'f')
9730           {                                       /* printf     */
9731             return KEY_printf;
9732           }
9733
9734           goto unknown;
9735
9736         case 'r':
9737           switch (name[1])
9738           {
9739             case 'e':
9740               switch (name[2])
9741               {
9742                 case 'n':
9743                   if (name[3] == 'a' &&
9744                       name[4] == 'm' &&
9745                       name[5] == 'e')
9746                   {                               /* rename     */
9747                     return -KEY_rename;
9748                   }
9749
9750                   goto unknown;
9751
9752                 case 't':
9753                   if (name[3] == 'u' &&
9754                       name[4] == 'r' &&
9755                       name[5] == 'n')
9756                   {                               /* return     */
9757                     return KEY_return;
9758                   }
9759
9760                   goto unknown;
9761
9762                 default:
9763                   goto unknown;
9764               }
9765
9766             case 'i':
9767               if (name[2] == 'n' &&
9768                   name[3] == 'd' &&
9769                   name[4] == 'e' &&
9770                   name[5] == 'x')
9771               {                                   /* rindex     */
9772                 return -KEY_rindex;
9773               }
9774
9775               goto unknown;
9776
9777             default:
9778               goto unknown;
9779           }
9780
9781         case 's':
9782           switch (name[1])
9783           {
9784             case 'c':
9785               if (name[2] == 'a' &&
9786                   name[3] == 'l' &&
9787                   name[4] == 'a' &&
9788                   name[5] == 'r')
9789               {                                   /* scalar     */
9790                 return KEY_scalar;
9791               }
9792
9793               goto unknown;
9794
9795             case 'e':
9796               switch (name[2])
9797               {
9798                 case 'l':
9799                   if (name[3] == 'e' &&
9800                       name[4] == 'c' &&
9801                       name[5] == 't')
9802                   {                               /* select     */
9803                     return -KEY_select;
9804                   }
9805
9806                   goto unknown;
9807
9808                 case 'm':
9809                   switch (name[3])
9810                   {
9811                     case 'c':
9812                       if (name[4] == 't' &&
9813                           name[5] == 'l')
9814                       {                           /* semctl     */
9815                         return -KEY_semctl;
9816                       }
9817
9818                       goto unknown;
9819
9820                     case 'g':
9821                       if (name[4] == 'e' &&
9822                           name[5] == 't')
9823                       {                           /* semget     */
9824                         return -KEY_semget;
9825                       }
9826
9827                       goto unknown;
9828
9829                     default:
9830                       goto unknown;
9831                   }
9832
9833                 default:
9834                   goto unknown;
9835               }
9836
9837             case 'h':
9838               if (name[2] == 'm')
9839               {
9840                 switch (name[3])
9841                 {
9842                   case 'c':
9843                     if (name[4] == 't' &&
9844                         name[5] == 'l')
9845                     {                             /* shmctl     */
9846                       return -KEY_shmctl;
9847                     }
9848
9849                     goto unknown;
9850
9851                   case 'g':
9852                     if (name[4] == 'e' &&
9853                         name[5] == 't')
9854                     {                             /* shmget     */
9855                       return -KEY_shmget;
9856                     }
9857
9858                     goto unknown;
9859
9860                   default:
9861                     goto unknown;
9862                 }
9863               }
9864
9865               goto unknown;
9866
9867             case 'o':
9868               if (name[2] == 'c' &&
9869                   name[3] == 'k' &&
9870                   name[4] == 'e' &&
9871                   name[5] == 't')
9872               {                                   /* socket     */
9873                 return -KEY_socket;
9874               }
9875
9876               goto unknown;
9877
9878             case 'p':
9879               if (name[2] == 'l' &&
9880                   name[3] == 'i' &&
9881                   name[4] == 'c' &&
9882                   name[5] == 'e')
9883               {                                   /* splice     */
9884                 return -KEY_splice;
9885               }
9886
9887               goto unknown;
9888
9889             case 'u':
9890               if (name[2] == 'b' &&
9891                   name[3] == 's' &&
9892                   name[4] == 't' &&
9893                   name[5] == 'r')
9894               {                                   /* substr     */
9895                 return -KEY_substr;
9896               }
9897
9898               goto unknown;
9899
9900             case 'y':
9901               if (name[2] == 's' &&
9902                   name[3] == 't' &&
9903                   name[4] == 'e' &&
9904                   name[5] == 'm')
9905               {                                   /* system     */
9906                 return -KEY_system;
9907               }
9908
9909               goto unknown;
9910
9911             default:
9912               goto unknown;
9913           }
9914
9915         case 'u':
9916           if (name[1] == 'n')
9917           {
9918             switch (name[2])
9919             {
9920               case 'l':
9921                 switch (name[3])
9922                 {
9923                   case 'e':
9924                     if (name[4] == 's' &&
9925                         name[5] == 's')
9926                     {                             /* unless     */
9927                       return KEY_unless;
9928                     }
9929
9930                     goto unknown;
9931
9932                   case 'i':
9933                     if (name[4] == 'n' &&
9934                         name[5] == 'k')
9935                     {                             /* unlink     */
9936                       return -KEY_unlink;
9937                     }
9938
9939                     goto unknown;
9940
9941                   default:
9942                     goto unknown;
9943                 }
9944
9945               case 'p':
9946                 if (name[3] == 'a' &&
9947                     name[4] == 'c' &&
9948                     name[5] == 'k')
9949                 {                                 /* unpack     */
9950                   return -KEY_unpack;
9951                 }
9952
9953                 goto unknown;
9954
9955               default:
9956                 goto unknown;
9957             }
9958           }
9959
9960           goto unknown;
9961
9962         case 'v':
9963           if (name[1] == 'a' &&
9964               name[2] == 'l' &&
9965               name[3] == 'u' &&
9966               name[4] == 'e' &&
9967               name[5] == 's')
9968           {                                       /* values     */
9969             return -KEY_values;
9970           }
9971
9972           goto unknown;
9973
9974         default:
9975           goto unknown;
9976       }
9977
9978     case 7: /* 29 tokens of length 7 */
9979       switch (name[0])
9980       {
9981         case 'D':
9982           if (name[1] == 'E' &&
9983               name[2] == 'S' &&
9984               name[3] == 'T' &&
9985               name[4] == 'R' &&
9986               name[5] == 'O' &&
9987               name[6] == 'Y')
9988           {                                       /* DESTROY    */
9989             return KEY_DESTROY;
9990           }
9991
9992           goto unknown;
9993
9994         case '_':
9995           if (name[1] == '_' &&
9996               name[2] == 'E' &&
9997               name[3] == 'N' &&
9998               name[4] == 'D' &&
9999               name[5] == '_' &&
10000               name[6] == '_')
10001           {                                       /* __END__    */
10002             return KEY___END__;
10003           }
10004
10005           goto unknown;
10006
10007         case 'b':
10008           if (name[1] == 'i' &&
10009               name[2] == 'n' &&
10010               name[3] == 'm' &&
10011               name[4] == 'o' &&
10012               name[5] == 'd' &&
10013               name[6] == 'e')
10014           {                                       /* binmode    */
10015             return -KEY_binmode;
10016           }
10017
10018           goto unknown;
10019
10020         case 'c':
10021           if (name[1] == 'o' &&
10022               name[2] == 'n' &&
10023               name[3] == 'n' &&
10024               name[4] == 'e' &&
10025               name[5] == 'c' &&
10026               name[6] == 't')
10027           {                                       /* connect    */
10028             return -KEY_connect;
10029           }
10030
10031           goto unknown;
10032
10033         case 'd':
10034           switch (name[1])
10035           {
10036             case 'b':
10037               if (name[2] == 'm' &&
10038                   name[3] == 'o' &&
10039                   name[4] == 'p' &&
10040                   name[5] == 'e' &&
10041                   name[6] == 'n')
10042               {                                   /* dbmopen    */
10043                 return -KEY_dbmopen;
10044               }
10045
10046               goto unknown;
10047
10048             case 'e':
10049               if (name[2] == 'f')
10050               {
10051                 switch (name[3])
10052                 {
10053                   case 'a':
10054                     if (name[4] == 'u' &&
10055                         name[5] == 'l' &&
10056                         name[6] == 't')
10057                     {                             /* default    */
10058                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
10059                     }
10060
10061                     goto unknown;
10062
10063                   case 'i':
10064                     if (name[4] == 'n' &&
10065                         name[5] == 'e' &&
10066                         name[6] == 'd')
10067                     {                             /* defined    */
10068                       return KEY_defined;
10069                     }
10070
10071                     goto unknown;
10072
10073                   default:
10074                     goto unknown;
10075                 }
10076               }
10077
10078               goto unknown;
10079
10080             default:
10081               goto unknown;
10082           }
10083
10084         case 'f':
10085           if (name[1] == 'o' &&
10086               name[2] == 'r' &&
10087               name[3] == 'e' &&
10088               name[4] == 'a' &&
10089               name[5] == 'c' &&
10090               name[6] == 'h')
10091           {                                       /* foreach    */
10092             return KEY_foreach;
10093           }
10094
10095           goto unknown;
10096
10097         case 'g':
10098           if (name[1] == 'e' &&
10099               name[2] == 't' &&
10100               name[3] == 'p')
10101           {
10102             switch (name[4])
10103             {
10104               case 'g':
10105                 if (name[5] == 'r' &&
10106                     name[6] == 'p')
10107                 {                                 /* getpgrp    */
10108                   return -KEY_getpgrp;
10109                 }
10110
10111                 goto unknown;
10112
10113               case 'p':
10114                 if (name[5] == 'i' &&
10115                     name[6] == 'd')
10116                 {                                 /* getppid    */
10117                   return -KEY_getppid;
10118                 }
10119
10120                 goto unknown;
10121
10122               default:
10123                 goto unknown;
10124             }
10125           }
10126
10127           goto unknown;
10128
10129         case 'l':
10130           if (name[1] == 'c' &&
10131               name[2] == 'f' &&
10132               name[3] == 'i' &&
10133               name[4] == 'r' &&
10134               name[5] == 's' &&
10135               name[6] == 't')
10136           {                                       /* lcfirst    */
10137             return -KEY_lcfirst;
10138           }
10139
10140           goto unknown;
10141
10142         case 'o':
10143           if (name[1] == 'p' &&
10144               name[2] == 'e' &&
10145               name[3] == 'n' &&
10146               name[4] == 'd' &&
10147               name[5] == 'i' &&
10148               name[6] == 'r')
10149           {                                       /* opendir    */
10150             return -KEY_opendir;
10151           }
10152
10153           goto unknown;
10154
10155         case 'p':
10156           if (name[1] == 'a' &&
10157               name[2] == 'c' &&
10158               name[3] == 'k' &&
10159               name[4] == 'a' &&
10160               name[5] == 'g' &&
10161               name[6] == 'e')
10162           {                                       /* package    */
10163             return KEY_package;
10164           }
10165
10166           goto unknown;
10167
10168         case 'r':
10169           if (name[1] == 'e')
10170           {
10171             switch (name[2])
10172             {
10173               case 'a':
10174                 if (name[3] == 'd' &&
10175                     name[4] == 'd' &&
10176                     name[5] == 'i' &&
10177                     name[6] == 'r')
10178                 {                                 /* readdir    */
10179                   return -KEY_readdir;
10180                 }
10181
10182                 goto unknown;
10183
10184               case 'q':
10185                 if (name[3] == 'u' &&
10186                     name[4] == 'i' &&
10187                     name[5] == 'r' &&
10188                     name[6] == 'e')
10189                 {                                 /* require    */
10190                   return KEY_require;
10191                 }
10192
10193                 goto unknown;
10194
10195               case 'v':
10196                 if (name[3] == 'e' &&
10197                     name[4] == 'r' &&
10198                     name[5] == 's' &&
10199                     name[6] == 'e')
10200                 {                                 /* reverse    */
10201                   return -KEY_reverse;
10202                 }
10203
10204                 goto unknown;
10205
10206               default:
10207                 goto unknown;
10208             }
10209           }
10210
10211           goto unknown;
10212
10213         case 's':
10214           switch (name[1])
10215           {
10216             case 'e':
10217               switch (name[2])
10218               {
10219                 case 'e':
10220                   if (name[3] == 'k' &&
10221                       name[4] == 'd' &&
10222                       name[5] == 'i' &&
10223                       name[6] == 'r')
10224                   {                               /* seekdir    */
10225                     return -KEY_seekdir;
10226                   }
10227
10228                   goto unknown;
10229
10230                 case 't':
10231                   if (name[3] == 'p' &&
10232                       name[4] == 'g' &&
10233                       name[5] == 'r' &&
10234                       name[6] == 'p')
10235                   {                               /* setpgrp    */
10236                     return -KEY_setpgrp;
10237                   }
10238
10239                   goto unknown;
10240
10241                 default:
10242                   goto unknown;
10243               }
10244
10245             case 'h':
10246               if (name[2] == 'm' &&
10247                   name[3] == 'r' &&
10248                   name[4] == 'e' &&
10249                   name[5] == 'a' &&
10250                   name[6] == 'd')
10251               {                                   /* shmread    */
10252                 return -KEY_shmread;
10253               }
10254
10255               goto unknown;
10256
10257             case 'p':
10258               if (name[2] == 'r' &&
10259                   name[3] == 'i' &&
10260                   name[4] == 'n' &&
10261                   name[5] == 't' &&
10262                   name[6] == 'f')
10263               {                                   /* sprintf    */
10264                 return -KEY_sprintf;
10265               }
10266
10267               goto unknown;
10268
10269             case 'y':
10270               switch (name[2])
10271               {
10272                 case 'm':
10273                   if (name[3] == 'l' &&
10274                       name[4] == 'i' &&
10275                       name[5] == 'n' &&
10276                       name[6] == 'k')
10277                   {                               /* symlink    */
10278                     return -KEY_symlink;
10279                   }
10280
10281                   goto unknown;
10282
10283                 case 's':
10284                   switch (name[3])
10285                   {
10286                     case 'c':
10287                       if (name[4] == 'a' &&
10288                           name[5] == 'l' &&
10289                           name[6] == 'l')
10290                       {                           /* syscall    */
10291                         return -KEY_syscall;
10292                       }
10293
10294                       goto unknown;
10295
10296                     case 'o':
10297                       if (name[4] == 'p' &&
10298                           name[5] == 'e' &&
10299                           name[6] == 'n')
10300                       {                           /* sysopen    */
10301                         return -KEY_sysopen;
10302                       }
10303
10304                       goto unknown;
10305
10306                     case 'r':
10307                       if (name[4] == 'e' &&
10308                           name[5] == 'a' &&
10309                           name[6] == 'd')
10310                       {                           /* sysread    */
10311                         return -KEY_sysread;
10312                       }
10313
10314                       goto unknown;
10315
10316                     case 's':
10317                       if (name[4] == 'e' &&
10318                           name[5] == 'e' &&
10319                           name[6] == 'k')
10320                       {                           /* sysseek    */
10321                         return -KEY_sysseek;
10322                       }
10323
10324                       goto unknown;
10325
10326                     default:
10327                       goto unknown;
10328                   }
10329
10330                 default:
10331                   goto unknown;
10332               }
10333
10334             default:
10335               goto unknown;
10336           }
10337
10338         case 't':
10339           if (name[1] == 'e' &&
10340               name[2] == 'l' &&
10341               name[3] == 'l' &&
10342               name[4] == 'd' &&
10343               name[5] == 'i' &&
10344               name[6] == 'r')
10345           {                                       /* telldir    */
10346             return -KEY_telldir;
10347           }
10348
10349           goto unknown;
10350
10351         case 'u':
10352           switch (name[1])
10353           {
10354             case 'c':
10355               if (name[2] == 'f' &&
10356                   name[3] == 'i' &&
10357                   name[4] == 'r' &&
10358                   name[5] == 's' &&
10359                   name[6] == 't')
10360               {                                   /* ucfirst    */
10361                 return -KEY_ucfirst;
10362               }
10363
10364               goto unknown;
10365
10366             case 'n':
10367               if (name[2] == 's' &&
10368                   name[3] == 'h' &&
10369                   name[4] == 'i' &&
10370                   name[5] == 'f' &&
10371                   name[6] == 't')
10372               {                                   /* unshift    */
10373                 return -KEY_unshift;
10374               }
10375
10376               goto unknown;
10377
10378             default:
10379               goto unknown;
10380           }
10381
10382         case 'w':
10383           if (name[1] == 'a' &&
10384               name[2] == 'i' &&
10385               name[3] == 't' &&
10386               name[4] == 'p' &&
10387               name[5] == 'i' &&
10388               name[6] == 'd')
10389           {                                       /* waitpid    */
10390             return -KEY_waitpid;
10391           }
10392
10393           goto unknown;
10394
10395         default:
10396           goto unknown;
10397       }
10398
10399     case 8: /* 26 tokens of length 8 */
10400       switch (name[0])
10401       {
10402         case 'A':
10403           if (name[1] == 'U' &&
10404               name[2] == 'T' &&
10405               name[3] == 'O' &&
10406               name[4] == 'L' &&
10407               name[5] == 'O' &&
10408               name[6] == 'A' &&
10409               name[7] == 'D')
10410           {                                       /* AUTOLOAD   */
10411             return KEY_AUTOLOAD;
10412           }
10413
10414           goto unknown;
10415
10416         case '_':
10417           if (name[1] == '_')
10418           {
10419             switch (name[2])
10420             {
10421               case 'D':
10422                 if (name[3] == 'A' &&
10423                     name[4] == 'T' &&
10424                     name[5] == 'A' &&
10425                     name[6] == '_' &&
10426                     name[7] == '_')
10427                 {                                 /* __DATA__   */
10428                   return KEY___DATA__;
10429                 }
10430
10431                 goto unknown;
10432
10433               case 'F':
10434                 if (name[3] == 'I' &&
10435                     name[4] == 'L' &&
10436                     name[5] == 'E' &&
10437                     name[6] == '_' &&
10438                     name[7] == '_')
10439                 {                                 /* __FILE__   */
10440                   return -KEY___FILE__;
10441                 }
10442
10443                 goto unknown;
10444
10445               case 'L':
10446                 if (name[3] == 'I' &&
10447                     name[4] == 'N' &&
10448                     name[5] == 'E' &&
10449                     name[6] == '_' &&
10450                     name[7] == '_')
10451                 {                                 /* __LINE__   */
10452                   return -KEY___LINE__;
10453                 }
10454
10455                 goto unknown;
10456
10457               default:
10458                 goto unknown;
10459             }
10460           }
10461
10462           goto unknown;
10463
10464         case 'c':
10465           switch (name[1])
10466           {
10467             case 'l':
10468               if (name[2] == 'o' &&
10469                   name[3] == 's' &&
10470                   name[4] == 'e' &&
10471                   name[5] == 'd' &&
10472                   name[6] == 'i' &&
10473                   name[7] == 'r')
10474               {                                   /* closedir   */
10475                 return -KEY_closedir;
10476               }
10477
10478               goto unknown;
10479
10480             case 'o':
10481               if (name[2] == 'n' &&
10482                   name[3] == 't' &&
10483                   name[4] == 'i' &&
10484                   name[5] == 'n' &&
10485                   name[6] == 'u' &&
10486                   name[7] == 'e')
10487               {                                   /* continue   */
10488                 return -KEY_continue;
10489               }
10490
10491               goto unknown;
10492
10493             default:
10494               goto unknown;
10495           }
10496
10497         case 'd':
10498           if (name[1] == 'b' &&
10499               name[2] == 'm' &&
10500               name[3] == 'c' &&
10501               name[4] == 'l' &&
10502               name[5] == 'o' &&
10503               name[6] == 's' &&
10504               name[7] == 'e')
10505           {                                       /* dbmclose   */
10506             return -KEY_dbmclose;
10507           }
10508
10509           goto unknown;
10510
10511         case 'e':
10512           if (name[1] == 'n' &&
10513               name[2] == 'd')
10514           {
10515             switch (name[3])
10516             {
10517               case 'g':
10518                 if (name[4] == 'r' &&
10519                     name[5] == 'e' &&
10520                     name[6] == 'n' &&
10521                     name[7] == 't')
10522                 {                                 /* endgrent   */
10523                   return -KEY_endgrent;
10524                 }
10525
10526                 goto unknown;
10527
10528               case 'p':
10529                 if (name[4] == 'w' &&
10530                     name[5] == 'e' &&
10531                     name[6] == 'n' &&
10532                     name[7] == 't')
10533                 {                                 /* endpwent   */
10534                   return -KEY_endpwent;
10535                 }
10536
10537                 goto unknown;
10538
10539               default:
10540                 goto unknown;
10541             }
10542           }
10543
10544           goto unknown;
10545
10546         case 'f':
10547           if (name[1] == 'o' &&
10548               name[2] == 'r' &&
10549               name[3] == 'm' &&
10550               name[4] == 'l' &&
10551               name[5] == 'i' &&
10552               name[6] == 'n' &&
10553               name[7] == 'e')
10554           {                                       /* formline   */
10555             return -KEY_formline;
10556           }
10557
10558           goto unknown;
10559
10560         case 'g':
10561           if (name[1] == 'e' &&
10562               name[2] == 't')
10563           {
10564             switch (name[3])
10565             {
10566               case 'g':
10567                 if (name[4] == 'r')
10568                 {
10569                   switch (name[5])
10570                   {
10571                     case 'e':
10572                       if (name[6] == 'n' &&
10573                           name[7] == 't')
10574                       {                           /* getgrent   */
10575                         return -KEY_getgrent;
10576                       }
10577
10578                       goto unknown;
10579
10580                     case 'g':
10581                       if (name[6] == 'i' &&
10582                           name[7] == 'd')
10583                       {                           /* getgrgid   */
10584                         return -KEY_getgrgid;
10585                       }
10586
10587                       goto unknown;
10588
10589                     case 'n':
10590                       if (name[6] == 'a' &&
10591                           name[7] == 'm')
10592                       {                           /* getgrnam   */
10593                         return -KEY_getgrnam;
10594                       }
10595
10596                       goto unknown;
10597
10598                     default:
10599                       goto unknown;
10600                   }
10601                 }
10602
10603                 goto unknown;
10604
10605               case 'l':
10606                 if (name[4] == 'o' &&
10607                     name[5] == 'g' &&
10608                     name[6] == 'i' &&
10609                     name[7] == 'n')
10610                 {                                 /* getlogin   */
10611                   return -KEY_getlogin;
10612                 }
10613
10614                 goto unknown;
10615
10616               case 'p':
10617                 if (name[4] == 'w')
10618                 {
10619                   switch (name[5])
10620                   {
10621                     case 'e':
10622                       if (name[6] == 'n' &&
10623                           name[7] == 't')
10624                       {                           /* getpwent   */
10625                         return -KEY_getpwent;
10626                       }
10627
10628                       goto unknown;
10629
10630                     case 'n':
10631                       if (name[6] == 'a' &&
10632                           name[7] == 'm')
10633                       {                           /* getpwnam   */
10634                         return -KEY_getpwnam;
10635                       }
10636
10637                       goto unknown;
10638
10639                     case 'u':
10640                       if (name[6] == 'i' &&
10641                           name[7] == 'd')
10642                       {                           /* getpwuid   */
10643                         return -KEY_getpwuid;
10644                       }
10645
10646                       goto unknown;
10647
10648                     default:
10649                       goto unknown;
10650                   }
10651                 }
10652
10653                 goto unknown;
10654
10655               default:
10656                 goto unknown;
10657             }
10658           }
10659
10660           goto unknown;
10661
10662         case 'r':
10663           if (name[1] == 'e' &&
10664               name[2] == 'a' &&
10665               name[3] == 'd')
10666           {
10667             switch (name[4])
10668             {
10669               case 'l':
10670                 if (name[5] == 'i' &&
10671                     name[6] == 'n')
10672                 {
10673                   switch (name[7])
10674                   {
10675                     case 'e':
10676                       {                           /* readline   */
10677                         return -KEY_readline;
10678                       }
10679
10680                     case 'k':
10681                       {                           /* readlink   */
10682                         return -KEY_readlink;
10683                       }
10684
10685                     default:
10686                       goto unknown;
10687                   }
10688                 }
10689
10690                 goto unknown;
10691
10692               case 'p':
10693                 if (name[5] == 'i' &&
10694                     name[6] == 'p' &&
10695                     name[7] == 'e')
10696                 {                                 /* readpipe   */
10697                   return -KEY_readpipe;
10698                 }
10699
10700                 goto unknown;
10701
10702               default:
10703                 goto unknown;
10704             }
10705           }
10706
10707           goto unknown;
10708
10709         case 's':
10710           switch (name[1])
10711           {
10712             case 'e':
10713               if (name[2] == 't')
10714               {
10715                 switch (name[3])
10716                 {
10717                   case 'g':
10718                     if (name[4] == 'r' &&
10719                         name[5] == 'e' &&
10720                         name[6] == 'n' &&
10721                         name[7] == 't')
10722                     {                             /* setgrent   */
10723                       return -KEY_setgrent;
10724                     }
10725
10726                     goto unknown;
10727
10728                   case 'p':
10729                     if (name[4] == 'w' &&
10730                         name[5] == 'e' &&
10731                         name[6] == 'n' &&
10732                         name[7] == 't')
10733                     {                             /* setpwent   */
10734                       return -KEY_setpwent;
10735                     }
10736
10737                     goto unknown;
10738
10739                   default:
10740                     goto unknown;
10741                 }
10742               }
10743
10744               goto unknown;
10745
10746             case 'h':
10747               switch (name[2])
10748               {
10749                 case 'm':
10750                   if (name[3] == 'w' &&
10751                       name[4] == 'r' &&
10752                       name[5] == 'i' &&
10753                       name[6] == 't' &&
10754                       name[7] == 'e')
10755                   {                               /* shmwrite   */
10756                     return -KEY_shmwrite;
10757                   }
10758
10759                   goto unknown;
10760
10761                 case 'u':
10762                   if (name[3] == 't' &&
10763                       name[4] == 'd' &&
10764                       name[5] == 'o' &&
10765                       name[6] == 'w' &&
10766                       name[7] == 'n')
10767                   {                               /* shutdown   */
10768                     return -KEY_shutdown;
10769                   }
10770
10771                   goto unknown;
10772
10773                 default:
10774                   goto unknown;
10775               }
10776
10777             case 'y':
10778               if (name[2] == 's' &&
10779                   name[3] == 'w' &&
10780                   name[4] == 'r' &&
10781                   name[5] == 'i' &&
10782                   name[6] == 't' &&
10783                   name[7] == 'e')
10784               {                                   /* syswrite   */
10785                 return -KEY_syswrite;
10786               }
10787
10788               goto unknown;
10789
10790             default:
10791               goto unknown;
10792           }
10793
10794         case 't':
10795           if (name[1] == 'r' &&
10796               name[2] == 'u' &&
10797               name[3] == 'n' &&
10798               name[4] == 'c' &&
10799               name[5] == 'a' &&
10800               name[6] == 't' &&
10801               name[7] == 'e')
10802           {                                       /* truncate   */
10803             return -KEY_truncate;
10804           }
10805
10806           goto unknown;
10807
10808         default:
10809           goto unknown;
10810       }
10811
10812     case 9: /* 9 tokens of length 9 */
10813       switch (name[0])
10814       {
10815         case 'U':
10816           if (name[1] == 'N' &&
10817               name[2] == 'I' &&
10818               name[3] == 'T' &&
10819               name[4] == 'C' &&
10820               name[5] == 'H' &&
10821               name[6] == 'E' &&
10822               name[7] == 'C' &&
10823               name[8] == 'K')
10824           {                                       /* UNITCHECK  */
10825             return KEY_UNITCHECK;
10826           }
10827
10828           goto unknown;
10829
10830         case 'e':
10831           if (name[1] == 'n' &&
10832               name[2] == 'd' &&
10833               name[3] == 'n' &&
10834               name[4] == 'e' &&
10835               name[5] == 't' &&
10836               name[6] == 'e' &&
10837               name[7] == 'n' &&
10838               name[8] == 't')
10839           {                                       /* endnetent  */
10840             return -KEY_endnetent;
10841           }
10842
10843           goto unknown;
10844
10845         case 'g':
10846           if (name[1] == 'e' &&
10847               name[2] == 't' &&
10848               name[3] == 'n' &&
10849               name[4] == 'e' &&
10850               name[5] == 't' &&
10851               name[6] == 'e' &&
10852               name[7] == 'n' &&
10853               name[8] == 't')
10854           {                                       /* getnetent  */
10855             return -KEY_getnetent;
10856           }
10857
10858           goto unknown;
10859
10860         case 'l':
10861           if (name[1] == 'o' &&
10862               name[2] == 'c' &&
10863               name[3] == 'a' &&
10864               name[4] == 'l' &&
10865               name[5] == 't' &&
10866               name[6] == 'i' &&
10867               name[7] == 'm' &&
10868               name[8] == 'e')
10869           {                                       /* localtime  */
10870             return -KEY_localtime;
10871           }
10872
10873           goto unknown;
10874
10875         case 'p':
10876           if (name[1] == 'r' &&
10877               name[2] == 'o' &&
10878               name[3] == 't' &&
10879               name[4] == 'o' &&
10880               name[5] == 't' &&
10881               name[6] == 'y' &&
10882               name[7] == 'p' &&
10883               name[8] == 'e')
10884           {                                       /* prototype  */
10885             return KEY_prototype;
10886           }
10887
10888           goto unknown;
10889
10890         case 'q':
10891           if (name[1] == 'u' &&
10892               name[2] == 'o' &&
10893               name[3] == 't' &&
10894               name[4] == 'e' &&
10895               name[5] == 'm' &&
10896               name[6] == 'e' &&
10897               name[7] == 't' &&
10898               name[8] == 'a')
10899           {                                       /* quotemeta  */
10900             return -KEY_quotemeta;
10901           }
10902
10903           goto unknown;
10904
10905         case 'r':
10906           if (name[1] == 'e' &&
10907               name[2] == 'w' &&
10908               name[3] == 'i' &&
10909               name[4] == 'n' &&
10910               name[5] == 'd' &&
10911               name[6] == 'd' &&
10912               name[7] == 'i' &&
10913               name[8] == 'r')
10914           {                                       /* rewinddir  */
10915             return -KEY_rewinddir;
10916           }
10917
10918           goto unknown;
10919
10920         case 's':
10921           if (name[1] == 'e' &&
10922               name[2] == 't' &&
10923               name[3] == 'n' &&
10924               name[4] == 'e' &&
10925               name[5] == 't' &&
10926               name[6] == 'e' &&
10927               name[7] == 'n' &&
10928               name[8] == 't')
10929           {                                       /* setnetent  */
10930             return -KEY_setnetent;
10931           }
10932
10933           goto unknown;
10934
10935         case 'w':
10936           if (name[1] == 'a' &&
10937               name[2] == 'n' &&
10938               name[3] == 't' &&
10939               name[4] == 'a' &&
10940               name[5] == 'r' &&
10941               name[6] == 'r' &&
10942               name[7] == 'a' &&
10943               name[8] == 'y')
10944           {                                       /* wantarray  */
10945             return -KEY_wantarray;
10946           }
10947
10948           goto unknown;
10949
10950         default:
10951           goto unknown;
10952       }
10953
10954     case 10: /* 9 tokens of length 10 */
10955       switch (name[0])
10956       {
10957         case 'e':
10958           if (name[1] == 'n' &&
10959               name[2] == 'd')
10960           {
10961             switch (name[3])
10962             {
10963               case 'h':
10964                 if (name[4] == 'o' &&
10965                     name[5] == 's' &&
10966                     name[6] == 't' &&
10967                     name[7] == 'e' &&
10968                     name[8] == 'n' &&
10969                     name[9] == 't')
10970                 {                                 /* endhostent */
10971                   return -KEY_endhostent;
10972                 }
10973
10974                 goto unknown;
10975
10976               case 's':
10977                 if (name[4] == 'e' &&
10978                     name[5] == 'r' &&
10979                     name[6] == 'v' &&
10980                     name[7] == 'e' &&
10981                     name[8] == 'n' &&
10982                     name[9] == 't')
10983                 {                                 /* endservent */
10984                   return -KEY_endservent;
10985                 }
10986
10987                 goto unknown;
10988
10989               default:
10990                 goto unknown;
10991             }
10992           }
10993
10994           goto unknown;
10995
10996         case 'g':
10997           if (name[1] == 'e' &&
10998               name[2] == 't')
10999           {
11000             switch (name[3])
11001             {
11002               case 'h':
11003                 if (name[4] == 'o' &&
11004                     name[5] == 's' &&
11005                     name[6] == 't' &&
11006                     name[7] == 'e' &&
11007                     name[8] == 'n' &&
11008                     name[9] == 't')
11009                 {                                 /* gethostent */
11010                   return -KEY_gethostent;
11011                 }
11012
11013                 goto unknown;
11014
11015               case 's':
11016                 switch (name[4])
11017                 {
11018                   case 'e':
11019                     if (name[5] == 'r' &&
11020                         name[6] == 'v' &&
11021                         name[7] == 'e' &&
11022                         name[8] == 'n' &&
11023                         name[9] == 't')
11024                     {                             /* getservent */
11025                       return -KEY_getservent;
11026                     }
11027
11028                     goto unknown;
11029
11030                   case 'o':
11031                     if (name[5] == 'c' &&
11032                         name[6] == 'k' &&
11033                         name[7] == 'o' &&
11034                         name[8] == 'p' &&
11035                         name[9] == 't')
11036                     {                             /* getsockopt */
11037                       return -KEY_getsockopt;
11038                     }
11039
11040                     goto unknown;
11041
11042                   default:
11043                     goto unknown;
11044                 }
11045
11046               default:
11047                 goto unknown;
11048             }
11049           }
11050
11051           goto unknown;
11052
11053         case 's':
11054           switch (name[1])
11055           {
11056             case 'e':
11057               if (name[2] == 't')
11058               {
11059                 switch (name[3])
11060                 {
11061                   case 'h':
11062                     if (name[4] == 'o' &&
11063                         name[5] == 's' &&
11064                         name[6] == 't' &&
11065                         name[7] == 'e' &&
11066                         name[8] == 'n' &&
11067                         name[9] == 't')
11068                     {                             /* sethostent */
11069                       return -KEY_sethostent;
11070                     }
11071
11072                     goto unknown;
11073
11074                   case 's':
11075                     switch (name[4])
11076                     {
11077                       case 'e':
11078                         if (name[5] == 'r' &&
11079                             name[6] == 'v' &&
11080                             name[7] == 'e' &&
11081                             name[8] == 'n' &&
11082                             name[9] == 't')
11083                         {                         /* setservent */
11084                           return -KEY_setservent;
11085                         }
11086
11087                         goto unknown;
11088
11089                       case 'o':
11090                         if (name[5] == 'c' &&
11091                             name[6] == 'k' &&
11092                             name[7] == 'o' &&
11093                             name[8] == 'p' &&
11094                             name[9] == 't')
11095                         {                         /* setsockopt */
11096                           return -KEY_setsockopt;
11097                         }
11098
11099                         goto unknown;
11100
11101                       default:
11102                         goto unknown;
11103                     }
11104
11105                   default:
11106                     goto unknown;
11107                 }
11108               }
11109
11110               goto unknown;
11111
11112             case 'o':
11113               if (name[2] == 'c' &&
11114                   name[3] == 'k' &&
11115                   name[4] == 'e' &&
11116                   name[5] == 't' &&
11117                   name[6] == 'p' &&
11118                   name[7] == 'a' &&
11119                   name[8] == 'i' &&
11120                   name[9] == 'r')
11121               {                                   /* socketpair */
11122                 return -KEY_socketpair;
11123               }
11124
11125               goto unknown;
11126
11127             default:
11128               goto unknown;
11129           }
11130
11131         default:
11132           goto unknown;
11133       }
11134
11135     case 11: /* 8 tokens of length 11 */
11136       switch (name[0])
11137       {
11138         case '_':
11139           if (name[1] == '_' &&
11140               name[2] == 'P' &&
11141               name[3] == 'A' &&
11142               name[4] == 'C' &&
11143               name[5] == 'K' &&
11144               name[6] == 'A' &&
11145               name[7] == 'G' &&
11146               name[8] == 'E' &&
11147               name[9] == '_' &&
11148               name[10] == '_')
11149           {                                       /* __PACKAGE__ */
11150             return -KEY___PACKAGE__;
11151           }
11152
11153           goto unknown;
11154
11155         case 'e':
11156           if (name[1] == 'n' &&
11157               name[2] == 'd' &&
11158               name[3] == 'p' &&
11159               name[4] == 'r' &&
11160               name[5] == 'o' &&
11161               name[6] == 't' &&
11162               name[7] == 'o' &&
11163               name[8] == 'e' &&
11164               name[9] == 'n' &&
11165               name[10] == 't')
11166           {                                       /* endprotoent */
11167             return -KEY_endprotoent;
11168           }
11169
11170           goto unknown;
11171
11172         case 'g':
11173           if (name[1] == 'e' &&
11174               name[2] == 't')
11175           {
11176             switch (name[3])
11177             {
11178               case 'p':
11179                 switch (name[4])
11180                 {
11181                   case 'e':
11182                     if (name[5] == 'e' &&
11183                         name[6] == 'r' &&
11184                         name[7] == 'n' &&
11185                         name[8] == 'a' &&
11186                         name[9] == 'm' &&
11187                         name[10] == 'e')
11188                     {                             /* getpeername */
11189                       return -KEY_getpeername;
11190                     }
11191
11192                     goto unknown;
11193
11194                   case 'r':
11195                     switch (name[5])
11196                     {
11197                       case 'i':
11198                         if (name[6] == 'o' &&
11199                             name[7] == 'r' &&
11200                             name[8] == 'i' &&
11201                             name[9] == 't' &&
11202                             name[10] == 'y')
11203                         {                         /* getpriority */
11204                           return -KEY_getpriority;
11205                         }
11206
11207                         goto unknown;
11208
11209                       case 'o':
11210                         if (name[6] == 't' &&
11211                             name[7] == 'o' &&
11212                             name[8] == 'e' &&
11213                             name[9] == 'n' &&
11214                             name[10] == 't')
11215                         {                         /* getprotoent */
11216                           return -KEY_getprotoent;
11217                         }
11218
11219                         goto unknown;
11220
11221                       default:
11222                         goto unknown;
11223                     }
11224
11225                   default:
11226                     goto unknown;
11227                 }
11228
11229               case 's':
11230                 if (name[4] == 'o' &&
11231                     name[5] == 'c' &&
11232                     name[6] == 'k' &&
11233                     name[7] == 'n' &&
11234                     name[8] == 'a' &&
11235                     name[9] == 'm' &&
11236                     name[10] == 'e')
11237                 {                                 /* getsockname */
11238                   return -KEY_getsockname;
11239                 }
11240
11241                 goto unknown;
11242
11243               default:
11244                 goto unknown;
11245             }
11246           }
11247
11248           goto unknown;
11249
11250         case 's':
11251           if (name[1] == 'e' &&
11252               name[2] == 't' &&
11253               name[3] == 'p' &&
11254               name[4] == 'r')
11255           {
11256             switch (name[5])
11257             {
11258               case 'i':
11259                 if (name[6] == 'o' &&
11260                     name[7] == 'r' &&
11261                     name[8] == 'i' &&
11262                     name[9] == 't' &&
11263                     name[10] == 'y')
11264                 {                                 /* setpriority */
11265                   return -KEY_setpriority;
11266                 }
11267
11268                 goto unknown;
11269
11270               case 'o':
11271                 if (name[6] == 't' &&
11272                     name[7] == 'o' &&
11273                     name[8] == 'e' &&
11274                     name[9] == 'n' &&
11275                     name[10] == 't')
11276                 {                                 /* setprotoent */
11277                   return -KEY_setprotoent;
11278                 }
11279
11280                 goto unknown;
11281
11282               default:
11283                 goto unknown;
11284             }
11285           }
11286
11287           goto unknown;
11288
11289         default:
11290           goto unknown;
11291       }
11292
11293     case 12: /* 2 tokens of length 12 */
11294       if (name[0] == 'g' &&
11295           name[1] == 'e' &&
11296           name[2] == 't' &&
11297           name[3] == 'n' &&
11298           name[4] == 'e' &&
11299           name[5] == 't' &&
11300           name[6] == 'b' &&
11301           name[7] == 'y')
11302       {
11303         switch (name[8])
11304         {
11305           case 'a':
11306             if (name[9] == 'd' &&
11307                 name[10] == 'd' &&
11308                 name[11] == 'r')
11309             {                                     /* getnetbyaddr */
11310               return -KEY_getnetbyaddr;
11311             }
11312
11313             goto unknown;
11314
11315           case 'n':
11316             if (name[9] == 'a' &&
11317                 name[10] == 'm' &&
11318                 name[11] == 'e')
11319             {                                     /* getnetbyname */
11320               return -KEY_getnetbyname;
11321             }
11322
11323             goto unknown;
11324
11325           default:
11326             goto unknown;
11327         }
11328       }
11329
11330       goto unknown;
11331
11332     case 13: /* 4 tokens of length 13 */
11333       if (name[0] == 'g' &&
11334           name[1] == 'e' &&
11335           name[2] == 't')
11336       {
11337         switch (name[3])
11338         {
11339           case 'h':
11340             if (name[4] == 'o' &&
11341                 name[5] == 's' &&
11342                 name[6] == 't' &&
11343                 name[7] == 'b' &&
11344                 name[8] == 'y')
11345             {
11346               switch (name[9])
11347               {
11348                 case 'a':
11349                   if (name[10] == 'd' &&
11350                       name[11] == 'd' &&
11351                       name[12] == 'r')
11352                   {                               /* gethostbyaddr */
11353                     return -KEY_gethostbyaddr;
11354                   }
11355
11356                   goto unknown;
11357
11358                 case 'n':
11359                   if (name[10] == 'a' &&
11360                       name[11] == 'm' &&
11361                       name[12] == 'e')
11362                   {                               /* gethostbyname */
11363                     return -KEY_gethostbyname;
11364                   }
11365
11366                   goto unknown;
11367
11368                 default:
11369                   goto unknown;
11370               }
11371             }
11372
11373             goto unknown;
11374
11375           case 's':
11376             if (name[4] == 'e' &&
11377                 name[5] == 'r' &&
11378                 name[6] == 'v' &&
11379                 name[7] == 'b' &&
11380                 name[8] == 'y')
11381             {
11382               switch (name[9])
11383               {
11384                 case 'n':
11385                   if (name[10] == 'a' &&
11386                       name[11] == 'm' &&
11387                       name[12] == 'e')
11388                   {                               /* getservbyname */
11389                     return -KEY_getservbyname;
11390                   }
11391
11392                   goto unknown;
11393
11394                 case 'p':
11395                   if (name[10] == 'o' &&
11396                       name[11] == 'r' &&
11397                       name[12] == 't')
11398                   {                               /* getservbyport */
11399                     return -KEY_getservbyport;
11400                   }
11401
11402                   goto unknown;
11403
11404                 default:
11405                   goto unknown;
11406               }
11407             }
11408
11409             goto unknown;
11410
11411           default:
11412             goto unknown;
11413         }
11414       }
11415
11416       goto unknown;
11417
11418     case 14: /* 1 tokens of length 14 */
11419       if (name[0] == 'g' &&
11420           name[1] == 'e' &&
11421           name[2] == 't' &&
11422           name[3] == 'p' &&
11423           name[4] == 'r' &&
11424           name[5] == 'o' &&
11425           name[6] == 't' &&
11426           name[7] == 'o' &&
11427           name[8] == 'b' &&
11428           name[9] == 'y' &&
11429           name[10] == 'n' &&
11430           name[11] == 'a' &&
11431           name[12] == 'm' &&
11432           name[13] == 'e')
11433       {                                           /* getprotobyname */
11434         return -KEY_getprotobyname;
11435       }
11436
11437       goto unknown;
11438
11439     case 16: /* 1 tokens of length 16 */
11440       if (name[0] == 'g' &&
11441           name[1] == 'e' &&
11442           name[2] == 't' &&
11443           name[3] == 'p' &&
11444           name[4] == 'r' &&
11445           name[5] == 'o' &&
11446           name[6] == 't' &&
11447           name[7] == 'o' &&
11448           name[8] == 'b' &&
11449           name[9] == 'y' &&
11450           name[10] == 'n' &&
11451           name[11] == 'u' &&
11452           name[12] == 'm' &&
11453           name[13] == 'b' &&
11454           name[14] == 'e' &&
11455           name[15] == 'r')
11456       {                                           /* getprotobynumber */
11457         return -KEY_getprotobynumber;
11458       }
11459
11460       goto unknown;
11461
11462     default:
11463       goto unknown;
11464   }
11465
11466 unknown:
11467   return 0;
11468 }
11469
11470 STATIC void
11471 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11472 {
11473     dVAR;
11474
11475     PERL_ARGS_ASSERT_CHECKCOMMA;
11476
11477     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
11478         if (ckWARN(WARN_SYNTAX)) {
11479             int level = 1;
11480             const char *w;
11481             for (w = s+2; *w && level; w++) {
11482                 if (*w == '(')
11483                     ++level;
11484                 else if (*w == ')')
11485                     --level;
11486             }
11487             while (isSPACE(*w))
11488                 ++w;
11489             /* the list of chars below is for end of statements or
11490              * block / parens, boolean operators (&&, ||, //) and branch
11491              * constructs (or, and, if, until, unless, while, err, for).
11492              * Not a very solid hack... */
11493             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11494                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11495                             "%s (...) interpreted as function",name);
11496         }
11497     }
11498     while (s < PL_bufend && isSPACE(*s))
11499         s++;
11500     if (*s == '(')
11501         s++;
11502     while (s < PL_bufend && isSPACE(*s))
11503         s++;
11504     if (isIDFIRST_lazy_if(s,UTF)) {
11505         const char * const w = s++;
11506         while (isALNUM_lazy_if(s,UTF))
11507             s++;
11508         while (s < PL_bufend && isSPACE(*s))
11509             s++;
11510         if (*s == ',') {
11511             GV* gv;
11512             if (keyword(w, s - w, 0))
11513                 return;
11514
11515             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11516             if (gv && GvCVu(gv))
11517                 return;
11518             Perl_croak(aTHX_ "No comma allowed after %s", what);
11519         }
11520     }
11521 }
11522
11523 /* Either returns sv, or mortalizes sv and returns a new SV*.
11524    Best used as sv=new_constant(..., sv, ...).
11525    If s, pv are NULL, calls subroutine with one argument,
11526    and type is used with error messages only. */
11527
11528 STATIC SV *
11529 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11530                SV *sv, SV *pv, const char *type, STRLEN typelen)
11531 {
11532     dVAR; dSP;
11533     HV * const table = GvHV(PL_hintgv);          /* ^H */
11534     SV *res;
11535     SV **cvp;
11536     SV *cv, *typesv;
11537     const char *why1 = "", *why2 = "", *why3 = "";
11538
11539     PERL_ARGS_ASSERT_NEW_CONSTANT;
11540
11541     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11542         SV *msg;
11543
11544         why2 = (const char *)
11545             (strEQ(key,"charnames")
11546              ? "(possibly a missing \"use charnames ...\")"
11547              : "");
11548         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11549                             (type ? type: "undef"), why2);
11550
11551         /* This is convoluted and evil ("goto considered harmful")
11552          * but I do not understand the intricacies of all the different
11553          * failure modes of %^H in here.  The goal here is to make
11554          * the most probable error message user-friendly. --jhi */
11555
11556         goto msgdone;
11557
11558     report:
11559         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11560                             (type ? type: "undef"), why1, why2, why3);
11561     msgdone:
11562         yyerror(SvPVX_const(msg));
11563         SvREFCNT_dec(msg);
11564         return sv;
11565     }
11566
11567     /* charnames doesn't work well if there have been errors found */
11568     if (PL_error_count > 0 && strEQ(key,"charnames"))
11569         return &PL_sv_undef;
11570
11571     cvp = hv_fetch(table, key, keylen, FALSE);
11572     if (!cvp || !SvOK(*cvp)) {
11573         why1 = "$^H{";
11574         why2 = key;
11575         why3 = "} is not defined";
11576         goto report;
11577     }
11578     sv_2mortal(sv);                     /* Parent created it permanently */
11579     cv = *cvp;
11580     if (!pv && s)
11581         pv = newSVpvn_flags(s, len, SVs_TEMP);
11582     if (type && pv)
11583         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11584     else
11585         typesv = &PL_sv_undef;
11586
11587     PUSHSTACKi(PERLSI_OVERLOAD);
11588     ENTER ;
11589     SAVETMPS;
11590
11591     PUSHMARK(SP) ;
11592     EXTEND(sp, 3);
11593     if (pv)
11594         PUSHs(pv);
11595     PUSHs(sv);
11596     if (pv)
11597         PUSHs(typesv);
11598     PUTBACK;
11599     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11600
11601     SPAGAIN ;
11602
11603     /* Check the eval first */
11604     if (!PL_in_eval && SvTRUE(ERRSV)) {
11605         sv_catpvs(ERRSV, "Propagated");
11606         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11607         (void)POPs;
11608         res = SvREFCNT_inc_simple(sv);
11609     }
11610     else {
11611         res = POPs;
11612         SvREFCNT_inc_simple_void(res);
11613     }
11614
11615     PUTBACK ;
11616     FREETMPS ;
11617     LEAVE ;
11618     POPSTACK;
11619
11620     if (!SvOK(res)) {
11621         why1 = "Call to &{$^H{";
11622         why2 = key;
11623         why3 = "}} did not return a defined value";
11624         sv = res;
11625         goto report;
11626     }
11627
11628     return res;
11629 }
11630
11631 /* Returns a NUL terminated string, with the length of the string written to
11632    *slp
11633    */
11634 STATIC char *
11635 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11636 {
11637     dVAR;
11638     register char *d = dest;
11639     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
11640
11641     PERL_ARGS_ASSERT_SCAN_WORD;
11642
11643     for (;;) {
11644         if (d >= e)
11645             Perl_croak(aTHX_ ident_too_long);
11646         if (isALNUM(*s))        /* UTF handled below */
11647             *d++ = *s++;
11648         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11649             *d++ = ':';
11650             *d++ = ':';
11651             s++;
11652         }
11653         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11654             *d++ = *s++;
11655             *d++ = *s++;
11656         }
11657         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11658             char *t = s + UTF8SKIP(s);
11659             size_t len;
11660             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11661                 t += UTF8SKIP(t);
11662             len = t - s;
11663             if (d + len > e)
11664                 Perl_croak(aTHX_ ident_too_long);
11665             Copy(s, d, len, char);
11666             d += len;
11667             s = t;
11668         }
11669         else {
11670             *d = '\0';
11671             *slp = d - dest;
11672             return s;
11673         }
11674     }
11675 }
11676
11677 STATIC char *
11678 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11679 {
11680     dVAR;
11681     char *bracket = NULL;
11682     char funny = *s++;
11683     register char *d = dest;
11684     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
11685
11686     PERL_ARGS_ASSERT_SCAN_IDENT;
11687
11688     if (isSPACE(*s))
11689         s = PEEKSPACE(s);
11690     if (isDIGIT(*s)) {
11691         while (isDIGIT(*s)) {
11692             if (d >= e)
11693                 Perl_croak(aTHX_ ident_too_long);
11694             *d++ = *s++;
11695         }
11696     }
11697     else {
11698         for (;;) {
11699             if (d >= e)
11700                 Perl_croak(aTHX_ ident_too_long);
11701             if (isALNUM(*s))    /* UTF handled below */
11702                 *d++ = *s++;
11703             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11704                 *d++ = ':';
11705                 *d++ = ':';
11706                 s++;
11707             }
11708             else if (*s == ':' && s[1] == ':') {
11709                 *d++ = *s++;
11710                 *d++ = *s++;
11711             }
11712             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11713                 char *t = s + UTF8SKIP(s);
11714                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11715                     t += UTF8SKIP(t);
11716                 if (d + (t - s) > e)
11717                     Perl_croak(aTHX_ ident_too_long);
11718                 Copy(s, d, t - s, char);
11719                 d += t - s;
11720                 s = t;
11721             }
11722             else
11723                 break;
11724         }
11725     }
11726     *d = '\0';
11727     d = dest;
11728     if (*d) {
11729         if (PL_lex_state != LEX_NORMAL)
11730             PL_lex_state = LEX_INTERPENDMAYBE;
11731         return s;
11732     }
11733     if (*s == '$' && s[1] &&
11734         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11735     {
11736         return s;
11737     }
11738     if (*s == '{') {
11739         bracket = s;
11740         s++;
11741     }
11742     else if (ck_uni)
11743         check_uni();
11744     if (s < send)
11745         *d = *s++;
11746     d[1] = '\0';
11747     if (*d == '^' && *s && isCONTROLVAR(*s)) {
11748         *d = toCTRL(*s);
11749         s++;
11750     }
11751     if (bracket) {
11752         if (isSPACE(s[-1])) {
11753             while (s < send) {
11754                 const char ch = *s++;
11755                 if (!SPACE_OR_TAB(ch)) {
11756                     *d = ch;
11757                     break;
11758                 }
11759             }
11760         }
11761         if (isIDFIRST_lazy_if(d,UTF)) {
11762             d++;
11763             if (UTF) {
11764                 char *end = s;
11765                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11766                     end += UTF8SKIP(end);
11767                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11768                         end += UTF8SKIP(end);
11769                 }
11770                 Copy(s, d, end - s, char);
11771                 d += end - s;
11772                 s = end;
11773             }
11774             else {
11775                 while ((isALNUM(*s) || *s == ':') && d < e)
11776                     *d++ = *s++;
11777                 if (d >= e)
11778                     Perl_croak(aTHX_ ident_too_long);
11779             }
11780             *d = '\0';
11781             while (s < send && SPACE_OR_TAB(*s))
11782                 s++;
11783             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11784                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11785                     const char * const brack =
11786                         (const char *)
11787                         ((*s == '[') ? "[...]" : "{...}");
11788                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11789                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11790                         funny, dest, brack, funny, dest, brack);
11791                 }
11792                 bracket++;
11793                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11794                 return s;
11795             }
11796         }
11797         /* Handle extended ${^Foo} variables
11798          * 1999-02-27 mjd-perl-patch@plover.com */
11799         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11800                  && isALNUM(*s))
11801         {
11802             d++;
11803             while (isALNUM(*s) && d < e) {
11804                 *d++ = *s++;
11805             }
11806             if (d >= e)
11807                 Perl_croak(aTHX_ ident_too_long);
11808             *d = '\0';
11809         }
11810         if (*s == '}') {
11811             s++;
11812             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11813                 PL_lex_state = LEX_INTERPEND;
11814                 PL_expect = XREF;
11815             }
11816             if (PL_lex_state == LEX_NORMAL) {
11817                 if (ckWARN(WARN_AMBIGUOUS) &&
11818                     (keyword(dest, d - dest, 0)
11819                      || get_cvn_flags(dest, d - dest, 0)))
11820                 {
11821                     if (funny == '#')
11822                         funny = '@';
11823                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11824                         "Ambiguous use of %c{%s} resolved to %c%s",
11825                         funny, dest, funny, dest);
11826                 }
11827             }
11828         }
11829         else {
11830             s = bracket;                /* let the parser handle it */
11831             *dest = '\0';
11832         }
11833     }
11834     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11835         PL_lex_state = LEX_INTERPEND;
11836     return s;
11837 }
11838
11839 static U32
11840 S_pmflag(U32 pmfl, const char ch) {
11841     switch (ch) {
11842         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11843     case GLOBAL_PAT_MOD:    pmfl |= PMf_GLOBAL; break;
11844     case CONTINUE_PAT_MOD:  pmfl |= PMf_CONTINUE; break;
11845     case ONCE_PAT_MOD:      pmfl |= PMf_KEEP; break;
11846     case KEEPCOPY_PAT_MOD:  pmfl |= PMf_KEEPCOPY; break;
11847     }
11848     return pmfl;
11849 }
11850
11851 STATIC char *
11852 S_scan_pat(pTHX_ char *start, I32 type)
11853 {
11854     dVAR;
11855     PMOP *pm;
11856     char *s = scan_str(start,!!PL_madskills,FALSE);
11857     const char * const valid_flags =
11858         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11859 #ifdef PERL_MAD
11860     char *modstart;
11861 #endif
11862
11863     PERL_ARGS_ASSERT_SCAN_PAT;
11864
11865     if (!s) {
11866         const char * const delimiter = skipspace(start);
11867         Perl_croak(aTHX_
11868                    (const char *)
11869                    (*delimiter == '?'
11870                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
11871                     : "Search pattern not terminated" ));
11872     }
11873
11874     pm = (PMOP*)newPMOP(type, 0);
11875     if (PL_multi_open == '?') {
11876         /* This is the only point in the code that sets PMf_ONCE:  */
11877         pm->op_pmflags |= PMf_ONCE;
11878
11879         /* Hence it's safe to do this bit of PMOP book-keeping here, which
11880            allows us to restrict the list needed by reset to just the ??
11881            matches.  */
11882         assert(type != OP_TRANS);
11883         if (PL_curstash) {
11884             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11885             U32 elements;
11886             if (!mg) {
11887                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11888                                  0);
11889             }
11890             elements = mg->mg_len / sizeof(PMOP**);
11891             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11892             ((PMOP**)mg->mg_ptr) [elements++] = pm;
11893             mg->mg_len = elements * sizeof(PMOP**);
11894             PmopSTASH_set(pm,PL_curstash);
11895         }
11896     }
11897 #ifdef PERL_MAD
11898     modstart = s;
11899 #endif
11900     while (*s && strchr(valid_flags, *s))
11901         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11902 #ifdef PERL_MAD
11903     if (PL_madskills && modstart != s) {
11904         SV* tmptoken = newSVpvn(modstart, s - modstart);
11905         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11906     }
11907 #endif
11908     /* issue a warning if /c is specified,but /g is not */
11909     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11910     {
11911         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11912                        "Use of /c modifier is meaningless without /g" );
11913     }
11914
11915     PL_lex_op = (OP*)pm;
11916     pl_yylval.ival = OP_MATCH;
11917     return s;
11918 }
11919
11920 STATIC char *
11921 S_scan_subst(pTHX_ char *start)
11922 {
11923     dVAR;
11924     register char *s;
11925     register PMOP *pm;
11926     I32 first_start;
11927     I32 es = 0;
11928 #ifdef PERL_MAD
11929     char *modstart;
11930 #endif
11931
11932     PERL_ARGS_ASSERT_SCAN_SUBST;
11933
11934     pl_yylval.ival = OP_NULL;
11935
11936     s = scan_str(start,!!PL_madskills,FALSE);
11937
11938     if (!s)
11939         Perl_croak(aTHX_ "Substitution pattern not terminated");
11940
11941     if (s[-1] == PL_multi_open)
11942         s--;
11943 #ifdef PERL_MAD
11944     if (PL_madskills) {
11945         CURMAD('q', PL_thisopen);
11946         CURMAD('_', PL_thiswhite);
11947         CURMAD('E', PL_thisstuff);
11948         CURMAD('Q', PL_thisclose);
11949         PL_realtokenstart = s - SvPVX(PL_linestr);
11950     }
11951 #endif
11952
11953     first_start = PL_multi_start;
11954     s = scan_str(s,!!PL_madskills,FALSE);
11955     if (!s) {
11956         if (PL_lex_stuff) {
11957             SvREFCNT_dec(PL_lex_stuff);
11958             PL_lex_stuff = NULL;
11959         }
11960         Perl_croak(aTHX_ "Substitution replacement not terminated");
11961     }
11962     PL_multi_start = first_start;       /* so whole substitution is taken together */
11963
11964     pm = (PMOP*)newPMOP(OP_SUBST, 0);
11965
11966 #ifdef PERL_MAD
11967     if (PL_madskills) {
11968         CURMAD('z', PL_thisopen);
11969         CURMAD('R', PL_thisstuff);
11970         CURMAD('Z', PL_thisclose);
11971     }
11972     modstart = s;
11973 #endif
11974
11975     while (*s) {
11976         if (*s == EXEC_PAT_MOD) {
11977             s++;
11978             es++;
11979         }
11980         else if (strchr(S_PAT_MODS, *s))
11981             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11982         else
11983             break;
11984     }
11985
11986 #ifdef PERL_MAD
11987     if (PL_madskills) {
11988         if (modstart != s)
11989             curmad('m', newSVpvn(modstart, s - modstart));
11990         append_madprops(PL_thismad, (OP*)pm, 0);
11991         PL_thismad = 0;
11992     }
11993 #endif
11994     if ((pm->op_pmflags & PMf_CONTINUE)) {
11995         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11996     }
11997
11998     if (es) {
11999         SV * const repl = newSVpvs("");
12000
12001         PL_sublex_info.super_bufptr = s;
12002         PL_sublex_info.super_bufend = PL_bufend;
12003         PL_multi_end = 0;
12004         pm->op_pmflags |= PMf_EVAL;
12005         while (es-- > 0) {
12006             if (es)
12007                 sv_catpvs(repl, "eval ");
12008             else
12009                 sv_catpvs(repl, "do ");
12010         }
12011         sv_catpvs(repl, "{");
12012         sv_catsv(repl, PL_lex_repl);
12013         if (strchr(SvPVX(PL_lex_repl), '#'))
12014             sv_catpvs(repl, "\n");
12015         sv_catpvs(repl, "}");
12016         SvEVALED_on(repl);
12017         SvREFCNT_dec(PL_lex_repl);
12018         PL_lex_repl = repl;
12019     }
12020
12021     PL_lex_op = (OP*)pm;
12022     pl_yylval.ival = OP_SUBST;
12023     return s;
12024 }
12025
12026 STATIC char *
12027 S_scan_trans(pTHX_ char *start)
12028 {
12029     dVAR;
12030     register char* s;
12031     OP *o;
12032     short *tbl;
12033     U8 squash;
12034     U8 del;
12035     U8 complement;
12036 #ifdef PERL_MAD
12037     char *modstart;
12038 #endif
12039
12040     PERL_ARGS_ASSERT_SCAN_TRANS;
12041
12042     pl_yylval.ival = OP_NULL;
12043
12044     s = scan_str(start,!!PL_madskills,FALSE);
12045     if (!s)
12046         Perl_croak(aTHX_ "Transliteration pattern not terminated");
12047
12048     if (s[-1] == PL_multi_open)
12049         s--;
12050 #ifdef PERL_MAD
12051     if (PL_madskills) {
12052         CURMAD('q', PL_thisopen);
12053         CURMAD('_', PL_thiswhite);
12054         CURMAD('E', PL_thisstuff);
12055         CURMAD('Q', PL_thisclose);
12056         PL_realtokenstart = s - SvPVX(PL_linestr);
12057     }
12058 #endif
12059
12060     s = scan_str(s,!!PL_madskills,FALSE);
12061     if (!s) {
12062         if (PL_lex_stuff) {
12063             SvREFCNT_dec(PL_lex_stuff);
12064             PL_lex_stuff = NULL;
12065         }
12066         Perl_croak(aTHX_ "Transliteration replacement not terminated");
12067     }
12068     if (PL_madskills) {
12069         CURMAD('z', PL_thisopen);
12070         CURMAD('R', PL_thisstuff);
12071         CURMAD('Z', PL_thisclose);
12072     }
12073
12074     complement = del = squash = 0;
12075 #ifdef PERL_MAD
12076     modstart = s;
12077 #endif
12078     while (1) {
12079         switch (*s) {
12080         case 'c':
12081             complement = OPpTRANS_COMPLEMENT;
12082             break;
12083         case 'd':
12084             del = OPpTRANS_DELETE;
12085             break;
12086         case 's':
12087             squash = OPpTRANS_SQUASH;
12088             break;
12089         default:
12090             goto no_more;
12091         }
12092         s++;
12093     }
12094   no_more:
12095
12096     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
12097     o = newPVOP(OP_TRANS, 0, (char*)tbl);
12098     o->op_private &= ~OPpTRANS_ALL;
12099     o->op_private |= del|squash|complement|
12100       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12101       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
12102
12103     PL_lex_op = o;
12104     pl_yylval.ival = OP_TRANS;
12105
12106 #ifdef PERL_MAD
12107     if (PL_madskills) {
12108         if (modstart != s)
12109             curmad('m', newSVpvn(modstart, s - modstart));
12110         append_madprops(PL_thismad, o, 0);
12111         PL_thismad = 0;
12112     }
12113 #endif
12114
12115     return s;
12116 }
12117
12118 STATIC char *
12119 S_scan_heredoc(pTHX_ register char *s)
12120 {
12121     dVAR;
12122     SV *herewas;
12123     I32 op_type = OP_SCALAR;
12124     I32 len;
12125     SV *tmpstr;
12126     char term;
12127     const char *found_newline;
12128     register char *d;
12129     register char *e;
12130     char *peek;
12131     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
12132 #ifdef PERL_MAD
12133     I32 stuffstart = s - SvPVX(PL_linestr);
12134     char *tstart;
12135
12136     PL_realtokenstart = -1;
12137 #endif
12138
12139     PERL_ARGS_ASSERT_SCAN_HEREDOC;
12140
12141     s += 2;
12142     d = PL_tokenbuf;
12143     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
12144     if (!outer)
12145         *d++ = '\n';
12146     peek = s;
12147     while (SPACE_OR_TAB(*peek))
12148         peek++;
12149     if (*peek == '`' || *peek == '\'' || *peek =='"') {
12150         s = peek;
12151         term = *s++;
12152         s = delimcpy(d, e, s, PL_bufend, term, &len);
12153         d += len;
12154         if (s < PL_bufend)
12155             s++;
12156     }
12157     else {
12158         if (*s == '\\')
12159             s++, term = '\'';
12160         else
12161             term = '"';
12162         if (!isALNUM_lazy_if(s,UTF))
12163             deprecate("bare << to mean <<\"\"");
12164         for (; isALNUM_lazy_if(s,UTF); s++) {
12165             if (d < e)
12166                 *d++ = *s;
12167         }
12168     }
12169     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
12170         Perl_croak(aTHX_ "Delimiter for here document is too long");
12171     *d++ = '\n';
12172     *d = '\0';
12173     len = d - PL_tokenbuf;
12174
12175 #ifdef PERL_MAD
12176     if (PL_madskills) {
12177         tstart = PL_tokenbuf + !outer;
12178         PL_thisclose = newSVpvn(tstart, len - !outer);
12179         tstart = SvPVX(PL_linestr) + stuffstart;
12180         PL_thisopen = newSVpvn(tstart, s - tstart);
12181         stuffstart = s - SvPVX(PL_linestr);
12182     }
12183 #endif
12184 #ifndef PERL_STRICT_CR
12185     d = strchr(s, '\r');
12186     if (d) {
12187         char * const olds = s;
12188         s = d;
12189         while (s < PL_bufend) {
12190             if (*s == '\r') {
12191                 *d++ = '\n';
12192                 if (*++s == '\n')
12193                     s++;
12194             }
12195             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
12196                 *d++ = *s++;
12197                 s++;
12198             }
12199             else
12200                 *d++ = *s++;
12201         }
12202         *d = '\0';
12203         PL_bufend = d;
12204         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12205         s = olds;
12206     }
12207 #endif
12208 #ifdef PERL_MAD
12209     found_newline = 0;
12210 #endif
12211     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
12212         herewas = newSVpvn(s,PL_bufend-s);
12213     }
12214     else {
12215 #ifdef PERL_MAD
12216         herewas = newSVpvn(s-1,found_newline-s+1);
12217 #else
12218         s--;
12219         herewas = newSVpvn(s,found_newline-s);
12220 #endif
12221     }
12222 #ifdef PERL_MAD
12223     if (PL_madskills) {
12224         tstart = SvPVX(PL_linestr) + stuffstart;
12225         if (PL_thisstuff)
12226             sv_catpvn(PL_thisstuff, tstart, s - tstart);
12227         else
12228             PL_thisstuff = newSVpvn(tstart, s - tstart);
12229     }
12230 #endif
12231     s += SvCUR(herewas);
12232
12233 #ifdef PERL_MAD
12234     stuffstart = s - SvPVX(PL_linestr);
12235
12236     if (found_newline)
12237         s--;
12238 #endif
12239
12240     tmpstr = newSV_type(SVt_PVIV);
12241     SvGROW(tmpstr, 80);
12242     if (term == '\'') {
12243         op_type = OP_CONST;
12244         SvIV_set(tmpstr, -1);
12245     }
12246     else if (term == '`') {
12247         op_type = OP_BACKTICK;
12248         SvIV_set(tmpstr, '\\');
12249     }
12250
12251     CLINE;
12252     PL_multi_start = CopLINE(PL_curcop);
12253     PL_multi_open = PL_multi_close = '<';
12254     term = *PL_tokenbuf;
12255     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
12256         char * const bufptr = PL_sublex_info.super_bufptr;
12257         char * const bufend = PL_sublex_info.super_bufend;
12258         char * const olds = s - SvCUR(herewas);
12259         s = strchr(bufptr, '\n');
12260         if (!s)
12261             s = bufend;
12262         d = s;
12263         while (s < bufend &&
12264           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12265             if (*s++ == '\n')
12266                 CopLINE_inc(PL_curcop);
12267         }
12268         if (s >= bufend) {
12269             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12270             missingterm(PL_tokenbuf);
12271         }
12272         sv_setpvn(herewas,bufptr,d-bufptr+1);
12273         sv_setpvn(tmpstr,d+1,s-d);
12274         s += len - 1;
12275         sv_catpvn(herewas,s,bufend-s);
12276         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
12277
12278         s = olds;
12279         goto retval;
12280     }
12281     else if (!outer) {
12282         d = s;
12283         while (s < PL_bufend &&
12284           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12285             if (*s++ == '\n')
12286                 CopLINE_inc(PL_curcop);
12287         }
12288         if (s >= PL_bufend) {
12289             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12290             missingterm(PL_tokenbuf);
12291         }
12292         sv_setpvn(tmpstr,d+1,s-d);
12293 #ifdef PERL_MAD
12294         if (PL_madskills) {
12295             if (PL_thisstuff)
12296                 sv_catpvn(PL_thisstuff, d + 1, s - d);
12297             else
12298                 PL_thisstuff = newSVpvn(d + 1, s - d);
12299             stuffstart = s - SvPVX(PL_linestr);
12300         }
12301 #endif
12302         s += len - 1;
12303         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12304
12305         sv_catpvn(herewas,s,PL_bufend-s);
12306         sv_setsv(PL_linestr,herewas);
12307         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12308         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12309         PL_last_lop = PL_last_uni = NULL;
12310     }
12311     else
12312         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
12313     while (s >= PL_bufend) {    /* multiple line string? */
12314 #ifdef PERL_MAD
12315         if (PL_madskills) {
12316             tstart = SvPVX(PL_linestr) + stuffstart;
12317             if (PL_thisstuff)
12318                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12319             else
12320                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12321         }
12322 #endif
12323         PL_bufptr = s;
12324         CopLINE_inc(PL_curcop);
12325         if (!outer || !lex_next_chunk(0)) {
12326             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12327             missingterm(PL_tokenbuf);
12328         }
12329         CopLINE_dec(PL_curcop);
12330         s = PL_bufptr;
12331 #ifdef PERL_MAD
12332         stuffstart = s - SvPVX(PL_linestr);
12333 #endif
12334         CopLINE_inc(PL_curcop);
12335         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12336         PL_last_lop = PL_last_uni = NULL;
12337 #ifndef PERL_STRICT_CR
12338         if (PL_bufend - PL_linestart >= 2) {
12339             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12340                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12341             {
12342                 PL_bufend[-2] = '\n';
12343                 PL_bufend--;
12344                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12345             }
12346             else if (PL_bufend[-1] == '\r')
12347                 PL_bufend[-1] = '\n';
12348         }
12349         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12350             PL_bufend[-1] = '\n';
12351 #endif
12352         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12353             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12354             *(SvPVX(PL_linestr) + off ) = ' ';
12355             sv_catsv(PL_linestr,herewas);
12356             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12357             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12358         }
12359         else {
12360             s = PL_bufend;
12361             sv_catsv(tmpstr,PL_linestr);
12362         }
12363     }
12364     s++;
12365 retval:
12366     PL_multi_end = CopLINE(PL_curcop);
12367     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12368         SvPV_shrink_to_cur(tmpstr);
12369     }
12370     SvREFCNT_dec(herewas);
12371     if (!IN_BYTES) {
12372         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12373             SvUTF8_on(tmpstr);
12374         else if (PL_encoding)
12375             sv_recode_to_utf8(tmpstr, PL_encoding);
12376     }
12377     PL_lex_stuff = tmpstr;
12378     pl_yylval.ival = op_type;
12379     return s;
12380 }
12381
12382 /* scan_inputsymbol
12383    takes: current position in input buffer
12384    returns: new position in input buffer
12385    side-effects: pl_yylval and lex_op are set.
12386
12387    This code handles:
12388
12389    <>           read from ARGV
12390    <FH>         read from filehandle
12391    <pkg::FH>    read from package qualified filehandle
12392    <pkg'FH>     read from package qualified filehandle
12393    <$fh>        read from filehandle in $fh
12394    <*.h>        filename glob
12395
12396 */
12397
12398 STATIC char *
12399 S_scan_inputsymbol(pTHX_ char *start)
12400 {
12401     dVAR;
12402     register char *s = start;           /* current position in buffer */
12403     char *end;
12404     I32 len;
12405     char *d = PL_tokenbuf;                                      /* start of temp holding space */
12406     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
12407
12408     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12409
12410     end = strchr(s, '\n');
12411     if (!end)
12412         end = PL_bufend;
12413     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
12414
12415     /* die if we didn't have space for the contents of the <>,
12416        or if it didn't end, or if we see a newline
12417     */
12418
12419     if (len >= (I32)sizeof PL_tokenbuf)
12420         Perl_croak(aTHX_ "Excessively long <> operator");
12421     if (s >= end)
12422         Perl_croak(aTHX_ "Unterminated <> operator");
12423
12424     s++;
12425
12426     /* check for <$fh>
12427        Remember, only scalar variables are interpreted as filehandles by
12428        this code.  Anything more complex (e.g., <$fh{$num}>) will be
12429        treated as a glob() call.
12430        This code makes use of the fact that except for the $ at the front,
12431        a scalar variable and a filehandle look the same.
12432     */
12433     if (*d == '$' && d[1]) d++;
12434
12435     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12436     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12437         d++;
12438
12439     /* If we've tried to read what we allow filehandles to look like, and
12440        there's still text left, then it must be a glob() and not a getline.
12441        Use scan_str to pull out the stuff between the <> and treat it
12442        as nothing more than a string.
12443     */
12444
12445     if (d - PL_tokenbuf != len) {
12446         pl_yylval.ival = OP_GLOB;
12447         s = scan_str(start,!!PL_madskills,FALSE);
12448         if (!s)
12449            Perl_croak(aTHX_ "Glob not terminated");
12450         return s;
12451     }
12452     else {
12453         bool readline_overriden = FALSE;
12454         GV *gv_readline;
12455         GV **gvp;
12456         /* we're in a filehandle read situation */
12457         d = PL_tokenbuf;
12458
12459         /* turn <> into <ARGV> */
12460         if (!len)
12461             Copy("ARGV",d,5,char);
12462
12463         /* Check whether readline() is overriden */
12464         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12465         if ((gv_readline
12466                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12467                 ||
12468                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12469                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12470                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12471             readline_overriden = TRUE;
12472
12473         /* if <$fh>, create the ops to turn the variable into a
12474            filehandle
12475         */
12476         if (*d == '$') {
12477             /* try to find it in the pad for this block, otherwise find
12478                add symbol table ops
12479             */
12480             const PADOFFSET tmp = pad_findmy(d, len, 0);
12481             if (tmp != NOT_IN_PAD) {
12482                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12483                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12484                     HEK * const stashname = HvNAME_HEK(stash);
12485                     SV * const sym = sv_2mortal(newSVhek(stashname));
12486                     sv_catpvs(sym, "::");
12487                     sv_catpv(sym, d+1);
12488                     d = SvPVX(sym);
12489                     goto intro_sym;
12490                 }
12491                 else {
12492                     OP * const o = newOP(OP_PADSV, 0);
12493                     o->op_targ = tmp;
12494                     PL_lex_op = readline_overriden
12495                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12496                                 append_elem(OP_LIST, o,
12497                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12498                         : (OP*)newUNOP(OP_READLINE, 0, o);
12499                 }
12500             }
12501             else {
12502                 GV *gv;
12503                 ++d;
12504 intro_sym:
12505                 gv = gv_fetchpv(d,
12506                                 (PL_in_eval
12507                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
12508                                  : GV_ADDMULTI),
12509                                 SVt_PV);
12510                 PL_lex_op = readline_overriden
12511                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12512                             append_elem(OP_LIST,
12513                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12514                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12515                     : (OP*)newUNOP(OP_READLINE, 0,
12516                             newUNOP(OP_RV2SV, 0,
12517                                 newGVOP(OP_GV, 0, gv)));
12518             }
12519             if (!readline_overriden)
12520                 PL_lex_op->op_flags |= OPf_SPECIAL;
12521             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12522             pl_yylval.ival = OP_NULL;
12523         }
12524
12525         /* If it's none of the above, it must be a literal filehandle
12526            (<Foo::BAR> or <FOO>) so build a simple readline OP */
12527         else {
12528             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12529             PL_lex_op = readline_overriden
12530                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12531                         append_elem(OP_LIST,
12532                             newGVOP(OP_GV, 0, gv),
12533                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12534                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12535             pl_yylval.ival = OP_NULL;
12536         }
12537     }
12538
12539     return s;
12540 }
12541
12542
12543 /* scan_str
12544    takes: start position in buffer
12545           keep_quoted preserve \ on the embedded delimiter(s)
12546           keep_delims preserve the delimiters around the string
12547    returns: position to continue reading from buffer
12548    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12549         updates the read buffer.
12550
12551    This subroutine pulls a string out of the input.  It is called for:
12552         q               single quotes           q(literal text)
12553         '               single quotes           'literal text'
12554         qq              double quotes           qq(interpolate $here please)
12555         "               double quotes           "interpolate $here please"
12556         qx              backticks               qx(/bin/ls -l)
12557         `               backticks               `/bin/ls -l`
12558         qw              quote words             @EXPORT_OK = qw( func() $spam )
12559         m//             regexp match            m/this/
12560         s///            regexp substitute       s/this/that/
12561         tr///           string transliterate    tr/this/that/
12562         y///            string transliterate    y/this/that/
12563         ($*@)           sub prototypes          sub foo ($)
12564         (stuff)         sub attr parameters     sub foo : attr(stuff)
12565         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
12566
12567    In most of these cases (all but <>, patterns and transliterate)
12568    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
12569    calls scan_str().  s/// makes yylex() call scan_subst() which calls
12570    scan_str().  tr/// and y/// make yylex() call scan_trans() which
12571    calls scan_str().
12572
12573    It skips whitespace before the string starts, and treats the first
12574    character as the delimiter.  If the delimiter is one of ([{< then
12575    the corresponding "close" character )]}> is used as the closing
12576    delimiter.  It allows quoting of delimiters, and if the string has
12577    balanced delimiters ([{<>}]) it allows nesting.
12578
12579    On success, the SV with the resulting string is put into lex_stuff or,
12580    if that is already non-NULL, into lex_repl. The second case occurs only
12581    when parsing the RHS of the special constructs s/// and tr/// (y///).
12582    For convenience, the terminating delimiter character is stuffed into
12583    SvIVX of the SV.
12584 */
12585
12586 STATIC char *
12587 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12588 {
12589     dVAR;
12590     SV *sv;                             /* scalar value: string */
12591     const char *tmps;                   /* temp string, used for delimiter matching */
12592     register char *s = start;           /* current position in the buffer */
12593     register char term;                 /* terminating character */
12594     register char *to;                  /* current position in the sv's data */
12595     I32 brackets = 1;                   /* bracket nesting level */
12596     bool has_utf8 = FALSE;              /* is there any utf8 content? */
12597     I32 termcode;                       /* terminating char. code */
12598     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
12599     STRLEN termlen;                     /* length of terminating string */
12600     int last_off = 0;                   /* last position for nesting bracket */
12601 #ifdef PERL_MAD
12602     int stuffstart;
12603     char *tstart;
12604 #endif
12605
12606     PERL_ARGS_ASSERT_SCAN_STR;
12607
12608     /* skip space before the delimiter */
12609     if (isSPACE(*s)) {
12610         s = PEEKSPACE(s);
12611     }
12612
12613 #ifdef PERL_MAD
12614     if (PL_realtokenstart >= 0) {
12615         stuffstart = PL_realtokenstart;
12616         PL_realtokenstart = -1;
12617     }
12618     else
12619         stuffstart = start - SvPVX(PL_linestr);
12620 #endif
12621     /* mark where we are, in case we need to report errors */
12622     CLINE;
12623
12624     /* after skipping whitespace, the next character is the terminator */
12625     term = *s;
12626     if (!UTF) {
12627         termcode = termstr[0] = term;
12628         termlen = 1;
12629     }
12630     else {
12631         termcode = utf8_to_uvchr((U8*)s, &termlen);
12632         Copy(s, termstr, termlen, U8);
12633         if (!UTF8_IS_INVARIANT(term))
12634             has_utf8 = TRUE;
12635     }
12636
12637     /* mark where we are */
12638     PL_multi_start = CopLINE(PL_curcop);
12639     PL_multi_open = term;
12640
12641     /* find corresponding closing delimiter */
12642     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12643         termcode = termstr[0] = term = tmps[5];
12644
12645     PL_multi_close = term;
12646
12647     /* create a new SV to hold the contents.  79 is the SV's initial length.
12648        What a random number. */
12649     sv = newSV_type(SVt_PVIV);
12650     SvGROW(sv, 80);
12651     SvIV_set(sv, termcode);
12652     (void)SvPOK_only(sv);               /* validate pointer */
12653
12654     /* move past delimiter and try to read a complete string */
12655     if (keep_delims)
12656         sv_catpvn(sv, s, termlen);
12657     s += termlen;
12658 #ifdef PERL_MAD
12659     tstart = SvPVX(PL_linestr) + stuffstart;
12660     if (!PL_thisopen && !keep_delims) {
12661         PL_thisopen = newSVpvn(tstart, s - tstart);
12662         stuffstart = s - SvPVX(PL_linestr);
12663     }
12664 #endif
12665     for (;;) {
12666         if (PL_encoding && !UTF) {
12667             bool cont = TRUE;
12668
12669             while (cont) {
12670                 int offset = s - SvPVX_const(PL_linestr);
12671                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12672                                            &offset, (char*)termstr, termlen);
12673                 const char * const ns = SvPVX_const(PL_linestr) + offset;
12674                 char * const svlast = SvEND(sv) - 1;
12675
12676                 for (; s < ns; s++) {
12677                     if (*s == '\n' && !PL_rsfp)
12678                         CopLINE_inc(PL_curcop);
12679                 }
12680                 if (!found)
12681                     goto read_more_line;
12682                 else {
12683                     /* handle quoted delimiters */
12684                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12685                         const char *t;
12686                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12687                             t--;
12688                         if ((svlast-1 - t) % 2) {
12689                             if (!keep_quoted) {
12690                                 *(svlast-1) = term;
12691                                 *svlast = '\0';
12692                                 SvCUR_set(sv, SvCUR(sv) - 1);
12693                             }
12694                             continue;
12695                         }
12696                     }
12697                     if (PL_multi_open == PL_multi_close) {
12698                         cont = FALSE;
12699                     }
12700                     else {
12701                         const char *t;
12702                         char *w;
12703                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12704                             /* At here, all closes are "was quoted" one,
12705                                so we don't check PL_multi_close. */
12706                             if (*t == '\\') {
12707                                 if (!keep_quoted && *(t+1) == PL_multi_open)
12708                                     t++;
12709                                 else
12710                                     *w++ = *t++;
12711                             }
12712                             else if (*t == PL_multi_open)
12713                                 brackets++;
12714
12715                             *w = *t;
12716                         }
12717                         if (w < t) {
12718                             *w++ = term;
12719                             *w = '\0';
12720                             SvCUR_set(sv, w - SvPVX_const(sv));
12721                         }
12722                         last_off = w - SvPVX(sv);
12723                         if (--brackets <= 0)
12724                             cont = FALSE;
12725                     }
12726                 }
12727             }
12728             if (!keep_delims) {
12729                 SvCUR_set(sv, SvCUR(sv) - 1);
12730                 *SvEND(sv) = '\0';
12731             }
12732             break;
12733         }
12734
12735         /* extend sv if need be */
12736         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12737         /* set 'to' to the next character in the sv's string */
12738         to = SvPVX(sv)+SvCUR(sv);
12739
12740         /* if open delimiter is the close delimiter read unbridle */
12741         if (PL_multi_open == PL_multi_close) {
12742             for (; s < PL_bufend; s++,to++) {
12743                 /* embedded newlines increment the current line number */
12744                 if (*s == '\n' && !PL_rsfp)
12745                     CopLINE_inc(PL_curcop);
12746                 /* handle quoted delimiters */
12747                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12748                     if (!keep_quoted && s[1] == term)
12749                         s++;
12750                 /* any other quotes are simply copied straight through */
12751                     else
12752                         *to++ = *s++;
12753                 }
12754                 /* terminate when run out of buffer (the for() condition), or
12755                    have found the terminator */
12756                 else if (*s == term) {
12757                     if (termlen == 1)
12758                         break;
12759                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12760                         break;
12761                 }
12762                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12763                     has_utf8 = TRUE;
12764                 *to = *s;
12765             }
12766         }
12767
12768         /* if the terminator isn't the same as the start character (e.g.,
12769            matched brackets), we have to allow more in the quoting, and
12770            be prepared for nested brackets.
12771         */
12772         else {
12773             /* read until we run out of string, or we find the terminator */
12774             for (; s < PL_bufend; s++,to++) {
12775                 /* embedded newlines increment the line count */
12776                 if (*s == '\n' && !PL_rsfp)
12777                     CopLINE_inc(PL_curcop);
12778                 /* backslashes can escape the open or closing characters */
12779                 if (*s == '\\' && s+1 < PL_bufend) {
12780                     if (!keep_quoted &&
12781                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12782                         s++;
12783                     else
12784                         *to++ = *s++;
12785                 }
12786                 /* allow nested opens and closes */
12787                 else if (*s == PL_multi_close && --brackets <= 0)
12788                     break;
12789                 else if (*s == PL_multi_open)
12790                     brackets++;
12791                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12792                     has_utf8 = TRUE;
12793                 *to = *s;
12794             }
12795         }
12796         /* terminate the copied string and update the sv's end-of-string */
12797         *to = '\0';
12798         SvCUR_set(sv, to - SvPVX_const(sv));
12799
12800         /*
12801          * this next chunk reads more into the buffer if we're not done yet
12802          */
12803
12804         if (s < PL_bufend)
12805             break;              /* handle case where we are done yet :-) */
12806
12807 #ifndef PERL_STRICT_CR
12808         if (to - SvPVX_const(sv) >= 2) {
12809             if ((to[-2] == '\r' && to[-1] == '\n') ||
12810                 (to[-2] == '\n' && to[-1] == '\r'))
12811             {
12812                 to[-2] = '\n';
12813                 to--;
12814                 SvCUR_set(sv, to - SvPVX_const(sv));
12815             }
12816             else if (to[-1] == '\r')
12817                 to[-1] = '\n';
12818         }
12819         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12820             to[-1] = '\n';
12821 #endif
12822
12823      read_more_line:
12824         /* if we're out of file, or a read fails, bail and reset the current
12825            line marker so we can report where the unterminated string began
12826         */
12827 #ifdef PERL_MAD
12828         if (PL_madskills) {
12829             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12830             if (PL_thisstuff)
12831                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12832             else
12833                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12834         }
12835 #endif
12836         CopLINE_inc(PL_curcop);
12837         PL_bufptr = PL_bufend;
12838         if (!lex_next_chunk(0)) {
12839             sv_free(sv);
12840             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12841             return NULL;
12842         }
12843         s = PL_bufptr;
12844 #ifdef PERL_MAD
12845         stuffstart = 0;
12846 #endif
12847     }
12848
12849     /* at this point, we have successfully read the delimited string */
12850
12851     if (!PL_encoding || UTF) {
12852 #ifdef PERL_MAD
12853         if (PL_madskills) {
12854             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12855             const int len = s - tstart;
12856             if (PL_thisstuff)
12857                 sv_catpvn(PL_thisstuff, tstart, len);
12858             else
12859                 PL_thisstuff = newSVpvn(tstart, len);
12860             if (!PL_thisclose && !keep_delims)
12861                 PL_thisclose = newSVpvn(s,termlen);
12862         }
12863 #endif
12864
12865         if (keep_delims)
12866             sv_catpvn(sv, s, termlen);
12867         s += termlen;
12868     }
12869 #ifdef PERL_MAD
12870     else {
12871         if (PL_madskills) {
12872             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12873             const int len = s - tstart - termlen;
12874             if (PL_thisstuff)
12875                 sv_catpvn(PL_thisstuff, tstart, len);
12876             else
12877                 PL_thisstuff = newSVpvn(tstart, len);
12878             if (!PL_thisclose && !keep_delims)
12879                 PL_thisclose = newSVpvn(s - termlen,termlen);
12880         }
12881     }
12882 #endif
12883     if (has_utf8 || PL_encoding)
12884         SvUTF8_on(sv);
12885
12886     PL_multi_end = CopLINE(PL_curcop);
12887
12888     /* if we allocated too much space, give some back */
12889     if (SvCUR(sv) + 5 < SvLEN(sv)) {
12890         SvLEN_set(sv, SvCUR(sv) + 1);
12891         SvPV_renew(sv, SvLEN(sv));
12892     }
12893
12894     /* decide whether this is the first or second quoted string we've read
12895        for this op
12896     */
12897
12898     if (PL_lex_stuff)
12899         PL_lex_repl = sv;
12900     else
12901         PL_lex_stuff = sv;
12902     return s;
12903 }
12904
12905 /*
12906   scan_num
12907   takes: pointer to position in buffer
12908   returns: pointer to new position in buffer
12909   side-effects: builds ops for the constant in pl_yylval.op
12910
12911   Read a number in any of the formats that Perl accepts:
12912
12913   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12914   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12915   0b[01](_?[01])*
12916   0[0-7](_?[0-7])*
12917   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12918
12919   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12920   thing it reads.
12921
12922   If it reads a number without a decimal point or an exponent, it will
12923   try converting the number to an integer and see if it can do so
12924   without loss of precision.
12925 */
12926
12927 char *
12928 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12929 {
12930     dVAR;
12931     register const char *s = start;     /* current position in buffer */
12932     register char *d;                   /* destination in temp buffer */
12933     register char *e;                   /* end of temp buffer */
12934     NV nv;                              /* number read, as a double */
12935     SV *sv = NULL;                      /* place to put the converted number */
12936     bool floatit;                       /* boolean: int or float? */
12937     const char *lastub = NULL;          /* position of last underbar */
12938     static char const number_too_long[] = "Number too long";
12939
12940     PERL_ARGS_ASSERT_SCAN_NUM;
12941
12942     /* We use the first character to decide what type of number this is */
12943
12944     switch (*s) {
12945     default:
12946       Perl_croak(aTHX_ "panic: scan_num");
12947
12948     /* if it starts with a 0, it could be an octal number, a decimal in
12949        0.13 disguise, or a hexadecimal number, or a binary number. */
12950     case '0':
12951         {
12952           /* variables:
12953              u          holds the "number so far"
12954              shift      the power of 2 of the base
12955                         (hex == 4, octal == 3, binary == 1)
12956              overflowed was the number more than we can hold?
12957
12958              Shift is used when we add a digit.  It also serves as an "are
12959              we in octal/hex/binary?" indicator to disallow hex characters
12960              when in octal mode.
12961            */
12962             NV n = 0.0;
12963             UV u = 0;
12964             I32 shift;
12965             bool overflowed = FALSE;
12966             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
12967             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12968             static const char* const bases[5] =
12969               { "", "binary", "", "octal", "hexadecimal" };
12970             static const char* const Bases[5] =
12971               { "", "Binary", "", "Octal", "Hexadecimal" };
12972             static const char* const maxima[5] =
12973               { "",
12974                 "0b11111111111111111111111111111111",
12975                 "",
12976                 "037777777777",
12977                 "0xffffffff" };
12978             const char *base, *Base, *max;
12979
12980             /* check for hex */
12981             if (s[1] == 'x') {
12982                 shift = 4;
12983                 s += 2;
12984                 just_zero = FALSE;
12985             } else if (s[1] == 'b') {
12986                 shift = 1;
12987                 s += 2;
12988                 just_zero = FALSE;
12989             }
12990             /* check for a decimal in disguise */
12991             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12992                 goto decimal;
12993             /* so it must be octal */
12994             else {
12995                 shift = 3;
12996                 s++;
12997             }
12998
12999             if (*s == '_') {
13000                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13001                                "Misplaced _ in number");
13002                lastub = s++;
13003             }
13004
13005             base = bases[shift];
13006             Base = Bases[shift];
13007             max  = maxima[shift];
13008
13009             /* read the rest of the number */
13010             for (;;) {
13011                 /* x is used in the overflow test,
13012                    b is the digit we're adding on. */
13013                 UV x, b;
13014
13015                 switch (*s) {
13016
13017                 case '.':
13018                     /* Dot here is historically concat, not a radix point.
13019                        Deprecate that; it's confusing, and gets in the way of
13020                        hex(ish) fractions... but '..' is OK. */
13021                     if (s[1] != '.') {
13022                         Perl_ck_warner_d(aTHX_
13023                             packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
13024                             "Dot after %s literal is deprecated concatenation",
13025                             base);
13026                     }
13027                     /* FALL THROUGH */
13028
13029                 /* if we don't mention it, we're done */
13030                 default:
13031                     goto out;
13032
13033                 /* _ are ignored -- but warned about if consecutive */
13034                 case '_':
13035                     if (lastub && s == lastub + 1)
13036                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13037                                        "Misplaced _ in number");
13038                     lastub = s++;
13039                     break;
13040
13041                 /* 8 and 9 are not octal */
13042                 case '8': case '9':
13043                     if (shift == 3)
13044                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
13045                     /* FALL THROUGH */
13046
13047                 /* octal digits */
13048                 case '2': case '3': case '4':
13049                 case '5': case '6': case '7':
13050                     if (shift == 1)
13051                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
13052                     /* FALL THROUGH */
13053
13054                 case '0': case '1':
13055                     b = *s++ & 15;              /* ASCII digit -> value of digit */
13056                     goto digit;
13057
13058                 /* hex digits */
13059                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13060                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
13061                     /* make sure they said 0x */
13062                     if (shift != 4)
13063                         goto out;
13064                     b = (*s++ & 7) + 9;
13065
13066                     /* Prepare to put the digit we have onto the end
13067                        of the number so far.  We check for overflows.
13068                     */
13069
13070                   digit:
13071                     just_zero = FALSE;
13072                     if (!overflowed) {
13073                         x = u << shift; /* make room for the digit */
13074
13075                         if ((x >> shift) != u
13076                             && !(PL_hints & HINT_NEW_BINARY)) {
13077                             overflowed = TRUE;
13078                             n = (NV) u;
13079                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13080                                              "Integer overflow in %s number",
13081                                              base);
13082                         } else
13083                             u = x | b;          /* add the digit to the end */
13084                     }
13085                     if (overflowed) {
13086                         n *= nvshift[shift];
13087                         /* If an NV has not enough bits in its
13088                          * mantissa to represent an UV this summing of
13089                          * small low-order numbers is a waste of time
13090                          * (because the NV cannot preserve the
13091                          * low-order bits anyway): we could just
13092                          * remember when did we overflow and in the
13093                          * end just multiply n by the right
13094                          * amount. */
13095                         n += (NV) b;
13096                     }
13097                     break;
13098                 }
13099             }
13100
13101           /* if we get here, we had success: make a scalar value from
13102              the number.
13103           */
13104           out:
13105
13106             /* final misplaced underbar check */
13107             if (s[-1] == '_') {
13108                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13109             }
13110
13111             sv = newSV(0);
13112             if (overflowed) {
13113                 if (n > 4294967295.0)
13114                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13115                                    "%s number > %s non-portable",
13116                                    Base, max);
13117                 sv_setnv(sv, n);
13118             }
13119             else {
13120 #if UVSIZE > 4
13121                 if (u > 0xffffffff)
13122                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13123                                    "%s number > %s non-portable",
13124                                    Base, max);
13125 #endif
13126                 sv_setuv(sv, u);
13127             }
13128             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
13129                 sv = new_constant(start, s - start, "integer",
13130                                   sv, NULL, NULL, 0);
13131             else if (PL_hints & HINT_NEW_BINARY)
13132                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
13133         }
13134         break;
13135
13136     /*
13137       handle decimal numbers.
13138       we're also sent here when we read a 0 as the first digit
13139     */
13140     case '1': case '2': case '3': case '4': case '5':
13141     case '6': case '7': case '8': case '9': case '.':
13142       decimal:
13143         d = PL_tokenbuf;
13144         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
13145         floatit = FALSE;
13146
13147         /* read next group of digits and _ and copy into d */
13148         while (isDIGIT(*s) || *s == '_') {
13149             /* skip underscores, checking for misplaced ones
13150                if -w is on
13151             */
13152             if (*s == '_') {
13153                 if (lastub && s == lastub + 1)
13154                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13155                                    "Misplaced _ in number");
13156                 lastub = s++;
13157             }
13158             else {
13159                 /* check for end of fixed-length buffer */
13160                 if (d >= e)
13161                     Perl_croak(aTHX_ number_too_long);
13162                 /* if we're ok, copy the character */
13163                 *d++ = *s++;
13164             }
13165         }
13166
13167         /* final misplaced underbar check */
13168         if (lastub && s == lastub + 1) {
13169             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13170         }
13171
13172         /* read a decimal portion if there is one.  avoid
13173            3..5 being interpreted as the number 3. followed
13174            by .5
13175         */
13176         if (*s == '.' && s[1] != '.') {
13177             floatit = TRUE;
13178             *d++ = *s++;
13179
13180             if (*s == '_') {
13181                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13182                                "Misplaced _ in number");
13183                 lastub = s;
13184             }
13185
13186             /* copy, ignoring underbars, until we run out of digits.
13187             */
13188             for (; isDIGIT(*s) || *s == '_'; s++) {
13189                 /* fixed length buffer check */
13190                 if (d >= e)
13191                     Perl_croak(aTHX_ number_too_long);
13192                 if (*s == '_') {
13193                    if (lastub && s == lastub + 1)
13194                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13195                                       "Misplaced _ in number");
13196                    lastub = s;
13197                 }
13198                 else
13199                     *d++ = *s;
13200             }
13201             /* fractional part ending in underbar? */
13202             if (s[-1] == '_') {
13203                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13204                                "Misplaced _ in number");
13205             }
13206             if (*s == '.' && isDIGIT(s[1])) {
13207                 /* oops, it's really a v-string, but without the "v" */
13208                 s = start;
13209                 goto vstring;
13210             }
13211         }
13212
13213         /* read exponent part, if present */
13214         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
13215             floatit = TRUE;
13216             s++;
13217
13218             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
13219             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
13220
13221             /* stray preinitial _ */
13222             if (*s == '_') {
13223                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13224                                "Misplaced _ in number");
13225                 lastub = s++;
13226             }
13227
13228             /* allow positive or negative exponent */
13229             if (*s == '+' || *s == '-')
13230                 *d++ = *s++;
13231
13232             /* stray initial _ */
13233             if (*s == '_') {
13234                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13235                                "Misplaced _ in number");
13236                 lastub = s++;
13237             }
13238
13239             /* read digits of exponent */
13240             while (isDIGIT(*s) || *s == '_') {
13241                 if (isDIGIT(*s)) {
13242                     if (d >= e)
13243                         Perl_croak(aTHX_ number_too_long);
13244                     *d++ = *s++;
13245                 }
13246                 else {
13247                    if (((lastub && s == lastub + 1) ||
13248                         (!isDIGIT(s[1]) && s[1] != '_')))
13249                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13250                                       "Misplaced _ in number");
13251                    lastub = s++;
13252                 }
13253             }
13254         }
13255
13256
13257         /* make an sv from the string */
13258         sv = newSV(0);
13259
13260         /*
13261            We try to do an integer conversion first if no characters
13262            indicating "float" have been found.
13263          */
13264
13265         if (!floatit) {
13266             UV uv;
13267             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
13268
13269             if (flags == IS_NUMBER_IN_UV) {
13270               if (uv <= IV_MAX)
13271                 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
13272               else
13273                 sv_setuv(sv, uv);
13274             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13275               if (uv <= (UV) IV_MIN)
13276                 sv_setiv(sv, -(IV)uv);
13277               else
13278                 floatit = TRUE;
13279             } else
13280               floatit = TRUE;
13281         }
13282         if (floatit) {
13283             /* terminate the string */
13284             *d = '\0';
13285             nv = Atof(PL_tokenbuf);
13286             sv_setnv(sv, nv);
13287         }
13288
13289         if ( floatit
13290              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13291             const char *const key = floatit ? "float" : "integer";
13292             const STRLEN keylen = floatit ? 5 : 7;
13293             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13294                                 key, keylen, sv, NULL, NULL, 0);
13295         }
13296         break;
13297
13298     /* if it starts with a v, it could be a v-string */
13299     case 'v':
13300 vstring:
13301                 sv = newSV(5); /* preallocate storage space */
13302                 s = scan_vstring(s, PL_bufend, sv);
13303         break;
13304     }
13305
13306     /* make the op for the constant and return */
13307
13308     if (sv)
13309         lvalp->opval = newSVOP(OP_CONST, 0, sv);
13310     else
13311         lvalp->opval = NULL;
13312
13313     return (char *)s;
13314 }
13315
13316 STATIC char *
13317 S_scan_formline(pTHX_ register char *s)
13318 {
13319     dVAR;
13320     register char *eol;
13321     register char *t;
13322     SV * const stuff = newSVpvs("");
13323     bool needargs = FALSE;
13324     bool eofmt = FALSE;
13325 #ifdef PERL_MAD
13326     char *tokenstart = s;
13327     SV* savewhite = NULL;
13328
13329     if (PL_madskills) {
13330         savewhite = PL_thiswhite;
13331         PL_thiswhite = 0;
13332     }
13333 #endif
13334
13335     PERL_ARGS_ASSERT_SCAN_FORMLINE;
13336
13337     while (!needargs) {
13338         if (*s == '.') {
13339             t = s+1;
13340 #ifdef PERL_STRICT_CR
13341             while (SPACE_OR_TAB(*t))
13342                 t++;
13343 #else
13344             while (SPACE_OR_TAB(*t) || *t == '\r')
13345                 t++;
13346 #endif
13347             if (*t == '\n' || t == PL_bufend) {
13348                 eofmt = TRUE;
13349                 break;
13350             }
13351         }
13352         if (PL_in_eval && !PL_rsfp) {
13353             eol = (char *) memchr(s,'\n',PL_bufend-s);
13354             if (!eol++)
13355                 eol = PL_bufend;
13356         }
13357         else
13358             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13359         if (*s != '#') {
13360             for (t = s; t < eol; t++) {
13361                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13362                     needargs = FALSE;
13363                     goto enough;        /* ~~ must be first line in formline */
13364                 }
13365                 if (*t == '@' || *t == '^')
13366                     needargs = TRUE;
13367             }
13368             if (eol > s) {
13369                 sv_catpvn(stuff, s, eol-s);
13370 #ifndef PERL_STRICT_CR
13371                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13372                     char *end = SvPVX(stuff) + SvCUR(stuff);
13373                     end[-2] = '\n';
13374                     end[-1] = '\0';
13375                     SvCUR_set(stuff, SvCUR(stuff) - 1);
13376                 }
13377 #endif
13378             }
13379             else
13380               break;
13381         }
13382         s = (char*)eol;
13383         if (PL_rsfp) {
13384             bool got_some;
13385 #ifdef PERL_MAD
13386             if (PL_madskills) {
13387                 if (PL_thistoken)
13388                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13389                 else
13390                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13391             }
13392 #endif
13393             PL_bufptr = PL_bufend;
13394             CopLINE_inc(PL_curcop);
13395             got_some = lex_next_chunk(0);
13396             CopLINE_dec(PL_curcop);
13397             s = PL_bufptr;
13398 #ifdef PERL_MAD
13399             tokenstart = PL_bufptr;
13400 #endif
13401             if (!got_some)
13402                 break;
13403         }
13404         incline(s);
13405     }
13406   enough:
13407     if (SvCUR(stuff)) {
13408         PL_expect = XTERM;
13409         if (needargs) {
13410             PL_lex_state = LEX_NORMAL;
13411             start_force(PL_curforce);
13412             NEXTVAL_NEXTTOKE.ival = 0;
13413             force_next(',');
13414         }
13415         else
13416             PL_lex_state = LEX_FORMLINE;
13417         if (!IN_BYTES) {
13418             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13419                 SvUTF8_on(stuff);
13420             else if (PL_encoding)
13421                 sv_recode_to_utf8(stuff, PL_encoding);
13422         }
13423         start_force(PL_curforce);
13424         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13425         force_next(THING);
13426         start_force(PL_curforce);
13427         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13428         force_next(LSTOP);
13429     }
13430     else {
13431         SvREFCNT_dec(stuff);
13432         if (eofmt)
13433             PL_lex_formbrack = 0;
13434         PL_bufptr = s;
13435     }
13436 #ifdef PERL_MAD
13437     if (PL_madskills) {
13438         if (PL_thistoken)
13439             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13440         else
13441             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13442         PL_thiswhite = savewhite;
13443     }
13444 #endif
13445     return s;
13446 }
13447
13448 I32
13449 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13450 {
13451     dVAR;
13452     const I32 oldsavestack_ix = PL_savestack_ix;
13453     CV* const outsidecv = PL_compcv;
13454
13455     if (PL_compcv) {
13456         assert(SvTYPE(PL_compcv) == SVt_PVCV);
13457     }
13458     SAVEI32(PL_subline);
13459     save_item(PL_subname);
13460     SAVESPTR(PL_compcv);
13461
13462     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13463     CvFLAGS(PL_compcv) |= flags;
13464
13465     PL_subline = CopLINE(PL_curcop);
13466     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13467     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13468     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13469
13470     return oldsavestack_ix;
13471 }
13472
13473 #ifdef __SC__
13474 #pragma segment Perl_yylex
13475 #endif
13476 static int
13477 S_yywarn(pTHX_ const char *const s)
13478 {
13479     dVAR;
13480
13481     PERL_ARGS_ASSERT_YYWARN;
13482
13483     PL_in_eval |= EVAL_WARNONLY;
13484     yyerror(s);
13485     PL_in_eval &= ~EVAL_WARNONLY;
13486     return 0;
13487 }
13488
13489 int
13490 Perl_yyerror(pTHX_ const char *const s)
13491 {
13492     dVAR;
13493     const char *where = NULL;
13494     const char *context = NULL;
13495     int contlen = -1;
13496     SV *msg;
13497     int yychar  = PL_parser->yychar;
13498
13499     PERL_ARGS_ASSERT_YYERROR;
13500
13501     if (!yychar || (yychar == ';' && !PL_rsfp))
13502         where = "at EOF";
13503     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13504       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13505       PL_oldbufptr != PL_bufptr) {
13506         /*
13507                 Only for NetWare:
13508                 The code below is removed for NetWare because it abends/crashes on NetWare
13509                 when the script has error such as not having the closing quotes like:
13510                     if ($var eq "value)
13511                 Checking of white spaces is anyway done in NetWare code.
13512         */
13513 #ifndef NETWARE
13514         while (isSPACE(*PL_oldoldbufptr))
13515             PL_oldoldbufptr++;
13516 #endif
13517         context = PL_oldoldbufptr;
13518         contlen = PL_bufptr - PL_oldoldbufptr;
13519     }
13520     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13521       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13522         /*
13523                 Only for NetWare:
13524                 The code below is removed for NetWare because it abends/crashes on NetWare
13525                 when the script has error such as not having the closing quotes like:
13526                     if ($var eq "value)
13527                 Checking of white spaces is anyway done in NetWare code.
13528         */
13529 #ifndef NETWARE
13530         while (isSPACE(*PL_oldbufptr))
13531             PL_oldbufptr++;
13532 #endif
13533         context = PL_oldbufptr;
13534         contlen = PL_bufptr - PL_oldbufptr;
13535     }
13536     else if (yychar > 255)
13537         where = "next token ???";
13538     else if (yychar == -2) { /* YYEMPTY */
13539         if (PL_lex_state == LEX_NORMAL ||
13540            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13541             where = "at end of line";
13542         else if (PL_lex_inpat)
13543             where = "within pattern";
13544         else
13545             where = "within string";
13546     }
13547     else {
13548         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13549         if (yychar < 32)
13550             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13551         else if (isPRINT_LC(yychar)) {
13552             const char string = yychar;
13553             sv_catpvn(where_sv, &string, 1);
13554         }
13555         else
13556             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13557         where = SvPVX_const(where_sv);
13558     }
13559     msg = sv_2mortal(newSVpv(s, 0));
13560     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13561         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13562     if (context)
13563         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13564     else
13565         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13566     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13567         Perl_sv_catpvf(aTHX_ msg,
13568         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13569                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13570         PL_multi_end = 0;
13571     }
13572     if (PL_in_eval & EVAL_WARNONLY) {
13573         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13574     }
13575     else
13576         qerror(msg);
13577     if (PL_error_count >= 10) {
13578         if (PL_in_eval && SvCUR(ERRSV))
13579             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13580                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
13581         else
13582             Perl_croak(aTHX_ "%s has too many errors.\n",
13583             OutCopFILE(PL_curcop));
13584     }
13585     PL_in_my = 0;
13586     PL_in_my_stash = NULL;
13587     return 0;
13588 }
13589 #ifdef __SC__
13590 #pragma segment Main
13591 #endif
13592
13593 STATIC char*
13594 S_swallow_bom(pTHX_ U8 *s)
13595 {
13596     dVAR;
13597     const STRLEN slen = SvCUR(PL_linestr);
13598
13599     PERL_ARGS_ASSERT_SWALLOW_BOM;
13600
13601     switch (s[0]) {
13602     case 0xFF:
13603         if (s[1] == 0xFE) {
13604             /* UTF-16 little-endian? (or UTF-32LE?) */
13605             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13606                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13607 #ifndef PERL_NO_UTF16_FILTER
13608             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13609             s += 2;
13610             if (PL_bufend > (char*)s) {
13611                 s = add_utf16_textfilter(s, TRUE);
13612             }
13613 #else
13614             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13615 #endif
13616         }
13617         break;
13618     case 0xFE:
13619         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13620 #ifndef PERL_NO_UTF16_FILTER
13621             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13622             s += 2;
13623             if (PL_bufend > (char *)s) {
13624                 s = add_utf16_textfilter(s, FALSE);
13625             }
13626 #else
13627             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13628 #endif
13629         }
13630         break;
13631     case 0xEF:
13632         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13633             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13634             s += 3;                      /* UTF-8 */
13635         }
13636         break;
13637     case 0:
13638         if (slen > 3) {
13639              if (s[1] == 0) {
13640                   if (s[2] == 0xFE && s[3] == 0xFF) {
13641                        /* UTF-32 big-endian */
13642                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13643                   }
13644              }
13645              else if (s[2] == 0 && s[3] != 0) {
13646                   /* Leading bytes
13647                    * 00 xx 00 xx
13648                    * are a good indicator of UTF-16BE. */
13649 #ifndef PERL_NO_UTF16_FILTER
13650                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13651                   s = add_utf16_textfilter(s, FALSE);
13652 #else
13653                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13654 #endif
13655              }
13656         }
13657 #ifdef EBCDIC
13658     case 0xDD:
13659         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13660             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13661             s += 4;                      /* UTF-8 */
13662         }
13663         break;
13664 #endif
13665
13666     default:
13667          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13668                   /* Leading bytes
13669                    * xx 00 xx 00
13670                    * are a good indicator of UTF-16LE. */
13671 #ifndef PERL_NO_UTF16_FILTER
13672               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13673               s = add_utf16_textfilter(s, TRUE);
13674 #else
13675               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13676 #endif
13677          }
13678     }
13679     return (char*)s;
13680 }
13681
13682
13683 #ifndef PERL_NO_UTF16_FILTER
13684 static I32
13685 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13686 {
13687     dVAR;
13688     SV *const filter = FILTER_DATA(idx);
13689     /* We re-use this each time round, throwing the contents away before we
13690        return.  */
13691     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13692     SV *const utf8_buffer = filter;
13693     IV status = IoPAGE(filter);
13694     const bool reverse = cBOOL(IoLINES(filter));
13695     I32 retval;
13696
13697     /* As we're automatically added, at the lowest level, and hence only called
13698        from this file, we can be sure that we're not called in block mode. Hence
13699        don't bother writing code to deal with block mode.  */
13700     if (maxlen) {
13701         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13702     }
13703     if (status < 0) {
13704         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13705     }
13706     DEBUG_P(PerlIO_printf(Perl_debug_log,
13707                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13708                           FPTR2DPTR(void *, S_utf16_textfilter),
13709                           reverse ? 'l' : 'b', idx, maxlen, status,
13710                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13711
13712     while (1) {
13713         STRLEN chars;
13714         STRLEN have;
13715         I32 newlen;
13716         U8 *end;
13717         /* First, look in our buffer of existing UTF-8 data:  */
13718         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13719
13720         if (nl) {
13721             ++nl;
13722         } else if (status == 0) {
13723             /* EOF */
13724             IoPAGE(filter) = 0;
13725             nl = SvEND(utf8_buffer);
13726         }
13727         if (nl) {
13728             STRLEN got = nl - SvPVX(utf8_buffer);
13729             /* Did we have anything to append?  */
13730             retval = got != 0;
13731             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13732             /* Everything else in this code works just fine if SVp_POK isn't
13733                set.  This, however, needs it, and we need it to work, else
13734                we loop infinitely because the buffer is never consumed.  */
13735             sv_chop(utf8_buffer, nl);
13736             break;
13737         }
13738
13739         /* OK, not a complete line there, so need to read some more UTF-16.
13740            Read an extra octect if the buffer currently has an odd number. */
13741         while (1) {
13742             if (status <= 0)
13743                 break;
13744             if (SvCUR(utf16_buffer) >= 2) {
13745                 /* Location of the high octet of the last complete code point.
13746                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13747                    *coupled* with all the benefits of partial reads and
13748                    endianness.  */
13749                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13750                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13751
13752                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13753                     break;
13754                 }
13755
13756                 /* We have the first half of a surrogate. Read more.  */
13757                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13758             }
13759
13760             status = FILTER_READ(idx + 1, utf16_buffer,
13761                                  160 + (SvCUR(utf16_buffer) & 1));
13762             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13763             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13764             if (status < 0) {
13765                 /* Error */
13766                 IoPAGE(filter) = status;
13767                 return status;
13768             }
13769         }
13770
13771         chars = SvCUR(utf16_buffer) >> 1;
13772         have = SvCUR(utf8_buffer);
13773         SvGROW(utf8_buffer, have + chars * 3 + 1);
13774
13775         if (reverse) {
13776             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13777                                          (U8*)SvPVX_const(utf8_buffer) + have,
13778                                          chars * 2, &newlen);
13779         } else {
13780             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13781                                 (U8*)SvPVX_const(utf8_buffer) + have,
13782                                 chars * 2, &newlen);
13783         }
13784         SvCUR_set(utf8_buffer, have + newlen);
13785         *end = '\0';
13786
13787         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13788            it's private to us, and utf16_to_utf8{,reversed} take a
13789            (pointer,length) pair, rather than a NUL-terminated string.  */
13790         if(SvCUR(utf16_buffer) & 1) {
13791             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13792             SvCUR_set(utf16_buffer, 1);
13793         } else {
13794             SvCUR_set(utf16_buffer, 0);
13795         }
13796     }
13797     DEBUG_P(PerlIO_printf(Perl_debug_log,
13798                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13799                           status,
13800                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13801     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13802     return retval;
13803 }
13804
13805 static U8 *
13806 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13807 {
13808     SV *filter = filter_add(S_utf16_textfilter, NULL);
13809
13810     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13811     sv_setpvs(filter, "");
13812     IoLINES(filter) = reversed;
13813     IoPAGE(filter) = 1; /* Not EOF */
13814
13815     /* Sadly, we have to return a valid pointer, come what may, so we have to
13816        ignore any error return from this.  */
13817     SvCUR_set(PL_linestr, 0);
13818     if (FILTER_READ(0, PL_linestr, 0)) {
13819         SvUTF8_on(PL_linestr);
13820     } else {
13821         SvUTF8_on(PL_linestr);
13822     }
13823     PL_bufend = SvEND(PL_linestr);
13824     return (U8*)SvPVX(PL_linestr);
13825 }
13826 #endif
13827
13828 /*
13829 Returns a pointer to the next character after the parsed
13830 vstring, as well as updating the passed in sv.
13831
13832 Function must be called like
13833
13834         sv = newSV(5);
13835         s = scan_vstring(s,e,sv);
13836
13837 where s and e are the start and end of the string.
13838 The sv should already be large enough to store the vstring
13839 passed in, for performance reasons.
13840
13841 */
13842
13843 char *
13844 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13845 {
13846     dVAR;
13847     const char *pos = s;
13848     const char *start = s;
13849
13850     PERL_ARGS_ASSERT_SCAN_VSTRING;
13851
13852     if (*pos == 'v') pos++;  /* get past 'v' */
13853     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13854         pos++;
13855     if ( *pos != '.') {
13856         /* this may not be a v-string if followed by => */
13857         const char *next = pos;
13858         while (next < e && isSPACE(*next))
13859             ++next;
13860         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13861             /* return string not v-string */
13862             sv_setpvn(sv,(char *)s,pos-s);
13863             return (char *)pos;
13864         }
13865     }
13866
13867     if (!isALPHA(*pos)) {
13868         U8 tmpbuf[UTF8_MAXBYTES+1];
13869
13870         if (*s == 'v')
13871             s++;  /* get past 'v' */
13872
13873         sv_setpvs(sv, "");
13874
13875         for (;;) {
13876             /* this is atoi() that tolerates underscores */
13877             U8 *tmpend;
13878             UV rev = 0;
13879             const char *end = pos;
13880             UV mult = 1;
13881             while (--end >= s) {
13882                 if (*end != '_') {
13883                     const UV orev = rev;
13884                     rev += (*end - '0') * mult;
13885                     mult *= 10;
13886                     if (orev > rev)
13887                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13888                                          "Integer overflow in decimal number");
13889                 }
13890             }
13891 #ifdef EBCDIC
13892             if (rev > 0x7FFFFFFF)
13893                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13894 #endif
13895             /* Append native character for the rev point */
13896             tmpend = uvchr_to_utf8(tmpbuf, rev);
13897             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13898             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13899                  SvUTF8_on(sv);
13900             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13901                  s = ++pos;
13902             else {
13903                  s = pos;
13904                  break;
13905             }
13906             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13907                  pos++;
13908         }
13909         SvPOK_on(sv);
13910         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13911         SvRMAGICAL_on(sv);
13912     }
13913     return (char *)s;
13914 }
13915
13916 int
13917 Perl_keyword_plugin_standard(pTHX_
13918         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13919 {
13920     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13921     PERL_UNUSED_CONTEXT;
13922     PERL_UNUSED_ARG(keyword_ptr);
13923     PERL_UNUSED_ARG(keyword_len);
13924     PERL_UNUSED_ARG(op_ptr);
13925     return KEYWORD_PLUGIN_DECLINE;
13926 }
13927
13928 /*
13929  * Local variables:
13930  * c-indentation-style: bsd
13931  * c-basic-offset: 4
13932  * indent-tabs-mode: t
13933  * End:
13934  *
13935  * ex: set ts=8 sts=4 sw=4 noet:
13936  */