toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26
  27 This is the lower layer of the Perl parser, managing characters and tokens.
  28
  29 =for apidoc AmU|yy_parser *|PL_parser
  30
  31 Pointer to a structure encapsulating the state of the parsing operation
  32 currently in progress.  The pointer can be locally changed to perform
  33 a nested parse without interfering with the state of an outer parse.
  34 Individual members of C<PL_parser> have their own documentation.
  35
  36 =cut
  37 */
  38
  39 #include "EXTERN.h"
  40 #define PERL_IN_TOKE_C
  41 #include "perl.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g)     \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* YYINITDEPTH -- initial size of the parser's stacks.  */
  49 #define YYINITDEPTH 200
  50
  51 /* XXX temporary backwards compatibility */
  52 #define PL_lex_brackets         (PL_parser->lex_brackets)
  53 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  54 #define PL_lex_casemods         (PL_parser->lex_casemods)
  55 #define PL_lex_casestack        (PL_parser->lex_casestack)
  56 #define PL_lex_defer            (PL_parser->lex_defer)
  57 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  58 #define PL_lex_expect           (PL_parser->lex_expect)
  59 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  60 #define PL_lex_inpat            (PL_parser->lex_inpat)
  61 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  62 #define PL_lex_op               (PL_parser->lex_op)
  63 #define PL_lex_repl             (PL_parser->lex_repl)
  64 #define PL_lex_starts           (PL_parser->lex_starts)
  65 #define PL_lex_stuff            (PL_parser->lex_stuff)
  66 #define PL_multi_start          (PL_parser->multi_start)
  67 #define PL_multi_open           (PL_parser->multi_open)
  68 #define PL_multi_close          (PL_parser->multi_close)
  69 #define PL_pending_ident        (PL_parser->pending_ident)
  70 #define PL_preambled            (PL_parser->preambled)
  71 #define PL_sublex_info          (PL_parser->sublex_info)
  72 #define PL_linestr              (PL_parser->linestr)
  73 #define PL_expect               (PL_parser->expect)
  74 #define PL_copline              (PL_parser->copline)
  75 #define PL_bufptr               (PL_parser->bufptr)
  76 #define PL_oldbufptr            (PL_parser->oldbufptr)
  77 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  78 #define PL_linestart            (PL_parser->linestart)
  79 #define PL_bufend               (PL_parser->bufend)
  80 #define PL_last_uni             (PL_parser->last_uni)
  81 #define PL_last_lop             (PL_parser->last_lop)
  82 #define PL_last_lop_op          (PL_parser->last_lop_op)
  83 #define PL_lex_state            (PL_parser->lex_state)
  84 #define PL_rsfp                 (PL_parser->rsfp)
  85 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  86 #define PL_in_my                (PL_parser->in_my)
  87 #define PL_in_my_stash          (PL_parser->in_my_stash)
  88 #define PL_tokenbuf             (PL_parser->tokenbuf)
  89 #define PL_multi_end            (PL_parser->multi_end)
  90 #define PL_error_count          (PL_parser->error_count)
  91
  92 #ifdef PERL_MAD
  93 #  define PL_endwhite           (PL_parser->endwhite)
  94 #  define PL_faketokens         (PL_parser->faketokens)
  95 #  define PL_lasttoke           (PL_parser->lasttoke)
  96 #  define PL_nextwhite          (PL_parser->nextwhite)
  97 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  98 #  define PL_skipwhite          (PL_parser->skipwhite)
  99 #  define PL_thisclose          (PL_parser->thisclose)
 100 #  define PL_thismad            (PL_parser->thismad)
 101 #  define PL_thisopen           (PL_parser->thisopen)
 102 #  define PL_thisstuff          (PL_parser->thisstuff)
 103 #  define PL_thistoken          (PL_parser->thistoken)
 104 #  define PL_thiswhite          (PL_parser->thiswhite)
 105 #  define PL_thiswhite          (PL_parser->thiswhite)
 106 #  define PL_nexttoke           (PL_parser->nexttoke)
 107 #  define PL_curforce           (PL_parser->curforce)
 108 #else
 109 #  define PL_nexttoke           (PL_parser->nexttoke)
 110 #  define PL_nexttype           (PL_parser->nexttype)
 111 #  define PL_nextval            (PL_parser->nextval)
 112 #endif
 113
 114 /* This can't be done with embed.fnc, because struct yy_parser contains a
 115    member named pending_ident, which clashes with the generated #define  */
 116 static int
 117 S_pending_ident(pTHX);
 118
 119 static const char ident_too_long[] = "Identifier too long";
 120
 121 #ifdef PERL_MAD
 122 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 123 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 124 #else
 125 #  define CURMAD(slot,sv)
 126 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 127 #endif
 128
 129 #define XFAKEBRACK 128
 130 #define XENUMMASK 127
 131
 132 #ifdef USE_UTF8_SCRIPTS
 133 #   define UTF (!IN_BYTES)
 134 #else
 135 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 136 #endif
 137
 138 /* The maximum number of characters preceding the unrecognized one to display */
 139 #define UNRECOGNIZED_PRECEDE_COUNT 10
 140
 141 /* In variables named $^X, these are the legal values for X.
 142  * 1999-02-27 mjd-perl-patch@plover.com */
 143 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 144
 145 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 146
 147 /* LEX_* are values for PL_lex_state, the state of the lexer.
 148  * They are arranged oddly so that the guard on the switch statement
 149  * can get by with a single comparison (if the compiler is smart enough).
 150  */
 151
 152 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 153
 154 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 155 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 156 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 157 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 158 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 159
 160                                    /* at end of code, eg "$x" followed by:  */
 161 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 162 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 163
 164 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 165                                         string or after \E, $foo, etc       */
 166 #define LEX_INTERPCONST          2 /* NOT USED */
 167 #define LEX_FORMLINE             1 /* expecting a format line               */
 168 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 169
 170
 171 #ifdef DEBUGGING
 172 static const char* const lex_state_names[] = {
 173     "KNOWNEXT",
 174     "FORMLINE",
 175     "INTERPCONST",
 176     "INTERPCONCAT",
 177     "INTERPENDMAYBE",
 178     "INTERPEND",
 179     "INTERPSTART",
 180     "INTERPPUSH",
 181     "INTERPCASEMOD",
 182     "INTERPNORMAL",
 183     "NORMAL"
 184 };
 185 #endif
 186
 187 #ifdef ff_next
 188 #undef ff_next
 189 #endif
 190
 191 #include "keywords.h"
 192
 193 /* CLINE is a macro that ensures PL_copline has a sane value */
 194
 195 #ifdef CLINE
 196 #undef CLINE
 197 #endif
 198 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 199
 200 #ifdef PERL_MAD
 201 #  define SKIPSPACE0(s) skipspace0(s)
 202 #  define SKIPSPACE1(s) skipspace1(s)
 203 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 204 #  define PEEKSPACE(s) skipspace2(s,0)
 205 #else
 206 #  define SKIPSPACE0(s) skipspace(s)
 207 #  define SKIPSPACE1(s) skipspace(s)
 208 #  define SKIPSPACE2(s,tsv) skipspace(s)
 209 #  define PEEKSPACE(s) skipspace(s)
 210 #endif
 211
 212 /*
 213  * Convenience functions to return different tokens and prime the
 214  * lexer for the next token.  They all take an argument.
 215  *
 216  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 217  * OPERATOR     : generic operator
 218  * AOPERATOR    : assignment operator
 219  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 220  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 221  * PREREF       : *EXPR where EXPR is not a simple identifier
 222  * TERM         : expression term
 223  * LOOPX        : loop exiting command (goto, last, dump, etc)
 224  * FTST         : file test operator
 225  * FUN0         : zero-argument function
 226  * FUN1         : not used, except for not, which isn't a UNIOP
 227  * BOop         : bitwise or or xor
 228  * BAop         : bitwise and
 229  * SHop         : shift operator
 230  * PWop         : power operator
 231  * PMop         : pattern-matching operator
 232  * Aop          : addition-level operator
 233  * Mop          : multiplication-level operator
 234  * Eop          : equality-testing operator
 235  * Rop          : relational operator <= != gt
 236  *
 237  * Also see LOP and lop() below.
 238  */
 239
 240 #ifdef DEBUGGING /* Serve -DT. */
 241 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 242 #else
 243 #   define REPORT(retval) (retval)
 244 #endif
 245
 246 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 247 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 248 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 249 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 250 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 251 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 252 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 253 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 254 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 255 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 256 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 257 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 258 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 259 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 260 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 261 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 262 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 263 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 264 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 265 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 266
 267 /* This bit of chicanery makes a unary function followed by
 268  * a parenthesis into a function with one argument, highest precedence.
 269  * The UNIDOR macro is for unary functions that can be followed by the //
 270  * operator (such as C<shift // 0>).
 271  */
 272 #define UNI2(f,x) { \
 273         pl_yylval.ival = f; \
 274         PL_expect = x; \
 275         PL_bufptr = s; \
 276         PL_last_uni = PL_oldbufptr; \
 277         PL_last_lop_op = f; \
 278         if (*s == '(') \
 279             return REPORT( (int)FUNC1 ); \
 280         s = PEEKSPACE(s); \
 281         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 282         }
 283 #define UNI(f)    UNI2(f,XTERM)
 284 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 285
 286 #define UNIBRACK(f) { \
 287         pl_yylval.ival = f; \
 288         PL_bufptr = s; \
 289         PL_last_uni = PL_oldbufptr; \
 290         if (*s == '(') \
 291             return REPORT( (int)FUNC1 ); \
 292         s = PEEKSPACE(s); \
 293         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 294         }
 295
 296 /* grandfather return to old style */
 297 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 298
 299 #ifdef DEBUGGING
 300
 301 /* how to interpret the pl_yylval associated with the token */
 302 enum token_type {
 303     TOKENTYPE_NONE,
 304     TOKENTYPE_IVAL,
 305     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 306     TOKENTYPE_PVAL,
 307     TOKENTYPE_OPVAL,
 308     TOKENTYPE_GVVAL
 309 };
 310
 311 static struct debug_tokens {
 312     const int token;
 313     enum token_type type;
 314     const char *name;
 315 } const debug_tokens[] =
 316 {
 317     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 318     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 319     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 320     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 321     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 322     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 323     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 324     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 325     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 326     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 327     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 328     { DO,               TOKENTYPE_NONE,         "DO" },
 329     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 330     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 331     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 332     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 333     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 334     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 335     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 336     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 337     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 338     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 339     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 340     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 341     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 342     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 343     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 344     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 345     { IF,               TOKENTYPE_IVAL,         "IF" },
 346     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 347     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 348     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 349     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 350     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 351     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 352     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 353     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 354     { MY,               TOKENTYPE_IVAL,         "MY" },
 355     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 356     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 357     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 358     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 359     { OROR,             TOKENTYPE_NONE,         "OROR" },
 360     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 361     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 362     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 363     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 364     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 365     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 366     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 367     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 368     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 369     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 370     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 371     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 372     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 373     { SUB,              TOKENTYPE_NONE,         "SUB" },
 374     { THING,            TOKENTYPE_OPVAL,        "THING" },
 375     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 376     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 377     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 378     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 379     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 380     { USE,              TOKENTYPE_IVAL,         "USE" },
 381     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 382     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 383     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 384     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 385     { 0,                TOKENTYPE_NONE,         NULL }
 386 };
 387
 388 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 389
 390 STATIC int
 391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 392 {
 393     dVAR;
 394
 395     PERL_ARGS_ASSERT_TOKEREPORT;
 396
 397     if (DEBUG_T_TEST) {
 398         const char *name = NULL;
 399         enum token_type type = TOKENTYPE_NONE;
 400         const struct debug_tokens *p;
 401         SV* const report = newSVpvs("<== ");
 402
 403         for (p = debug_tokens; p->token; p++) {
 404             if (p->token == (int)rv) {
 405                 name = p->name;
 406                 type = p->type;
 407                 break;
 408             }
 409         }
 410         if (name)
 411             Perl_sv_catpv(aTHX_ report, name);
 412         else if ((char)rv > ' ' && (char)rv < '~')
 413             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 414         else if (!rv)
 415             sv_catpvs(report, "EOF");
 416         else
 417             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 418         switch (type) {
 419         case TOKENTYPE_NONE:
 420         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 421             break;
 422         case TOKENTYPE_IVAL:
 423             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 424             break;
 425         case TOKENTYPE_OPNUM:
 426             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 427                                     PL_op_name[lvalp->ival]);
 428             break;
 429         case TOKENTYPE_PVAL:
 430             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 431             break;
 432         case TOKENTYPE_OPVAL:
 433             if (lvalp->opval) {
 434                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 435                                     PL_op_name[lvalp->opval->op_type]);
 436                 if (lvalp->opval->op_type == OP_CONST) {
 437                     Perl_sv_catpvf(aTHX_ report, " %s",
 438                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 439                 }
 440
 441             }
 442             else
 443                 sv_catpvs(report, "(opval=null)");
 444             break;
 445         }
 446         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 447     };
 448     return (int)rv;
 449 }
 450
 451
 452 /* print the buffer with suitable escapes */
 453
 454 STATIC void
 455 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 456 {
 457     SV* const tmp = newSVpvs("");
 458
 459     PERL_ARGS_ASSERT_PRINTBUF;
 460
 461     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 462     SvREFCNT_dec(tmp);
 463 }
 464
 465 #endif
 466
 467 static int
 468 S_deprecate_commaless_var_list(pTHX) {
 469     PL_expect = XTERM;
 470     deprecate("comma-less variable list");
 471     return REPORT(','); /* grandfather non-comma-format format */
 472 }
 473
 474 /*
 475  * S_ao
 476  *
 477  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 478  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 479  */
 480
 481 STATIC int
 482 S_ao(pTHX_ int toketype)
 483 {
 484     dVAR;
 485     if (*PL_bufptr == '=') {
 486         PL_bufptr++;
 487         if (toketype == ANDAND)
 488             pl_yylval.ival = OP_ANDASSIGN;
 489         else if (toketype == OROR)
 490             pl_yylval.ival = OP_ORASSIGN;
 491         else if (toketype == DORDOR)
 492             pl_yylval.ival = OP_DORASSIGN;
 493         toketype = ASSIGNOP;
 494     }
 495     return toketype;
 496 }
 497
 498 /*
 499  * S_no_op
 500  * When Perl expects an operator and finds something else, no_op
 501  * prints the warning.  It always prints "<something> found where
 502  * operator expected.  It prints "Missing semicolon on previous line?"
 503  * if the surprise occurs at the start of the line.  "do you need to
 504  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 505  * where the compiler doesn't know if foo is a method call or a function.
 506  * It prints "Missing operator before end of line" if there's nothing
 507  * after the missing operator, or "... before <...>" if there is something
 508  * after the missing operator.
 509  */
 510
 511 STATIC void
 512 S_no_op(pTHX_ const char *const what, char *s)
 513 {
 514     dVAR;
 515     char * const oldbp = PL_bufptr;
 516     const bool is_first = (PL_oldbufptr == PL_linestart);
 517
 518     PERL_ARGS_ASSERT_NO_OP;
 519
 520     if (!s)
 521         s = oldbp;
 522     else
 523         PL_bufptr = s;
 524     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 525     if (ckWARN_d(WARN_SYNTAX)) {
 526         if (is_first)
 527             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 528                     "\t(Missing semicolon on previous line?)\n");
 529         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 530             const char *t;
 531             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 532                 NOOP;
 533             if (t < PL_bufptr && isSPACE(*t))
 534                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 535                         "\t(Do you need to predeclare %.*s?)\n",
 536                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 537         }
 538         else {
 539             assert(s >= oldbp);
 540             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 541                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 542         }
 543     }
 544     PL_bufptr = oldbp;
 545 }
 546
 547 /*
 548  * S_missingterm
 549  * Complain about missing quote/regexp/heredoc terminator.
 550  * If it's called with NULL then it cauterizes the line buffer.
 551  * If we're in a delimited string and the delimiter is a control
 552  * character, it's reformatted into a two-char sequence like ^C.
 553  * This is fatal.
 554  */
 555
 556 STATIC void
 557 S_missingterm(pTHX_ char *s)
 558 {
 559     dVAR;
 560     char tmpbuf[3];
 561     char q;
 562     if (s) {
 563         char * const nl = strrchr(s,'\n');
 564         if (nl)
 565             *nl = '\0';
 566     }
 567     else if (isCNTRL(PL_multi_close)) {
 568         *tmpbuf = '^';
 569         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 570         tmpbuf[2] = '\0';
 571         s = tmpbuf;
 572     }
 573     else {
 574         *tmpbuf = (char)PL_multi_close;
 575         tmpbuf[1] = '\0';
 576         s = tmpbuf;
 577     }
 578     q = strchr(s,'"') ? '\'' : '"';
 579     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 580 }
 581
 582 #define FEATURE_IS_ENABLED(name)                                        \
 583         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 584             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 585 /* The longest string we pass in.  */
 586 #define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
 587
 588 /*
 589  * S_feature_is_enabled
 590  * Check whether the named feature is enabled.
 591  */
 592 STATIC bool
 593 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 594 {
 595     dVAR;
 596     HV * const hinthv = GvHV(PL_hintgv);
 597     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 598
 599     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 600
 601     assert(namelen <= MAX_FEATURE_LEN);
 602     memcpy(&he_name[8], name, namelen);
 603
 604     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 605 }
 606
 607 /*
 608  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 609  * utf16-to-utf8-reversed.
 610  */
 611
 612 #ifdef PERL_CR_FILTER
 613 static void
 614 strip_return(SV *sv)
 615 {
 616     register const char *s = SvPVX_const(sv);
 617     register const char * const e = s + SvCUR(sv);
 618
 619     PERL_ARGS_ASSERT_STRIP_RETURN;
 620
 621     /* outer loop optimized to do nothing if there are no CR-LFs */
 622     while (s < e) {
 623         if (*s++ == '\r' && *s == '\n') {
 624             /* hit a CR-LF, need to copy the rest */
 625             register char *d = s - 1;
 626             *d++ = *s++;
 627             while (s < e) {
 628                 if (*s == '\r' && s[1] == '\n')
 629                     s++;
 630                 *d++ = *s++;
 631             }
 632             SvCUR(sv) -= s - d;
 633             return;
 634         }
 635     }
 636 }
 637
 638 STATIC I32
 639 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 640 {
 641     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 642     if (count > 0 && !maxlen)
 643         strip_return(sv);
 644     return count;
 645 }
 646 #endif
 647
 648
 649
 650 /*
 651  * Perl_lex_start
 652  *
 653  * Create a parser object and initialise its parser and lexer fields
 654  *
 655  * rsfp       is the opened file handle to read from (if any),
 656  *
 657  * line       holds any initial content already read from the file (or in
 658  *            the case of no file, such as an eval, the whole contents);
 659  *
 660  * new_filter indicates that this is a new file and it shouldn't inherit
 661  *            the filters from the current parser (ie require).
 662  */
 663
 664 void
 665 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 666 {
 667     dVAR;
 668     const char *s = NULL;
 669     STRLEN len;
 670     yy_parser *parser, *oparser;
 671
 672     /* create and initialise a parser */
 673
 674     Newxz(parser, 1, yy_parser);
 675     parser->old_parser = oparser = PL_parser;
 676     PL_parser = parser;
 677
 678     Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
 679     parser->ps = parser->stack;
 680     parser->stack_size = YYINITDEPTH;
 681
 682     parser->stack->state = 0;
 683     parser->yyerrstatus = 0;
 684     parser->yychar = YYEMPTY;           /* Cause a token to be read.  */
 685
 686     /* on scope exit, free this parser and restore any outer one */
 687     SAVEPARSER(parser);
 688     parser->saved_curcop = PL_curcop;
 689
 690     /* initialise lexer state */
 691
 692 #ifdef PERL_MAD
 693     parser->curforce = -1;
 694 #else
 695     parser->nexttoke = 0;
 696 #endif
 697     parser->error_count = oparser ? oparser->error_count : 0;
 698     parser->copline = NOLINE;
 699     parser->lex_state = LEX_NORMAL;
 700     parser->expect = XSTATE;
 701     parser->rsfp = rsfp;
 702     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 703                 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 704
 705     Newx(parser->lex_brackstack, 120, char);
 706     Newx(parser->lex_casestack, 12, char);
 707     *parser->lex_casestack = '\0';
 708
 709     if (line) {
 710         s = SvPV_const(line, len);
 711     } else {
 712         len = 0;
 713     }
 714
 715     if (!len) {
 716         parser->linestr = newSVpvs("\n;");
 717     } else if (SvREADONLY(line) || s[len-1] != ';') {
 718         parser->linestr = newSVsv(line);
 719         if (s[len-1] != ';')
 720             sv_catpvs(parser->linestr, "\n;");
 721     } else {
 722         SvTEMP_off(line);
 723         SvREFCNT_inc_simple_void_NN(line);
 724         parser->linestr = line;
 725     }
 726     parser->oldoldbufptr =
 727         parser->oldbufptr =
 728         parser->bufptr =
 729         parser->linestart = SvPVX(parser->linestr);
 730     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 731     parser->last_lop = parser->last_uni = NULL;
 732 }
 733
 734
 735 /* delete a parser object */
 736
 737 void
 738 Perl_parser_free(pTHX_  const yy_parser *parser)
 739 {
 740     PERL_ARGS_ASSERT_PARSER_FREE;
 741
 742     PL_curcop = parser->saved_curcop;
 743     SvREFCNT_dec(parser->linestr);
 744
 745     if (parser->rsfp == PerlIO_stdin())
 746         PerlIO_clearerr(parser->rsfp);
 747     else if (parser->rsfp && (!parser->old_parser ||
 748                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 749         PerlIO_close(parser->rsfp);
 750     SvREFCNT_dec(parser->rsfp_filters);
 751
 752     Safefree(parser->stack);
 753     Safefree(parser->lex_brackstack);
 754     Safefree(parser->lex_casestack);
 755     PL_parser = parser->old_parser;
 756     Safefree(parser);
 757 }
 758
 759
 760 /*
 761  * Perl_lex_end
 762  * Finalizer for lexing operations.  Must be called when the parser is
 763  * done with the lexer.
 764  */
 765
 766 void
 767 Perl_lex_end(pTHX)
 768 {
 769     dVAR;
 770     PL_doextract = FALSE;
 771 }
 772
 773 /*
 774 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
 775
 776 Buffer scalar containing the chunk currently under consideration of the
 777 text currently being lexed.  This is always a plain string scalar (for
 778 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 779 normal scalar means; instead refer to the buffer directly by the pointer
 780 variables described below.
 781
 782 The lexer maintains various C<char*> pointers to things in the
 783 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 784 reallocated, all of these pointers must be updated.  Don't attempt to
 785 do this manually, but rather use L</lex_grow_linestr> if you need to
 786 reallocate the buffer.
 787
 788 The content of the text chunk in the buffer is commonly exactly one
 789 complete line of input, up to and including a newline terminator,
 790 but there are situations where it is otherwise.  The octets of the
 791 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 792 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 793 flag on this scalar, which may disagree with it.
 794
 795 For direct examination of the buffer, the variable
 796 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 797 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 798 of these pointers is usually preferable to examination of the scalar
 799 through normal scalar means.
 800
 801 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
 802
 803 Direct pointer to the end of the chunk of text currently being lexed, the
 804 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 805 + SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
 806 always located at the end of the buffer, and does not count as part of
 807 the buffer's contents.
 808
 809 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
 810
 811 Points to the current position of lexing inside the lexer buffer.
 812 Characters around this point may be freely examined, within
 813 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 814 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 815 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 816
 817 Lexing code (whether in the Perl core or not) moves this pointer past
 818 the characters that it consumes.  It is also expected to perform some
 819 bookkeeping whenever a newline character is consumed.  This movement
 820 can be more conveniently performed by the function L</lex_read_to>,
 821 which handles newlines appropriately.
 822
 823 Interpretation of the buffer's octets can be abstracted out by
 824 using the slightly higher-level functions L</lex_peek_unichar> and
 825 L</lex_read_unichar>.
 826
 827 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
 828
 829 Points to the start of the current line inside the lexer buffer.
 830 This is useful for indicating at which column an error occurred, and
 831 not much else.  This must be updated by any lexing code that consumes
 832 a newline; the function L</lex_read_to> handles this detail.
 833
 834 =cut
 835 */
 836
 837 /*
 838 =for apidoc Amx|bool|lex_bufutf8
 839
 840 Indicates whether the octets in the lexer buffer
 841 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 842 of Unicode characters.  If not, they should be interpreted as Latin-1
 843 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 844
 845 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 846 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 847 encoding.
 848
 849 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 850 is significant, but not the whole story regarding the input character
 851 encoding.  Normally, when a file is being read, the scalar contains octets
 852 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 853 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 854 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 855 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 856 is in effect.  This logic may change in the future; use this function
 857 instead of implementing the logic yourself.
 858
 859 =cut
 860 */
 861
 862 bool
 863 Perl_lex_bufutf8(pTHX)
 864 {
 865     return UTF;
 866 }
 867
 868 /*
 869 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
 870
 871 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 872 at least I<len> octets (including terminating NUL).  Returns a
 873 pointer to the reallocated buffer.  This is necessary before making
 874 any direct modification of the buffer that would increase its length.
 875 L</lex_stuff_pvn> provides a more convenient way to insert text into
 876 the buffer.
 877
 878 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 879 this function updates all of the lexer's variables that point directly
 880 into the buffer.
 881
 882 =cut
 883 */
 884
 885 char *
 886 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 887 {
 888     SV *linestr;
 889     char *buf;
 890     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 891     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
 892     linestr = PL_parser->linestr;
 893     buf = SvPVX(linestr);
 894     if (len <= SvLEN(linestr))
 895         return buf;
 896     bufend_pos = PL_parser->bufend - buf;
 897     bufptr_pos = PL_parser->bufptr - buf;
 898     oldbufptr_pos = PL_parser->oldbufptr - buf;
 899     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 900     linestart_pos = PL_parser->linestart - buf;
 901     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 902     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 903     buf = sv_grow(linestr, len);
 904     PL_parser->bufend = buf + bufend_pos;
 905     PL_parser->bufptr = buf + bufptr_pos;
 906     PL_parser->oldbufptr = buf + oldbufptr_pos;
 907     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 908     PL_parser->linestart = buf + linestart_pos;
 909     if (PL_parser->last_uni)
 910         PL_parser->last_uni = buf + last_uni_pos;
 911     if (PL_parser->last_lop)
 912         PL_parser->last_lop = buf + last_lop_pos;
 913     return buf;
 914 }
 915
 916 /*
 917 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
 918
 919 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
 920 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
 921 reallocating the buffer if necessary.  This means that lexing code that
 922 runs later will see the characters as if they had appeared in the input.
 923 It is not recommended to do this as part of normal parsing, and most
 924 uses of this facility run the risk of the inserted characters being
 925 interpreted in an unintended manner.
 926
 927 The string to be inserted is represented by I<len> octets starting
 928 at I<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
 929 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
 930 The characters are recoded for the lexer buffer, according to how the
 931 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
 932 to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
 933 function is more convenient.
 934
 935 =cut
 936 */
 937
 938 void
 939 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
 940 {
 941     dVAR;
 942     char *bufptr;
 943     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
 944     if (flags & ~(LEX_STUFF_UTF8))
 945         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
 946     if (UTF) {
 947         if (flags & LEX_STUFF_UTF8) {
 948             goto plain_copy;
 949         } else {
 950             STRLEN highhalf = 0;
 951             const char *p, *e = pv+len;
 952             for (p = pv; p != e; p++)
 953                 highhalf += !!(((U8)*p) & 0x80);
 954             if (!highhalf)
 955                 goto plain_copy;
 956             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
 957             bufptr = PL_parser->bufptr;
 958             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
 959             SvCUR_set(PL_parser->linestr,
 960                 SvCUR(PL_parser->linestr) + len+highhalf);
 961             PL_parser->bufend += len+highhalf;
 962             for (p = pv; p != e; p++) {
 963                 U8 c = (U8)*p;
 964                 if (c & 0x80) {
 965                     *bufptr++ = (char)(0xc0 | (c >> 6));
 966                     *bufptr++ = (char)(0x80 | (c & 0x3f));
 967                 } else {
 968                     *bufptr++ = (char)c;
 969                 }
 970             }
 971         }
 972     } else {
 973         if (flags & LEX_STUFF_UTF8) {
 974             STRLEN highhalf = 0;
 975             const char *p, *e = pv+len;
 976             for (p = pv; p != e; p++) {
 977                 U8 c = (U8)*p;
 978                 if (c >= 0xc4) {
 979                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
 980                                 "non-Latin-1 character into Latin-1 input");
 981                 } else if (c >= 0xc2 && p+1 != e &&
 982                             (((U8)p[1]) & 0xc0) == 0x80) {
 983                     p++;
 984                     highhalf++;
 985                 } else if (c >= 0x80) {
 986                     /* malformed UTF-8 */
 987                     ENTER;
 988                     SAVESPTR(PL_warnhook);
 989                     PL_warnhook = PERL_WARNHOOK_FATAL;
 990                     utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
 991                     LEAVE;
 992                 }
 993             }
 994             if (!highhalf)
 995                 goto plain_copy;
 996             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
 997             bufptr = PL_parser->bufptr;
 998             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
 999             SvCUR_set(PL_parser->linestr,
1000                 SvCUR(PL_parser->linestr) + len-highhalf);
1001             PL_parser->bufend += len-highhalf;
1002             for (p = pv; p != e; p++) {
1003                 U8 c = (U8)*p;
1004                 if (c & 0x80) {
1005                     *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1006                     p++;
1007                 } else {
1008                     *bufptr++ = (char)c;
1009                 }
1010             }
1011         } else {
1012             plain_copy:
1013             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1014             bufptr = PL_parser->bufptr;
1015             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1016             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1017             PL_parser->bufend += len;
1018             Copy(pv, bufptr, len, char);
1019         }
1020     }
1021 }
1022
1023 /*
1024 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1025
1026 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1027 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1028 reallocating the buffer if necessary.  This means that lexing code that
1029 runs later will see the characters as if they had appeared in the input.
1030 It is not recommended to do this as part of normal parsing, and most
1031 uses of this facility run the risk of the inserted characters being
1032 interpreted in an unintended manner.
1033
1034 The string to be inserted is the string value of I<sv>.  The characters
1035 are recoded for the lexer buffer, according to how the buffer is currently
1036 being interpreted (L</lex_bufutf8>).  If a string to be interpreted is
1037 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1038 need to construct a scalar.
1039
1040 =cut
1041 */
1042
1043 void
1044 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1045 {
1046     char *pv;
1047     STRLEN len;
1048     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1049     if (flags)
1050         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1051     pv = SvPV(sv, len);
1052     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1053 }
1054
1055 /*
1056 =for apidoc Amx|void|lex_unstuff|char *ptr
1057
1058 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1059 I<ptr>.  Text following I<ptr> will be moved, and the buffer shortened.
1060 This hides the discarded text from any lexing code that runs later,
1061 as if the text had never appeared.
1062
1063 This is not the normal way to consume lexed text.  For that, use
1064 L</lex_read_to>.
1065
1066 =cut
1067 */
1068
1069 void
1070 Perl_lex_unstuff(pTHX_ char *ptr)
1071 {
1072     char *buf, *bufend;
1073     STRLEN unstuff_len;
1074     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1075     buf = PL_parser->bufptr;
1076     if (ptr < buf)
1077         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1078     if (ptr == buf)
1079         return;
1080     bufend = PL_parser->bufend;
1081     if (ptr > bufend)
1082         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1083     unstuff_len = ptr - buf;
1084     Move(ptr, buf, bufend+1-ptr, char);
1085     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1086     PL_parser->bufend = bufend - unstuff_len;
1087 }
1088
1089 /*
1090 =for apidoc Amx|void|lex_read_to|char *ptr
1091
1092 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1093 to I<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1094 performing the correct bookkeeping whenever a newline character is passed.
1095 This is the normal way to consume lexed text.
1096
1097 Interpretation of the buffer's octets can be abstracted out by
1098 using the slightly higher-level functions L</lex_peek_unichar> and
1099 L</lex_read_unichar>.
1100
1101 =cut
1102 */
1103
1104 void
1105 Perl_lex_read_to(pTHX_ char *ptr)
1106 {
1107     char *s;
1108     PERL_ARGS_ASSERT_LEX_READ_TO;
1109     s = PL_parser->bufptr;
1110     if (ptr < s || ptr > PL_parser->bufend)
1111         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1112     for (; s != ptr; s++)
1113         if (*s == '\n') {
1114             CopLINE_inc(PL_curcop);
1115             PL_parser->linestart = s+1;
1116         }
1117     PL_parser->bufptr = ptr;
1118 }
1119
1120 /*
1121 =for apidoc Amx|void|lex_discard_to|char *ptr
1122
1123 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1124 up to I<ptr>.  The remaining content of the buffer will be moved, and
1125 all pointers into the buffer updated appropriately.  I<ptr> must not
1126 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1127 it is not permitted to discard text that has yet to be lexed.
1128
1129 Normally it is not necessarily to do this directly, because it suffices to
1130 use the implicit discarding behaviour of L</lex_next_chunk> and things
1131 based on it.  However, if a token stretches across multiple lines,
1132 and the lexing code has kept multiple lines of text in the buffer for
1133 that purpose, then after completion of the token it would be wise to
1134 explicitly discard the now-unneeded earlier lines, to avoid future
1135 multi-line tokens growing the buffer without bound.
1136
1137 =cut
1138 */
1139
1140 void
1141 Perl_lex_discard_to(pTHX_ char *ptr)
1142 {
1143     char *buf;
1144     STRLEN discard_len;
1145     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1146     buf = SvPVX(PL_parser->linestr);
1147     if (ptr < buf)
1148         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1149     if (ptr == buf)
1150         return;
1151     if (ptr > PL_parser->bufptr)
1152         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1153     discard_len = ptr - buf;
1154     if (PL_parser->oldbufptr < ptr)
1155         PL_parser->oldbufptr = ptr;
1156     if (PL_parser->oldoldbufptr < ptr)
1157         PL_parser->oldoldbufptr = ptr;
1158     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1159         PL_parser->last_uni = NULL;
1160     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1161         PL_parser->last_lop = NULL;
1162     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1163     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1164     PL_parser->bufend -= discard_len;
1165     PL_parser->bufptr -= discard_len;
1166     PL_parser->oldbufptr -= discard_len;
1167     PL_parser->oldoldbufptr -= discard_len;
1168     if (PL_parser->last_uni)
1169         PL_parser->last_uni -= discard_len;
1170     if (PL_parser->last_lop)
1171         PL_parser->last_lop -= discard_len;
1172 }
1173
1174 /*
1175 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1176
1177 Reads in the next chunk of text to be lexed, appending it to
1178 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1179 looked to the end of the current chunk and wants to know more.  It is
1180 usual, but not necessary, for lexing to have consumed the entirety of
1181 the current chunk at this time.
1182
1183 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1184 chunk (i.e., the current chunk has been entirely consumed), normally the
1185 current chunk will be discarded at the same time that the new chunk is
1186 read in.  If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1187 will not be discarded.  If the current chunk has not been entirely
1188 consumed, then it will not be discarded regardless of the flag.
1189
1190 Returns true if some new text was added to the buffer, or false if the
1191 buffer has reached the end of the input text.
1192
1193 =cut
1194 */
1195
1196 #define LEX_FAKE_EOF 0x80000000
1197
1198 bool
1199 Perl_lex_next_chunk(pTHX_ U32 flags)
1200 {
1201     SV *linestr;
1202     char *buf;
1203     STRLEN old_bufend_pos, new_bufend_pos;
1204     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1205     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1206     bool got_some_for_debugger = 0;
1207     bool got_some;
1208     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1209         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1210     linestr = PL_parser->linestr;
1211     buf = SvPVX(linestr);
1212     if (!(flags & LEX_KEEP_PREVIOUS) &&
1213             PL_parser->bufptr == PL_parser->bufend) {
1214         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1215         linestart_pos = 0;
1216         if (PL_parser->last_uni != PL_parser->bufend)
1217             PL_parser->last_uni = NULL;
1218         if (PL_parser->last_lop != PL_parser->bufend)
1219             PL_parser->last_lop = NULL;
1220         last_uni_pos = last_lop_pos = 0;
1221         *buf = 0;
1222         SvCUR(linestr) = 0;
1223     } else {
1224         old_bufend_pos = PL_parser->bufend - buf;
1225         bufptr_pos = PL_parser->bufptr - buf;
1226         oldbufptr_pos = PL_parser->oldbufptr - buf;
1227         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1228         linestart_pos = PL_parser->linestart - buf;
1229         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1230         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1231     }
1232     if (flags & LEX_FAKE_EOF) {
1233         goto eof;
1234     } else if (!PL_parser->rsfp) {
1235         got_some = 0;
1236     } else if (filter_gets(linestr, old_bufend_pos)) {
1237         got_some = 1;
1238         got_some_for_debugger = 1;
1239     } else {
1240         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1241             sv_setpvs(linestr, "");
1242         eof:
1243         /* End of real input.  Close filehandle (unless it was STDIN),
1244          * then add implicit termination.
1245          */
1246         if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1247             PerlIO_clearerr(PL_parser->rsfp);
1248         else if (PL_parser->rsfp)
1249             (void)PerlIO_close(PL_parser->rsfp);
1250         PL_parser->rsfp = NULL;
1251         PL_doextract = FALSE;
1252 #ifdef PERL_MAD
1253         if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1254             PL_faketokens = 1;
1255 #endif
1256         if (!PL_in_eval && PL_minus_p) {
1257             sv_catpvs(linestr,
1258                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1259             PL_minus_n = PL_minus_p = 0;
1260         } else if (!PL_in_eval && PL_minus_n) {
1261             sv_catpvs(linestr, /*{*/";}");
1262             PL_minus_n = 0;
1263         } else
1264             sv_catpvs(linestr, ";");
1265         got_some = 1;
1266     }
1267     buf = SvPVX(linestr);
1268     new_bufend_pos = SvCUR(linestr);
1269     PL_parser->bufend = buf + new_bufend_pos;
1270     PL_parser->bufptr = buf + bufptr_pos;
1271     PL_parser->oldbufptr = buf + oldbufptr_pos;
1272     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1273     PL_parser->linestart = buf + linestart_pos;
1274     if (PL_parser->last_uni)
1275         PL_parser->last_uni = buf + last_uni_pos;
1276     if (PL_parser->last_lop)
1277         PL_parser->last_lop = buf + last_lop_pos;
1278     if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1279             PL_curstash != PL_debstash) {
1280         /* debugger active and we're not compiling the debugger code,
1281          * so store the line into the debugger's array of lines
1282          */
1283         update_debugger_info(NULL, buf+old_bufend_pos,
1284             new_bufend_pos-old_bufend_pos);
1285     }
1286     return got_some;
1287 }
1288
1289 /*
1290 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1291
1292 Looks ahead one (Unicode) character in the text currently being lexed.
1293 Returns the codepoint (unsigned integer value) of the next character,
1294 or -1 if lexing has reached the end of the input text.  To consume the
1295 peeked character, use L</lex_read_unichar>.
1296
1297 If the next character is in (or extends into) the next chunk of input
1298 text, the next chunk will be read in.  Normally the current chunk will be
1299 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1300 then the current chunk will not be discarded.
1301
1302 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1303 is encountered, an exception is generated.
1304
1305 =cut
1306 */
1307
1308 I32
1309 Perl_lex_peek_unichar(pTHX_ U32 flags)
1310 {
1311     dVAR;
1312     char *s, *bufend;
1313     if (flags & ~(LEX_KEEP_PREVIOUS))
1314         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1315     s = PL_parser->bufptr;
1316     bufend = PL_parser->bufend;
1317     if (UTF) {
1318         U8 head;
1319         I32 unichar;
1320         STRLEN len, retlen;
1321         if (s == bufend) {
1322             if (!lex_next_chunk(flags))
1323                 return -1;
1324             s = PL_parser->bufptr;
1325             bufend = PL_parser->bufend;
1326         }
1327         head = (U8)*s;
1328         if (!(head & 0x80))
1329             return head;
1330         if (head & 0x40) {
1331             len = PL_utf8skip[head];
1332             while ((STRLEN)(bufend-s) < len) {
1333                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1334                     break;
1335                 s = PL_parser->bufptr;
1336                 bufend = PL_parser->bufend;
1337             }
1338         }
1339         unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1340         if (retlen == (STRLEN)-1) {
1341             /* malformed UTF-8 */
1342             ENTER;
1343             SAVESPTR(PL_warnhook);
1344             PL_warnhook = PERL_WARNHOOK_FATAL;
1345             utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1346             LEAVE;
1347         }
1348         return unichar;
1349     } else {
1350         if (s == bufend) {
1351             if (!lex_next_chunk(flags))
1352                 return -1;
1353             s = PL_parser->bufptr;
1354         }
1355         return (U8)*s;
1356     }
1357 }
1358
1359 /*
1360 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1361
1362 Reads the next (Unicode) character in the text currently being lexed.
1363 Returns the codepoint (unsigned integer value) of the character read,
1364 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1365 if lexing has reached the end of the input text.  To non-destructively
1366 examine the next character, use L</lex_peek_unichar> instead.
1367
1368 If the next character is in (or extends into) the next chunk of input
1369 text, the next chunk will be read in.  Normally the current chunk will be
1370 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1371 then the current chunk will not be discarded.
1372
1373 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1374 is encountered, an exception is generated.
1375
1376 =cut
1377 */
1378
1379 I32
1380 Perl_lex_read_unichar(pTHX_ U32 flags)
1381 {
1382     I32 c;
1383     if (flags & ~(LEX_KEEP_PREVIOUS))
1384         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1385     c = lex_peek_unichar(flags);
1386     if (c != -1) {
1387         if (c == '\n')
1388             CopLINE_inc(PL_curcop);
1389         PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1390     }
1391     return c;
1392 }
1393
1394 /*
1395 =for apidoc Amx|void|lex_read_space|U32 flags
1396
1397 Reads optional spaces, in Perl style, in the text currently being
1398 lexed.  The spaces may include ordinary whitespace characters and
1399 Perl-style comments.  C<#line> directives are processed if encountered.
1400 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1401 at a non-space character (or the end of the input text).
1402
1403 If spaces extend into the next chunk of input text, the next chunk will
1404 be read in.  Normally the current chunk will be discarded at the same
1405 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1406 chunk will not be discarded.
1407
1408 =cut
1409 */
1410
1411 #define LEX_NO_NEXT_CHUNK 0x80000000
1412
1413 void
1414 Perl_lex_read_space(pTHX_ U32 flags)
1415 {
1416     char *s, *bufend;
1417     bool need_incline = 0;
1418     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1419         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1420 #ifdef PERL_MAD
1421     if (PL_skipwhite) {
1422         sv_free(PL_skipwhite);
1423         PL_skipwhite = NULL;
1424     }
1425     if (PL_madskills)
1426         PL_skipwhite = newSVpvs("");
1427 #endif /* PERL_MAD */
1428     s = PL_parser->bufptr;
1429     bufend = PL_parser->bufend;
1430     while (1) {
1431         char c = *s;
1432         if (c == '#') {
1433             do {
1434                 c = *++s;
1435             } while (!(c == '\n' || (c == 0 && s == bufend)));
1436         } else if (c == '\n') {
1437             s++;
1438             PL_parser->linestart = s;
1439             if (s == bufend)
1440                 need_incline = 1;
1441             else
1442                 incline(s);
1443         } else if (isSPACE(c)) {
1444             s++;
1445         } else if (c == 0 && s == bufend) {
1446             bool got_more;
1447 #ifdef PERL_MAD
1448             if (PL_madskills)
1449                 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1450 #endif /* PERL_MAD */
1451             if (flags & LEX_NO_NEXT_CHUNK)
1452                 break;
1453             PL_parser->bufptr = s;
1454             CopLINE_inc(PL_curcop);
1455             got_more = lex_next_chunk(flags);
1456             CopLINE_dec(PL_curcop);
1457             s = PL_parser->bufptr;
1458             bufend = PL_parser->bufend;
1459             if (!got_more)
1460                 break;
1461             if (need_incline && PL_parser->rsfp) {
1462                 incline(s);
1463                 need_incline = 0;
1464             }
1465         } else {
1466             break;
1467         }
1468     }
1469 #ifdef PERL_MAD
1470     if (PL_madskills)
1471         sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1472 #endif /* PERL_MAD */
1473     PL_parser->bufptr = s;
1474 }
1475
1476 /*
1477  * S_incline
1478  * This subroutine has nothing to do with tilting, whether at windmills
1479  * or pinball tables.  Its name is short for "increment line".  It
1480  * increments the current line number in CopLINE(PL_curcop) and checks
1481  * to see whether the line starts with a comment of the form
1482  *    # line 500 "foo.pm"
1483  * If so, it sets the current line number and file to the values in the comment.
1484  */
1485
1486 STATIC void
1487 S_incline(pTHX_ const char *s)
1488 {
1489     dVAR;
1490     const char *t;
1491     const char *n;
1492     const char *e;
1493
1494     PERL_ARGS_ASSERT_INCLINE;
1495
1496     CopLINE_inc(PL_curcop);
1497     if (*s++ != '#')
1498         return;
1499     while (SPACE_OR_TAB(*s))
1500         s++;
1501     if (strnEQ(s, "line", 4))
1502         s += 4;
1503     else
1504         return;
1505     if (SPACE_OR_TAB(*s))
1506         s++;
1507     else
1508         return;
1509     while (SPACE_OR_TAB(*s))
1510         s++;
1511     if (!isDIGIT(*s))
1512         return;
1513
1514     n = s;
1515     while (isDIGIT(*s))
1516         s++;
1517     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1518         return;
1519     while (SPACE_OR_TAB(*s))
1520         s++;
1521     if (*s == '"' && (t = strchr(s+1, '"'))) {
1522         s++;
1523         e = t + 1;
1524     }
1525     else {
1526         t = s;
1527         while (!isSPACE(*t))
1528             t++;
1529         e = t;
1530     }
1531     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1532         e++;
1533     if (*e != '\n' && *e != '\0')
1534         return;         /* false alarm */
1535
1536     if (t - s > 0) {
1537         const STRLEN len = t - s;
1538 #ifndef USE_ITHREADS
1539         SV *const temp_sv = CopFILESV(PL_curcop);
1540         const char *cf;
1541         STRLEN tmplen;
1542
1543         if (temp_sv) {
1544             cf = SvPVX(temp_sv);
1545             tmplen = SvCUR(temp_sv);
1546         } else {
1547             cf = NULL;
1548             tmplen = 0;
1549         }
1550
1551         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1552             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1553              * to *{"::_<newfilename"} */
1554             /* However, the long form of evals is only turned on by the
1555                debugger - usually they're "(eval %lu)" */
1556             char smallbuf[128];
1557             char *tmpbuf;
1558             GV **gvp;
1559             STRLEN tmplen2 = len;
1560             if (tmplen + 2 <= sizeof smallbuf)
1561                 tmpbuf = smallbuf;
1562             else
1563                 Newx(tmpbuf, tmplen + 2, char);
1564             tmpbuf[0] = '_';
1565             tmpbuf[1] = '<';
1566             memcpy(tmpbuf + 2, cf, tmplen);
1567             tmplen += 2;
1568             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1569             if (gvp) {
1570                 char *tmpbuf2;
1571                 GV *gv2;
1572
1573                 if (tmplen2 + 2 <= sizeof smallbuf)
1574                     tmpbuf2 = smallbuf;
1575                 else
1576                     Newx(tmpbuf2, tmplen2 + 2, char);
1577
1578                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1579                     /* Either they malloc'd it, or we malloc'd it,
1580                        so no prefix is present in ours.  */
1581                     tmpbuf2[0] = '_';
1582                     tmpbuf2[1] = '<';
1583                 }
1584
1585                 memcpy(tmpbuf2 + 2, s, tmplen2);
1586                 tmplen2 += 2;
1587
1588                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1589                 if (!isGV(gv2)) {
1590                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1591                     /* adjust ${"::_<newfilename"} to store the new file name */
1592                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1593                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1594                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1595                 }
1596
1597                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1598             }
1599             if (tmpbuf != smallbuf) Safefree(tmpbuf);
1600         }
1601 #endif
1602         CopFILE_free(PL_curcop);
1603         CopFILE_setn(PL_curcop, s, len);
1604     }
1605     CopLINE_set(PL_curcop, atoi(n)-1);
1606 }
1607
1608 #ifdef PERL_MAD
1609 /* skip space before PL_thistoken */
1610
1611 STATIC char *
1612 S_skipspace0(pTHX_ register char *s)
1613 {
1614     PERL_ARGS_ASSERT_SKIPSPACE0;
1615
1616     s = skipspace(s);
1617     if (!PL_madskills)
1618         return s;
1619     if (PL_skipwhite) {
1620         if (!PL_thiswhite)
1621             PL_thiswhite = newSVpvs("");
1622         sv_catsv(PL_thiswhite, PL_skipwhite);
1623         sv_free(PL_skipwhite);
1624         PL_skipwhite = 0;
1625     }
1626     PL_realtokenstart = s - SvPVX(PL_linestr);
1627     return s;
1628 }
1629
1630 /* skip space after PL_thistoken */
1631
1632 STATIC char *
1633 S_skipspace1(pTHX_ register char *s)
1634 {
1635     const char *start = s;
1636     I32 startoff = start - SvPVX(PL_linestr);
1637
1638     PERL_ARGS_ASSERT_SKIPSPACE1;
1639
1640     s = skipspace(s);
1641     if (!PL_madskills)
1642         return s;
1643     start = SvPVX(PL_linestr) + startoff;
1644     if (!PL_thistoken && PL_realtokenstart >= 0) {
1645         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1646         PL_thistoken = newSVpvn(tstart, start - tstart);
1647     }
1648     PL_realtokenstart = -1;
1649     if (PL_skipwhite) {
1650         if (!PL_nextwhite)
1651             PL_nextwhite = newSVpvs("");
1652         sv_catsv(PL_nextwhite, PL_skipwhite);
1653         sv_free(PL_skipwhite);
1654         PL_skipwhite = 0;
1655     }
1656     return s;
1657 }
1658
1659 STATIC char *
1660 S_skipspace2(pTHX_ register char *s, SV **svp)
1661 {
1662     char *start;
1663     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1664     const I32 startoff = s - SvPVX(PL_linestr);
1665
1666     PERL_ARGS_ASSERT_SKIPSPACE2;
1667
1668     s = skipspace(s);
1669     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1670     if (!PL_madskills || !svp)
1671         return s;
1672     start = SvPVX(PL_linestr) + startoff;
1673     if (!PL_thistoken && PL_realtokenstart >= 0) {
1674         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1675         PL_thistoken = newSVpvn(tstart, start - tstart);
1676         PL_realtokenstart = -1;
1677     }
1678     if (PL_skipwhite) {
1679         if (!*svp)
1680             *svp = newSVpvs("");
1681         sv_setsv(*svp, PL_skipwhite);
1682         sv_free(PL_skipwhite);
1683         PL_skipwhite = 0;
1684     }
1685
1686     return s;
1687 }
1688 #endif
1689
1690 STATIC void
1691 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1692 {
1693     AV *av = CopFILEAVx(PL_curcop);
1694     if (av) {
1695         SV * const sv = newSV_type(SVt_PVMG);
1696         if (orig_sv)
1697             sv_setsv(sv, orig_sv);
1698         else
1699             sv_setpvn(sv, buf, len);
1700         (void)SvIOK_on(sv);
1701         SvIV_set(sv, 0);
1702         av_store(av, (I32)CopLINE(PL_curcop), sv);
1703     }
1704 }
1705
1706 /*
1707  * S_skipspace
1708  * Called to gobble the appropriate amount and type of whitespace.
1709  * Skips comments as well.
1710  */
1711
1712 STATIC char *
1713 S_skipspace(pTHX_ register char *s)
1714 {
1715 #ifdef PERL_MAD
1716     char *start = s;
1717 #endif /* PERL_MAD */
1718     PERL_ARGS_ASSERT_SKIPSPACE;
1719 #ifdef PERL_MAD
1720     if (PL_skipwhite) {
1721         sv_free(PL_skipwhite);
1722         PL_skipwhite = NULL;
1723     }
1724 #endif /* PERL_MAD */
1725     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1726         while (s < PL_bufend && SPACE_OR_TAB(*s))
1727             s++;
1728     } else {
1729         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1730         PL_bufptr = s;
1731         lex_read_space(LEX_KEEP_PREVIOUS |
1732                 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1733                     LEX_NO_NEXT_CHUNK : 0));
1734         s = PL_bufptr;
1735         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1736         if (PL_linestart > PL_bufptr)
1737             PL_bufptr = PL_linestart;
1738         return s;
1739     }
1740 #ifdef PERL_MAD
1741     if (PL_madskills)
1742         PL_skipwhite = newSVpvn(start, s-start);
1743 #endif /* PERL_MAD */
1744     return s;
1745 }
1746
1747 /*
1748  * S_check_uni
1749  * Check the unary operators to ensure there's no ambiguity in how they're
1750  * used.  An ambiguous piece of code would be:
1751  *     rand + 5
1752  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1753  * the +5 is its argument.
1754  */
1755
1756 STATIC void
1757 S_check_uni(pTHX)
1758 {
1759     dVAR;
1760     const char *s;
1761     const char *t;
1762
1763     if (PL_oldoldbufptr != PL_last_uni)
1764         return;
1765     while (isSPACE(*PL_last_uni))
1766         PL_last_uni++;
1767     s = PL_last_uni;
1768     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1769         s++;
1770     if ((t = strchr(s, '(')) && t < PL_bufptr)
1771         return;
1772
1773     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1774                      "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1775                      (int)(s - PL_last_uni), PL_last_uni);
1776 }
1777
1778 /*
1779  * LOP : macro to build a list operator.  Its behaviour has been replaced
1780  * with a subroutine, S_lop() for which LOP is just another name.
1781  */
1782
1783 #define LOP(f,x) return lop(f,x,s)
1784
1785 /*
1786  * S_lop
1787  * Build a list operator (or something that might be one).  The rules:
1788  *  - if we have a next token, then it's a list operator [why?]
1789  *  - if the next thing is an opening paren, then it's a function
1790  *  - else it's a list operator
1791  */
1792
1793 STATIC I32
1794 S_lop(pTHX_ I32 f, int x, char *s)
1795 {
1796     dVAR;
1797
1798     PERL_ARGS_ASSERT_LOP;
1799
1800     pl_yylval.ival = f;
1801     CLINE;
1802     PL_expect = x;
1803     PL_bufptr = s;
1804     PL_last_lop = PL_oldbufptr;
1805     PL_last_lop_op = (OPCODE)f;
1806 #ifdef PERL_MAD
1807     if (PL_lasttoke)
1808         return REPORT(LSTOP);
1809 #else
1810     if (PL_nexttoke)
1811         return REPORT(LSTOP);
1812 #endif
1813     if (*s == '(')
1814         return REPORT(FUNC);
1815     s = PEEKSPACE(s);
1816     if (*s == '(')
1817         return REPORT(FUNC);
1818     else
1819         return REPORT(LSTOP);
1820 }
1821
1822 #ifdef PERL_MAD
1823  /*
1824  * S_start_force
1825  * Sets up for an eventual force_next().  start_force(0) basically does
1826  * an unshift, while start_force(-1) does a push.  yylex removes items
1827  * on the "pop" end.
1828  */
1829
1830 STATIC void
1831 S_start_force(pTHX_ int where)
1832 {
1833     int i;
1834
1835     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1836         where = PL_lasttoke;
1837     assert(PL_curforce < 0 || PL_curforce == where);
1838     if (PL_curforce != where) {
1839         for (i = PL_lasttoke; i > where; --i) {
1840             PL_nexttoke[i] = PL_nexttoke[i-1];
1841         }
1842         PL_lasttoke++;
1843     }
1844     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1845         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1846     PL_curforce = where;
1847     if (PL_nextwhite) {
1848         if (PL_madskills)
1849             curmad('^', newSVpvs(""));
1850         CURMAD('_', PL_nextwhite);
1851     }
1852 }
1853
1854 STATIC void
1855 S_curmad(pTHX_ char slot, SV *sv)
1856 {
1857     MADPROP **where;
1858
1859     if (!sv)
1860         return;
1861     if (PL_curforce < 0)
1862         where = &PL_thismad;
1863     else
1864         where = &PL_nexttoke[PL_curforce].next_mad;
1865
1866     if (PL_faketokens)
1867         sv_setpvs(sv, "");
1868     else {
1869         if (!IN_BYTES) {
1870             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1871                 SvUTF8_on(sv);
1872             else if (PL_encoding) {
1873                 sv_recode_to_utf8(sv, PL_encoding);
1874             }
1875         }
1876     }
1877
1878     /* keep a slot open for the head of the list? */
1879     if (slot != '_' && *where && (*where)->mad_key == '^') {
1880         (*where)->mad_key = slot;
1881         sv_free(MUTABLE_SV(((*where)->mad_val)));
1882         (*where)->mad_val = (void*)sv;
1883     }
1884     else
1885         addmad(newMADsv(slot, sv), where, 0);
1886 }
1887 #else
1888 #  define start_force(where)    NOOP
1889 #  define curmad(slot, sv)      NOOP
1890 #endif
1891
1892 /*
1893  * S_force_next
1894  * When the lexer realizes it knows the next token (for instance,
1895  * it is reordering tokens for the parser) then it can call S_force_next
1896  * to know what token to return the next time the lexer is called.  Caller
1897  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1898  * and possibly PL_expect to ensure the lexer handles the token correctly.
1899  */
1900
1901 STATIC void
1902 S_force_next(pTHX_ I32 type)
1903 {
1904     dVAR;
1905 #ifdef DEBUGGING
1906     if (DEBUG_T_TEST) {
1907         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1908         tokereport(type, &NEXTVAL_NEXTTOKE);
1909     }
1910 #endif
1911 #ifdef PERL_MAD
1912     if (PL_curforce < 0)
1913         start_force(PL_lasttoke);
1914     PL_nexttoke[PL_curforce].next_type = type;
1915     if (PL_lex_state != LEX_KNOWNEXT)
1916         PL_lex_defer = PL_lex_state;
1917     PL_lex_state = LEX_KNOWNEXT;
1918     PL_lex_expect = PL_expect;
1919     PL_curforce = -1;
1920 #else
1921     PL_nexttype[PL_nexttoke] = type;
1922     PL_nexttoke++;
1923     if (PL_lex_state != LEX_KNOWNEXT) {
1924         PL_lex_defer = PL_lex_state;
1925         PL_lex_expect = PL_expect;
1926         PL_lex_state = LEX_KNOWNEXT;
1927     }
1928 #endif
1929 }
1930
1931 STATIC SV *
1932 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1933 {
1934     dVAR;
1935     SV * const sv = newSVpvn_utf8(start, len,
1936                                   !IN_BYTES
1937                                   && UTF
1938                                   && !is_ascii_string((const U8*)start, len)
1939                                   && is_utf8_string((const U8*)start, len));
1940     return sv;
1941 }
1942
1943 /*
1944  * S_force_word
1945  * When the lexer knows the next thing is a word (for instance, it has
1946  * just seen -> and it knows that the next char is a word char, then
1947  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1948  * lookahead.
1949  *
1950  * Arguments:
1951  *   char *start : buffer position (must be within PL_linestr)
1952  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1953  *   int check_keyword : if true, Perl checks to make sure the word isn't
1954  *       a keyword (do this if the word is a label, e.g. goto FOO)
1955  *   int allow_pack : if true, : characters will also be allowed (require,
1956  *       use, etc. do this)
1957  *   int allow_initial_tick : used by the "sub" lexer only.
1958  */
1959
1960 STATIC char *
1961 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1962 {
1963     dVAR;
1964     register char *s;
1965     STRLEN len;
1966
1967     PERL_ARGS_ASSERT_FORCE_WORD;
1968
1969     start = SKIPSPACE1(start);
1970     s = start;
1971     if (isIDFIRST_lazy_if(s,UTF) ||
1972         (allow_pack && *s == ':') ||
1973         (allow_initial_tick && *s == '\'') )
1974     {
1975         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1976         if (check_keyword && keyword(PL_tokenbuf, len, 0))
1977             return start;
1978         start_force(PL_curforce);
1979         if (PL_madskills)
1980             curmad('X', newSVpvn(start,s-start));
1981         if (token == METHOD) {
1982             s = SKIPSPACE1(s);
1983             if (*s == '(')
1984                 PL_expect = XTERM;
1985             else {
1986                 PL_expect = XOPERATOR;
1987             }
1988         }
1989         if (PL_madskills)
1990             curmad('g', newSVpvs( "forced" ));
1991         NEXTVAL_NEXTTOKE.opval
1992             = (OP*)newSVOP(OP_CONST,0,
1993                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1994         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1995         force_next(token);
1996     }
1997     return s;
1998 }
1999
2000 /*
2001  * S_force_ident
2002  * Called when the lexer wants $foo *foo &foo etc, but the program
2003  * text only contains the "foo" portion.  The first argument is a pointer
2004  * to the "foo", and the second argument is the type symbol to prefix.
2005  * Forces the next token to be a "WORD".
2006  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2007  */
2008
2009 STATIC void
2010 S_force_ident(pTHX_ register const char *s, int kind)
2011 {
2012     dVAR;
2013
2014     PERL_ARGS_ASSERT_FORCE_IDENT;
2015
2016     if (*s) {
2017         const STRLEN len = strlen(s);
2018         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2019         start_force(PL_curforce);
2020         NEXTVAL_NEXTTOKE.opval = o;
2021         force_next(WORD);
2022         if (kind) {
2023             o->op_private = OPpCONST_ENTERED;
2024             /* XXX see note in pp_entereval() for why we forgo typo
2025                warnings if the symbol must be introduced in an eval.
2026                GSAR 96-10-12 */
2027             gv_fetchpvn_flags(s, len,
2028                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2029                               : GV_ADD,
2030                               kind == '$' ? SVt_PV :
2031                               kind == '@' ? SVt_PVAV :
2032                               kind == '%' ? SVt_PVHV :
2033                               SVt_PVGV
2034                               );
2035         }
2036     }
2037 }
2038
2039 NV
2040 Perl_str_to_version(pTHX_ SV *sv)
2041 {
2042     NV retval = 0.0;
2043     NV nshift = 1.0;
2044     STRLEN len;
2045     const char *start = SvPV_const(sv,len);
2046     const char * const end = start + len;
2047     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2048
2049     PERL_ARGS_ASSERT_STR_TO_VERSION;
2050
2051     while (start < end) {
2052         STRLEN skip;
2053         UV n;
2054         if (utf)
2055             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2056         else {
2057             n = *(U8*)start;
2058             skip = 1;
2059         }
2060         retval += ((NV)n)/nshift;
2061         start += skip;
2062         nshift *= 1000;
2063     }
2064     return retval;
2065 }
2066
2067 /*
2068  * S_force_version
2069  * Forces the next token to be a version number.
2070  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2071  * and if "guessing" is TRUE, then no new token is created (and the caller
2072  * must use an alternative parsing method).
2073  */
2074
2075 STATIC char *
2076 S_force_version(pTHX_ char *s, int guessing)
2077 {
2078     dVAR;
2079     OP *version = NULL;
2080     char *d;
2081 #ifdef PERL_MAD
2082     I32 startoff = s - SvPVX(PL_linestr);
2083 #endif
2084
2085     PERL_ARGS_ASSERT_FORCE_VERSION;
2086
2087     s = SKIPSPACE1(s);
2088
2089     d = s;
2090     if (*d == 'v')
2091         d++;
2092     if (isDIGIT(*d)) {
2093         while (isDIGIT(*d) || *d == '_' || *d == '.')
2094             d++;
2095 #ifdef PERL_MAD
2096         if (PL_madskills) {
2097             start_force(PL_curforce);
2098             curmad('X', newSVpvn(s,d-s));
2099         }
2100 #endif
2101         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2102             SV *ver;
2103 #ifdef USE_LOCALE_NUMERIC
2104             char *loc = setlocale(LC_NUMERIC, "C");
2105 #endif
2106             s = scan_num(s, &pl_yylval);
2107 #ifdef USE_LOCALE_NUMERIC
2108             setlocale(LC_NUMERIC, loc);
2109 #endif
2110             version = pl_yylval.opval;
2111             ver = cSVOPx(version)->op_sv;
2112             if (SvPOK(ver) && !SvNIOK(ver)) {
2113                 SvUPGRADE(ver, SVt_PVNV);
2114                 SvNV_set(ver, str_to_version(ver));
2115                 SvNOK_on(ver);          /* hint that it is a version */
2116             }
2117         }
2118         else if (guessing) {
2119 #ifdef PERL_MAD
2120             if (PL_madskills) {
2121                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
2122                 PL_nextwhite = 0;
2123                 s = SvPVX(PL_linestr) + startoff;
2124             }
2125 #endif
2126             return s;
2127         }
2128     }
2129
2130 #ifdef PERL_MAD
2131     if (PL_madskills && !version) {
2132         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2133         PL_nextwhite = 0;
2134         s = SvPVX(PL_linestr) + startoff;
2135     }
2136 #endif
2137     /* NOTE: The parser sees the package name and the VERSION swapped */
2138     start_force(PL_curforce);
2139     NEXTVAL_NEXTTOKE.opval = version;
2140     force_next(WORD);
2141
2142     return s;
2143 }
2144
2145 /*
2146  * S_force_strict_version
2147  * Forces the next token to be a version number using strict syntax rules.
2148  */
2149
2150 STATIC char *
2151 S_force_strict_version(pTHX_ char *s)
2152 {
2153     dVAR;
2154     OP *version = NULL;
2155 #ifdef PERL_MAD
2156     I32 startoff = s - SvPVX(PL_linestr);
2157 #endif
2158     const char *errstr = NULL;
2159
2160     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2161
2162     while (isSPACE(*s)) /* leading whitespace */
2163         s++;
2164
2165     if (is_STRICT_VERSION(s,&errstr)) {
2166         SV *ver = newSV(0);
2167         s = (char *)scan_version(s, ver, 0);
2168         version = newSVOP(OP_CONST, 0, ver);
2169     }
2170     else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2171             (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2172     {
2173         PL_bufptr = s;
2174         if (errstr)
2175             yyerror(errstr); /* version required */
2176         return s;
2177     }
2178
2179 #ifdef PERL_MAD
2180     if (PL_madskills && !version) {
2181         sv_free(PL_nextwhite);  /* let next token collect whitespace */
2182         PL_nextwhite = 0;
2183         s = SvPVX(PL_linestr) + startoff;
2184     }
2185 #endif
2186     /* NOTE: The parser sees the package name and the VERSION swapped */
2187     start_force(PL_curforce);
2188     NEXTVAL_NEXTTOKE.opval = version;
2189     force_next(WORD);
2190
2191     return s;
2192 }
2193
2194 /*
2195  * S_tokeq
2196  * Tokenize a quoted string passed in as an SV.  It finds the next
2197  * chunk, up to end of string or a backslash.  It may make a new
2198  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
2199  * turns \\ into \.
2200  */
2201
2202 STATIC SV *
2203 S_tokeq(pTHX_ SV *sv)
2204 {
2205     dVAR;
2206     register char *s;
2207     register char *send;
2208     register char *d;
2209     STRLEN len = 0;
2210     SV *pv = sv;
2211
2212     PERL_ARGS_ASSERT_TOKEQ;
2213
2214     if (!SvLEN(sv))
2215         goto finish;
2216
2217     s = SvPV_force(sv, len);
2218     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2219         goto finish;
2220     send = s + len;
2221     while (s < send && *s != '\\')
2222         s++;
2223     if (s == send)
2224         goto finish;
2225     d = s;
2226     if ( PL_hints & HINT_NEW_STRING ) {
2227         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2228     }
2229     while (s < send) {
2230         if (*s == '\\') {
2231             if (s + 1 < send && (s[1] == '\\'))
2232                 s++;            /* all that, just for this */
2233         }
2234         *d++ = *s++;
2235     }
2236     *d = '\0';
2237     SvCUR_set(sv, d - SvPVX_const(sv));
2238   finish:
2239     if ( PL_hints & HINT_NEW_STRING )
2240        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2241     return sv;
2242 }
2243
2244 /*
2245  * Now come three functions related to double-quote context,
2246  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2247  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2248  * interact with PL_lex_state, and create fake ( ... ) argument lists
2249  * to handle functions and concatenation.
2250  * They assume that whoever calls them will be setting up a fake
2251  * join call, because each subthing puts a ',' after it.  This lets
2252  *   "lower \luPpEr"
2253  * become
2254  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2255  *
2256  * (I'm not sure whether the spurious commas at the end of lcfirst's
2257  * arguments and join's arguments are created or not).
2258  */
2259
2260 /*
2261  * S_sublex_start
2262  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2263  *
2264  * Pattern matching will set PL_lex_op to the pattern-matching op to
2265  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2266  *
2267  * OP_CONST and OP_READLINE are easy--just make the new op and return.
2268  *
2269  * Everything else becomes a FUNC.
2270  *
2271  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2272  * had an OP_CONST or OP_READLINE).  This just sets us up for a
2273  * call to S_sublex_push().
2274  */
2275
2276 STATIC I32
2277 S_sublex_start(pTHX)
2278 {
2279     dVAR;
2280     register const I32 op_type = pl_yylval.ival;
2281
2282     if (op_type == OP_NULL) {
2283         pl_yylval.opval = PL_lex_op;
2284         PL_lex_op = NULL;
2285         return THING;
2286     }
2287     if (op_type == OP_CONST || op_type == OP_READLINE) {
2288         SV *sv = tokeq(PL_lex_stuff);
2289
2290         if (SvTYPE(sv) == SVt_PVIV) {
2291             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2292             STRLEN len;
2293             const char * const p = SvPV_const(sv, len);
2294             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2295             SvREFCNT_dec(sv);
2296             sv = nsv;
2297         }
2298         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2299         PL_lex_stuff = NULL;
2300         /* Allow <FH> // "foo" */
2301         if (op_type == OP_READLINE)
2302             PL_expect = XTERMORDORDOR;
2303         return THING;
2304     }
2305     else if (op_type == OP_BACKTICK && PL_lex_op) {
2306         /* readpipe() vas overriden */
2307         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2308         pl_yylval.opval = PL_lex_op;
2309         PL_lex_op = NULL;
2310         PL_lex_stuff = NULL;
2311         return THING;
2312     }
2313
2314     PL_sublex_info.super_state = PL_lex_state;
2315     PL_sublex_info.sub_inwhat = (U16)op_type;
2316     PL_sublex_info.sub_op = PL_lex_op;
2317     PL_lex_state = LEX_INTERPPUSH;
2318
2319     PL_expect = XTERM;
2320     if (PL_lex_op) {
2321         pl_yylval.opval = PL_lex_op;
2322         PL_lex_op = NULL;
2323         return PMFUNC;
2324     }
2325     else
2326         return FUNC;
2327 }
2328
2329 /*
2330  * S_sublex_push
2331  * Create a new scope to save the lexing state.  The scope will be
2332  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2333  * to the uc, lc, etc. found before.
2334  * Sets PL_lex_state to LEX_INTERPCONCAT.
2335  */
2336
2337 STATIC I32
2338 S_sublex_push(pTHX)
2339 {
2340     dVAR;
2341     ENTER;
2342
2343     PL_lex_state = PL_sublex_info.super_state;
2344     SAVEBOOL(PL_lex_dojoin);
2345     SAVEI32(PL_lex_brackets);
2346     SAVEI32(PL_lex_casemods);
2347     SAVEI32(PL_lex_starts);
2348     SAVEI8(PL_lex_state);
2349     SAVEVPTR(PL_lex_inpat);
2350     SAVEI16(PL_lex_inwhat);
2351     SAVECOPLINE(PL_curcop);
2352     SAVEPPTR(PL_bufptr);
2353     SAVEPPTR(PL_bufend);
2354     SAVEPPTR(PL_oldbufptr);
2355     SAVEPPTR(PL_oldoldbufptr);
2356     SAVEPPTR(PL_last_lop);
2357     SAVEPPTR(PL_last_uni);
2358     SAVEPPTR(PL_linestart);
2359     SAVESPTR(PL_linestr);
2360     SAVEGENERICPV(PL_lex_brackstack);
2361     SAVEGENERICPV(PL_lex_casestack);
2362
2363     PL_linestr = PL_lex_stuff;
2364     PL_lex_stuff = NULL;
2365
2366     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2367         = SvPVX(PL_linestr);
2368     PL_bufend += SvCUR(PL_linestr);
2369     PL_last_lop = PL_last_uni = NULL;
2370     SAVEFREESV(PL_linestr);
2371
2372     PL_lex_dojoin = FALSE;
2373     PL_lex_brackets = 0;
2374     Newx(PL_lex_brackstack, 120, char);
2375     Newx(PL_lex_casestack, 12, char);
2376     PL_lex_casemods = 0;
2377     *PL_lex_casestack = '\0';
2378     PL_lex_starts = 0;
2379     PL_lex_state = LEX_INTERPCONCAT;
2380     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2381
2382     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2383     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2384         PL_lex_inpat = PL_sublex_info.sub_op;
2385     else
2386         PL_lex_inpat = NULL;
2387
2388     return '(';
2389 }
2390
2391 /*
2392  * S_sublex_done
2393  * Restores lexer state after a S_sublex_push.
2394  */
2395
2396 STATIC I32
2397 S_sublex_done(pTHX)
2398 {
2399     dVAR;
2400     if (!PL_lex_starts++) {
2401         SV * const sv = newSVpvs("");
2402         if (SvUTF8(PL_linestr))
2403             SvUTF8_on(sv);
2404         PL_expect = XOPERATOR;
2405         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2406         return THING;
2407     }
2408
2409     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2410         PL_lex_state = LEX_INTERPCASEMOD;
2411         return yylex();
2412     }
2413
2414     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2415     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2416         PL_linestr = PL_lex_repl;
2417         PL_lex_inpat = 0;
2418         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2419         PL_bufend += SvCUR(PL_linestr);
2420         PL_last_lop = PL_last_uni = NULL;
2421         SAVEFREESV(PL_linestr);
2422         PL_lex_dojoin = FALSE;
2423         PL_lex_brackets = 0;
2424         PL_lex_casemods = 0;
2425         *PL_lex_casestack = '\0';
2426         PL_lex_starts = 0;
2427         if (SvEVALED(PL_lex_repl)) {
2428             PL_lex_state = LEX_INTERPNORMAL;
2429             PL_lex_starts++;
2430             /*  we don't clear PL_lex_repl here, so that we can check later
2431                 whether this is an evalled subst; that means we rely on the
2432                 logic to ensure sublex_done() is called again only via the
2433                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2434         }
2435         else {
2436             PL_lex_state = LEX_INTERPCONCAT;
2437             PL_lex_repl = NULL;
2438         }
2439         return ',';
2440     }
2441     else {
2442 #ifdef PERL_MAD
2443         if (PL_madskills) {
2444             if (PL_thiswhite) {
2445                 if (!PL_endwhite)
2446                     PL_endwhite = newSVpvs("");
2447                 sv_catsv(PL_endwhite, PL_thiswhite);
2448                 PL_thiswhite = 0;
2449             }
2450             if (PL_thistoken)
2451                 sv_setpvs(PL_thistoken,"");
2452             else
2453                 PL_realtokenstart = -1;
2454         }
2455 #endif
2456         LEAVE;
2457         PL_bufend = SvPVX(PL_linestr);
2458         PL_bufend += SvCUR(PL_linestr);
2459         PL_expect = XOPERATOR;
2460         PL_sublex_info.sub_inwhat = 0;
2461         return ')';
2462     }
2463 }
2464
2465 /*
2466   scan_const
2467
2468   Extracts a pattern, double-quoted string, or transliteration.  This
2469   is terrifying code.
2470
2471   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2472   processing a pattern (PL_lex_inpat is true), a transliteration
2473   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2474
2475   Returns a pointer to the character scanned up to. If this is
2476   advanced from the start pointer supplied (i.e. if anything was
2477   successfully parsed), will leave an OP for the substring scanned
2478   in pl_yylval. Caller must intuit reason for not parsing further
2479   by looking at the next characters herself.
2480
2481   In patterns:
2482     backslashes:
2483       constants: \N{NAME} only
2484       case and quoting: \U \Q \E
2485     stops on @ and $, but not for $ as tail anchor
2486
2487   In transliterations:
2488     characters are VERY literal, except for - not at the start or end
2489     of the string, which indicates a range. If the range is in bytes,
2490     scan_const expands the range to the full set of intermediate
2491     characters. If the range is in utf8, the hyphen is replaced with
2492     a certain range mark which will be handled by pmtrans() in op.c.
2493
2494   In double-quoted strings:
2495     backslashes:
2496       double-quoted style: \r and \n
2497       constants: \x31, etc.
2498       deprecated backrefs: \1 (in substitution replacements)
2499       case and quoting: \U \Q \E
2500     stops on @ and $
2501
2502   scan_const does *not* construct ops to handle interpolated strings.
2503   It stops processing as soon as it finds an embedded $ or @ variable
2504   and leaves it to the caller to work out what's going on.
2505
2506   embedded arrays (whether in pattern or not) could be:
2507       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2508
2509   $ in double-quoted strings must be the symbol of an embedded scalar.
2510
2511   $ in pattern could be $foo or could be tail anchor.  Assumption:
2512   it's a tail anchor if $ is the last thing in the string, or if it's
2513   followed by one of "()| \r\n\t"
2514
2515   \1 (backreferences) are turned into $1
2516
2517   The structure of the code is
2518       while (there's a character to process) {
2519           handle transliteration ranges
2520           skip regexp comments /(?#comment)/ and codes /(?{code})/
2521           skip #-initiated comments in //x patterns
2522           check for embedded arrays
2523           check for embedded scalars
2524           if (backslash) {
2525               deprecate \1 in substitution replacements
2526               handle string-changing backslashes \l \U \Q \E, etc.
2527               switch (what was escaped) {
2528                   handle \- in a transliteration (becomes a literal -)
2529                   if a pattern and not \N{, go treat as regular character
2530                   handle \132 (octal characters)
2531                   handle \x15 and \x{1234} (hex characters)
2532                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2533                   handle \cV (control characters)
2534                   handle printf-style backslashes (\f, \r, \n, etc)
2535               } (end switch)
2536               continue
2537           } (end if backslash)
2538           handle regular character
2539     } (end while character to read)
2540
2541 */
2542
2543 STATIC char *
2544 S_scan_const(pTHX_ char *start)
2545 {
2546     dVAR;
2547     register char *send = PL_bufend;            /* end of the constant */
2548     SV *sv = newSV(send - start);               /* sv for the constant.  See
2549                                                    note below on sizing. */
2550     register char *s = start;                   /* start of the constant */
2551     register char *d = SvPVX(sv);               /* destination for copies */
2552     bool dorange = FALSE;                       /* are we in a translit range? */
2553     bool didrange = FALSE;                      /* did we just finish a range? */
2554     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
2555     I32  this_utf8 = UTF;                       /* Is the source string assumed
2556                                                    to be UTF8?  But, this can
2557                                                    show as true when the source
2558                                                    isn't utf8, as for example
2559                                                    when it is entirely composed
2560                                                    of hex constants */
2561
2562     /* Note on sizing:  The scanned constant is placed into sv, which is
2563      * initialized by newSV() assuming one byte of output for every byte of
2564      * input.  This routine expects newSV() to allocate an extra byte for a
2565      * trailing NUL, which this routine will append if it gets to the end of
2566      * the input.  There may be more bytes of input than output (eg., \N{LATIN
2567      * CAPITAL LETTER A}), or more output than input if the constant ends up
2568      * recoded to utf8, but each time a construct is found that might increase
2569      * the needed size, SvGROW() is called.  Its size parameter each time is
2570      * based on the best guess estimate at the time, namely the length used so
2571      * far, plus the length the current construct will occupy, plus room for
2572      * the trailing NUL, plus one byte for every input byte still unscanned */
2573
2574     UV uv;
2575 #ifdef EBCDIC
2576     UV literal_endpoint = 0;
2577     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2578 #endif
2579
2580     PERL_ARGS_ASSERT_SCAN_CONST;
2581
2582     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2583         /* If we are doing a trans and we know we want UTF8 set expectation */
2584         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2585         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2586     }
2587
2588
2589     while (s < send || dorange) {
2590
2591         /* get transliterations out of the way (they're most literal) */
2592         if (PL_lex_inwhat == OP_TRANS) {
2593             /* expand a range A-Z to the full set of characters.  AIE! */
2594             if (dorange) {
2595                 I32 i;                          /* current expanded character */
2596                 I32 min;                        /* first character in range */
2597                 I32 max;                        /* last character in range */
2598
2599 #ifdef EBCDIC
2600                 UV uvmax = 0;
2601 #endif
2602
2603                 if (has_utf8
2604 #ifdef EBCDIC
2605                     && !native_range
2606 #endif
2607                     ) {
2608                     char * const c = (char*)utf8_hop((U8*)d, -1);
2609                     char *e = d++;
2610                     while (e-- > c)
2611                         *(e + 1) = *e;
2612                     *c = (char)UTF_TO_NATIVE(0xff);
2613                     /* mark the range as done, and continue */
2614                     dorange = FALSE;
2615                     didrange = TRUE;
2616                     continue;
2617                 }
2618
2619                 i = d - SvPVX_const(sv);                /* remember current offset */
2620 #ifdef EBCDIC
2621                 SvGROW(sv,
2622                        SvLEN(sv) + (has_utf8 ?
2623                                     (512 - UTF_CONTINUATION_MARK +
2624                                      UNISKIP(0x100))
2625                                     : 256));
2626                 /* How many two-byte within 0..255: 128 in UTF-8,
2627                  * 96 in UTF-8-mod. */
2628 #else
2629                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2630 #endif
2631                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2632 #ifdef EBCDIC
2633                 if (has_utf8) {
2634                     int j;
2635                     for (j = 0; j <= 1; j++) {
2636                         char * const c = (char*)utf8_hop((U8*)d, -1);
2637                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2638                         if (j)
2639                             min = (U8)uv;
2640                         else if (uv < 256)
2641                             max = (U8)uv;
2642                         else {
2643                             max = (U8)0xff; /* only to \xff */
2644                             uvmax = uv; /* \x{100} to uvmax */
2645                         }
2646                         d = c; /* eat endpoint chars */
2647                      }
2648                 }
2649                else {
2650 #endif
2651                    d -= 2;              /* eat the first char and the - */
2652                    min = (U8)*d;        /* first char in range */
2653                    max = (U8)d[1];      /* last char in range  */
2654 #ifdef EBCDIC
2655                }
2656 #endif
2657
2658                 if (min > max) {
2659                     Perl_croak(aTHX_
2660                                "Invalid range \"%c-%c\" in transliteration operator",
2661                                (char)min, (char)max);
2662                 }
2663
2664 #ifdef EBCDIC
2665                 if (literal_endpoint == 2 &&
2666                     ((isLOWER(min) && isLOWER(max)) ||
2667                      (isUPPER(min) && isUPPER(max)))) {
2668                     if (isLOWER(min)) {
2669                         for (i = min; i <= max; i++)
2670                             if (isLOWER(i))
2671                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2672                     } else {
2673                         for (i = min; i <= max; i++)
2674                             if (isUPPER(i))
2675                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2676                     }
2677                 }
2678                 else
2679 #endif
2680                     for (i = min; i <= max; i++)
2681 #ifdef EBCDIC
2682                         if (has_utf8) {
2683                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2684                             if (UNI_IS_INVARIANT(ch))
2685                                 *d++ = (U8)i;
2686                             else {
2687                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2688                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2689                             }
2690                         }
2691                         else
2692 #endif
2693                             *d++ = (char)i;
2694
2695 #ifdef EBCDIC
2696                 if (uvmax) {
2697                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2698                     if (uvmax > 0x101)
2699                         *d++ = (char)UTF_TO_NATIVE(0xff);
2700                     if (uvmax > 0x100)
2701                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2702                 }
2703 #endif
2704
2705                 /* mark the range as done, and continue */
2706                 dorange = FALSE;
2707                 didrange = TRUE;
2708 #ifdef EBCDIC
2709                 literal_endpoint = 0;
2710 #endif
2711                 continue;
2712             }
2713
2714             /* range begins (ignore - as first or last char) */
2715             else if (*s == '-' && s+1 < send  && s != start) {
2716                 if (didrange) {
2717                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2718                 }
2719                 if (has_utf8
2720 #ifdef EBCDIC
2721                     && !native_range
2722 #endif
2723                     ) {
2724                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2725                     s++;
2726                     continue;
2727                 }
2728                 dorange = TRUE;
2729                 s++;
2730             }
2731             else {
2732                 didrange = FALSE;
2733 #ifdef EBCDIC
2734                 literal_endpoint = 0;
2735                 native_range = TRUE;
2736 #endif
2737             }
2738         }
2739
2740         /* if we get here, we're not doing a transliteration */
2741
2742         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2743            except for the last char, which will be done separately. */
2744         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2745             if (s[2] == '#') {
2746                 while (s+1 < send && *s != ')')
2747                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2748             }
2749             else if (s[2] == '{' /* This should match regcomp.c */
2750                     || (s[2] == '?' && s[3] == '{'))
2751             {
2752                 I32 count = 1;
2753                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2754                 char c;
2755
2756                 while (count && (c = *regparse)) {
2757                     if (c == '\\' && regparse[1])
2758                         regparse++;
2759                     else if (c == '{')
2760                         count++;
2761                     else if (c == '}')
2762                         count--;
2763                     regparse++;
2764                 }
2765                 if (*regparse != ')')
2766                     regparse--;         /* Leave one char for continuation. */
2767                 while (s < regparse)
2768                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2769             }
2770         }
2771
2772         /* likewise skip #-initiated comments in //x patterns */
2773         else if (*s == '#' && PL_lex_inpat &&
2774           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2775             while (s+1 < send && *s != '\n')
2776                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2777         }
2778
2779         /* check for embedded arrays
2780            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2781            */
2782         else if (*s == '@' && s[1]) {
2783             if (isALNUM_lazy_if(s+1,UTF))
2784                 break;
2785             if (strchr(":'{$", s[1]))
2786                 break;
2787             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2788                 break; /* in regexp, neither @+ nor @- are interpolated */
2789         }
2790
2791         /* check for embedded scalars.  only stop if we're sure it's a
2792            variable.
2793         */
2794         else if (*s == '$') {
2795             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2796                 break;
2797             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2798                 if (s[1] == '\\') {
2799                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2800                                    "Possible unintended interpolation of $\\ in regex");
2801                 }
2802                 break;          /* in regexp, $ might be tail anchor */
2803             }
2804         }
2805
2806         /* End of else if chain - OP_TRANS rejoin rest */
2807
2808         /* backslashes */
2809         if (*s == '\\' && s+1 < send) {
2810             char* e;    /* Can be used for ending '}', etc. */
2811
2812             s++;
2813
2814             /* deprecate \1 in strings and substitution replacements */
2815             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2816                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2817             {
2818                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2819                 *--s = '$';
2820                 break;
2821             }
2822
2823             /* string-change backslash escapes */
2824             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2825                 --s;
2826                 break;
2827             }
2828             /* In a pattern, process \N, but skip any other backslash escapes.
2829              * This is because we don't want to translate an escape sequence
2830              * into a meta symbol and have the regex compiler use the meta
2831              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
2832              * in spite of this, we do have to process \N here while the proper
2833              * charnames handler is in scope.  See bugs #56444 and #62056.
2834              * There is a complication because \N in a pattern may also stand
2835              * for 'match a non-nl', and not mean a charname, in which case its
2836              * processing should be deferred to the regex compiler.  To be a
2837              * charname it must be followed immediately by a '{', and not look
2838              * like \N followed by a curly quantifier, i.e., not something like
2839              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
2840              * quantifier */
2841             else if (PL_lex_inpat
2842                     && (*s != 'N'
2843                         || s[1] != '{'
2844                         || regcurly(s + 1)))
2845             {
2846                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2847                 goto default_action;
2848             }
2849
2850             switch (*s) {
2851
2852             /* quoted - in transliterations */
2853             case '-':
2854                 if (PL_lex_inwhat == OP_TRANS) {
2855                     *d++ = *s++;
2856                     continue;
2857                 }
2858                 /* FALL THROUGH */
2859             default:
2860                 {
2861                     if ((isALPHA(*s) || isDIGIT(*s)))
2862                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2863                                        "Unrecognized escape \\%c passed through",
2864                                        *s);
2865                     /* default action is to copy the quoted character */
2866                     goto default_action;
2867                 }
2868
2869             /* eg. \132 indicates the octal constant 0x132 */
2870             case '0': case '1': case '2': case '3':
2871             case '4': case '5': case '6': case '7':
2872                 {
2873                     I32 flags = 0;
2874                     STRLEN len = 3;
2875                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2876                     s += len;
2877                 }
2878                 goto NUM_ESCAPE_INSERT;
2879
2880             /* eg. \x24 indicates the hex constant 0x24 */
2881             case 'x':
2882                 ++s;
2883                 if (*s == '{') {
2884                     char* const e = strchr(s, '}');
2885                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2886                       PERL_SCAN_DISALLOW_PREFIX;
2887                     STRLEN len;
2888
2889                     ++s;
2890                     if (!e) {
2891                         yyerror("Missing right brace on \\x{}");
2892                         continue;
2893                     }
2894                     len = e - s;
2895                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2896                     s = e + 1;
2897                 }
2898                 else {
2899                     {
2900                         STRLEN len = 2;
2901                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2902                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2903                         s += len;
2904                     }
2905                 }
2906
2907               NUM_ESCAPE_INSERT:
2908                 /* Insert oct or hex escaped character.  There will always be
2909                  * enough room in sv since such escapes will be longer than any
2910                  * UTF-8 sequence they can end up as, except if they force us
2911                  * to recode the rest of the string into utf8 */
2912
2913                 /* Here uv is the ordinal of the next character being added in
2914                  * unicode (converted from native). */
2915                 if (!UNI_IS_INVARIANT(uv)) {
2916                     if (!has_utf8 && uv > 255) {
2917                         /* Might need to recode whatever we have accumulated so
2918                          * far if it contains any chars variant in utf8 or
2919                          * utf-ebcdic. */
2920
2921                         SvCUR_set(sv, d - SvPVX_const(sv));
2922                         SvPOK_on(sv);
2923                         *d = '\0';
2924                         /* See Note on sizing above.  */
2925                         sv_utf8_upgrade_flags_grow(sv,
2926                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2927                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
2928                         d = SvPVX(sv) + SvCUR(sv);
2929                         has_utf8 = TRUE;
2930                     }
2931
2932                     if (has_utf8) {
2933                         d = (char*)uvuni_to_utf8((U8*)d, uv);
2934                         if (PL_lex_inwhat == OP_TRANS &&
2935                             PL_sublex_info.sub_op) {
2936                             PL_sublex_info.sub_op->op_private |=
2937                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2938                                              : OPpTRANS_TO_UTF);
2939                         }
2940 #ifdef EBCDIC
2941                         if (uv > 255 && !dorange)
2942                             native_range = FALSE;
2943 #endif
2944                     }
2945                     else {
2946                         *d++ = (char)uv;
2947                     }
2948                 }
2949                 else {
2950                     *d++ = (char) uv;
2951                 }
2952                 continue;
2953
2954             case 'N':
2955                 /* In a non-pattern \N must be a named character, like \N{LATIN
2956                  * SMALL LETTER A} or \N{U+0041}.  For patterns, it also can
2957                  * mean to match a non-newline.  For non-patterns, named
2958                  * characters are converted to their string equivalents. In
2959                  * patterns, named characters are not converted to their
2960                  * ultimate forms for the same reasons that other escapes
2961                  * aren't.  Instead, they are converted to the \N{U+...} form
2962                  * to get the value from the charnames that is in effect right
2963                  * now, while preserving the fact that it was a named character
2964                  * so that the regex compiler knows this */
2965
2966                 /* This section of code doesn't generally use the
2967                  * NATIVE_TO_NEED() macro to transform the input.  I (khw) did
2968                  * a close examination of this macro and determined it is a
2969                  * no-op except on utfebcdic variant characters.  Every
2970                  * character generated by this that would normally need to be
2971                  * enclosed by this macro is invariant, so the macro is not
2972                  * needed, and would complicate use of copy(). There are other
2973                  * parts of this file where the macro is used inconsistently,
2974                  * but are saved by it being a no-op */
2975
2976                 /* The structure of this section of code (besides checking for
2977                  * errors and upgrading to utf8) is:
2978                  *  Further disambiguate between the two meanings of \N, and if
2979                  *      not a charname, go process it elsewhere
2980                  *  If of form \N{U+...}, pass it through if a pattern;
2981                  *      otherwise convert to utf8
2982                  *  Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
2983                  *  pattern; otherwise convert to utf8 */
2984
2985                 /* Here, s points to the 'N'; the test below is guaranteed to
2986                  * succeed if we are being called on a pattern as we already
2987                  * know from a test above that the next character is a '{'.
2988                  * On a non-pattern \N must mean 'named sequence, which
2989                  * requires braces */
2990                 s++;
2991                 if (*s != '{') {
2992                     yyerror("Missing braces on \\N{}");
2993                     continue;
2994                 }
2995                 s++;
2996
2997                 /* If there is no matching '}', it is an error. */
2998                 if (! (e = strchr(s, '}'))) {
2999                     if (! PL_lex_inpat) {
3000                         yyerror("Missing right brace on \\N{}");
3001                     } else {
3002                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3003                     }
3004                     continue;
3005                 }
3006
3007                 /* Here it looks like a named character */
3008
3009                 if (PL_lex_inpat) {
3010
3011                     /* XXX This block is temporary code.  \N{} implies that the
3012                      * pattern is to have Unicode semantics, and therefore
3013                      * currently has to be encoded in utf8.  By putting it in
3014                      * utf8 now, we save a whole pass in the regular expression
3015                      * compiler.  Once that code is changed so Unicode
3016                      * semantics doesn't necessarily have to be in utf8, this
3017                      * block should be removed */
3018                     if (!has_utf8) {
3019                         SvCUR_set(sv, d - SvPVX_const(sv));
3020                         SvPOK_on(sv);
3021                         *d = '\0';
3022                         /* See Note on sizing above.  */
3023                         sv_utf8_upgrade_flags_grow(sv,
3024                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3025                                         /* 5 = '\N{' + cur char + NUL */
3026                                         (STRLEN)(send - s) + 5);
3027                         d = SvPVX(sv) + SvCUR(sv);
3028                         has_utf8 = TRUE;
3029                     }
3030                 }
3031
3032                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3033                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3034                                 | PERL_SCAN_DISALLOW_PREFIX;
3035                     STRLEN len;
3036
3037                     /* For \N{U+...}, the '...' is a unicode value even on
3038                      * EBCDIC machines */
3039                     s += 2;         /* Skip to next char after the 'U+' */
3040                     len = e - s;
3041                     uv = grok_hex(s, &len, &flags, NULL);
3042                     if (len == 0 || len != (STRLEN)(e - s)) {
3043                         yyerror("Invalid hexadecimal number in \\N{U+...}");
3044                         s = e + 1;
3045                         continue;
3046                     }
3047
3048                     if (PL_lex_inpat) {
3049
3050                         /* Pass through to the regex compiler unchanged.  The
3051                          * reason we evaluated the number above is to make sure
3052                          * there wasn't a syntax error. */
3053                         s -= 5;     /* Include the '\N{U+' */
3054                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
3055                         d += e - s + 1;
3056                     }
3057                     else {  /* Not a pattern: convert the hex to string */
3058
3059                          /* If destination is not in utf8, unconditionally
3060                           * recode it to be so.  This is because \N{} implies
3061                           * Unicode semantics, and scalars have to be in utf8
3062                           * to guarantee those semantics */
3063                         if (! has_utf8) {
3064                             SvCUR_set(sv, d - SvPVX_const(sv));
3065                             SvPOK_on(sv);
3066                             *d = '\0';
3067                             /* See Note on sizing above.  */
3068                             sv_utf8_upgrade_flags_grow(
3069                                         sv,
3070                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3071                                         UNISKIP(uv) + (STRLEN)(send - e) + 1);
3072                             d = SvPVX(sv) + SvCUR(sv);
3073                             has_utf8 = TRUE;
3074                         }
3075
3076                         /* Add the string to the output */
3077                         if (UNI_IS_INVARIANT(uv)) {
3078                             *d++ = (char) uv;
3079                         }
3080                         else d = (char*)uvuni_to_utf8((U8*)d, uv);
3081                     }
3082                 }
3083                 else { /* Here is \N{NAME} but not \N{U+...}. */
3084
3085                     SV *res;            /* result from charnames */
3086                     const char *str;    /* the string in 'res' */
3087                     STRLEN len;         /* its length */
3088
3089                     /* Get the value for NAME */
3090                     res = newSVpvn(s, e - s);
3091                     res = new_constant( NULL, 0, "charnames",
3092                                         /* includes all of: \N{...} */
3093                                         res, NULL, s - 3, e - s + 4 );
3094
3095                     /* Most likely res will be in utf8 already since the
3096                      * standard charnames uses pack U, but a custom translator
3097                      * can leave it otherwise, so make sure.  XXX This can be
3098                      * revisited to not have charnames use utf8 for characters
3099                      * that don't need it when regexes don't have to be in utf8
3100                      * for Unicode semantics.  If doing so, remember EBCDIC */
3101                     sv_utf8_upgrade(res);
3102                     str = SvPV_const(res, len);
3103
3104                     /* Don't accept malformed input */
3105                     if (! is_utf8_string((U8 *) str, len)) {
3106                         yyerror("Malformed UTF-8 returned by \\N");
3107                     }
3108                     else if (PL_lex_inpat) {
3109
3110                         if (! len) { /* The name resolved to an empty string */
3111                             Copy("\\N{}", d, 4, char);
3112                             d += 4;
3113                         }
3114                         else {
3115                             /* In order to not lose information for the regex
3116                             * compiler, pass the result in the specially made
3117                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3118                             * the code points in hex of each character
3119                             * returned by charnames */
3120
3121                             const char *str_end = str + len;
3122                             STRLEN char_length;     /* cur char's byte length */
3123                             STRLEN output_length;   /* and the number of bytes
3124                                                        after this is translated
3125                                                        into hex digits */
3126                             const STRLEN off = d - SvPVX_const(sv);
3127
3128                             /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3129                              * max('U+', '.'); and 1 for NUL */
3130                             char hex_string[2 * UTF8_MAXBYTES + 5];
3131
3132                             /* Get the first character of the result. */
3133                             U32 uv = utf8n_to_uvuni((U8 *) str,
3134                                                     len,
3135                                                     &char_length,
3136                                                     UTF8_ALLOW_ANYUV);
3137
3138                             /* The call to is_utf8_string() above hopefully
3139                              * guarantees that there won't be an error.  But
3140                              * it's easy here to make sure.  The function just
3141                              * above warns and returns 0 if invalid utf8, but
3142                              * it can also return 0 if the input is validly a
3143                              * NUL. Disambiguate */
3144                             if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3145                                 uv = UNICODE_REPLACEMENT;
3146                             }
3147
3148                             /* Convert first code point to hex, including the
3149                              * boiler plate before it */
3150                             sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3151                             output_length = strlen(hex_string);
3152
3153                             /* Make sure there is enough space to hold it */
3154                             d = off + SvGROW(sv, off
3155                                                  + output_length
3156                                                  + (STRLEN)(send - e)
3157                                                  + 2);  /* '}' + NUL */
3158                             /* And output it */
3159                             Copy(hex_string, d, output_length, char);
3160                             d += output_length;
3161
3162                             /* For each subsequent character, append dot and
3163                              * its ordinal in hex */
3164                             while ((str += char_length) < str_end) {
3165                                 const STRLEN off = d - SvPVX_const(sv);
3166                                 U32 uv = utf8n_to_uvuni((U8 *) str,
3167                                                         str_end - str,
3168                                                         &char_length,
3169                                                         UTF8_ALLOW_ANYUV);
3170                                 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3171                                     uv = UNICODE_REPLACEMENT;
3172                                 }
3173
3174                                 sprintf(hex_string, ".%X", (unsigned int) uv);
3175                                 output_length = strlen(hex_string);
3176
3177                                 d = off + SvGROW(sv, off
3178                                                      + output_length
3179                                                      + (STRLEN)(send - e)
3180                                                      + 2);      /* '}' +  NUL */
3181                                 Copy(hex_string, d, output_length, char);
3182                                 d += output_length;
3183                             }
3184
3185                             *d++ = '}'; /* Done.  Add the trailing brace */
3186                         }
3187                     }
3188                     else { /* Here, not in a pattern.  Convert the name to a
3189                             * string. */
3190
3191                          /* If destination is not in utf8, unconditionally
3192                           * recode it to be so.  This is because \N{} implies
3193                           * Unicode semantics, and scalars have to be in utf8
3194                           * to guarantee those semantics */
3195                         if (! has_utf8) {
3196                             SvCUR_set(sv, d - SvPVX_const(sv));
3197                             SvPOK_on(sv);
3198                             *d = '\0';
3199                             /* See Note on sizing above.  */
3200                             sv_utf8_upgrade_flags_grow(sv,
3201                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3202                                                 len + (STRLEN)(send - s) + 1);
3203                             d = SvPVX(sv) + SvCUR(sv);
3204                             has_utf8 = TRUE;
3205                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3206
3207                             /* See Note on sizing above.  (NOTE: SvCUR() is not
3208                              * set correctly here). */
3209                             const STRLEN off = d - SvPVX_const(sv);
3210                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3211                         }
3212                         Copy(str, d, len, char);
3213                         d += len;
3214                     }
3215                     SvREFCNT_dec(res);
3216
3217                     /* Deprecate non-approved name syntax */
3218                     if (ckWARN_d(WARN_DEPRECATED)) {
3219                         bool problematic = FALSE;
3220                         char* i = s;
3221
3222                         /* For non-ut8 input, look to see that the first
3223                          * character is an alpha, then loop through the rest
3224                          * checking that each is a continuation */
3225                         if (! this_utf8) {
3226                             if (! isALPHAU(*i)) problematic = TRUE;
3227                             else for (i = s + 1; i < e; i++) {
3228                                 if (isCHARNAME_CONT(*i)) continue;
3229                                 problematic = TRUE;
3230                                 break;
3231                             }
3232                         }
3233                         else {
3234                             /* Similarly for utf8.  For invariants can check
3235                              * directly.  We accept anything above the latin1
3236                              * range because it is immaterial to Perl if it is
3237                              * correct or not, and is expensive to check.  But
3238                              * it is fairly easy in the latin1 range to convert
3239                              * the variants into a single character and check
3240                              * those */
3241                             if (UTF8_IS_INVARIANT(*i)) {
3242                                 if (! isALPHAU(*i)) problematic = TRUE;
3243                             } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3244                                 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3245                                                                             *(i+1)))))
3246                                 {
3247                                     problematic = TRUE;
3248                                 }
3249                             }
3250                             if (! problematic) for (i = s + UTF8SKIP(s);
3251                                                     i < e;
3252                                                     i+= UTF8SKIP(i))
3253                             {
3254                                 if (UTF8_IS_INVARIANT(*i)) {
3255                                     if (isCHARNAME_CONT(*i)) continue;
3256                                 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3257                                     continue;
3258                                 } else if (isCHARNAME_CONT(
3259                                             UNI_TO_NATIVE(
3260                                             UTF8_ACCUMULATE(*i, *(i+1)))))
3261                                 {
3262                                     continue;
3263                                 }
3264                                 problematic = TRUE;
3265                                 break;
3266                             }
3267                         }
3268                         if (problematic) {
3269                             /* The e-i passed to the final %.*s makes sure that
3270                              * should the trailing NUL be missing that this
3271                              * print won't run off the end of the string */
3272                             Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3273                                 "Deprecated character in \\N{...}; marked by <-- HERE  in \\N{%.*s<-- HERE %.*s", i - s + 1, s, e - i, i + 1);
3274                         }
3275                     }
3276                 } /* End \N{NAME} */
3277 #ifdef EBCDIC
3278                 if (!dorange)
3279                     native_range = FALSE; /* \N{} is defined to be Unicode */
3280 #endif
3281                 s = e + 1;  /* Point to just after the '}' */
3282                 continue;
3283
3284             /* \c is a control character */
3285             case 'c':
3286                 s++;
3287                 if (s < send) {
3288                     *d++ = grok_bslash_c(*s++, 1);
3289                 }
3290                 else {
3291                     yyerror("Missing control char name in \\c");
3292                 }
3293                 continue;
3294
3295             /* printf-style backslashes, formfeeds, newlines, etc */
3296             case 'b':
3297                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3298                 break;
3299             case 'n':
3300                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3301                 break;
3302             case 'r':
3303                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3304                 break;
3305             case 'f':
3306                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3307                 break;
3308             case 't':
3309                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3310                 break;
3311             case 'e':
3312                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3313                 break;
3314             case 'a':
3315                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3316                 break;
3317             } /* end switch */
3318
3319             s++;
3320             continue;
3321         } /* end if (backslash) */
3322 #ifdef EBCDIC
3323         else
3324             literal_endpoint++;
3325 #endif
3326
3327     default_action:
3328         /* If we started with encoded form, or already know we want it,
3329            then encode the next character */
3330         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3331             STRLEN len  = 1;
3332
3333
3334             /* One might think that it is wasted effort in the case of the
3335              * source being utf8 (this_utf8 == TRUE) to take the next character
3336              * in the source, convert it to an unsigned value, and then convert
3337              * it back again.  But the source has not been validated here.  The
3338              * routine that does the conversion checks for errors like
3339              * malformed utf8 */
3340
3341             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3342             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3343             if (!has_utf8) {
3344                 SvCUR_set(sv, d - SvPVX_const(sv));
3345                 SvPOK_on(sv);
3346                 *d = '\0';
3347                 /* See Note on sizing above.  */
3348                 sv_utf8_upgrade_flags_grow(sv,
3349                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3350                                         need + (STRLEN)(send - s) + 1);
3351                 d = SvPVX(sv) + SvCUR(sv);
3352                 has_utf8 = TRUE;
3353             } else if (need > len) {
3354                 /* encoded value larger than old, may need extra space (NOTE:
3355                  * SvCUR() is not set correctly here).   See Note on sizing
3356                  * above.  */
3357                 const STRLEN off = d - SvPVX_const(sv);
3358                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3359             }
3360             s += len;
3361
3362             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3363 #ifdef EBCDIC
3364             if (uv > 255 && !dorange)
3365                 native_range = FALSE;
3366 #endif
3367         }
3368         else {
3369             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3370         }
3371     } /* while loop to process each character */
3372
3373     /* terminate the string and set up the sv */
3374     *d = '\0';
3375     SvCUR_set(sv, d - SvPVX_const(sv));
3376     if (SvCUR(sv) >= SvLEN(sv))
3377         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3378
3379     SvPOK_on(sv);
3380     if (PL_encoding && !has_utf8) {
3381         sv_recode_to_utf8(sv, PL_encoding);
3382         if (SvUTF8(sv))
3383             has_utf8 = TRUE;
3384     }
3385     if (has_utf8) {
3386         SvUTF8_on(sv);
3387         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3388             PL_sublex_info.sub_op->op_private |=
3389                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3390         }
3391     }
3392
3393     /* shrink the sv if we allocated more than we used */
3394     if (SvCUR(sv) + 5 < SvLEN(sv)) {
3395         SvPV_shrink_to_cur(sv);
3396     }
3397
3398     /* return the substring (via pl_yylval) only if we parsed anything */
3399     if (s > PL_bufptr) {
3400         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3401             const char *const key = PL_lex_inpat ? "qr" : "q";
3402             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3403             const char *type;
3404             STRLEN typelen;
3405
3406             if (PL_lex_inwhat == OP_TRANS) {
3407                 type = "tr";
3408                 typelen = 2;
3409             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3410                 type = "s";
3411                 typelen = 1;
3412             } else  {
3413                 type = "qq";
3414                 typelen = 2;
3415             }
3416
3417             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3418                                 type, typelen);
3419         }
3420         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3421     } else
3422         SvREFCNT_dec(sv);
3423     return s;
3424 }
3425
3426 /* S_intuit_more
3427  * Returns TRUE if there's more to the expression (e.g., a subscript),
3428  * FALSE otherwise.
3429  *
3430  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3431  *
3432  * ->[ and ->{ return TRUE
3433  * { and [ outside a pattern are always subscripts, so return TRUE
3434  * if we're outside a pattern and it's not { or [, then return FALSE
3435  * if we're in a pattern and the first char is a {
3436  *   {4,5} (any digits around the comma) returns FALSE
3437  * if we're in a pattern and the first char is a [
3438  *   [] returns FALSE
3439  *   [SOMETHING] has a funky algorithm to decide whether it's a
3440  *      character class or not.  It has to deal with things like
3441  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3442  * anything else returns TRUE
3443  */
3444
3445 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3446
3447 STATIC int
3448 S_intuit_more(pTHX_ register char *s)
3449 {
3450     dVAR;
3451
3452     PERL_ARGS_ASSERT_INTUIT_MORE;
3453
3454     if (PL_lex_brackets)
3455         return TRUE;
3456     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3457         return TRUE;
3458     if (*s != '{' && *s != '[')
3459         return FALSE;
3460     if (!PL_lex_inpat)
3461         return TRUE;
3462
3463     /* In a pattern, so maybe we have {n,m}. */
3464     if (*s == '{') {
3465         s++;
3466         if (!isDIGIT(*s))
3467             return TRUE;
3468         while (isDIGIT(*s))
3469             s++;
3470         if (*s == ',')
3471             s++;
3472         while (isDIGIT(*s))
3473             s++;
3474         if (*s == '}')
3475             return FALSE;
3476         return TRUE;
3477
3478     }
3479
3480     /* On the other hand, maybe we have a character class */
3481
3482     s++;
3483     if (*s == ']' || *s == '^')
3484         return FALSE;
3485     else {
3486         /* this is terrifying, and it works */
3487         int weight = 2;         /* let's weigh the evidence */
3488         char seen[256];
3489         unsigned char un_char = 255, last_un_char;
3490         const char * const send = strchr(s,']');
3491         char tmpbuf[sizeof PL_tokenbuf * 4];
3492
3493         if (!send)              /* has to be an expression */
3494             return TRUE;
3495
3496         Zero(seen,256,char);
3497         if (*s == '$')
3498             weight -= 3;
3499         else if (isDIGIT(*s)) {
3500             if (s[1] != ']') {
3501                 if (isDIGIT(s[1]) && s[2] == ']')
3502                     weight -= 10;
3503             }
3504             else
3505                 weight -= 100;
3506         }
3507         for (; s < send; s++) {
3508             last_un_char = un_char;
3509             un_char = (unsigned char)*s;
3510             switch (*s) {
3511             case '@':
3512             case '&':
3513             case '$':
3514                 weight -= seen[un_char] * 10;
3515                 if (isALNUM_lazy_if(s+1,UTF)) {
3516                     int len;
3517                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3518                     len = (int)strlen(tmpbuf);
3519                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3520                         weight -= 100;
3521                     else
3522                         weight -= 10;
3523                 }
3524                 else if (*s == '$' && s[1] &&
3525                   strchr("[#!%*<>()-=",s[1])) {
3526                     if (/*{*/ strchr("])} =",s[2]))
3527                         weight -= 10;
3528                     else
3529                         weight -= 1;
3530                 }
3531                 break;
3532             case '\\':
3533                 un_char = 254;
3534                 if (s[1]) {
3535                     if (strchr("wds]",s[1]))
3536                         weight += 100;
3537                     else if (seen[(U8)'\''] || seen[(U8)'"'])
3538                         weight += 1;
3539                     else if (strchr("rnftbxcav",s[1]))
3540                         weight += 40;
3541                     else if (isDIGIT(s[1])) {
3542                         weight += 40;
3543                         while (s[1] && isDIGIT(s[1]))
3544                             s++;
3545                     }
3546                 }
3547                 else
3548                     weight += 100;
3549                 break;
3550             case '-':
3551                 if (s[1] == '\\')
3552                     weight += 50;
3553                 if (strchr("aA01! ",last_un_char))
3554                     weight += 30;
3555                 if (strchr("zZ79~",s[1]))
3556                     weight += 30;
3557                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3558                     weight -= 5;        /* cope with negative subscript */
3559                 break;
3560             default:
3561                 if (!isALNUM(last_un_char)
3562                     && !(last_un_char == '$' || last_un_char == '@'
3563                          || last_un_char == '&')
3564                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3565                     char *d = tmpbuf;
3566                     while (isALPHA(*s))
3567                         *d++ = *s++;
3568                     *d = '\0';
3569                     if (keyword(tmpbuf, d - tmpbuf, 0))
3570                         weight -= 150;
3571                 }
3572                 if (un_char == last_un_char + 1)
3573                     weight += 5;
3574                 weight -= seen[un_char];
3575                 break;
3576             }
3577             seen[un_char]++;
3578         }
3579         if (weight >= 0)        /* probably a character class */
3580             return FALSE;
3581     }
3582
3583     return TRUE;
3584 }
3585
3586 /*
3587  * S_intuit_method
3588  *
3589  * Does all the checking to disambiguate
3590  *   foo bar
3591  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
3592  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3593  *
3594  * First argument is the stuff after the first token, e.g. "bar".
3595  *
3596  * Not a method if bar is a filehandle.
3597  * Not a method if foo is a subroutine prototyped to take a filehandle.
3598  * Not a method if it's really "Foo $bar"
3599  * Method if it's "foo $bar"
3600  * Not a method if it's really "print foo $bar"
3601  * Method if it's really "foo package::" (interpreted as package->foo)
3602  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3603  * Not a method if bar is a filehandle or package, but is quoted with
3604  *   =>
3605  */
3606
3607 STATIC int
3608 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3609 {
3610     dVAR;
3611     char *s = start + (*start == '$');
3612     char tmpbuf[sizeof PL_tokenbuf];
3613     STRLEN len;
3614     GV* indirgv;
3615 #ifdef PERL_MAD
3616     int soff;
3617 #endif
3618
3619     PERL_ARGS_ASSERT_INTUIT_METHOD;
3620
3621     if (gv) {
3622         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3623             return 0;
3624         if (cv) {
3625             if (SvPOK(cv)) {
3626                 const char *proto = SvPVX_const(cv);
3627                 if (proto) {
3628                     if (*proto == ';')
3629                         proto++;
3630                     if (*proto == '*')
3631                         return 0;
3632                 }
3633             }
3634         } else
3635             gv = NULL;
3636     }
3637     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3638     /* start is the beginning of the possible filehandle/object,
3639      * and s is the end of it
3640      * tmpbuf is a copy of it
3641      */
3642
3643     if (*start == '$') {
3644         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3645                 isUPPER(*PL_tokenbuf))
3646             return 0;
3647 #ifdef PERL_MAD
3648         len = start - SvPVX(PL_linestr);
3649 #endif
3650         s = PEEKSPACE(s);
3651 #ifdef PERL_MAD
3652         start = SvPVX(PL_linestr) + len;
3653 #endif
3654         PL_bufptr = start;
3655         PL_expect = XREF;
3656         return *s == '(' ? FUNCMETH : METHOD;
3657     }
3658     if (!keyword(tmpbuf, len, 0)) {
3659         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3660             len -= 2;
3661             tmpbuf[len] = '\0';
3662 #ifdef PERL_MAD
3663             soff = s - SvPVX(PL_linestr);
3664 #endif
3665             goto bare_package;
3666         }
3667         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3668         if (indirgv && GvCVu(indirgv))
3669             return 0;
3670         /* filehandle or package name makes it a method */
3671         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3672 #ifdef PERL_MAD
3673             soff = s - SvPVX(PL_linestr);
3674 #endif
3675             s = PEEKSPACE(s);
3676             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3677                 return 0;       /* no assumptions -- "=>" quotes bearword */
3678       bare_package:
3679             start_force(PL_curforce);
3680             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3681                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3682             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3683             if (PL_madskills)
3684                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3685             PL_expect = XTERM;
3686             force_next(WORD);
3687             PL_bufptr = s;
3688 #ifdef PERL_MAD
3689             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3690 #endif
3691             return *s == '(' ? FUNCMETH : METHOD;
3692         }
3693     }
3694     return 0;
3695 }
3696
3697 /* Encoded script support. filter_add() effectively inserts a
3698  * 'pre-processing' function into the current source input stream.
3699  * Note that the filter function only applies to the current source file
3700  * (e.g., it will not affect files 'require'd or 'use'd by this one).
3701  *
3702  * The datasv parameter (which may be NULL) can be used to pass
3703  * private data to this instance of the filter. The filter function
3704  * can recover the SV using the FILTER_DATA macro and use it to
3705  * store private buffers and state information.
3706  *
3707  * The supplied datasv parameter is upgraded to a PVIO type
3708  * and the IoDIRP/IoANY field is used to store the function pointer,
3709  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3710  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3711  * private use must be set using malloc'd pointers.
3712  */
3713
3714 SV *
3715 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3716 {
3717     dVAR;
3718     if (!funcp)
3719         return NULL;
3720
3721     if (!PL_parser)
3722         return NULL;
3723
3724     if (!PL_rsfp_filters)
3725         PL_rsfp_filters = newAV();
3726     if (!datasv)
3727         datasv = newSV(0);
3728     SvUPGRADE(datasv, SVt_PVIO);
3729     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3730     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3731     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3732                           FPTR2DPTR(void *, IoANY(datasv)),
3733                           SvPV_nolen(datasv)));
3734     av_unshift(PL_rsfp_filters, 1);
3735     av_store(PL_rsfp_filters, 0, datasv) ;
3736     return(datasv);
3737 }
3738
3739
3740 /* Delete most recently added instance of this filter function. */
3741 void
3742 Perl_filter_del(pTHX_ filter_t funcp)
3743 {
3744     dVAR;
3745     SV *datasv;
3746
3747     PERL_ARGS_ASSERT_FILTER_DEL;
3748
3749 #ifdef DEBUGGING
3750     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3751                           FPTR2DPTR(void*, funcp)));
3752 #endif
3753     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3754         return;
3755     /* if filter is on top of stack (usual case) just pop it off */
3756     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3757     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3758         IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
3759         IoANY(datasv) = (void *)NULL;
3760         sv_free(av_pop(PL_rsfp_filters));
3761
3762         return;
3763     }
3764     /* we need to search for the correct entry and clear it     */
3765     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3766 }
3767
3768
3769 /* Invoke the idxth filter function for the current rsfp.        */
3770 /* maxlen 0 = read one text line */
3771 I32
3772 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3773 {
3774     dVAR;
3775     filter_t funcp;
3776     SV *datasv = NULL;
3777     /* This API is bad. It should have been using unsigned int for maxlen.
3778        Not sure if we want to change the API, but if not we should sanity
3779        check the value here.  */
3780     const unsigned int correct_length
3781         = maxlen < 0 ?
3782 #ifdef PERL_MICRO
3783         0x7FFFFFFF
3784 #else
3785         INT_MAX
3786 #endif
3787         : maxlen;
3788
3789     PERL_ARGS_ASSERT_FILTER_READ;
3790
3791     if (!PL_parser || !PL_rsfp_filters)
3792         return -1;
3793     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
3794         /* Provide a default input filter to make life easy.    */
3795         /* Note that we append to the line. This is handy.      */
3796         DEBUG_P(PerlIO_printf(Perl_debug_log,
3797                               "filter_read %d: from rsfp\n", idx));
3798         if (correct_length) {
3799             /* Want a block */
3800             int len ;
3801             const int old_len = SvCUR(buf_sv);
3802
3803             /* ensure buf_sv is large enough */
3804             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3805             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3806                                    correct_length)) <= 0) {
3807                 if (PerlIO_error(PL_rsfp))
3808                     return -1;          /* error */
3809                 else
3810                     return 0 ;          /* end of file */
3811             }
3812             SvCUR_set(buf_sv, old_len + len) ;
3813             SvPVX(buf_sv)[old_len + len] = '\0';
3814         } else {
3815             /* Want a line */
3816             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3817                 if (PerlIO_error(PL_rsfp))
3818                     return -1;          /* error */
3819                 else
3820                     return 0 ;          /* end of file */
3821             }
3822         }
3823         return SvCUR(buf_sv);
3824     }
3825     /* Skip this filter slot if filter has been deleted */
3826     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3827         DEBUG_P(PerlIO_printf(Perl_debug_log,
3828                               "filter_read %d: skipped (filter deleted)\n",
3829                               idx));
3830         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3831     }
3832     /* Get function pointer hidden within datasv        */
3833     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3834     DEBUG_P(PerlIO_printf(Perl_debug_log,
3835                           "filter_read %d: via function %p (%s)\n",
3836                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
3837     /* Call function. The function is expected to       */
3838     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
3839     /* Return: <0:error, =0:eof, >0:not eof             */
3840     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3841 }
3842
3843 STATIC char *
3844 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3845 {
3846     dVAR;
3847
3848     PERL_ARGS_ASSERT_FILTER_GETS;
3849
3850 #ifdef PERL_CR_FILTER
3851     if (!PL_rsfp_filters) {
3852         filter_add(S_cr_textfilter,NULL);
3853     }
3854 #endif
3855     if (PL_rsfp_filters) {
3856         if (!append)
3857             SvCUR_set(sv, 0);   /* start with empty line        */
3858         if (FILTER_READ(0, sv, 0) > 0)
3859             return ( SvPVX(sv) ) ;
3860         else
3861             return NULL ;
3862     }
3863     else
3864         return (sv_gets(sv, PL_rsfp, append));
3865 }
3866
3867 STATIC HV *
3868 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3869 {
3870     dVAR;
3871     GV *gv;
3872
3873     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3874
3875     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3876         return PL_curstash;
3877
3878     if (len > 2 &&
3879         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3880         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3881     {
3882         return GvHV(gv);                        /* Foo:: */
3883     }
3884
3885     /* use constant CLASS => 'MyClass' */
3886     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3887     if (gv && GvCV(gv)) {
3888         SV * const sv = cv_const_sv(GvCV(gv));
3889         if (sv)
3890             pkgname = SvPV_const(sv, len);
3891     }
3892
3893     return gv_stashpvn(pkgname, len, 0);
3894 }
3895
3896 /*
3897  * S_readpipe_override
3898  * Check whether readpipe() is overriden, and generates the appropriate
3899  * optree, provided sublex_start() is called afterwards.
3900  */
3901 STATIC void
3902 S_readpipe_override(pTHX)
3903 {
3904     GV **gvp;
3905     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3906     pl_yylval.ival = OP_BACKTICK;
3907     if ((gv_readpipe
3908                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3909             ||
3910             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3911              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3912              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3913     {
3914         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3915             append_elem(OP_LIST,
3916                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3917                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3918     }
3919 }
3920
3921 #ifdef PERL_MAD
3922  /*
3923  * Perl_madlex
3924  * The intent of this yylex wrapper is to minimize the changes to the
3925  * tokener when we aren't interested in collecting madprops.  It remains
3926  * to be seen how successful this strategy will be...
3927  */
3928
3929 int
3930 Perl_madlex(pTHX)
3931 {
3932     int optype;
3933     char *s = PL_bufptr;
3934
3935     /* make sure PL_thiswhite is initialized */
3936     PL_thiswhite = 0;
3937     PL_thismad = 0;
3938
3939     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3940     if (PL_pending_ident)
3941         return S_pending_ident(aTHX);
3942
3943     /* previous token ate up our whitespace? */
3944     if (!PL_lasttoke && PL_nextwhite) {
3945         PL_thiswhite = PL_nextwhite;
3946         PL_nextwhite = 0;
3947     }
3948
3949     /* isolate the token, and figure out where it is without whitespace */
3950     PL_realtokenstart = -1;
3951     PL_thistoken = 0;
3952     optype = yylex();
3953     s = PL_bufptr;
3954     assert(PL_curforce < 0);
3955
3956     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3957         if (!PL_thistoken) {
3958             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3959                 PL_thistoken = newSVpvs("");
3960             else {
3961                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3962                 PL_thistoken = newSVpvn(tstart, s - tstart);
3963             }
3964         }
3965         if (PL_thismad) /* install head */
3966             CURMAD('X', PL_thistoken);
3967     }
3968
3969     /* last whitespace of a sublex? */
3970     if (optype == ')' && PL_endwhite) {
3971         CURMAD('X', PL_endwhite);
3972     }
3973
3974     if (!PL_thismad) {
3975
3976         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
3977         if (!PL_thiswhite && !PL_endwhite && !optype) {
3978             sv_free(PL_thistoken);
3979             PL_thistoken = 0;
3980             return 0;
3981         }
3982
3983         /* put off final whitespace till peg */
3984         if (optype == ';' && !PL_rsfp) {
3985             PL_nextwhite = PL_thiswhite;
3986             PL_thiswhite = 0;
3987         }
3988         else if (PL_thisopen) {
3989             CURMAD('q', PL_thisopen);
3990             if (PL_thistoken)
3991                 sv_free(PL_thistoken);
3992             PL_thistoken = 0;
3993         }
3994         else {
3995             /* Store actual token text as madprop X */
3996             CURMAD('X', PL_thistoken);
3997         }
3998
3999         if (PL_thiswhite) {
4000             /* add preceding whitespace as madprop _ */
4001             CURMAD('_', PL_thiswhite);
4002         }
4003
4004         if (PL_thisstuff) {
4005             /* add quoted material as madprop = */
4006             CURMAD('=', PL_thisstuff);
4007         }
4008
4009         if (PL_thisclose) {
4010             /* add terminating quote as madprop Q */
4011             CURMAD('Q', PL_thisclose);
4012         }
4013     }
4014
4015     /* special processing based on optype */
4016
4017     switch (optype) {
4018
4019     /* opval doesn't need a TOKEN since it can already store mp */
4020     case WORD:
4021     case METHOD:
4022     case FUNCMETH:
4023     case THING:
4024     case PMFUNC:
4025     case PRIVATEREF:
4026     case FUNC0SUB:
4027     case UNIOPSUB:
4028     case LSTOPSUB:
4029         if (pl_yylval.opval)
4030             append_madprops(PL_thismad, pl_yylval.opval, 0);
4031         PL_thismad = 0;
4032         return optype;
4033
4034     /* fake EOF */
4035     case 0:
4036         optype = PEG;
4037         if (PL_endwhite) {
4038             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4039             PL_endwhite = 0;
4040         }
4041         break;
4042
4043     case ']':
4044     case '}':
4045         if (PL_faketokens)
4046             break;
4047         /* remember any fake bracket that lexer is about to discard */
4048         if (PL_lex_brackets == 1 &&
4049             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4050         {
4051             s = PL_bufptr;
4052             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4053                 s++;
4054             if (*s == '}') {
4055                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4056                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4057                 PL_thiswhite = 0;
4058                 PL_bufptr = s - 1;
4059                 break;  /* don't bother looking for trailing comment */
4060             }
4061             else
4062                 s = PL_bufptr;
4063         }
4064         if (optype == ']')
4065             break;
4066         /* FALLTHROUGH */
4067
4068     /* attach a trailing comment to its statement instead of next token */
4069     case ';':
4070         if (PL_faketokens)
4071             break;
4072         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4073             s = PL_bufptr;
4074             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4075                 s++;
4076             if (*s == '\n' || *s == '#') {
4077                 while (s < PL_bufend && *s != '\n')
4078                     s++;
4079                 if (s < PL_bufend)
4080                     s++;
4081                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4082                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4083                 PL_thiswhite = 0;
4084                 PL_bufptr = s;
4085             }
4086         }
4087         break;
4088
4089     /* pval */
4090     case LABEL:
4091         break;
4092
4093     /* ival */
4094     default:
4095         break;
4096
4097     }
4098
4099     /* Create new token struct.  Note: opvals return early above. */
4100     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4101     PL_thismad = 0;
4102     return optype;
4103 }
4104 #endif
4105
4106 STATIC char *
4107 S_tokenize_use(pTHX_ int is_use, char *s) {
4108     dVAR;
4109
4110     PERL_ARGS_ASSERT_TOKENIZE_USE;
4111
4112     if (PL_expect != XSTATE)
4113         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4114                     is_use ? "use" : "no"));
4115     s = SKIPSPACE1(s);
4116     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4117         s = force_version(s, TRUE);
4118         if (*s == ';' || *s == '}'
4119                 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4120             start_force(PL_curforce);
4121             NEXTVAL_NEXTTOKE.opval = NULL;
4122             force_next(WORD);
4123         }
4124         else if (*s == 'v') {
4125             s = force_word(s,WORD,FALSE,TRUE,FALSE);
4126             s = force_version(s, FALSE);
4127         }
4128     }
4129     else {
4130         s = force_word(s,WORD,FALSE,TRUE,FALSE);
4131         s = force_version(s, FALSE);
4132     }
4133     pl_yylval.ival = is_use;
4134     return s;
4135 }
4136 #ifdef DEBUGGING
4137     static const char* const exp_name[] =
4138         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4139           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4140         };
4141 #endif
4142
4143 /*
4144   yylex
4145
4146   Works out what to call the token just pulled out of the input
4147   stream.  The yacc parser takes care of taking the ops we return and
4148   stitching them into a tree.
4149
4150   Returns:
4151     PRIVATEREF
4152
4153   Structure:
4154       if read an identifier
4155           if we're in a my declaration
4156               croak if they tried to say my($foo::bar)
4157               build the ops for a my() declaration
4158           if it's an access to a my() variable
4159               are we in a sort block?
4160                   croak if my($a); $a <=> $b
4161               build ops for access to a my() variable
4162           if in a dq string, and they've said @foo and we can't find @foo
4163               croak
4164           build ops for a bareword
4165       if we already built the token before, use it.
4166 */
4167
4168
4169 #ifdef __SC__
4170 #pragma segment Perl_yylex
4171 #endif
4172 int
4173 Perl_yylex(pTHX)
4174 {
4175     dVAR;
4176     register char *s = PL_bufptr;
4177     register char *d;
4178     STRLEN len;
4179     bool bof = FALSE;
4180     U32 fake_eof = 0;
4181
4182     /* orig_keyword, gvp, and gv are initialized here because
4183      * jump to the label just_a_word_zero can bypass their
4184      * initialization later. */
4185     I32 orig_keyword = 0;
4186     GV *gv = NULL;
4187     GV **gvp = NULL;
4188
4189     DEBUG_T( {
4190         SV* tmp = newSVpvs("");
4191         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4192             (IV)CopLINE(PL_curcop),
4193             lex_state_names[PL_lex_state],
4194             exp_name[PL_expect],
4195             pv_display(tmp, s, strlen(s), 0, 60));
4196         SvREFCNT_dec(tmp);
4197     } );
4198     /* check if there's an identifier for us to look at */
4199     if (PL_pending_ident)
4200         return REPORT(S_pending_ident(aTHX));
4201
4202     /* no identifier pending identification */
4203
4204     switch (PL_lex_state) {
4205 #ifdef COMMENTARY
4206     case LEX_NORMAL:            /* Some compilers will produce faster */
4207     case LEX_INTERPNORMAL:      /* code if we comment these out. */
4208         break;
4209 #endif
4210
4211     /* when we've already built the next token, just pull it out of the queue */
4212     case LEX_KNOWNEXT:
4213 #ifdef PERL_MAD
4214         PL_lasttoke--;
4215         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4216         if (PL_madskills) {
4217             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4218             PL_nexttoke[PL_lasttoke].next_mad = 0;
4219             if (PL_thismad && PL_thismad->mad_key == '_') {
4220                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4221                 PL_thismad->mad_val = 0;
4222                 mad_free(PL_thismad);
4223                 PL_thismad = 0;
4224             }
4225         }
4226         if (!PL_lasttoke) {
4227             PL_lex_state = PL_lex_defer;
4228             PL_expect = PL_lex_expect;
4229             PL_lex_defer = LEX_NORMAL;
4230             if (!PL_nexttoke[PL_lasttoke].next_type)
4231                 return yylex();
4232         }
4233 #else
4234         PL_nexttoke--;
4235         pl_yylval = PL_nextval[PL_nexttoke];
4236         if (!PL_nexttoke) {
4237             PL_lex_state = PL_lex_defer;
4238             PL_expect = PL_lex_expect;
4239             PL_lex_defer = LEX_NORMAL;
4240         }
4241 #endif
4242 #ifdef PERL_MAD
4243         /* FIXME - can these be merged?  */
4244         return(PL_nexttoke[PL_lasttoke].next_type);
4245 #else
4246         return REPORT(PL_nexttype[PL_nexttoke]);
4247 #endif
4248
4249     /* interpolated case modifiers like \L \U, including \Q and \E.
4250        when we get here, PL_bufptr is at the \
4251     */
4252     case LEX_INTERPCASEMOD:
4253 #ifdef DEBUGGING
4254         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4255             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4256 #endif
4257         /* handle \E or end of string */
4258         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4259             /* if at a \E */
4260             if (PL_lex_casemods) {
4261                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4262                 PL_lex_casestack[PL_lex_casemods] = '\0';
4263
4264                 if (PL_bufptr != PL_bufend
4265                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4266                     PL_bufptr += 2;
4267                     PL_lex_state = LEX_INTERPCONCAT;
4268 #ifdef PERL_MAD
4269                     if (PL_madskills)
4270                         PL_thistoken = newSVpvs("\\E");
4271 #endif
4272                 }
4273                 return REPORT(')');
4274             }
4275 #ifdef PERL_MAD
4276             while (PL_bufptr != PL_bufend &&
4277               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4278                 if (!PL_thiswhite)
4279                     PL_thiswhite = newSVpvs("");
4280                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4281                 PL_bufptr += 2;
4282             }
4283 #else
4284             if (PL_bufptr != PL_bufend)
4285                 PL_bufptr += 2;
4286 #endif
4287             PL_lex_state = LEX_INTERPCONCAT;
4288             return yylex();
4289         }
4290         else {
4291             DEBUG_T({ PerlIO_printf(Perl_debug_log,
4292               "### Saw case modifier\n"); });
4293             s = PL_bufptr + 1;
4294             if (s[1] == '\\' && s[2] == 'E') {
4295 #ifdef PERL_MAD
4296                 if (!PL_thiswhite)
4297                     PL_thiswhite = newSVpvs("");
4298                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4299 #endif
4300                 PL_bufptr = s + 3;
4301                 PL_lex_state = LEX_INTERPCONCAT;
4302                 return yylex();
4303             }
4304             else {
4305                 I32 tmp;
4306                 if (!PL_madskills) /* when just compiling don't need correct */
4307                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4308                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
4309                 if ((*s == 'L' || *s == 'U') &&
4310                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4311                     PL_lex_casestack[--PL_lex_casemods] = '\0';
4312                     return REPORT(')');
4313                 }
4314                 if (PL_lex_casemods > 10)
4315                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4316                 PL_lex_casestack[PL_lex_casemods++] = *s;
4317                 PL_lex_casestack[PL_lex_casemods] = '\0';
4318                 PL_lex_state = LEX_INTERPCONCAT;
4319                 start_force(PL_curforce);
4320                 NEXTVAL_NEXTTOKE.ival = 0;
4321                 force_next('(');
4322                 start_force(PL_curforce);
4323                 if (*s == 'l')
4324                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4325                 else if (*s == 'u')
4326                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4327                 else if (*s == 'L')
4328                     NEXTVAL_NEXTTOKE.ival = OP_LC;
4329                 else if (*s == 'U')
4330                     NEXTVAL_NEXTTOKE.ival = OP_UC;
4331                 else if (*s == 'Q')
4332                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4333                 else
4334                     Perl_croak(aTHX_ "panic: yylex");
4335                 if (PL_madskills) {
4336                     SV* const tmpsv = newSVpvs("\\ ");
4337                     /* replace the space with the character we want to escape
4338                      */
4339                     SvPVX(tmpsv)[1] = *s;
4340                     curmad('_', tmpsv);
4341                 }
4342                 PL_bufptr = s + 1;
4343             }
4344             force_next(FUNC);
4345             if (PL_lex_starts) {
4346                 s = PL_bufptr;
4347                 PL_lex_starts = 0;
4348 #ifdef PERL_MAD
4349                 if (PL_madskills) {
4350                     if (PL_thistoken)
4351                         sv_free(PL_thistoken);
4352                     PL_thistoken = newSVpvs("");
4353                 }
4354 #endif
4355                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4356                 if (PL_lex_casemods == 1 && PL_lex_inpat)
4357                     OPERATOR(',');
4358                 else
4359                     Aop(OP_CONCAT);
4360             }
4361             else
4362                 return yylex();
4363         }
4364
4365     case LEX_INTERPPUSH:
4366         return REPORT(sublex_push());
4367
4368     case LEX_INTERPSTART:
4369         if (PL_bufptr == PL_bufend)
4370             return REPORT(sublex_done());
4371         DEBUG_T({ PerlIO_printf(Perl_debug_log,
4372               "### Interpolated variable\n"); });
4373         PL_expect = XTERM;
4374         PL_lex_dojoin = (*PL_bufptr == '@');
4375         PL_lex_state = LEX_INTERPNORMAL;
4376         if (PL_lex_dojoin) {
4377             start_force(PL_curforce);
4378             NEXTVAL_NEXTTOKE.ival = 0;
4379             force_next(',');
4380             start_force(PL_curforce);
4381             force_ident("\"", '$');
4382             start_force(PL_curforce);
4383             NEXTVAL_NEXTTOKE.ival = 0;
4384             force_next('$');
4385             start_force(PL_curforce);
4386             NEXTVAL_NEXTTOKE.ival = 0;
4387             force_next('(');
4388             start_force(PL_curforce);
4389             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
4390             force_next(FUNC);
4391         }
4392         if (PL_lex_starts++) {
4393             s = PL_bufptr;
4394 #ifdef PERL_MAD
4395             if (PL_madskills) {
4396                 if (PL_thistoken)
4397                     sv_free(PL_thistoken);
4398                 PL_thistoken = newSVpvs("");
4399             }
4400 #endif
4401             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4402             if (!PL_lex_casemods && PL_lex_inpat)
4403                 OPERATOR(',');
4404             else
4405                 Aop(OP_CONCAT);
4406         }
4407         return yylex();
4408
4409     case LEX_INTERPENDMAYBE:
4410         if (intuit_more(PL_bufptr)) {
4411             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
4412             break;
4413         }
4414         /* FALL THROUGH */
4415
4416     case LEX_INTERPEND:
4417         if (PL_lex_dojoin) {
4418             PL_lex_dojoin = FALSE;
4419             PL_lex_state = LEX_INTERPCONCAT;
4420 #ifdef PERL_MAD
4421             if (PL_madskills) {
4422                 if (PL_thistoken)
4423                     sv_free(PL_thistoken);
4424                 PL_thistoken = newSVpvs("");
4425             }
4426 #endif
4427             return REPORT(')');
4428         }
4429         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4430             && SvEVALED(PL_lex_repl))
4431         {
4432             if (PL_bufptr != PL_bufend)
4433                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4434             PL_lex_repl = NULL;
4435         }
4436         /* FALLTHROUGH */
4437     case LEX_INTERPCONCAT:
4438 #ifdef DEBUGGING
4439         if (PL_lex_brackets)
4440             Perl_croak(aTHX_ "panic: INTERPCONCAT");
4441 #endif
4442         if (PL_bufptr == PL_bufend)
4443             return REPORT(sublex_done());
4444
4445         if (SvIVX(PL_linestr) == '\'') {
4446             SV *sv = newSVsv(PL_linestr);
4447             if (!PL_lex_inpat)
4448                 sv = tokeq(sv);
4449             else if ( PL_hints & HINT_NEW_RE )
4450                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4451             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4452             s = PL_bufend;
4453         }
4454         else {
4455             s = scan_const(PL_bufptr);
4456             if (*s == '\\')
4457                 PL_lex_state = LEX_INTERPCASEMOD;
4458             else
4459                 PL_lex_state = LEX_INTERPSTART;
4460         }
4461
4462         if (s != PL_bufptr) {
4463             start_force(PL_curforce);
4464             if (PL_madskills) {
4465                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4466             }
4467             NEXTVAL_NEXTTOKE = pl_yylval;
4468             PL_expect = XTERM;
4469             force_next(THING);
4470             if (PL_lex_starts++) {
4471 #ifdef PERL_MAD
4472                 if (PL_madskills) {
4473                     if (PL_thistoken)
4474                         sv_free(PL_thistoken);
4475                     PL_thistoken = newSVpvs("");
4476                 }
4477 #endif
4478                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4479                 if (!PL_lex_casemods && PL_lex_inpat)
4480                     OPERATOR(',');
4481                 else
4482                     Aop(OP_CONCAT);
4483             }
4484             else {
4485                 PL_bufptr = s;
4486                 return yylex();
4487             }
4488         }
4489
4490         return yylex();
4491     case LEX_FORMLINE:
4492         PL_lex_state = LEX_NORMAL;
4493         s = scan_formline(PL_bufptr);
4494         if (!PL_lex_formbrack)
4495             goto rightbracket;
4496         OPERATOR(';');
4497     }
4498
4499     s = PL_bufptr;
4500     PL_oldoldbufptr = PL_oldbufptr;
4501     PL_oldbufptr = s;
4502
4503   retry:
4504 #ifdef PERL_MAD
4505     if (PL_thistoken) {
4506         sv_free(PL_thistoken);
4507         PL_thistoken = 0;
4508     }
4509     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
4510 #endif
4511     switch (*s) {
4512     default:
4513         if (isIDFIRST_lazy_if(s,UTF))
4514             goto keylookup;
4515         {
4516         unsigned char c = *s;
4517         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4518         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4519             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4520         } else {
4521             d = PL_linestart;
4522         }
4523         *s = '\0';
4524         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4525     }
4526     case 4:
4527     case 26:
4528         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
4529     case 0:
4530 #ifdef PERL_MAD
4531         if (PL_madskills)
4532             PL_faketokens = 0;
4533 #endif
4534         if (!PL_rsfp) {
4535             PL_last_uni = 0;
4536             PL_last_lop = 0;
4537             if (PL_lex_brackets) {
4538                 yyerror((const char *)
4539                         (PL_lex_formbrack
4540                          ? "Format not terminated"
4541                          : "Missing right curly or square bracket"));
4542             }
4543             DEBUG_T( { PerlIO_printf(Perl_debug_log,
4544                         "### Tokener got EOF\n");
4545             } );
4546             TOKEN(0);
4547         }
4548         if (s++ < PL_bufend)
4549             goto retry;                 /* ignore stray nulls */
4550         PL_last_uni = 0;
4551         PL_last_lop = 0;
4552         if (!PL_in_eval && !PL_preambled) {
4553             PL_preambled = TRUE;
4554 #ifdef PERL_MAD
4555             if (PL_madskills)
4556                 PL_faketokens = 1;
4557 #endif
4558             if (PL_perldb) {
4559                 /* Generate a string of Perl code to load the debugger.
4560                  * If PERL5DB is set, it will return the contents of that,
4561                  * otherwise a compile-time require of perl5db.pl.  */
4562
4563                 const char * const pdb = PerlEnv_getenv("PERL5DB");
4564
4565                 if (pdb) {
4566                     sv_setpv(PL_linestr, pdb);
4567                     sv_catpvs(PL_linestr,";");
4568                 } else {
4569                     SETERRNO(0,SS_NORMAL);
4570                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4571                 }
4572             } else
4573                 sv_setpvs(PL_linestr,"");
4574             if (PL_preambleav) {
4575                 SV **svp = AvARRAY(PL_preambleav);
4576                 SV **const end = svp + AvFILLp(PL_preambleav);
4577                 while(svp <= end) {
4578                     sv_catsv(PL_linestr, *svp);
4579                     ++svp;
4580                     sv_catpvs(PL_linestr, ";");
4581                 }
4582                 sv_free(MUTABLE_SV(PL_preambleav));
4583                 PL_preambleav = NULL;
4584             }
4585             if (PL_minus_E)
4586                 sv_catpvs(PL_linestr,
4587                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4588             if (PL_minus_n || PL_minus_p) {
4589                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4590                 if (PL_minus_l)
4591                     sv_catpvs(PL_linestr,"chomp;");
4592                 if (PL_minus_a) {
4593                     if (PL_minus_F) {
4594                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4595                              || *PL_splitstr == '"')
4596                               && strchr(PL_splitstr + 1, *PL_splitstr))
4597                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4598                         else {
4599                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4600                                bytes can be used as quoting characters.  :-) */
4601                             const char *splits = PL_splitstr;
4602                             sv_catpvs(PL_linestr, "our @F=split(q\0");
4603                             do {
4604                                 /* Need to \ \s  */
4605                                 if (*splits == '\\')
4606                                     sv_catpvn(PL_linestr, splits, 1);
4607                                 sv_catpvn(PL_linestr, splits, 1);
4608                             } while (*splits++);
4609                             /* This loop will embed the trailing NUL of
4610                                PL_linestr as the last thing it does before
4611                                terminating.  */
4612                             sv_catpvs(PL_linestr, ");");
4613                         }
4614                     }
4615                     else
4616                         sv_catpvs(PL_linestr,"our @F=split(' ');");
4617                 }
4618             }
4619             sv_catpvs(PL_linestr, "\n");
4620             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4621             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4622             PL_last_lop = PL_last_uni = NULL;
4623             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4624                 update_debugger_info(PL_linestr, NULL, 0);
4625             goto retry;
4626         }
4627         do {
4628             fake_eof = 0;
4629             bof = PL_rsfp ? TRUE : FALSE;
4630             if (0) {
4631               fake_eof:
4632                 fake_eof = LEX_FAKE_EOF;
4633             }
4634             PL_bufptr = PL_bufend;
4635             CopLINE_inc(PL_curcop);
4636             if (!lex_next_chunk(fake_eof)) {
4637                 CopLINE_dec(PL_curcop);
4638                 s = PL_bufptr;
4639                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
4640             }
4641             CopLINE_dec(PL_curcop);
4642 #ifdef PERL_MAD
4643             if (!PL_rsfp)
4644                 PL_realtokenstart = -1;
4645 #endif
4646             s = PL_bufptr;
4647             /* If it looks like the start of a BOM or raw UTF-16,
4648              * check if it in fact is. */
4649             if (bof && PL_rsfp &&
4650                      (*s == 0 ||
4651                       *(U8*)s == 0xEF ||
4652                       *(U8*)s >= 0xFE ||
4653                       s[1] == 0)) {
4654                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4655                 if (bof) {
4656                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4657                     s = swallow_bom((U8*)s);
4658                 }
4659             }
4660             if (PL_doextract) {
4661                 /* Incest with pod. */
4662 #ifdef PERL_MAD
4663                 if (PL_madskills)
4664                     sv_catsv(PL_thiswhite, PL_linestr);
4665 #endif
4666                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4667                     sv_setpvs(PL_linestr, "");
4668                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4669                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4670                     PL_last_lop = PL_last_uni = NULL;
4671                     PL_doextract = FALSE;
4672                 }
4673             }
4674             if (PL_rsfp)
4675                 incline(s);
4676         } while (PL_doextract);
4677         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4678         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4679         PL_last_lop = PL_last_uni = NULL;
4680         if (CopLINE(PL_curcop) == 1) {
4681             while (s < PL_bufend && isSPACE(*s))
4682                 s++;
4683             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4684                 s++;
4685 #ifdef PERL_MAD
4686             if (PL_madskills)
4687                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4688 #endif
4689             d = NULL;
4690             if (!PL_in_eval) {
4691                 if (*s == '#' && *(s+1) == '!')
4692                     d = s + 2;
4693 #ifdef ALTERNATE_SHEBANG
4694                 else {
4695                     static char const as[] = ALTERNATE_SHEBANG;
4696                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4697                         d = s + (sizeof(as) - 1);
4698                 }
4699 #endif /* ALTERNATE_SHEBANG */
4700             }
4701             if (d) {
4702                 char *ipath;
4703                 char *ipathend;
4704
4705                 while (isSPACE(*d))
4706                     d++;
4707                 ipath = d;
4708                 while (*d && !isSPACE(*d))
4709                     d++;
4710                 ipathend = d;
4711
4712 #ifdef ARG_ZERO_IS_SCRIPT
4713                 if (ipathend > ipath) {
4714                     /*
4715                      * HP-UX (at least) sets argv[0] to the script name,
4716                      * which makes $^X incorrect.  And Digital UNIX and Linux,
4717                      * at least, set argv[0] to the basename of the Perl
4718                      * interpreter. So, having found "#!", we'll set it right.
4719                      */
4720                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4721                                                     SVt_PV)); /* $^X */
4722                     assert(SvPOK(x) || SvGMAGICAL(x));
4723                     if (sv_eq(x, CopFILESV(PL_curcop))) {
4724                         sv_setpvn(x, ipath, ipathend - ipath);
4725                         SvSETMAGIC(x);
4726                     }
4727                     else {
4728                         STRLEN blen;
4729                         STRLEN llen;
4730                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4731                         const char * const lstart = SvPV_const(x,llen);
4732                         if (llen < blen) {
4733                             bstart += blen - llen;
4734                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4735                                 sv_setpvn(x, ipath, ipathend - ipath);
4736                                 SvSETMAGIC(x);
4737                             }
4738                         }
4739                     }
4740                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
4741                 }
4742 #endif /* ARG_ZERO_IS_SCRIPT */
4743
4744                 /*
4745                  * Look for options.
4746                  */
4747                 d = instr(s,"perl -");
4748                 if (!d) {
4749                     d = instr(s,"perl");
4750 #if defined(DOSISH)
4751                     /* avoid getting into infinite loops when shebang
4752                      * line contains "Perl" rather than "perl" */
4753                     if (!d) {
4754                         for (d = ipathend-4; d >= ipath; --d) {
4755                             if ((*d == 'p' || *d == 'P')
4756                                 && !ibcmp(d, "perl", 4))
4757                             {
4758                                 break;
4759                             }
4760                         }
4761                         if (d < ipath)
4762                             d = NULL;
4763                     }
4764 #endif
4765                 }
4766 #ifdef ALTERNATE_SHEBANG
4767                 /*
4768                  * If the ALTERNATE_SHEBANG on this system starts with a
4769                  * character that can be part of a Perl expression, then if
4770                  * we see it but not "perl", we're probably looking at the
4771                  * start of Perl code, not a request to hand off to some
4772                  * other interpreter.  Similarly, if "perl" is there, but
4773                  * not in the first 'word' of the line, we assume the line
4774                  * contains the start of the Perl program.
4775                  */
4776                 if (d && *s != '#') {
4777                     const char *c = ipath;
4778                     while (*c && !strchr("; \t\r\n\f\v#", *c))
4779                         c++;
4780                     if (c < d)
4781                         d = NULL;       /* "perl" not in first word; ignore */
4782                     else
4783                         *s = '#';       /* Don't try to parse shebang line */
4784                 }
4785 #endif /* ALTERNATE_SHEBANG */
4786                 if (!d &&
4787                     *s == '#' &&
4788                     ipathend > ipath &&
4789                     !PL_minus_c &&
4790                     !instr(s,"indir") &&
4791                     instr(PL_origargv[0],"perl"))
4792                 {
4793                     dVAR;
4794                     char **newargv;
4795
4796                     *ipathend = '\0';
4797                     s = ipathend + 1;
4798                     while (s < PL_bufend && isSPACE(*s))
4799                         s++;
4800                     if (s < PL_bufend) {
4801                         Newx(newargv,PL_origargc+3,char*);
4802                         newargv[1] = s;
4803                         while (s < PL_bufend && !isSPACE(*s))
4804                             s++;
4805                         *s = '\0';
4806                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4807                     }
4808                     else
4809                         newargv = PL_origargv;
4810                     newargv[0] = ipath;
4811                     PERL_FPU_PRE_EXEC
4812                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4813                     PERL_FPU_POST_EXEC
4814                     Perl_croak(aTHX_ "Can't exec %s", ipath);
4815                 }
4816                 if (d) {
4817                     while (*d && !isSPACE(*d))
4818                         d++;
4819                     while (SPACE_OR_TAB(*d))
4820                         d++;
4821
4822                     if (*d++ == '-') {
4823                         const bool switches_done = PL_doswitches;
4824                         const U32 oldpdb = PL_perldb;
4825                         const bool oldn = PL_minus_n;
4826                         const bool oldp = PL_minus_p;
4827                         const char *d1 = d;
4828
4829                         do {
4830                             bool baduni = FALSE;
4831                             if (*d1 == 'C') {
4832                                 const char *d2 = d1 + 1;
4833                                 if (parse_unicode_opts((const char **)&d2)
4834                                     != PL_unicode)
4835                                     baduni = TRUE;
4836                             }
4837                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4838                                 const char * const m = d1;
4839                                 while (*d1 && !isSPACE(*d1))
4840                                     d1++;
4841                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4842                                       (int)(d1 - m), m);
4843                             }
4844                             d1 = moreswitches(d1);
4845                         } while (d1);
4846                         if (PL_doswitches && !switches_done) {
4847                             int argc = PL_origargc;
4848                             char **argv = PL_origargv;
4849                             do {
4850                                 argc--,argv++;
4851                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4852                             init_argv_symbols(argc,argv);
4853                         }
4854                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4855                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4856                               /* if we have already added "LINE: while (<>) {",
4857                                  we must not do it again */
4858                         {
4859                             sv_setpvs(PL_linestr, "");
4860                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4861                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4862                             PL_last_lop = PL_last_uni = NULL;
4863                             PL_preambled = FALSE;
4864                             if (PERLDB_LINE || PERLDB_SAVESRC)
4865                                 (void)gv_fetchfile(PL_origfilename);
4866                             goto retry;
4867                         }
4868                     }
4869                 }
4870             }
4871         }
4872         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4873             PL_bufptr = s;
4874             PL_lex_state = LEX_FORMLINE;
4875             return yylex();
4876         }
4877         goto retry;
4878     case '\r':
4879 #ifdef PERL_STRICT_CR
4880         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4881         Perl_croak(aTHX_
4882       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4883 #endif
4884     case ' ': case '\t': case '\f': case 013:
4885 #ifdef PERL_MAD
4886         PL_realtokenstart = -1;
4887         if (!PL_thiswhite)
4888             PL_thiswhite = newSVpvs("");
4889         sv_catpvn(PL_thiswhite, s, 1);
4890 #endif
4891         s++;
4892         goto retry;
4893     case '#':
4894     case '\n':
4895 #ifdef PERL_MAD
4896         PL_realtokenstart = -1;
4897         if (PL_madskills)
4898             PL_faketokens = 0;
4899 #endif
4900         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4901             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4902                 /* handle eval qq[#line 1 "foo"\n ...] */
4903                 CopLINE_dec(PL_curcop);
4904                 incline(s);
4905             }
4906             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4907                 s = SKIPSPACE0(s);
4908                 if (!PL_in_eval || PL_rsfp)
4909                     incline(s);
4910             }
4911             else {
4912                 d = s;
4913                 while (d < PL_bufend && *d != '\n')
4914                     d++;
4915                 if (d < PL_bufend)
4916                     d++;
4917                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4918                   Perl_croak(aTHX_ "panic: input overflow");
4919 #ifdef PERL_MAD
4920                 if (PL_madskills)
4921                     PL_thiswhite = newSVpvn(s, d - s);
4922 #endif
4923                 s = d;
4924                 incline(s);
4925             }
4926             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4927                 PL_bufptr = s;
4928                 PL_lex_state = LEX_FORMLINE;
4929                 return yylex();
4930             }
4931         }
4932         else {
4933 #ifdef PERL_MAD
4934             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4935                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4936                     PL_faketokens = 0;
4937                     s = SKIPSPACE0(s);
4938                     TOKEN(PEG); /* make sure any #! line is accessible */
4939                 }
4940                 s = SKIPSPACE0(s);
4941             }
4942             else {
4943 /*              if (PL_madskills && PL_lex_formbrack) { */
4944                     d = s;
4945                     while (d < PL_bufend && *d != '\n')
4946                         d++;
4947                     if (d < PL_bufend)
4948                         d++;
4949                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4950                       Perl_croak(aTHX_ "panic: input overflow");
4951                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4952                         if (!PL_thiswhite)
4953                             PL_thiswhite = newSVpvs("");
4954                         if (CopLINE(PL_curcop) == 1) {
4955                             sv_setpvs(PL_thiswhite, "");
4956                             PL_faketokens = 0;
4957                         }
4958                         sv_catpvn(PL_thiswhite, s, d - s);
4959                     }
4960                     s = d;
4961 /*              }
4962                 *s = '\0';
4963                 PL_bufend = s; */
4964             }
4965 #else
4966             *s = '\0';
4967             PL_bufend = s;
4968 #endif
4969         }
4970         goto retry;
4971     case '-':
4972         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4973             I32 ftst = 0;
4974             char tmp;
4975
4976             s++;
4977             PL_bufptr = s;
4978             tmp = *s++;
4979
4980             while (s < PL_bufend && SPACE_OR_TAB(*s))
4981                 s++;
4982
4983             if (strnEQ(s,"=>",2)) {
4984                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4985                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4986                 OPERATOR('-');          /* unary minus */
4987             }
4988             PL_last_uni = PL_oldbufptr;
4989             switch (tmp) {
4990             case 'r': ftst = OP_FTEREAD;        break;
4991             case 'w': ftst = OP_FTEWRITE;       break;
4992             case 'x': ftst = OP_FTEEXEC;        break;
4993             case 'o': ftst = OP_FTEOWNED;       break;
4994             case 'R': ftst = OP_FTRREAD;        break;
4995             case 'W': ftst = OP_FTRWRITE;       break;
4996             case 'X': ftst = OP_FTREXEC;        break;
4997             case 'O': ftst = OP_FTROWNED;       break;
4998             case 'e': ftst = OP_FTIS;           break;
4999             case 'z': ftst = OP_FTZERO;         break;
5000             case 's': ftst = OP_FTSIZE;         break;
5001             case 'f': ftst = OP_FTFILE;         break;
5002             case 'd': ftst = OP_FTDIR;          break;
5003             case 'l': ftst = OP_FTLINK;         break;
5004             case 'p': ftst = OP_FTPIPE;         break;
5005             case 'S': ftst = OP_FTSOCK;         break;
5006             case 'u': ftst = OP_FTSUID;         break;
5007             case 'g': ftst = OP_FTSGID;         break;
5008             case 'k': ftst = OP_FTSVTX;         break;
5009             case 'b': ftst = OP_FTBLK;          break;
5010             case 'c': ftst = OP_FTCHR;          break;
5011             case 't': ftst = OP_FTTTY;          break;
5012             case 'T': ftst = OP_FTTEXT;         break;
5013             case 'B': ftst = OP_FTBINARY;       break;
5014             case 'M': case 'A': case 'C':
5015                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5016                 switch (tmp) {
5017                 case 'M': ftst = OP_FTMTIME;    break;
5018                 case 'A': ftst = OP_FTATIME;    break;
5019                 case 'C': ftst = OP_FTCTIME;    break;
5020                 default:                        break;
5021                 }
5022                 break;
5023             default:
5024                 break;
5025             }
5026             if (ftst) {
5027                 PL_last_lop_op = (OPCODE)ftst;
5028                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5029                         "### Saw file test %c\n", (int)tmp);
5030                 } );
5031                 FTST(ftst);
5032             }
5033             else {
5034                 /* Assume it was a minus followed by a one-letter named
5035                  * subroutine call (or a -bareword), then. */
5036                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5037                         "### '-%c' looked like a file test but was not\n",
5038                         (int) tmp);
5039                 } );
5040                 s = --PL_bufptr;
5041             }
5042         }
5043         {
5044             const char tmp = *s++;
5045             if (*s == tmp) {
5046                 s++;
5047                 if (PL_expect == XOPERATOR)
5048                     TERM(POSTDEC);
5049                 else
5050                     OPERATOR(PREDEC);
5051             }
5052             else if (*s == '>') {
5053                 s++;
5054                 s = SKIPSPACE1(s);
5055                 if (isIDFIRST_lazy_if(s,UTF)) {
5056                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5057                     TOKEN(ARROW);
5058                 }
5059                 else if (*s == '$')
5060                     OPERATOR(ARROW);
5061                 else
5062                     TERM(ARROW);
5063             }
5064             if (PL_expect == XOPERATOR)
5065                 Aop(OP_SUBTRACT);
5066             else {
5067                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5068                     check_uni();
5069                 OPERATOR('-');          /* unary minus */
5070             }
5071         }
5072
5073     case '+':
5074         {
5075             const char tmp = *s++;
5076             if (*s == tmp) {
5077                 s++;
5078                 if (PL_expect == XOPERATOR)
5079                     TERM(POSTINC);
5080                 else
5081                     OPERATOR(PREINC);
5082             }
5083             if (PL_expect == XOPERATOR)
5084                 Aop(OP_ADD);
5085             else {
5086                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5087                     check_uni();
5088                 OPERATOR('+');
5089             }
5090         }
5091
5092     case '*':
5093         if (PL_expect != XOPERATOR) {
5094             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5095             PL_expect = XOPERATOR;
5096             force_ident(PL_tokenbuf, '*');
5097             if (!*PL_tokenbuf)
5098                 PREREF('*');
5099             TERM('*');
5100         }
5101         s++;
5102         if (*s == '*') {
5103             s++;
5104             PWop(OP_POW);
5105         }
5106         Mop(OP_MULTIPLY);
5107
5108     case '%':
5109         if (PL_expect == XOPERATOR) {
5110             ++s;
5111             Mop(OP_MODULO);
5112         }
5113         PL_tokenbuf[0] = '%';
5114         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5115                 sizeof PL_tokenbuf - 1, FALSE);
5116         if (!PL_tokenbuf[1]) {
5117             PREREF('%');
5118         }
5119         PL_pending_ident = '%';
5120         TERM('%');
5121
5122     case '^':
5123         s++;
5124         BOop(OP_BIT_XOR);
5125     case '[':
5126         PL_lex_brackets++;
5127         {
5128             const char tmp = *s++;
5129             OPERATOR(tmp);
5130         }
5131     case '~':
5132         if (s[1] == '~'
5133             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5134         {
5135             s += 2;
5136             Eop(OP_SMARTMATCH);
5137         }
5138     case ',':
5139         {
5140             const char tmp = *s++;
5141             OPERATOR(tmp);
5142         }
5143     case ':':
5144         if (s[1] == ':') {
5145             len = 0;
5146             goto just_a_word_zero_gv;
5147         }
5148         s++;
5149         switch (PL_expect) {
5150             OP *attrs;
5151 #ifdef PERL_MAD
5152             I32 stuffstart;
5153 #endif
5154         case XOPERATOR:
5155             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5156                 break;
5157             PL_bufptr = s;      /* update in case we back off */
5158             if (*s == '=') {
5159                 deprecate(":= for an empty attribute list");
5160             }
5161             goto grabattrs;
5162         case XATTRBLOCK:
5163             PL_expect = XBLOCK;
5164             goto grabattrs;
5165         case XATTRTERM:
5166             PL_expect = XTERMBLOCK;
5167          grabattrs:
5168 #ifdef PERL_MAD
5169             stuffstart = s - SvPVX(PL_linestr) - 1;
5170 #endif
5171             s = PEEKSPACE(s);
5172             attrs = NULL;
5173             while (isIDFIRST_lazy_if(s,UTF)) {
5174                 I32 tmp;
5175                 SV *sv;
5176                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5177                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5178                     if (tmp < 0) tmp = -tmp;
5179                     switch (tmp) {
5180                     case KEY_or:
5181                     case KEY_and:
5182                     case KEY_for:
5183                     case KEY_foreach:
5184                     case KEY_unless:
5185                     case KEY_if:
5186                     case KEY_while:
5187                     case KEY_until:
5188                         goto got_attrs;
5189                     default:
5190                         break;
5191                     }
5192                 }
5193                 sv = newSVpvn(s, len);
5194                 if (*d == '(') {
5195                     d = scan_str(d,TRUE,TRUE);
5196                     if (!d) {
5197                         /* MUST advance bufptr here to avoid bogus
5198                            "at end of line" context messages from yyerror().
5199                          */
5200                         PL_bufptr = s + len;
5201                         yyerror("Unterminated attribute parameter in attribute list");
5202                         if (attrs)
5203                             op_free(attrs);
5204                         sv_free(sv);
5205                         return REPORT(0);       /* EOF indicator */
5206                     }
5207                 }
5208                 if (PL_lex_stuff) {
5209                     sv_catsv(sv, PL_lex_stuff);
5210                     attrs = append_elem(OP_LIST, attrs,
5211                                         newSVOP(OP_CONST, 0, sv));
5212                     SvREFCNT_dec(PL_lex_stuff);
5213                     PL_lex_stuff = NULL;
5214                 }
5215                 else {
5216                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5217                         sv_free(sv);
5218                         if (PL_in_my == KEY_our) {
5219                             deprecate(":unique");
5220                         }
5221                         else
5222                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5223                     }
5224
5225                     /* NOTE: any CV attrs applied here need to be part of
5226                        the CVf_BUILTIN_ATTRS define in cv.h! */
5227                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5228                         sv_free(sv);
5229                         CvLVALUE_on(PL_compcv);
5230                     }
5231                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5232                         sv_free(sv);
5233                         deprecate(":locked");
5234                     }
5235                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5236                         sv_free(sv);
5237                         CvMETHOD_on(PL_compcv);
5238                     }
5239                     /* After we've set the flags, it could be argued that
5240                        we don't need to do the attributes.pm-based setting
5241                        process, and shouldn't bother appending recognized
5242                        flags.  To experiment with that, uncomment the
5243                        following "else".  (Note that's already been
5244                        uncommented.  That keeps the above-applied built-in
5245                        attributes from being intercepted (and possibly
5246                        rejected) by a package's attribute routines, but is
5247                        justified by the performance win for the common case
5248                        of applying only built-in attributes.) */
5249                     else
5250                         attrs = append_elem(OP_LIST, attrs,
5251                                             newSVOP(OP_CONST, 0,
5252                                                     sv));
5253                 }
5254                 s = PEEKSPACE(d);
5255                 if (*s == ':' && s[1] != ':')
5256                     s = PEEKSPACE(s+1);
5257                 else if (s == d)
5258                     break;      /* require real whitespace or :'s */
5259                 /* XXX losing whitespace on sequential attributes here */
5260             }
5261             {
5262                 const char tmp
5263                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5264                 if (*s != ';' && *s != '}' && *s != tmp
5265                     && (tmp != '=' || *s != ')')) {
5266                     const char q = ((*s == '\'') ? '"' : '\'');
5267                     /* If here for an expression, and parsed no attrs, back
5268                        off. */
5269                     if (tmp == '=' && !attrs) {
5270                         s = PL_bufptr;
5271                         break;
5272                     }
5273                     /* MUST advance bufptr here to avoid bogus "at end of line"
5274                        context messages from yyerror().
5275                     */
5276                     PL_bufptr = s;
5277                     yyerror( (const char *)
5278                              (*s
5279                               ? Perl_form(aTHX_ "Invalid separator character "
5280                                           "%c%c%c in attribute list", q, *s, q)
5281                               : "Unterminated attribute list" ) );
5282                     if (attrs)
5283                         op_free(attrs);
5284                     OPERATOR(':');
5285                 }
5286             }
5287         got_attrs:
5288             if (attrs) {
5289                 start_force(PL_curforce);
5290                 NEXTVAL_NEXTTOKE.opval = attrs;
5291                 CURMAD('_', PL_nextwhite);
5292                 force_next(THING);
5293             }
5294 #ifdef PERL_MAD
5295             if (PL_madskills) {
5296                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5297                                      (s - SvPVX(PL_linestr)) - stuffstart);
5298             }
5299 #endif
5300             TOKEN(COLONATTR);
5301         }
5302         OPERATOR(':');
5303     case '(':
5304         s++;
5305         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5306             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
5307         else
5308             PL_expect = XTERM;
5309         s = SKIPSPACE1(s);
5310         TOKEN('(');
5311     case ';':
5312         CLINE;
5313         {
5314             const char tmp = *s++;
5315             OPERATOR(tmp);
5316         }
5317     case ')':
5318         {
5319             const char tmp = *s++;
5320             s = SKIPSPACE1(s);
5321             if (*s == '{')
5322                 PREBLOCK(tmp);
5323             TERM(tmp);
5324         }
5325     case ']':
5326         s++;
5327         if (PL_lex_brackets <= 0)
5328             yyerror("Unmatched right square bracket");
5329         else
5330             --PL_lex_brackets;
5331         if (PL_lex_state == LEX_INTERPNORMAL) {
5332             if (PL_lex_brackets == 0) {
5333                 if (*s == '-' && s[1] == '>')
5334                     PL_lex_state = LEX_INTERPENDMAYBE;
5335                 else if (*s != '[' && *s != '{')
5336                     PL_lex_state = LEX_INTERPEND;
5337             }
5338         }
5339         TERM(']');
5340     case '{':
5341       leftbracket:
5342         s++;
5343         if (PL_lex_brackets > 100) {
5344             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5345         }
5346         switch (PL_expect) {
5347         case XTERM:
5348             if (PL_lex_formbrack) {
5349                 s--;
5350                 PRETERMBLOCK(DO);
5351             }
5352             if (PL_oldoldbufptr == PL_last_lop)
5353                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5354             else
5355                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5356             OPERATOR(HASHBRACK);
5357         case XOPERATOR:
5358             while (s < PL_bufend && SPACE_OR_TAB(*s))
5359                 s++;
5360             d = s;
5361             PL_tokenbuf[0] = '\0';
5362             if (d < PL_bufend && *d == '-') {
5363                 PL_tokenbuf[0] = '-';
5364                 d++;
5365                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5366                     d++;
5367             }
5368             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5369                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5370                               FALSE, &len);
5371                 while (d < PL_bufend && SPACE_OR_TAB(*d))
5372                     d++;
5373                 if (*d == '}') {
5374                     const char minus = (PL_tokenbuf[0] == '-');
5375                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5376                     if (minus)
5377                         force_next('-');
5378                 }
5379             }
5380             /* FALL THROUGH */
5381         case XATTRBLOCK:
5382         case XBLOCK:
5383             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5384             PL_expect = XSTATE;
5385             break;
5386         case XATTRTERM:
5387         case XTERMBLOCK:
5388             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5389             PL_expect = XSTATE;
5390             break;
5391         default: {
5392                 const char *t;
5393                 if (PL_oldoldbufptr == PL_last_lop)
5394                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5395                 else
5396                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5397                 s = SKIPSPACE1(s);
5398                 if (*s == '}') {
5399                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5400                         PL_expect = XTERM;
5401                         /* This hack is to get the ${} in the message. */
5402                         PL_bufptr = s+1;
5403                         yyerror("syntax error");
5404                         break;
5405                     }
5406                     OPERATOR(HASHBRACK);
5407                 }
5408                 /* This hack serves to disambiguate a pair of curlies
5409                  * as being a block or an anon hash.  Normally, expectation
5410                  * determines that, but in cases where we're not in a
5411                  * position to expect anything in particular (like inside
5412                  * eval"") we have to resolve the ambiguity.  This code
5413                  * covers the case where the first term in the curlies is a
5414                  * quoted string.  Most other cases need to be explicitly
5415                  * disambiguated by prepending a "+" before the opening
5416                  * curly in order to force resolution as an anon hash.
5417                  *
5418                  * XXX should probably propagate the outer expectation
5419                  * into eval"" to rely less on this hack, but that could
5420                  * potentially break current behavior of eval"".
5421                  * GSAR 97-07-21
5422                  */
5423                 t = s;
5424                 if (*s == '\'' || *s == '"' || *s == '`') {
5425                     /* common case: get past first string, handling escapes */
5426                     for (t++; t < PL_bufend && *t != *s;)
5427                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
5428                             t++;
5429                     t++;
5430                 }
5431                 else if (*s == 'q') {
5432                     if (++t < PL_bufend
5433                         && (!isALNUM(*t)
5434                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5435                                 && !isALNUM(*t))))
5436                     {
5437                         /* skip q//-like construct */
5438                         const char *tmps;
5439                         char open, close, term;
5440                         I32 brackets = 1;
5441
5442                         while (t < PL_bufend && isSPACE(*t))
5443                             t++;
5444                         /* check for q => */
5445                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5446                             OPERATOR(HASHBRACK);
5447                         }
5448                         term = *t;
5449                         open = term;
5450                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5451                             term = tmps[5];
5452                         close = term;
5453                         if (open == close)
5454                             for (t++; t < PL_bufend; t++) {
5455                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5456                                     t++;
5457                                 else if (*t == open)
5458                                     break;
5459                             }
5460                         else {
5461                             for (t++; t < PL_bufend; t++) {
5462                                 if (*t == '\\' && t+1 < PL_bufend)
5463                                     t++;
5464                                 else if (*t == close && --brackets <= 0)
5465                                     break;
5466                                 else if (*t == open)
5467                                     brackets++;
5468                             }
5469                         }
5470                         t++;
5471                     }
5472                     else
5473                         /* skip plain q word */
5474                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5475                              t += UTF8SKIP(t);
5476                 }
5477                 else if (isALNUM_lazy_if(t,UTF)) {
5478                     t += UTF8SKIP(t);
5479                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5480                          t += UTF8SKIP(t);
5481                 }
5482                 while (t < PL_bufend && isSPACE(*t))
5483                     t++;
5484                 /* if comma follows first term, call it an anon hash */
5485                 /* XXX it could be a comma expression with loop modifiers */
5486                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5487                                    || (*t == '=' && t[1] == '>')))
5488                     OPERATOR(HASHBRACK);
5489                 if (PL_expect == XREF)
5490                     PL_expect = XTERM;
5491                 else {
5492                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5493                     PL_expect = XSTATE;
5494                 }
5495             }
5496             break;
5497         }
5498         pl_yylval.ival = CopLINE(PL_curcop);
5499         if (isSPACE(*s) || *s == '#')
5500             PL_copline = NOLINE;   /* invalidate current command line number */
5501         TOKEN('{');
5502     case '}':
5503       rightbracket:
5504         s++;
5505         if (PL_lex_brackets <= 0)
5506             yyerror("Unmatched right curly bracket");
5507         else
5508             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5509         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5510             PL_lex_formbrack = 0;
5511         if (PL_lex_state == LEX_INTERPNORMAL) {
5512             if (PL_lex_brackets == 0) {
5513                 if (PL_expect & XFAKEBRACK) {
5514                     PL_expect &= XENUMMASK;
5515                     PL_lex_state = LEX_INTERPEND;
5516                     PL_bufptr = s;
5517 #if 0
5518                     if (PL_madskills) {
5519                         if (!PL_thiswhite)
5520                             PL_thiswhite = newSVpvs("");
5521                         sv_catpvs(PL_thiswhite,"}");
5522                     }
5523 #endif
5524                     return yylex();     /* ignore fake brackets */
5525                 }
5526                 if (*s == '-' && s[1] == '>')
5527                     PL_lex_state = LEX_INTERPENDMAYBE;
5528                 else if (*s != '[' && *s != '{')
5529                     PL_lex_state = LEX_INTERPEND;
5530             }
5531         }
5532         if (PL_expect & XFAKEBRACK) {
5533             PL_expect &= XENUMMASK;
5534             PL_bufptr = s;
5535             return yylex();             /* ignore fake brackets */
5536         }
5537         start_force(PL_curforce);
5538         if (PL_madskills) {
5539             curmad('X', newSVpvn(s-1,1));
5540             CURMAD('_', PL_thiswhite);
5541         }
5542         force_next('}');
5543 #ifdef PERL_MAD
5544         if (!PL_thistoken)
5545             PL_thistoken = newSVpvs("");
5546 #endif
5547         TOKEN(';');
5548     case '&':
5549         s++;
5550         if (*s++ == '&')
5551             AOPERATOR(ANDAND);
5552         s--;
5553         if (PL_expect == XOPERATOR) {
5554             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5555                 && isIDFIRST_lazy_if(s,UTF))
5556             {
5557                 CopLINE_dec(PL_curcop);
5558                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5559                 CopLINE_inc(PL_curcop);
5560             }
5561             BAop(OP_BIT_AND);
5562         }
5563
5564         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5565         if (*PL_tokenbuf) {
5566             PL_expect = XOPERATOR;
5567             force_ident(PL_tokenbuf, '&');
5568         }
5569         else
5570             PREREF('&');
5571         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5572         TERM('&');
5573
5574     case '|':
5575         s++;
5576         if (*s++ == '|')
5577             AOPERATOR(OROR);
5578         s--;
5579         BOop(OP_BIT_OR);
5580     case '=':
5581         s++;
5582         {
5583             const char tmp = *s++;
5584             if (tmp == '=')
5585                 Eop(OP_EQ);
5586             if (tmp == '>')
5587                 OPERATOR(',');
5588             if (tmp == '~')
5589                 PMop(OP_MATCH);
5590             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5591                 && strchr("+-*/%.^&|<",tmp))
5592                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5593                             "Reversed %c= operator",(int)tmp);
5594             s--;
5595             if (PL_expect == XSTATE && isALPHA(tmp) &&
5596                 (s == PL_linestart+1 || s[-2] == '\n') )
5597                 {
5598                     if (PL_in_eval && !PL_rsfp) {
5599                         d = PL_bufend;
5600                         while (s < d) {
5601                             if (*s++ == '\n') {
5602                                 incline(s);
5603                                 if (strnEQ(s,"=cut",4)) {
5604                                     s = strchr(s,'\n');
5605                                     if (s)
5606                                         s++;
5607                                     else
5608                                         s = d;
5609                                     incline(s);
5610                                     goto retry;
5611                                 }
5612                             }
5613                         }
5614                         goto retry;
5615                     }
5616 #ifdef PERL_MAD
5617                     if (PL_madskills) {
5618                         if (!PL_thiswhite)
5619                             PL_thiswhite = newSVpvs("");
5620                         sv_catpvn(PL_thiswhite, PL_linestart,
5621                                   PL_bufend - PL_linestart);
5622                     }
5623 #endif
5624                     s = PL_bufend;
5625                     PL_doextract = TRUE;
5626                     goto retry;
5627                 }
5628         }
5629         if (PL_lex_brackets < PL_lex_formbrack) {
5630             const char *t = s;
5631 #ifdef PERL_STRICT_CR
5632             while (SPACE_OR_TAB(*t))
5633 #else
5634             while (SPACE_OR_TAB(*t) || *t == '\r')
5635 #endif
5636                 t++;
5637             if (*t == '\n' || *t == '#') {
5638                 s--;
5639                 PL_expect = XBLOCK;
5640                 goto leftbracket;
5641             }
5642         }
5643         pl_yylval.ival = 0;
5644         OPERATOR(ASSIGNOP);
5645     case '!':
5646         s++;
5647         {
5648             const char tmp = *s++;
5649             if (tmp == '=') {
5650                 /* was this !=~ where !~ was meant?
5651                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5652
5653                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5654                     const char *t = s+1;
5655
5656                     while (t < PL_bufend && isSPACE(*t))
5657                         ++t;
5658
5659                     if (*t == '/' || *t == '?' ||
5660                         ((*t == 'm' || *t == 's' || *t == 'y')
5661                          && !isALNUM(t[1])) ||
5662                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5663                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5664                                     "!=~ should be !~");
5665                 }
5666                 Eop(OP_NE);
5667             }
5668             if (tmp == '~')
5669                 PMop(OP_NOT);
5670         }
5671         s--;
5672         OPERATOR('!');
5673     case '<':
5674         if (PL_expect != XOPERATOR) {
5675             if (s[1] != '<' && !strchr(s,'>'))
5676                 check_uni();
5677             if (s[1] == '<')
5678                 s = scan_heredoc(s);
5679             else
5680                 s = scan_inputsymbol(s);
5681             TERM(sublex_start());
5682         }
5683         s++;
5684         {
5685             char tmp = *s++;
5686             if (tmp == '<')
5687                 SHop(OP_LEFT_SHIFT);
5688             if (tmp == '=') {
5689                 tmp = *s++;
5690                 if (tmp == '>')
5691                     Eop(OP_NCMP);
5692                 s--;
5693                 Rop(OP_LE);
5694             }
5695         }
5696         s--;
5697         Rop(OP_LT);
5698     case '>':
5699         s++;
5700         {
5701             const char tmp = *s++;
5702             if (tmp == '>')
5703                 SHop(OP_RIGHT_SHIFT);
5704             else if (tmp == '=')
5705                 Rop(OP_GE);
5706         }
5707         s--;
5708         Rop(OP_GT);
5709
5710     case '$':
5711         CLINE;
5712
5713         if (PL_expect == XOPERATOR) {
5714             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5715                 return deprecate_commaless_var_list();
5716             }
5717         }
5718
5719         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5720             PL_tokenbuf[0] = '@';
5721             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5722                            sizeof PL_tokenbuf - 1, FALSE);
5723             if (PL_expect == XOPERATOR)
5724                 no_op("Array length", s);
5725             if (!PL_tokenbuf[1])
5726                 PREREF(DOLSHARP);
5727             PL_expect = XOPERATOR;
5728             PL_pending_ident = '#';
5729             TOKEN(DOLSHARP);
5730         }
5731
5732         PL_tokenbuf[0] = '$';
5733         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5734                        sizeof PL_tokenbuf - 1, FALSE);
5735         if (PL_expect == XOPERATOR)
5736             no_op("Scalar", s);
5737         if (!PL_tokenbuf[1]) {
5738             if (s == PL_bufend)
5739                 yyerror("Final $ should be \\$ or $name");
5740             PREREF('$');
5741         }
5742
5743         /* This kludge not intended to be bulletproof. */
5744         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5745             pl_yylval.opval = newSVOP(OP_CONST, 0,
5746                                    newSViv(CopARYBASE_get(&PL_compiling)));
5747             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5748             TERM(THING);
5749         }
5750
5751         d = s;
5752         {
5753             const char tmp = *s;
5754             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5755                 s = SKIPSPACE1(s);
5756
5757             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5758                 && intuit_more(s)) {
5759                 if (*s == '[') {
5760                     PL_tokenbuf[0] = '@';
5761                     if (ckWARN(WARN_SYNTAX)) {
5762                         char *t = s+1;
5763
5764                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5765                             t++;
5766                         if (*t++ == ',') {
5767                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5768                             while (t < PL_bufend && *t != ']')
5769                                 t++;
5770                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5771                                         "Multidimensional syntax %.*s not supported",
5772                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
5773                         }
5774                     }
5775                 }
5776                 else if (*s == '{') {
5777                     char *t;
5778                     PL_tokenbuf[0] = '%';
5779                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
5780                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5781                         {
5782                             char tmpbuf[sizeof PL_tokenbuf];
5783                             do {
5784                                 t++;
5785                             } while (isSPACE(*t));
5786                             if (isIDFIRST_lazy_if(t,UTF)) {
5787                                 STRLEN len;
5788                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5789                                               &len);
5790                                 while (isSPACE(*t))
5791                                     t++;
5792                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5793                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5794                                                 "You need to quote \"%s\"",
5795                                                 tmpbuf);
5796                             }
5797                         }
5798                 }
5799             }
5800
5801             PL_expect = XOPERATOR;
5802             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5803                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5804                 if (!islop || PL_last_lop_op == OP_GREPSTART)
5805                     PL_expect = XOPERATOR;
5806                 else if (strchr("$@\"'`q", *s))
5807                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
5808                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5809                     PL_expect = XTERM;          /* e.g. print $fh &sub */
5810                 else if (isIDFIRST_lazy_if(s,UTF)) {
5811                     char tmpbuf[sizeof PL_tokenbuf];
5812                     int t2;
5813                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5814                     if ((t2 = keyword(tmpbuf, len, 0))) {
5815                         /* binary operators exclude handle interpretations */
5816                         switch (t2) {
5817                         case -KEY_x:
5818                         case -KEY_eq:
5819                         case -KEY_ne:
5820                         case -KEY_gt:
5821                         case -KEY_lt:
5822                         case -KEY_ge:
5823                         case -KEY_le:
5824                         case -KEY_cmp:
5825                             break;
5826                         default:
5827                             PL_expect = XTERM;  /* e.g. print $fh length() */
5828                             break;
5829                         }
5830                     }
5831                     else {
5832                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5833                     }
5834                 }
5835                 else if (isDIGIT(*s))
5836                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5837                 else if (*s == '.' && isDIGIT(s[1]))
5838                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5839                 else if ((*s == '?' || *s == '-' || *s == '+')
5840                          && !isSPACE(s[1]) && s[1] != '=')
5841                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5842                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5843                          && s[1] != '/')
5844                     PL_expect = XTERM;          /* e.g. print $fh /.../
5845                                                    XXX except DORDOR operator
5846                                                 */
5847                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5848                          && s[2] != '=')
5849                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5850             }
5851         }
5852         PL_pending_ident = '$';
5853         TOKEN('$');
5854
5855     case '@':
5856         if (PL_expect == XOPERATOR)
5857             no_op("Array", s);
5858         PL_tokenbuf[0] = '@';
5859         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5860         if (!PL_tokenbuf[1]) {
5861             PREREF('@');
5862         }
5863         if (PL_lex_state == LEX_NORMAL)
5864             s = SKIPSPACE1(s);
5865         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5866             if (*s == '{')
5867                 PL_tokenbuf[0] = '%';
5868
5869             /* Warn about @ where they meant $. */
5870             if (*s == '[' || *s == '{') {
5871                 if (ckWARN(WARN_SYNTAX)) {
5872                     const char *t = s + 1;
5873                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5874                         t++;
5875                     if (*t == '}' || *t == ']') {
5876                         t++;
5877                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5878                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5879                             "Scalar value %.*s better written as $%.*s",
5880                             (int)(t-PL_bufptr), PL_bufptr,
5881                             (int)(t-PL_bufptr-1), PL_bufptr+1);
5882                     }
5883                 }
5884             }
5885         }
5886         PL_pending_ident = '@';
5887         TERM('@');
5888
5889      case '/':                  /* may be division, defined-or, or pattern */
5890         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5891             s += 2;
5892             AOPERATOR(DORDOR);
5893         }
5894      case '?':                  /* may either be conditional or pattern */
5895         if (PL_expect == XOPERATOR) {
5896              char tmp = *s++;
5897              if(tmp == '?') {
5898                 OPERATOR('?');
5899              }
5900              else {
5901                  tmp = *s++;
5902                  if(tmp == '/') {
5903                      /* A // operator. */
5904                     AOPERATOR(DORDOR);
5905                  }
5906                  else {
5907                      s--;
5908                      Mop(OP_DIVIDE);
5909                  }
5910              }
5911          }
5912          else {
5913              /* Disable warning on "study /blah/" */
5914              if (PL_oldoldbufptr == PL_last_uni
5915               && (*PL_last_uni != 's' || s - PL_last_uni < 5
5916                   || memNE(PL_last_uni, "study", 5)
5917                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
5918               ))
5919                  check_uni();
5920              s = scan_pat(s,OP_MATCH);
5921              TERM(sublex_start());
5922          }
5923
5924     case '.':
5925         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5926 #ifdef PERL_STRICT_CR
5927             && s[1] == '\n'
5928 #else
5929             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5930 #endif
5931             && (s == PL_linestart || s[-1] == '\n') )
5932         {
5933             PL_lex_formbrack = 0;
5934             PL_expect = XSTATE;
5935             goto rightbracket;
5936         }
5937         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5938             s += 3;
5939             OPERATOR(YADAYADA);
5940         }
5941         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5942             char tmp = *s++;
5943             if (*s == tmp) {
5944                 s++;
5945                 if (*s == tmp) {
5946                     s++;
5947                     pl_yylval.ival = OPf_SPECIAL;
5948                 }
5949                 else
5950                     pl_yylval.ival = 0;
5951                 OPERATOR(DOTDOT);
5952             }
5953             Aop(OP_CONCAT);
5954         }
5955         /* FALL THROUGH */
5956     case '0': case '1': case '2': case '3': case '4':
5957     case '5': case '6': case '7': case '8': case '9':
5958         s = scan_num(s, &pl_yylval);
5959         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5960         if (PL_expect == XOPERATOR)
5961             no_op("Number",s);
5962         TERM(THING);
5963
5964     case '\'':
5965         s = scan_str(s,!!PL_madskills,FALSE);
5966         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5967         if (PL_expect == XOPERATOR) {
5968             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5969                 return deprecate_commaless_var_list();
5970             }
5971             else
5972                 no_op("String",s);
5973         }
5974         if (!s)
5975             missingterm(NULL);
5976         pl_yylval.ival = OP_CONST;
5977         TERM(sublex_start());
5978
5979     case '"':
5980         s = scan_str(s,!!PL_madskills,FALSE);
5981         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5982         if (PL_expect == XOPERATOR) {
5983             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5984                 return deprecate_commaless_var_list();
5985             }
5986             else
5987                 no_op("String",s);
5988         }
5989         if (!s)
5990             missingterm(NULL);
5991         pl_yylval.ival = OP_CONST;
5992         /* FIXME. I think that this can be const if char *d is replaced by
5993            more localised variables.  */
5994         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5995             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5996                 pl_yylval.ival = OP_STRINGIFY;
5997                 break;
5998             }
5999         }
6000         TERM(sublex_start());
6001
6002     case '`':
6003         s = scan_str(s,!!PL_madskills,FALSE);
6004         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6005         if (PL_expect == XOPERATOR)
6006             no_op("Backticks",s);
6007         if (!s)
6008             missingterm(NULL);
6009         readpipe_override();
6010         TERM(sublex_start());
6011
6012     case '\\':
6013         s++;
6014         if (PL_lex_inwhat && isDIGIT(*s))
6015             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6016                            *s, *s);
6017         if (PL_expect == XOPERATOR)
6018             no_op("Backslash",s);
6019         OPERATOR(REFGEN);
6020
6021     case 'v':
6022         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6023             char *start = s + 2;
6024             while (isDIGIT(*start) || *start == '_')
6025                 start++;
6026             if (*start == '.' && isDIGIT(start[1])) {
6027                 s = scan_num(s, &pl_yylval);
6028                 TERM(THING);
6029             }
6030             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6031             else if (!isALPHA(*start) && (PL_expect == XTERM
6032                         || PL_expect == XREF || PL_expect == XSTATE
6033                         || PL_expect == XTERMORDORDOR)) {
6034                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6035                 if (!gv) {
6036                     s = scan_num(s, &pl_yylval);
6037                     TERM(THING);
6038                 }
6039             }
6040         }
6041         goto keylookup;
6042     case 'x':
6043         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6044             s++;
6045             Mop(OP_REPEAT);
6046         }
6047         goto keylookup;
6048
6049     case '_':
6050     case 'a': case 'A':
6051     case 'b': case 'B':
6052     case 'c': case 'C':
6053     case 'd': case 'D':
6054     case 'e': case 'E':
6055     case 'f': case 'F':
6056     case 'g': case 'G':
6057     case 'h': case 'H':
6058     case 'i': case 'I':
6059     case 'j': case 'J':
6060     case 'k': case 'K':
6061     case 'l': case 'L':
6062     case 'm': case 'M':
6063     case 'n': case 'N':
6064     case 'o': case 'O':
6065     case 'p': case 'P':
6066     case 'q': case 'Q':
6067     case 'r': case 'R':
6068     case 's': case 'S':
6069     case 't': case 'T':
6070     case 'u': case 'U':
6071               case 'V':
6072     case 'w': case 'W':
6073               case 'X':
6074     case 'y': case 'Y':
6075     case 'z': case 'Z':
6076
6077       keylookup: {
6078         bool anydelim;
6079         I32 tmp;
6080
6081         orig_keyword = 0;
6082         gv = NULL;
6083         gvp = NULL;
6084
6085         PL_bufptr = s;
6086         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6087
6088         /* Some keywords can be followed by any delimiter, including ':' */
6089         anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
6090                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6091                              (PL_tokenbuf[0] == 'q' &&
6092                               strchr("qwxr", PL_tokenbuf[1])))));
6093
6094         /* x::* is just a word, unless x is "CORE" */
6095         if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6096             goto just_a_word;
6097
6098         d = s;
6099         while (d < PL_bufend && isSPACE(*d))
6100                 d++;    /* no comments skipped here, or s### is misparsed */
6101
6102         /* Is this a word before a => operator? */
6103         if (*d == '=' && d[1] == '>') {
6104             CLINE;
6105             pl_yylval.opval
6106                 = (OP*)newSVOP(OP_CONST, 0,
6107                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6108             pl_yylval.opval->op_private = OPpCONST_BARE;
6109             TERM(WORD);
6110         }
6111
6112         /* Check for plugged-in keyword */
6113         {
6114             OP *o;
6115             int result;
6116             char *saved_bufptr = PL_bufptr;
6117             PL_bufptr = s;
6118             result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
6119             s = PL_bufptr;
6120             if (result == KEYWORD_PLUGIN_DECLINE) {
6121                 /* not a plugged-in keyword */
6122                 PL_bufptr = saved_bufptr;
6123             } else if (result == KEYWORD_PLUGIN_STMT) {
6124                 pl_yylval.opval = o;
6125                 CLINE;
6126                 PL_expect = XSTATE;
6127                 return REPORT(PLUGSTMT);
6128             } else if (result == KEYWORD_PLUGIN_EXPR) {
6129                 pl_yylval.opval = o;
6130                 CLINE;
6131                 PL_expect = XOPERATOR;
6132                 return REPORT(PLUGEXPR);
6133             } else {
6134                 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6135                                         PL_tokenbuf);
6136             }
6137         }
6138
6139         /* Check for built-in keyword */
6140         tmp = keyword(PL_tokenbuf, len, 0);
6141
6142         /* Is this a label? */
6143         if (!anydelim && PL_expect == XSTATE
6144               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6145             s = d + 1;
6146             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6147             CLINE;
6148             TOKEN(LABEL);
6149         }
6150
6151         if (tmp < 0) {                  /* second-class keyword? */
6152             GV *ogv = NULL;     /* override (winner) */
6153             GV *hgv = NULL;     /* hidden (loser) */
6154             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6155                 CV *cv;
6156                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6157                     (cv = GvCVu(gv)))
6158                 {
6159                     if (GvIMPORTED_CV(gv))
6160                         ogv = gv;
6161                     else if (! CvMETHOD(cv))
6162                         hgv = gv;
6163                 }
6164                 if (!ogv &&
6165                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6166                     (gv = *gvp) && isGV_with_GP(gv) &&
6167                     GvCVu(gv) && GvIMPORTED_CV(gv))
6168                 {
6169                     ogv = gv;
6170                 }
6171             }
6172             if (ogv) {
6173                 orig_keyword = tmp;
6174                 tmp = 0;                /* overridden by import or by GLOBAL */
6175             }
6176             else if (gv && !gvp
6177                      && -tmp==KEY_lock  /* XXX generalizable kludge */
6178                      && GvCVu(gv))
6179             {
6180                 tmp = 0;                /* any sub overrides "weak" keyword */
6181             }
6182             else {                      /* no override */
6183                 tmp = -tmp;
6184                 if (tmp == KEY_dump) {
6185                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6186                                    "dump() better written as CORE::dump()");
6187                 }
6188                 gv = NULL;
6189                 gvp = 0;
6190                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
6191                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6192                                    "Ambiguous call resolved as CORE::%s(), %s",
6193                                    GvENAME(hgv), "qualify as such or use &");
6194             }
6195         }
6196
6197       reserved_word:
6198         switch (tmp) {
6199
6200         default:                        /* not a keyword */
6201             /* Trade off - by using this evil construction we can pull the
6202                variable gv into the block labelled keylookup. If not, then
6203                we have to give it function scope so that the goto from the
6204                earlier ':' case doesn't bypass the initialisation.  */
6205             if (0) {
6206             just_a_word_zero_gv:
6207                 gv = NULL;
6208                 gvp = NULL;
6209                 orig_keyword = 0;
6210             }
6211           just_a_word: {
6212                 SV *sv;
6213                 int pkgname = 0;
6214                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6215                 OP *rv2cv_op;
6216                 CV *cv;
6217 #ifdef PERL_MAD
6218                 SV *nextPL_nextwhite = 0;
6219 #endif
6220
6221
6222                 /* Get the rest if it looks like a package qualifier */
6223
6224                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6225                     STRLEN morelen;
6226                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6227                                   TRUE, &morelen);
6228                     if (!morelen)
6229                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6230                                 *s == '\'' ? "'" : "::");
6231                     len += morelen;
6232                     pkgname = 1;
6233                 }
6234
6235                 if (PL_expect == XOPERATOR) {
6236                     if (PL_bufptr == PL_linestart) {
6237                         CopLINE_dec(PL_curcop);
6238                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6239                         CopLINE_inc(PL_curcop);
6240                     }
6241                     else
6242                         no_op("Bareword",s);
6243                 }
6244
6245                 /* Look for a subroutine with this name in current package,
6246                    unless name is "Foo::", in which case Foo is a bearword
6247                    (and a package name). */
6248
6249                 if (len > 2 && !PL_madskills &&
6250                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6251                 {
6252                     if (ckWARN(WARN_BAREWORD)
6253                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6254                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6255                             "Bareword \"%s\" refers to nonexistent package",
6256                              PL_tokenbuf);
6257                     len -= 2;
6258                     PL_tokenbuf[len] = '\0';
6259                     gv = NULL;
6260                     gvp = 0;
6261                 }
6262                 else {
6263                     if (!gv) {
6264                         /* Mustn't actually add anything to a symbol table.
6265                            But also don't want to "initialise" any placeholder
6266                            constants that might already be there into full
6267                            blown PVGVs with attached PVCV.  */
6268                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6269                                                GV_NOADD_NOINIT, SVt_PVCV);
6270                     }
6271                     len = 0;
6272                 }
6273
6274                 /* if we saw a global override before, get the right name */
6275
6276                 if (gvp) {
6277                     sv = newSVpvs("CORE::GLOBAL::");
6278                     sv_catpv(sv,PL_tokenbuf);
6279                 }
6280                 else {
6281                     /* If len is 0, newSVpv does strlen(), which is correct.
6282                        If len is non-zero, then it will be the true length,
6283                        and so the scalar will be created correctly.  */
6284                     sv = newSVpv(PL_tokenbuf,len);
6285                 }
6286 #ifdef PERL_MAD
6287                 if (PL_madskills && !PL_thistoken) {
6288                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6289                     PL_thistoken = newSVpvn(start,s - start);
6290                     PL_realtokenstart = s - SvPVX(PL_linestr);
6291                 }
6292 #endif
6293
6294                 /* Presume this is going to be a bareword of some sort. */
6295
6296                 CLINE;
6297                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6298                 pl_yylval.opval->op_private = OPpCONST_BARE;
6299                 /* UTF-8 package name? */
6300                 if (UTF && !IN_BYTES &&
6301                     is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
6302                     SvUTF8_on(sv);
6303
6304                 /* And if "Foo::", then that's what it certainly is. */
6305
6306                 if (len)
6307                     goto safe_bareword;
6308
6309                 cv = NULL;
6310                 {
6311                     OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6312                     const_op->op_private = OPpCONST_BARE;
6313                     rv2cv_op = newCVREF(0, const_op);
6314                 }
6315                 if (rv2cv_op->op_type == OP_RV2CV &&
6316                         (rv2cv_op->op_flags & OPf_KIDS)) {
6317                     OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6318                     switch (rv_op->op_type) {
6319                         case OP_CONST: {
6320                             SV *sv = cSVOPx_sv(rv_op);
6321                             if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6322                                 cv = (CV*)SvRV(sv);
6323                         } break;
6324                         case OP_GV: {
6325                             GV *gv = cGVOPx_gv(rv_op);
6326                             CV *maybe_cv = GvCVu(gv);
6327                             if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6328                                 cv = maybe_cv;
6329                         } break;
6330                     }
6331                 }
6332
6333                 /* See if it's the indirect object for a list operator. */
6334
6335                 if (PL_oldoldbufptr &&
6336                     PL_oldoldbufptr < PL_bufptr &&
6337                     (PL_oldoldbufptr == PL_last_lop
6338                      || PL_oldoldbufptr == PL_last_uni) &&
6339                     /* NO SKIPSPACE BEFORE HERE! */
6340                     (PL_expect == XREF ||
6341                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6342                 {
6343                     bool immediate_paren = *s == '(';
6344
6345                     /* (Now we can afford to cross potential line boundary.) */
6346                     s = SKIPSPACE2(s,nextPL_nextwhite);
6347 #ifdef PERL_MAD
6348                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
6349 #endif
6350
6351                     /* Two barewords in a row may indicate method call. */
6352
6353                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6354                         (tmp = intuit_method(s, gv, cv))) {
6355                         op_free(rv2cv_op);
6356                         return REPORT(tmp);
6357                     }
6358
6359                     /* If not a declared subroutine, it's an indirect object. */
6360                     /* (But it's an indir obj regardless for sort.) */
6361                     /* Also, if "_" follows a filetest operator, it's a bareword */
6362
6363                     if (
6364                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6365                          (!cv &&
6366                         (PL_last_lop_op != OP_MAPSTART &&
6367                          PL_last_lop_op != OP_GREPSTART))))
6368                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6369                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6370                        )
6371                     {
6372                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6373                         goto bareword;
6374                     }
6375                 }
6376
6377                 PL_expect = XOPERATOR;
6378 #ifdef PERL_MAD
6379                 if (isSPACE(*s))
6380                     s = SKIPSPACE2(s,nextPL_nextwhite);
6381                 PL_nextwhite = nextPL_nextwhite;
6382 #else
6383                 s = skipspace(s);
6384 #endif
6385
6386                 /* Is this a word before a => operator? */
6387                 if (*s == '=' && s[1] == '>' && !pkgname) {
6388                     op_free(rv2cv_op);
6389                     CLINE;
6390                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6391                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6392                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6393                     TERM(WORD);
6394                 }
6395
6396                 /* If followed by a paren, it's certainly a subroutine. */
6397                 if (*s == '(') {
6398                     CLINE;
6399                     if (cv) {
6400                         d = s + 1;
6401                         while (SPACE_OR_TAB(*d))
6402                             d++;
6403                         if (*d == ')' && (sv = cv_const_sv(cv))) {
6404                             s = d + 1;
6405                             goto its_constant;
6406                         }
6407                     }
6408 #ifdef PERL_MAD
6409                     if (PL_madskills) {
6410                         PL_nextwhite = PL_thiswhite;
6411                         PL_thiswhite = 0;
6412                     }
6413                     start_force(PL_curforce);
6414 #endif
6415                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6416                     PL_expect = XOPERATOR;
6417 #ifdef PERL_MAD
6418                     if (PL_madskills) {
6419                         PL_nextwhite = nextPL_nextwhite;
6420                         curmad('X', PL_thistoken);
6421                         PL_thistoken = newSVpvs("");
6422                     }
6423 #endif
6424                     op_free(rv2cv_op);
6425                     force_next(WORD);
6426                     pl_yylval.ival = 0;
6427                     TOKEN('&');
6428                 }
6429
6430                 /* If followed by var or block, call it a method (unless sub) */
6431
6432                 if ((*s == '$' || *s == '{') && !cv) {
6433                     op_free(rv2cv_op);
6434                     PL_last_lop = PL_oldbufptr;
6435                     PL_last_lop_op = OP_METHOD;
6436                     PREBLOCK(METHOD);
6437                 }
6438
6439                 /* If followed by a bareword, see if it looks like indir obj. */
6440
6441                 if (!orig_keyword
6442                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6443                         && (tmp = intuit_method(s, gv, cv))) {
6444                     op_free(rv2cv_op);
6445                     return REPORT(tmp);
6446                 }
6447
6448                 /* Not a method, so call it a subroutine (if defined) */
6449
6450                 if (cv) {
6451                     if (lastchar == '-')
6452                         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6453                                          "Ambiguous use of -%s resolved as -&%s()",
6454                                          PL_tokenbuf, PL_tokenbuf);
6455                     /* Check for a constant sub */
6456                     if ((sv = cv_const_sv(cv))) {
6457                   its_constant:
6458                         op_free(rv2cv_op);
6459                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6460                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6461                         pl_yylval.opval->op_private = 0;
6462                         TOKEN(WORD);
6463                     }
6464
6465                     op_free(pl_yylval.opval);
6466                     pl_yylval.opval = rv2cv_op;
6467                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6468                     PL_last_lop = PL_oldbufptr;
6469                     PL_last_lop_op = OP_ENTERSUB;
6470                     /* Is there a prototype? */
6471                     if (
6472 #ifdef PERL_MAD
6473                         cv &&
6474 #endif
6475                         SvPOK(cv))
6476                     {
6477                         STRLEN protolen;
6478                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6479                         if (!protolen)
6480                             TERM(FUNC0SUB);
6481                         if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
6482                             OPERATOR(UNIOPSUB);
6483                         while (*proto == ';')
6484                             proto++;
6485                         if (*proto == '&' && *s == '{') {
6486                             if (PL_curstash)
6487                                 sv_setpvs(PL_subname, "__ANON__");
6488                             else
6489                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6490                             PREBLOCK(LSTOPSUB);
6491                         }
6492                     }
6493 #ifdef PERL_MAD
6494                     {
6495                         if (PL_madskills) {
6496                             PL_nextwhite = PL_thiswhite;
6497                             PL_thiswhite = 0;
6498                         }
6499                         start_force(PL_curforce);
6500                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6501                         PL_expect = XTERM;
6502                         if (PL_madskills) {
6503                             PL_nextwhite = nextPL_nextwhite;
6504                             curmad('X', PL_thistoken);
6505                             PL_thistoken = newSVpvs("");
6506                         }
6507                         force_next(WORD);
6508                         TOKEN(NOAMP);
6509                     }
6510                 }
6511
6512                 /* Guess harder when madskills require "best effort". */
6513                 if (PL_madskills && (!gv || !GvCVu(gv))) {
6514                     int probable_sub = 0;
6515                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
6516                         probable_sub = 1;
6517                     else if (isALPHA(*s)) {
6518                         char tmpbuf[1024];
6519                         STRLEN tmplen;
6520                         d = s;
6521                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6522                         if (!keyword(tmpbuf, tmplen, 0))
6523                             probable_sub = 1;
6524                         else {
6525                             while (d < PL_bufend && isSPACE(*d))
6526                                 d++;
6527                             if (*d == '=' && d[1] == '>')
6528                                 probable_sub = 1;
6529                         }
6530                     }
6531                     if (probable_sub) {
6532                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6533                         op_free(pl_yylval.opval);
6534                         pl_yylval.opval = rv2cv_op;
6535                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6536                         PL_last_lop = PL_oldbufptr;
6537                         PL_last_lop_op = OP_ENTERSUB;
6538                         PL_nextwhite = PL_thiswhite;
6539                         PL_thiswhite = 0;
6540                         start_force(PL_curforce);
6541                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6542                         PL_expect = XTERM;
6543                         PL_nextwhite = nextPL_nextwhite;
6544                         curmad('X', PL_thistoken);
6545                         PL_thistoken = newSVpvs("");
6546                         force_next(WORD);
6547                         TOKEN(NOAMP);
6548                     }
6549 #else
6550                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6551                     PL_expect = XTERM;
6552                     force_next(WORD);
6553                     TOKEN(NOAMP);
6554 #endif
6555                 }
6556
6557                 /* Call it a bare word */
6558
6559                 if (PL_hints & HINT_STRICT_SUBS)
6560                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
6561                 else {
6562                 bareword:
6563                     /* after "print" and similar functions (corresponding to
6564                      * "F? L" in opcode.pl), whatever wasn't already parsed as
6565                      * a filehandle should be subject to "strict subs".
6566                      * Likewise for the optional indirect-object argument to system
6567                      * or exec, which can't be a bareword */
6568                     if ((PL_last_lop_op == OP_PRINT
6569                             || PL_last_lop_op == OP_PRTF
6570                             || PL_last_lop_op == OP_SAY
6571                             || PL_last_lop_op == OP_SYSTEM
6572                             || PL_last_lop_op == OP_EXEC)
6573                             && (PL_hints & HINT_STRICT_SUBS))
6574                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
6575                     if (lastchar != '-') {
6576                         if (ckWARN(WARN_RESERVED)) {
6577                             d = PL_tokenbuf;
6578                             while (isLOWER(*d))
6579                                 d++;
6580                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6581                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6582                                        PL_tokenbuf);
6583                         }
6584                     }
6585                 }
6586                 op_free(rv2cv_op);
6587
6588             safe_bareword:
6589                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6590                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6591                                      "Operator or semicolon missing before %c%s",
6592                                      lastchar, PL_tokenbuf);
6593                     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6594                                      "Ambiguous use of %c resolved as operator %c",
6595                                      lastchar, lastchar);
6596                 }
6597                 TOKEN(WORD);
6598             }
6599
6600         case KEY___FILE__:
6601             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6602                                         newSVpv(CopFILE(PL_curcop),0));
6603             TERM(THING);
6604
6605         case KEY___LINE__:
6606             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6607                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6608             TERM(THING);
6609
6610         case KEY___PACKAGE__:
6611             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6612                                         (PL_curstash
6613                                          ? newSVhek(HvNAME_HEK(PL_curstash))
6614                                          : &PL_sv_undef));
6615             TERM(THING);
6616
6617         case KEY___DATA__:
6618         case KEY___END__: {
6619             GV *gv;
6620             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6621                 const char *pname = "main";
6622                 if (PL_tokenbuf[2] == 'D')
6623                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6624                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6625                                 SVt_PVIO);
6626                 GvMULTI_on(gv);
6627                 if (!GvIO(gv))
6628                     GvIOp(gv) = newIO();
6629                 IoIFP(GvIOp(gv)) = PL_rsfp;
6630 #if defined(HAS_FCNTL) && defined(F_SETFD)
6631                 {
6632                     const int fd = PerlIO_fileno(PL_rsfp);
6633                     fcntl(fd,F_SETFD,fd >= 3);
6634                 }
6635 #endif
6636                 /* Mark this internal pseudo-handle as clean */
6637                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6638                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6639                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6640                 else
6641                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6642 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6643                 /* if the script was opened in binmode, we need to revert
6644                  * it to text mode for compatibility; but only iff it has CRs
6645                  * XXX this is a questionable hack at best. */
6646                 if (PL_bufend-PL_bufptr > 2
6647                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6648                 {
6649                     Off_t loc = 0;
6650                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6651                         loc = PerlIO_tell(PL_rsfp);
6652                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
6653                     }
6654 #ifdef NETWARE
6655                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6656 #else
6657                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6658 #endif  /* NETWARE */
6659 #ifdef PERLIO_IS_STDIO /* really? */
6660 #  if defined(__BORLANDC__)
6661                         /* XXX see note in do_binmode() */
6662                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6663 #  endif
6664 #endif
6665                         if (loc > 0)
6666                             PerlIO_seek(PL_rsfp, loc, 0);
6667                     }
6668                 }
6669 #endif
6670 #ifdef PERLIO_LAYERS
6671                 if (!IN_BYTES) {
6672                     if (UTF)
6673                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6674                     else if (PL_encoding) {
6675                         SV *name;
6676                         dSP;
6677                         ENTER;
6678                         SAVETMPS;
6679                         PUSHMARK(sp);
6680                         EXTEND(SP, 1);
6681                         XPUSHs(PL_encoding);
6682                         PUTBACK;
6683                         call_method("name", G_SCALAR);
6684                         SPAGAIN;
6685                         name = POPs;
6686                         PUTBACK;
6687                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6688                                             Perl_form(aTHX_ ":encoding(%"SVf")",
6689                                                       SVfARG(name)));
6690                         FREETMPS;
6691                         LEAVE;
6692                     }
6693                 }
6694 #endif
6695 #ifdef PERL_MAD
6696                 if (PL_madskills) {
6697                     if (PL_realtokenstart >= 0) {
6698                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6699                         if (!PL_endwhite)
6700                             PL_endwhite = newSVpvs("");
6701                         sv_catsv(PL_endwhite, PL_thiswhite);
6702                         PL_thiswhite = 0;
6703                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6704                         PL_realtokenstart = -1;
6705                     }
6706                     while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6707                            != NULL) ;
6708                 }
6709 #endif
6710                 PL_rsfp = NULL;
6711             }
6712             goto fake_eof;
6713         }
6714
6715         case KEY_AUTOLOAD:
6716         case KEY_DESTROY:
6717         case KEY_BEGIN:
6718         case KEY_UNITCHECK:
6719         case KEY_CHECK:
6720         case KEY_INIT:
6721         case KEY_END:
6722             if (PL_expect == XSTATE) {
6723                 s = PL_bufptr;
6724                 goto really_sub;
6725             }
6726             goto just_a_word;
6727
6728         case KEY_CORE:
6729             if (*s == ':' && s[1] == ':') {
6730                 s += 2;
6731                 d = s;
6732                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6733                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6734                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6735                 if (tmp < 0)
6736                     tmp = -tmp;
6737                 else if (tmp == KEY_require || tmp == KEY_do)
6738                     /* that's a way to remember we saw "CORE::" */
6739                     orig_keyword = tmp;
6740                 goto reserved_word;
6741             }
6742             goto just_a_word;
6743
6744         case KEY_abs:
6745             UNI(OP_ABS);
6746
6747         case KEY_alarm:
6748             UNI(OP_ALARM);
6749
6750         case KEY_accept:
6751             LOP(OP_ACCEPT,XTERM);
6752
6753         case KEY_and:
6754             OPERATOR(ANDOP);
6755
6756         case KEY_atan2:
6757             LOP(OP_ATAN2,XTERM);
6758
6759         case KEY_bind:
6760             LOP(OP_BIND,XTERM);
6761
6762         case KEY_binmode:
6763             LOP(OP_BINMODE,XTERM);
6764
6765         case KEY_bless:
6766             LOP(OP_BLESS,XTERM);
6767
6768         case KEY_break:
6769             FUN0(OP_BREAK);
6770
6771         case KEY_chop:
6772             UNI(OP_CHOP);
6773
6774         case KEY_continue:
6775             /* When 'use switch' is in effect, continue has a dual
6776                life as a control operator. */
6777             {
6778                 if (!FEATURE_IS_ENABLED("switch"))
6779                     PREBLOCK(CONTINUE);
6780                 else {
6781                     /* We have to disambiguate the two senses of
6782                       "continue". If the next token is a '{' then
6783                       treat it as the start of a continue block;
6784                       otherwise treat it as a control operator.
6785                      */
6786                     s = skipspace(s);
6787                     if (*s == '{')
6788             PREBLOCK(CONTINUE);
6789                     else
6790                         FUN0(OP_CONTINUE);
6791                 }
6792             }
6793
6794         case KEY_chdir:
6795             /* may use HOME */
6796             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6797             UNI(OP_CHDIR);
6798
6799         case KEY_close:
6800             UNI(OP_CLOSE);
6801
6802         case KEY_closedir:
6803             UNI(OP_CLOSEDIR);
6804
6805         case KEY_cmp:
6806             Eop(OP_SCMP);
6807
6808         case KEY_caller:
6809             UNI(OP_CALLER);
6810
6811         case KEY_crypt:
6812 #ifdef FCRYPT
6813             if (!PL_cryptseen) {
6814                 PL_cryptseen = TRUE;
6815                 init_des();
6816             }
6817 #endif
6818             LOP(OP_CRYPT,XTERM);
6819
6820         case KEY_chmod:
6821             LOP(OP_CHMOD,XTERM);
6822
6823         case KEY_chown:
6824             LOP(OP_CHOWN,XTERM);
6825
6826         case KEY_connect:
6827             LOP(OP_CONNECT,XTERM);
6828
6829         case KEY_chr:
6830             UNI(OP_CHR);
6831
6832         case KEY_cos:
6833             UNI(OP_COS);
6834
6835         case KEY_chroot:
6836             UNI(OP_CHROOT);
6837
6838         case KEY_default:
6839             PREBLOCK(DEFAULT);
6840
6841         case KEY_do:
6842             s = SKIPSPACE1(s);
6843             if (*s == '{')
6844                 PRETERMBLOCK(DO);
6845             if (*s != '\'')
6846                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6847             if (orig_keyword == KEY_do) {
6848                 orig_keyword = 0;
6849                 pl_yylval.ival = 1;
6850             }
6851             else
6852                 pl_yylval.ival = 0;
6853             OPERATOR(DO);
6854
6855         case KEY_die:
6856             PL_hints |= HINT_BLOCK_SCOPE;
6857             LOP(OP_DIE,XTERM);
6858
6859         case KEY_defined:
6860             UNI(OP_DEFINED);
6861
6862         case KEY_delete:
6863             UNI(OP_DELETE);
6864
6865         case KEY_dbmopen:
6866             gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6867             LOP(OP_DBMOPEN,XTERM);
6868
6869         case KEY_dbmclose:
6870             UNI(OP_DBMCLOSE);
6871
6872         case KEY_dump:
6873             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6874             LOOPX(OP_DUMP);
6875
6876         case KEY_else:
6877             PREBLOCK(ELSE);
6878
6879         case KEY_elsif:
6880             pl_yylval.ival = CopLINE(PL_curcop);
6881             OPERATOR(ELSIF);
6882
6883         case KEY_eq:
6884             Eop(OP_SEQ);
6885
6886         case KEY_exists:
6887             UNI(OP_EXISTS);
6888
6889         case KEY_exit:
6890             if (PL_madskills)
6891                 UNI(OP_INT);
6892             UNI(OP_EXIT);
6893
6894         case KEY_eval:
6895             s = SKIPSPACE1(s);
6896             if (*s == '{') { /* block eval */
6897                 PL_expect = XTERMBLOCK;
6898                 UNIBRACK(OP_ENTERTRY);
6899             }
6900             else { /* string eval */
6901                 PL_expect = XTERM;
6902                 UNIBRACK(OP_ENTEREVAL);
6903             }
6904
6905         case KEY_eof:
6906             UNI(OP_EOF);
6907
6908         case KEY_exp:
6909             UNI(OP_EXP);
6910
6911         case KEY_each:
6912             UNI(OP_EACH);
6913
6914         case KEY_exec:
6915             LOP(OP_EXEC,XREF);
6916
6917         case KEY_endhostent:
6918             FUN0(OP_EHOSTENT);
6919
6920         case KEY_endnetent:
6921             FUN0(OP_ENETENT);
6922
6923         case KEY_endservent:
6924             FUN0(OP_ESERVENT);
6925
6926         case KEY_endprotoent:
6927             FUN0(OP_EPROTOENT);
6928
6929         case KEY_endpwent:
6930             FUN0(OP_EPWENT);
6931
6932         case KEY_endgrent:
6933             FUN0(OP_EGRENT);
6934
6935         case KEY_for:
6936         case KEY_foreach:
6937             pl_yylval.ival = CopLINE(PL_curcop);
6938             s = SKIPSPACE1(s);
6939             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6940                 char *p = s;
6941 #ifdef PERL_MAD
6942                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6943 #endif
6944
6945                 if ((PL_bufend - p) >= 3 &&
6946                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6947                     p += 2;
6948                 else if ((PL_bufend - p) >= 4 &&
6949                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6950                     p += 3;
6951                 p = PEEKSPACE(p);
6952                 if (isIDFIRST_lazy_if(p,UTF)) {
6953                     p = scan_ident(p, PL_bufend,
6954                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6955                     p = PEEKSPACE(p);
6956                 }
6957                 if (*p != '$')
6958                     Perl_croak(aTHX_ "Missing $ on loop variable");
6959 #ifdef PERL_MAD
6960                 s = SvPVX(PL_linestr) + soff;
6961 #endif
6962             }
6963             OPERATOR(FOR);
6964
6965         case KEY_formline:
6966             LOP(OP_FORMLINE,XTERM);
6967
6968         case KEY_fork:
6969             FUN0(OP_FORK);
6970
6971         case KEY_fcntl:
6972             LOP(OP_FCNTL,XTERM);
6973
6974         case KEY_fileno:
6975             UNI(OP_FILENO);
6976
6977         case KEY_flock:
6978             LOP(OP_FLOCK,XTERM);
6979
6980         case KEY_gt:
6981             Rop(OP_SGT);
6982
6983         case KEY_ge:
6984             Rop(OP_SGE);
6985
6986         case KEY_grep:
6987             LOP(OP_GREPSTART, XREF);
6988
6989         case KEY_goto:
6990             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6991             LOOPX(OP_GOTO);
6992
6993         case KEY_gmtime:
6994             UNI(OP_GMTIME);
6995
6996         case KEY_getc:
6997             UNIDOR(OP_GETC);
6998
6999         case KEY_getppid:
7000             FUN0(OP_GETPPID);
7001
7002         case KEY_getpgrp:
7003             UNI(OP_GETPGRP);
7004
7005         case KEY_getpriority:
7006             LOP(OP_GETPRIORITY,XTERM);
7007
7008         case KEY_getprotobyname:
7009             UNI(OP_GPBYNAME);
7010
7011         case KEY_getprotobynumber:
7012             LOP(OP_GPBYNUMBER,XTERM);
7013
7014         case KEY_getprotoent:
7015             FUN0(OP_GPROTOENT);
7016
7017         case KEY_getpwent:
7018             FUN0(OP_GPWENT);
7019
7020         case KEY_getpwnam:
7021             UNI(OP_GPWNAM);
7022
7023         case KEY_getpwuid:
7024             UNI(OP_GPWUID);
7025
7026         case KEY_getpeername:
7027             UNI(OP_GETPEERNAME);
7028
7029         case KEY_gethostbyname:
7030             UNI(OP_GHBYNAME);
7031
7032         case KEY_gethostbyaddr:
7033             LOP(OP_GHBYADDR,XTERM);
7034
7035         case KEY_gethostent:
7036             FUN0(OP_GHOSTENT);
7037
7038         case KEY_getnetbyname:
7039             UNI(OP_GNBYNAME);
7040
7041         case KEY_getnetbyaddr:
7042             LOP(OP_GNBYADDR,XTERM);
7043
7044         case KEY_getnetent:
7045             FUN0(OP_GNETENT);
7046
7047         case KEY_getservbyname:
7048             LOP(OP_GSBYNAME,XTERM);
7049
7050         case KEY_getservbyport:
7051             LOP(OP_GSBYPORT,XTERM);
7052
7053         case KEY_getservent:
7054             FUN0(OP_GSERVENT);
7055
7056         case KEY_getsockname:
7057             UNI(OP_GETSOCKNAME);
7058
7059         case KEY_getsockopt:
7060             LOP(OP_GSOCKOPT,XTERM);
7061
7062         case KEY_getgrent:
7063             FUN0(OP_GGRENT);
7064
7065         case KEY_getgrnam:
7066             UNI(OP_GGRNAM);
7067
7068         case KEY_getgrgid:
7069             UNI(OP_GGRGID);
7070
7071         case KEY_getlogin:
7072             FUN0(OP_GETLOGIN);
7073
7074         case KEY_given:
7075             pl_yylval.ival = CopLINE(PL_curcop);
7076             OPERATOR(GIVEN);
7077
7078         case KEY_glob:
7079             LOP(OP_GLOB,XTERM);
7080
7081         case KEY_hex:
7082             UNI(OP_HEX);
7083
7084         case KEY_if:
7085             pl_yylval.ival = CopLINE(PL_curcop);
7086             OPERATOR(IF);
7087
7088         case KEY_index:
7089             LOP(OP_INDEX,XTERM);
7090
7091         case KEY_int:
7092             UNI(OP_INT);
7093
7094         case KEY_ioctl:
7095             LOP(OP_IOCTL,XTERM);
7096
7097         case KEY_join:
7098             LOP(OP_JOIN,XTERM);
7099
7100         case KEY_keys:
7101             UNI(OP_KEYS);
7102
7103         case KEY_kill:
7104             LOP(OP_KILL,XTERM);
7105
7106         case KEY_last:
7107             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7108             LOOPX(OP_LAST);
7109
7110         case KEY_lc:
7111             UNI(OP_LC);
7112
7113         case KEY_lcfirst:
7114             UNI(OP_LCFIRST);
7115
7116         case KEY_local:
7117             pl_yylval.ival = 0;
7118             OPERATOR(LOCAL);
7119
7120         case KEY_length:
7121             UNI(OP_LENGTH);
7122
7123         case KEY_lt:
7124             Rop(OP_SLT);
7125
7126         case KEY_le:
7127             Rop(OP_SLE);
7128
7129         case KEY_localtime:
7130             UNI(OP_LOCALTIME);
7131
7132         case KEY_log:
7133             UNI(OP_LOG);
7134
7135         case KEY_link:
7136             LOP(OP_LINK,XTERM);
7137
7138         case KEY_listen:
7139             LOP(OP_LISTEN,XTERM);
7140
7141         case KEY_lock:
7142             UNI(OP_LOCK);
7143
7144         case KEY_lstat:
7145             UNI(OP_LSTAT);
7146
7147         case KEY_m:
7148             s = scan_pat(s,OP_MATCH);
7149             TERM(sublex_start());
7150
7151         case KEY_map:
7152             LOP(OP_MAPSTART, XREF);
7153
7154         case KEY_mkdir:
7155             LOP(OP_MKDIR,XTERM);
7156
7157         case KEY_msgctl:
7158             LOP(OP_MSGCTL,XTERM);
7159
7160         case KEY_msgget:
7161             LOP(OP_MSGGET,XTERM);
7162
7163         case KEY_msgrcv:
7164             LOP(OP_MSGRCV,XTERM);
7165
7166         case KEY_msgsnd:
7167             LOP(OP_MSGSND,XTERM);
7168
7169         case KEY_our:
7170         case KEY_my:
7171         case KEY_state:
7172             PL_in_my = (U16)tmp;
7173             s = SKIPSPACE1(s);
7174             if (isIDFIRST_lazy_if(s,UTF)) {
7175 #ifdef PERL_MAD
7176                 char* start = s;
7177 #endif
7178                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7179                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7180                     goto really_sub;
7181                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7182                 if (!PL_in_my_stash) {
7183                     char tmpbuf[1024];
7184                     PL_bufptr = s;
7185                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7186                     yyerror(tmpbuf);
7187                 }
7188 #ifdef PERL_MAD
7189                 if (PL_madskills) {     /* just add type to declarator token */
7190                     sv_catsv(PL_thistoken, PL_nextwhite);
7191                     PL_nextwhite = 0;
7192                     sv_catpvn(PL_thistoken, start, s - start);
7193                 }
7194 #endif
7195             }
7196             pl_yylval.ival = 1;
7197             OPERATOR(MY);
7198
7199         case KEY_next:
7200             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7201             LOOPX(OP_NEXT);
7202
7203         case KEY_ne:
7204             Eop(OP_SNE);
7205
7206         case KEY_no:
7207             s = tokenize_use(0, s);
7208             OPERATOR(USE);
7209
7210         case KEY_not:
7211             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7212                 FUN1(OP_NOT);
7213             else
7214                 OPERATOR(NOTOP);
7215
7216         case KEY_open:
7217             s = SKIPSPACE1(s);
7218             if (isIDFIRST_lazy_if(s,UTF)) {
7219                 const char *t;
7220                 for (d = s; isALNUM_lazy_if(d,UTF);)
7221                     d++;
7222                 for (t=d; isSPACE(*t);)
7223                     t++;
7224                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7225                     /* [perl #16184] */
7226                     && !(t[0] == '=' && t[1] == '>')
7227                 ) {
7228                     int parms_len = (int)(d-s);
7229                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7230                            "Precedence problem: open %.*s should be open(%.*s)",
7231                             parms_len, s, parms_len, s);
7232                 }
7233             }
7234             LOP(OP_OPEN,XTERM);
7235
7236         case KEY_or:
7237             pl_yylval.ival = OP_OR;
7238             OPERATOR(OROP);
7239
7240         case KEY_ord:
7241             UNI(OP_ORD);
7242
7243         case KEY_oct:
7244             UNI(OP_OCT);
7245
7246         case KEY_opendir:
7247             LOP(OP_OPEN_DIR,XTERM);
7248
7249         case KEY_print:
7250             checkcomma(s,PL_tokenbuf,"filehandle");
7251             LOP(OP_PRINT,XREF);
7252
7253         case KEY_printf:
7254             checkcomma(s,PL_tokenbuf,"filehandle");
7255             LOP(OP_PRTF,XREF);
7256
7257         case KEY_prototype:
7258             UNI(OP_PROTOTYPE);
7259
7260         case KEY_push:
7261             LOP(OP_PUSH,XTERM);
7262
7263         case KEY_pop:
7264             UNIDOR(OP_POP);
7265
7266         case KEY_pos:
7267             UNIDOR(OP_POS);
7268
7269         case KEY_pack:
7270             LOP(OP_PACK,XTERM);
7271
7272         case KEY_package:
7273             s = force_word(s,WORD,FALSE,TRUE,FALSE);
7274             s = SKIPSPACE1(s);
7275             s = force_strict_version(s);
7276             PL_lex_expect = XBLOCK;
7277             OPERATOR(PACKAGE);
7278
7279         case KEY_pipe:
7280             LOP(OP_PIPE_OP,XTERM);
7281
7282         case KEY_q:
7283             s = scan_str(s,!!PL_madskills,FALSE);
7284             if (!s)
7285                 missingterm(NULL);
7286             pl_yylval.ival = OP_CONST;
7287             TERM(sublex_start());
7288
7289         case KEY_quotemeta:
7290             UNI(OP_QUOTEMETA);
7291
7292         case KEY_qw:
7293             s = scan_str(s,!!PL_madskills,FALSE);
7294             if (!s)
7295                 missingterm(NULL);
7296             PL_expect = XOPERATOR;
7297             force_next(')');
7298             if (SvCUR(PL_lex_stuff)) {
7299                 OP *words = NULL;
7300                 int warned = 0;
7301                 d = SvPV_force(PL_lex_stuff, len);
7302                 while (len) {
7303                     for (; isSPACE(*d) && len; --len, ++d)
7304                         /**/;
7305                     if (len) {
7306                         SV *sv;
7307                         const char *b = d;
7308                         if (!warned && ckWARN(WARN_QW)) {
7309                             for (; !isSPACE(*d) && len; --len, ++d) {
7310                                 if (*d == ',') {
7311                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7312                                         "Possible attempt to separate words with commas");
7313                                     ++warned;
7314                                 }
7315                                 else if (*d == '#') {
7316                                     Perl_warner(aTHX_ packWARN(WARN_QW),
7317                                         "Possible attempt to put comments in qw() list");
7318                                     ++warned;
7319                                 }
7320                             }
7321                         }
7322                         else {
7323                             for (; !isSPACE(*d) && len; --len, ++d)
7324                                 /**/;
7325                         }
7326                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7327                         words = append_elem(OP_LIST, words,
7328                                             newSVOP(OP_CONST, 0, tokeq(sv)));
7329                     }
7330                 }
7331                 if (words) {
7332                     start_force(PL_curforce);
7333                     NEXTVAL_NEXTTOKE.opval = words;
7334                     force_next(THING);
7335                 }
7336             }
7337             if (PL_lex_stuff) {
7338                 SvREFCNT_dec(PL_lex_stuff);
7339                 PL_lex_stuff = NULL;
7340             }
7341             PL_expect = XTERM;
7342             TOKEN('(');
7343
7344         case KEY_qq:
7345             s = scan_str(s,!!PL_madskills,FALSE);
7346             if (!s)
7347                 missingterm(NULL);
7348             pl_yylval.ival = OP_STRINGIFY;
7349             if (SvIVX(PL_lex_stuff) == '\'')
7350                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
7351             TERM(sublex_start());
7352
7353         case KEY_qr:
7354             s = scan_pat(s,OP_QR);
7355             TERM(sublex_start());
7356
7357         case KEY_qx:
7358             s = scan_str(s,!!PL_madskills,FALSE);
7359             if (!s)
7360                 missingterm(NULL);
7361             readpipe_override();
7362             TERM(sublex_start());
7363
7364         case KEY_return:
7365             OLDLOP(OP_RETURN);
7366
7367         case KEY_require:
7368             s = SKIPSPACE1(s);
7369             if (isDIGIT(*s)) {
7370                 s = force_version(s, FALSE);
7371             }
7372             else if (*s != 'v' || !isDIGIT(s[1])
7373                     || (s = force_version(s, TRUE), *s == 'v'))
7374             {
7375                 *PL_tokenbuf = '\0';
7376                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7377                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7378                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7379                 else if (*s == '<')
7380                     yyerror("<> should be quotes");
7381             }
7382             if (orig_keyword == KEY_require) {
7383                 orig_keyword = 0;
7384                 pl_yylval.ival = 1;
7385             }
7386             else
7387                 pl_yylval.ival = 0;
7388             PL_expect = XTERM;
7389             PL_bufptr = s;
7390             PL_last_uni = PL_oldbufptr;
7391             PL_last_lop_op = OP_REQUIRE;
7392             s = skipspace(s);
7393             return REPORT( (int)REQUIRE );
7394
7395         case KEY_reset:
7396             UNI(OP_RESET);
7397
7398         case KEY_redo:
7399             s = force_word(s,WORD,TRUE,FALSE,FALSE);
7400             LOOPX(OP_REDO);
7401
7402         case KEY_rename:
7403             LOP(OP_RENAME,XTERM);
7404
7405         case KEY_rand:
7406             UNI(OP_RAND);
7407
7408         case KEY_rmdir:
7409             UNI(OP_RMDIR);
7410
7411         case KEY_rindex:
7412             LOP(OP_RINDEX,XTERM);
7413
7414         case KEY_read:
7415             LOP(OP_READ,XTERM);
7416
7417         case KEY_readdir:
7418             UNI(OP_READDIR);
7419
7420         case KEY_readline:
7421             UNIDOR(OP_READLINE);
7422
7423         case KEY_readpipe:
7424             UNIDOR(OP_BACKTICK);
7425
7426         case KEY_rewinddir:
7427             UNI(OP_REWINDDIR);
7428
7429         case KEY_recv:
7430             LOP(OP_RECV,XTERM);
7431
7432         case KEY_reverse:
7433             LOP(OP_REVERSE,XTERM);
7434
7435         case KEY_readlink:
7436             UNIDOR(OP_READLINK);
7437
7438         case KEY_ref:
7439             UNI(OP_REF);
7440
7441         case KEY_s:
7442             s = scan_subst(s);
7443             if (pl_yylval.opval)
7444                 TERM(sublex_start());
7445             else
7446                 TOKEN(1);       /* force error */
7447
7448         case KEY_say:
7449             checkcomma(s,PL_tokenbuf,"filehandle");
7450             LOP(OP_SAY,XREF);
7451
7452         case KEY_chomp:
7453             UNI(OP_CHOMP);
7454
7455         case KEY_scalar:
7456             UNI(OP_SCALAR);
7457
7458         case KEY_select:
7459             LOP(OP_SELECT,XTERM);
7460
7461         case KEY_seek:
7462             LOP(OP_SEEK,XTERM);
7463
7464         case KEY_semctl:
7465             LOP(OP_SEMCTL,XTERM);
7466
7467         case KEY_semget:
7468             LOP(OP_SEMGET,XTERM);
7469
7470         case KEY_semop:
7471             LOP(OP_SEMOP,XTERM);
7472
7473         case KEY_send:
7474             LOP(OP_SEND,XTERM);
7475
7476         case KEY_setpgrp:
7477             LOP(OP_SETPGRP,XTERM);
7478
7479         case KEY_setpriority:
7480             LOP(OP_SETPRIORITY,XTERM);
7481
7482         case KEY_sethostent:
7483             UNI(OP_SHOSTENT);
7484
7485         case KEY_setnetent:
7486             UNI(OP_SNETENT);
7487
7488         case KEY_setservent:
7489             UNI(OP_SSERVENT);
7490
7491         case KEY_setprotoent:
7492             UNI(OP_SPROTOENT);
7493
7494         case KEY_setpwent:
7495             FUN0(OP_SPWENT);
7496
7497         case KEY_setgrent:
7498             FUN0(OP_SGRENT);
7499
7500         case KEY_seekdir:
7501             LOP(OP_SEEKDIR,XTERM);
7502
7503         case KEY_setsockopt:
7504             LOP(OP_SSOCKOPT,XTERM);
7505
7506         case KEY_shift:
7507             UNIDOR(OP_SHIFT);
7508
7509         case KEY_shmctl:
7510             LOP(OP_SHMCTL,XTERM);
7511
7512         case KEY_shmget:
7513             LOP(OP_SHMGET,XTERM);
7514
7515         case KEY_shmread:
7516             LOP(OP_SHMREAD,XTERM);
7517
7518         case KEY_shmwrite:
7519             LOP(OP_SHMWRITE,XTERM);
7520
7521         case KEY_shutdown:
7522             LOP(OP_SHUTDOWN,XTERM);
7523
7524         case KEY_sin:
7525             UNI(OP_SIN);
7526
7527         case KEY_sleep:
7528             UNI(OP_SLEEP);
7529
7530         case KEY_socket:
7531             LOP(OP_SOCKET,XTERM);
7532
7533         case KEY_socketpair:
7534             LOP(OP_SOCKPAIR,XTERM);
7535
7536         case KEY_sort:
7537             checkcomma(s,PL_tokenbuf,"subroutine name");
7538             s = SKIPSPACE1(s);
7539             if (*s == ';' || *s == ')')         /* probably a close */
7540                 Perl_croak(aTHX_ "sort is now a reserved word");
7541             PL_expect = XTERM;
7542             s = force_word(s,WORD,TRUE,TRUE,FALSE);
7543             LOP(OP_SORT,XREF);
7544
7545         case KEY_split:
7546             LOP(OP_SPLIT,XTERM);
7547
7548         case KEY_sprintf:
7549             LOP(OP_SPRINTF,XTERM);
7550
7551         case KEY_splice:
7552             LOP(OP_SPLICE,XTERM);
7553
7554         case KEY_sqrt:
7555             UNI(OP_SQRT);
7556
7557         case KEY_srand:
7558             UNI(OP_SRAND);
7559
7560         case KEY_stat:
7561             UNI(OP_STAT);
7562
7563         case KEY_study:
7564             UNI(OP_STUDY);
7565
7566         case KEY_substr:
7567             LOP(OP_SUBSTR,XTERM);
7568
7569         case KEY_format:
7570         case KEY_sub:
7571           really_sub:
7572             {
7573                 char tmpbuf[sizeof PL_tokenbuf];
7574                 SSize_t tboffset = 0;
7575                 expectation attrful;
7576                 bool have_name, have_proto;
7577                 const int key = tmp;
7578
7579 #ifdef PERL_MAD
7580                 SV *tmpwhite = 0;
7581
7582                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7583                 SV *subtoken = newSVpvn(tstart, s - tstart);
7584                 PL_thistoken = 0;
7585
7586                 d = s;
7587                 s = SKIPSPACE2(s,tmpwhite);
7588 #else
7589                 s = skipspace(s);
7590 #endif
7591
7592                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7593                     (*s == ':' && s[1] == ':'))
7594                 {
7595 #ifdef PERL_MAD
7596                     SV *nametoke = NULL;
7597 #endif
7598
7599                     PL_expect = XBLOCK;
7600                     attrful = XATTRBLOCK;
7601                     /* remember buffer pos'n for later force_word */
7602                     tboffset = s - PL_oldbufptr;
7603                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7604 #ifdef PERL_MAD
7605                     if (PL_madskills)
7606                         nametoke = newSVpvn(s, d - s);
7607 #endif
7608                     if (memchr(tmpbuf, ':', len))
7609                         sv_setpvn(PL_subname, tmpbuf, len);
7610                     else {
7611                         sv_setsv(PL_subname,PL_curstname);
7612                         sv_catpvs(PL_subname,"::");
7613                         sv_catpvn(PL_subname,tmpbuf,len);
7614                     }
7615                     have_name = TRUE;
7616
7617 #ifdef PERL_MAD
7618
7619                     start_force(0);
7620                     CURMAD('X', nametoke);
7621                     CURMAD('_', tmpwhite);
7622                     (void) force_word(PL_oldbufptr + tboffset, WORD,
7623                                       FALSE, TRUE, TRUE);
7624
7625                     s = SKIPSPACE2(d,tmpwhite);
7626 #else
7627                     s = skipspace(d);
7628 #endif
7629                 }
7630                 else {
7631                     if (key == KEY_my)
7632                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
7633                     PL_expect = XTERMBLOCK;
7634                     attrful = XATTRTERM;
7635                     sv_setpvs(PL_subname,"?");
7636                     have_name = FALSE;
7637                 }
7638
7639                 if (key == KEY_format) {
7640                     if (*s == '=')
7641                         PL_lex_formbrack = PL_lex_brackets + 1;
7642 #ifdef PERL_MAD
7643                     PL_thistoken = subtoken;
7644                     s = d;
7645 #else
7646                     if (have_name)
7647                         (void) force_word(PL_oldbufptr + tboffset, WORD,
7648                                           FALSE, TRUE, TRUE);
7649 #endif
7650                     OPERATOR(FORMAT);
7651                 }
7652
7653                 /* Look for a prototype */
7654                 if (*s == '(') {
7655                     char *p;
7656                     bool bad_proto = FALSE;
7657                     bool in_brackets = FALSE;
7658                     char greedy_proto = ' ';
7659                     bool proto_after_greedy_proto = FALSE;
7660                     bool must_be_last = FALSE;
7661                     bool underscore = FALSE;
7662                     bool seen_underscore = FALSE;
7663                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
7664
7665                     s = scan_str(s,!!PL_madskills,FALSE);
7666                     if (!s)
7667                         Perl_croak(aTHX_ "Prototype not terminated");
7668                     /* strip spaces and check for bad characters */
7669                     d = SvPVX(PL_lex_stuff);
7670                     tmp = 0;
7671                     for (p = d; *p; ++p) {
7672                         if (!isSPACE(*p)) {
7673                             d[tmp++] = *p;
7674
7675                             if (warnillegalproto) {
7676                                 if (must_be_last)
7677                                     proto_after_greedy_proto = TRUE;
7678                                 if (!strchr("$@%*;[]&\\_", *p)) {
7679                                     bad_proto = TRUE;
7680                                 }
7681                                 else {
7682                                     if ( underscore ) {
7683                                         if ( *p != ';' )
7684                                             bad_proto = TRUE;
7685                                         underscore = FALSE;
7686                                     }
7687                                     if ( *p == '[' ) {
7688                                         in_brackets = TRUE;
7689                                     }
7690                                     else if ( *p == ']' ) {
7691                                         in_brackets = FALSE;
7692                                     }
7693                                     else if ( (*p == '@' || *p == '%') &&
7694                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
7695                                          !in_brackets ) {
7696                                         must_be_last = TRUE;
7697                                         greedy_proto = *p;
7698                                     }
7699                                     else if ( *p == '_' ) {
7700                                         underscore = seen_underscore = TRUE;
7701                                     }
7702                                 }
7703                             }
7704                         }
7705                     }
7706                     d[tmp] = '\0';
7707                     if (proto_after_greedy_proto)
7708                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7709                                     "Prototype after '%c' for %"SVf" : %s",
7710                                     greedy_proto, SVfARG(PL_subname), d);
7711                     if (bad_proto)
7712                         Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7713                                     "Illegal character %sin prototype for %"SVf" : %s",
7714                                     seen_underscore ? "after '_' " : "",
7715                                     SVfARG(PL_subname), d);
7716                     SvCUR_set(PL_lex_stuff, tmp);
7717                     have_proto = TRUE;
7718
7719 #ifdef PERL_MAD
7720                     start_force(0);
7721                     CURMAD('q', PL_thisopen);
7722                     CURMAD('_', tmpwhite);
7723                     CURMAD('=', PL_thisstuff);
7724                     CURMAD('Q', PL_thisclose);
7725                     NEXTVAL_NEXTTOKE.opval =
7726                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7727                     PL_lex_stuff = NULL;
7728                     force_next(THING);
7729
7730                     s = SKIPSPACE2(s,tmpwhite);
7731 #else
7732                     s = skipspace(s);
7733 #endif
7734                 }
7735                 else
7736                     have_proto = FALSE;
7737
7738                 if (*s == ':' && s[1] != ':')
7739                     PL_expect = attrful;
7740                 else if (*s != '{' && key == KEY_sub) {
7741                     if (!have_name)
7742                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7743                     else if (*s != ';' && *s != '}')
7744                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7745                 }
7746
7747 #ifdef PERL_MAD
7748                 start_force(0);
7749                 if (tmpwhite) {
7750                     if (PL_madskills)
7751                         curmad('^', newSVpvs(""));
7752                     CURMAD('_', tmpwhite);
7753                 }
7754                 force_next(0);
7755
7756                 PL_thistoken = subtoken;
7757 #else
7758                 if (have_proto) {
7759                     NEXTVAL_NEXTTOKE.opval =
7760                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7761                     PL_lex_stuff = NULL;
7762                     force_next(THING);
7763                 }
7764 #endif
7765                 if (!have_name) {
7766                     if (PL_curstash)
7767                         sv_setpvs(PL_subname, "__ANON__");
7768                     else
7769                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
7770                     TOKEN(ANONSUB);
7771                 }
7772 #ifndef PERL_MAD
7773                 (void) force_word(PL_oldbufptr + tboffset, WORD,
7774                                   FALSE, TRUE, TRUE);
7775 #endif
7776                 if (key == KEY_my)
7777                     TOKEN(MYSUB);
7778                 TOKEN(SUB);
7779             }
7780
7781         case KEY_system:
7782             LOP(OP_SYSTEM,XREF);
7783
7784         case KEY_symlink:
7785             LOP(OP_SYMLINK,XTERM);
7786
7787         case KEY_syscall:
7788             LOP(OP_SYSCALL,XTERM);
7789
7790         case KEY_sysopen:
7791             LOP(OP_SYSOPEN,XTERM);
7792
7793         case KEY_sysseek:
7794             LOP(OP_SYSSEEK,XTERM);
7795
7796         case KEY_sysread:
7797             LOP(OP_SYSREAD,XTERM);
7798
7799         case KEY_syswrite:
7800             LOP(OP_SYSWRITE,XTERM);
7801
7802         case KEY_tr:
7803             s = scan_trans(s);
7804             TERM(sublex_start());
7805
7806         case KEY_tell:
7807             UNI(OP_TELL);
7808
7809         case KEY_telldir:
7810             UNI(OP_TELLDIR);
7811
7812         case KEY_tie:
7813             LOP(OP_TIE,XTERM);
7814
7815         case KEY_tied:
7816             UNI(OP_TIED);
7817
7818         case KEY_time:
7819             FUN0(OP_TIME);
7820
7821         case KEY_times:
7822             FUN0(OP_TMS);
7823
7824         case KEY_truncate:
7825             LOP(OP_TRUNCATE,XTERM);
7826
7827         case KEY_uc:
7828             UNI(OP_UC);
7829
7830         case KEY_ucfirst:
7831             UNI(OP_UCFIRST);
7832
7833         case KEY_untie:
7834             UNI(OP_UNTIE);
7835
7836         case KEY_until:
7837             pl_yylval.ival = CopLINE(PL_curcop);
7838             OPERATOR(UNTIL);
7839
7840         case KEY_unless:
7841             pl_yylval.ival = CopLINE(PL_curcop);
7842             OPERATOR(UNLESS);
7843
7844         case KEY_unlink:
7845             LOP(OP_UNLINK,XTERM);
7846
7847         case KEY_undef:
7848             UNIDOR(OP_UNDEF);
7849
7850         case KEY_unpack:
7851             LOP(OP_UNPACK,XTERM);
7852
7853         case KEY_utime:
7854             LOP(OP_UTIME,XTERM);
7855
7856         case KEY_umask:
7857             UNIDOR(OP_UMASK);
7858
7859         case KEY_unshift:
7860             LOP(OP_UNSHIFT,XTERM);
7861
7862         case KEY_use:
7863             s = tokenize_use(1, s);
7864             OPERATOR(USE);
7865
7866         case KEY_values:
7867             UNI(OP_VALUES);
7868
7869         case KEY_vec:
7870             LOP(OP_VEC,XTERM);
7871
7872         case KEY_when:
7873             pl_yylval.ival = CopLINE(PL_curcop);
7874             OPERATOR(WHEN);
7875
7876         case KEY_while:
7877             pl_yylval.ival = CopLINE(PL_curcop);
7878             OPERATOR(WHILE);
7879
7880         case KEY_warn:
7881             PL_hints |= HINT_BLOCK_SCOPE;
7882             LOP(OP_WARN,XTERM);
7883
7884         case KEY_wait:
7885             FUN0(OP_WAIT);
7886
7887         case KEY_waitpid:
7888             LOP(OP_WAITPID,XTERM);
7889
7890         case KEY_wantarray:
7891             FUN0(OP_WANTARRAY);
7892
7893         case KEY_write:
7894 #ifdef EBCDIC
7895         {
7896             char ctl_l[2];
7897             ctl_l[0] = toCTRL('L');
7898             ctl_l[1] = '\0';
7899             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7900         }
7901 #else
7902             /* Make sure $^L is defined */
7903             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7904 #endif
7905             UNI(OP_ENTERWRITE);
7906
7907         case KEY_x:
7908             if (PL_expect == XOPERATOR)
7909                 Mop(OP_REPEAT);
7910             check_uni();
7911             goto just_a_word;
7912
7913         case KEY_xor:
7914             pl_yylval.ival = OP_XOR;
7915             OPERATOR(OROP);
7916
7917         case KEY_y:
7918             s = scan_trans(s);
7919             TERM(sublex_start());
7920         }
7921     }}
7922 }
7923 #ifdef __SC__
7924 #pragma segment Main
7925 #endif
7926
7927 static int
7928 S_pending_ident(pTHX)
7929 {
7930     dVAR;
7931     register char *d;
7932     PADOFFSET tmp = 0;
7933     /* pit holds the identifier we read and pending_ident is reset */
7934     char pit = PL_pending_ident;
7935     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7936     /* All routes through this function want to know if there is a colon.  */
7937     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7938     PL_pending_ident = 0;
7939
7940     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7941     DEBUG_T({ PerlIO_printf(Perl_debug_log,
7942           "### Pending identifier '%s'\n", PL_tokenbuf); });
7943
7944     /* if we're in a my(), we can't allow dynamics here.
7945        $foo'bar has already been turned into $foo::bar, so
7946        just check for colons.
7947
7948        if it's a legal name, the OP is a PADANY.
7949     */
7950     if (PL_in_my) {
7951         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
7952             if (has_colon)
7953                 yyerror(Perl_form(aTHX_ "No package name allowed for "
7954                                   "variable %s in \"our\"",
7955                                   PL_tokenbuf));
7956             tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7957         }
7958         else {
7959             if (has_colon)
7960                 yyerror(Perl_form(aTHX_ PL_no_myglob,
7961                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7962
7963             pl_yylval.opval = newOP(OP_PADANY, 0);
7964             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7965             return PRIVATEREF;
7966         }
7967     }
7968
7969     /*
7970        build the ops for accesses to a my() variable.
7971
7972        Deny my($a) or my($b) in a sort block, *if* $a or $b is
7973        then used in a comparison.  This catches most, but not
7974        all cases.  For instance, it catches
7975            sort { my($a); $a <=> $b }
7976        but not
7977            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7978        (although why you'd do that is anyone's guess).
7979     */
7980
7981     if (!has_colon) {
7982         if (!PL_in_my)
7983             tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
7984         if (tmp != NOT_IN_PAD) {
7985             /* might be an "our" variable" */
7986             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7987                 /* build ops for a bareword */
7988                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
7989                 HEK * const stashname = HvNAME_HEK(stash);
7990                 SV *  const sym = newSVhek(stashname);
7991                 sv_catpvs(sym, "::");
7992                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7993                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7994                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7995                 gv_fetchsv(sym,
7996                     (PL_in_eval
7997                         ? (GV_ADDMULTI | GV_ADDINEVAL)
7998                         : GV_ADDMULTI
7999                     ),
8000                     ((PL_tokenbuf[0] == '$') ? SVt_PV
8001                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8002                      : SVt_PVHV));
8003                 return WORD;
8004             }
8005
8006             /* if it's a sort block and they're naming $a or $b */
8007             if (PL_last_lop_op == OP_SORT &&
8008                 PL_tokenbuf[0] == '$' &&
8009                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8010                 && !PL_tokenbuf[2])
8011             {
8012                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8013                      d < PL_bufend && *d != '\n';
8014                      d++)
8015                 {
8016                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8017                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8018                               PL_tokenbuf);
8019                     }
8020                 }
8021             }
8022
8023             pl_yylval.opval = newOP(OP_PADANY, 0);
8024             pl_yylval.opval->op_targ = tmp;
8025             return PRIVATEREF;
8026         }
8027     }
8028
8029     /*
8030        Whine if they've said @foo in a doublequoted string,
8031        and @foo isn't a variable we can find in the symbol
8032        table.
8033     */
8034     if (ckWARN(WARN_AMBIGUOUS) &&
8035         pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8036         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8037                                          SVt_PVAV);
8038         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8039                 /* DO NOT warn for @- and @+ */
8040                 && !( PL_tokenbuf[2] == '\0' &&
8041                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8042            )
8043         {
8044             /* Downgraded from fatal to warning 20000522 mjd */
8045             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8046                         "Possible unintended interpolation of %s in string",
8047                         PL_tokenbuf);
8048         }
8049     }
8050
8051     /* build ops for a bareword */
8052     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8053                                                       tokenbuf_len - 1));
8054     pl_yylval.opval->op_private = OPpCONST_ENTERED;
8055     gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8056                      PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8057                      ((PL_tokenbuf[0] == '$') ? SVt_PV
8058                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8059                       : SVt_PVHV));
8060     return WORD;
8061 }
8062
8063 /*
8064  *  The following code was generated by perl_keyword.pl.
8065  */
8066
8067 I32
8068 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
8069 {
8070     dVAR;
8071
8072     PERL_ARGS_ASSERT_KEYWORD;
8073
8074   switch (len)
8075   {
8076     case 1: /* 5 tokens of length 1 */
8077       switch (name[0])
8078       {
8079         case 'm':
8080           {                                       /* m          */
8081             return KEY_m;
8082           }
8083
8084         case 'q':
8085           {                                       /* q          */
8086             return KEY_q;
8087           }
8088
8089         case 's':
8090           {                                       /* s          */
8091             return KEY_s;
8092           }
8093
8094         case 'x':
8095           {                                       /* x          */
8096             return -KEY_x;
8097           }
8098
8099         case 'y':
8100           {                                       /* y          */
8101             return KEY_y;
8102           }
8103
8104         default:
8105           goto unknown;
8106       }
8107
8108     case 2: /* 18 tokens of length 2 */
8109       switch (name[0])
8110       {
8111         case 'd':
8112           if (name[1] == 'o')
8113           {                                       /* do         */
8114             return KEY_do;
8115           }
8116
8117           goto unknown;
8118
8119         case 'e':
8120           if (name[1] == 'q')
8121           {                                       /* eq         */
8122             return -KEY_eq;
8123           }
8124
8125           goto unknown;
8126
8127         case 'g':
8128           switch (name[1])
8129           {
8130             case 'e':
8131               {                                   /* ge         */
8132                 return -KEY_ge;
8133               }
8134
8135             case 't':
8136               {                                   /* gt         */
8137                 return -KEY_gt;
8138               }
8139
8140             default:
8141               goto unknown;
8142           }
8143
8144         case 'i':
8145           if (name[1] == 'f')
8146           {                                       /* if         */
8147             return KEY_if;
8148           }
8149
8150           goto unknown;
8151
8152         case 'l':
8153           switch (name[1])
8154           {
8155             case 'c':
8156               {                                   /* lc         */
8157                 return -KEY_lc;
8158               }
8159
8160             case 'e':
8161               {                                   /* le         */
8162                 return -KEY_le;
8163               }
8164
8165             case 't':
8166               {                                   /* lt         */
8167                 return -KEY_lt;
8168               }
8169
8170             default:
8171               goto unknown;
8172           }
8173
8174         case 'm':
8175           if (name[1] == 'y')
8176           {                                       /* my         */
8177             return KEY_my;
8178           }
8179
8180           goto unknown;
8181
8182         case 'n':
8183           switch (name[1])
8184           {
8185             case 'e':
8186               {                                   /* ne         */
8187                 return -KEY_ne;
8188               }
8189
8190             case 'o':
8191               {                                   /* no         */
8192                 return KEY_no;
8193               }
8194
8195             default:
8196               goto unknown;
8197           }
8198
8199         case 'o':
8200           if (name[1] == 'r')
8201           {                                       /* or         */
8202             return -KEY_or;
8203           }
8204
8205           goto unknown;
8206
8207         case 'q':
8208           switch (name[1])
8209           {
8210             case 'q':
8211               {                                   /* qq         */
8212                 return KEY_qq;
8213               }
8214
8215             case 'r':
8216               {                                   /* qr         */
8217                 return KEY_qr;
8218               }
8219
8220             case 'w':
8221               {                                   /* qw         */
8222                 return KEY_qw;
8223               }
8224
8225             case 'x':
8226               {                                   /* qx         */
8227                 return KEY_qx;
8228               }
8229
8230             default:
8231               goto unknown;
8232           }
8233
8234         case 't':
8235           if (name[1] == 'r')
8236           {                                       /* tr         */
8237             return KEY_tr;
8238           }
8239
8240           goto unknown;
8241
8242         case 'u':
8243           if (name[1] == 'c')
8244           {                                       /* uc         */
8245             return -KEY_uc;
8246           }
8247
8248           goto unknown;
8249
8250         default:
8251           goto unknown;
8252       }
8253
8254     case 3: /* 29 tokens of length 3 */
8255       switch (name[0])
8256       {
8257         case 'E':
8258           if (name[1] == 'N' &&
8259               name[2] == 'D')
8260           {                                       /* END        */
8261             return KEY_END;
8262           }
8263
8264           goto unknown;
8265
8266         case 'a':
8267           switch (name[1])
8268           {
8269             case 'b':
8270               if (name[2] == 's')
8271               {                                   /* abs        */
8272                 return -KEY_abs;
8273               }
8274
8275               goto unknown;
8276
8277             case 'n':
8278               if (name[2] == 'd')
8279               {                                   /* and        */
8280                 return -KEY_and;
8281               }
8282
8283               goto unknown;
8284
8285             default:
8286               goto unknown;
8287           }
8288
8289         case 'c':
8290           switch (name[1])
8291           {
8292             case 'h':
8293               if (name[2] == 'r')
8294               {                                   /* chr        */
8295                 return -KEY_chr;
8296               }
8297
8298               goto unknown;
8299
8300             case 'm':
8301               if (name[2] == 'p')
8302               {                                   /* cmp        */
8303                 return -KEY_cmp;
8304               }
8305
8306               goto unknown;
8307
8308             case 'o':
8309               if (name[2] == 's')
8310               {                                   /* cos        */
8311                 return -KEY_cos;
8312               }
8313
8314               goto unknown;
8315
8316             default:
8317               goto unknown;
8318           }
8319
8320         case 'd':
8321           if (name[1] == 'i' &&
8322               name[2] == 'e')
8323           {                                       /* die        */
8324             return -KEY_die;
8325           }
8326
8327           goto unknown;
8328
8329         case 'e':
8330           switch (name[1])
8331           {
8332             case 'o':
8333               if (name[2] == 'f')
8334               {                                   /* eof        */
8335                 return -KEY_eof;
8336               }
8337
8338               goto unknown;
8339
8340             case 'x':
8341               if (name[2] == 'p')
8342               {                                   /* exp        */
8343                 return -KEY_exp;
8344               }
8345
8346               goto unknown;
8347
8348             default:
8349               goto unknown;
8350           }
8351
8352         case 'f':
8353           if (name[1] == 'o' &&
8354               name[2] == 'r')
8355           {                                       /* for        */
8356             return KEY_for;
8357           }
8358
8359           goto unknown;
8360
8361         case 'h':
8362           if (name[1] == 'e' &&
8363               name[2] == 'x')
8364           {                                       /* hex        */
8365             return -KEY_hex;
8366           }
8367
8368           goto unknown;
8369
8370         case 'i':
8371           if (name[1] == 'n' &&
8372               name[2] == 't')
8373           {                                       /* int        */
8374             return -KEY_int;
8375           }
8376
8377           goto unknown;
8378
8379         case 'l':
8380           if (name[1] == 'o' &&
8381               name[2] == 'g')
8382           {                                       /* log        */
8383             return -KEY_log;
8384           }
8385
8386           goto unknown;
8387
8388         case 'm':
8389           if (name[1] == 'a' &&
8390               name[2] == 'p')
8391           {                                       /* map        */
8392             return KEY_map;
8393           }
8394
8395           goto unknown;
8396
8397         case 'n':
8398           if (name[1] == 'o' &&
8399               name[2] == 't')
8400           {                                       /* not        */
8401             return -KEY_not;
8402           }
8403
8404           goto unknown;
8405
8406         case 'o':
8407           switch (name[1])
8408           {
8409             case 'c':
8410               if (name[2] == 't')
8411               {                                   /* oct        */
8412                 return -KEY_oct;
8413               }
8414
8415               goto unknown;
8416
8417             case 'r':
8418               if (name[2] == 'd')
8419               {                                   /* ord        */
8420                 return -KEY_ord;
8421               }
8422
8423               goto unknown;
8424
8425             case 'u':
8426               if (name[2] == 'r')
8427               {                                   /* our        */
8428                 return KEY_our;
8429               }
8430
8431               goto unknown;
8432
8433             default:
8434               goto unknown;
8435           }
8436
8437         case 'p':
8438           if (name[1] == 'o')
8439           {
8440             switch (name[2])
8441             {
8442               case 'p':
8443                 {                                 /* pop        */
8444                   return -KEY_pop;
8445                 }
8446
8447               case 's':
8448                 {                                 /* pos        */
8449                   return KEY_pos;
8450                 }
8451
8452               default:
8453                 goto unknown;
8454             }
8455           }
8456
8457           goto unknown;
8458
8459         case 'r':
8460           if (name[1] == 'e' &&
8461               name[2] == 'f')
8462           {                                       /* ref        */
8463             return -KEY_ref;
8464           }
8465
8466           goto unknown;
8467
8468         case 's':
8469           switch (name[1])
8470           {
8471             case 'a':
8472               if (name[2] == 'y')
8473               {                                   /* say        */
8474                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8475               }
8476
8477               goto unknown;
8478
8479             case 'i':
8480               if (name[2] == 'n')
8481               {                                   /* sin        */
8482                 return -KEY_sin;
8483               }
8484
8485               goto unknown;
8486
8487             case 'u':
8488               if (name[2] == 'b')
8489               {                                   /* sub        */
8490                 return KEY_sub;
8491               }
8492
8493               goto unknown;
8494
8495             default:
8496               goto unknown;
8497           }
8498
8499         case 't':
8500           if (name[1] == 'i' &&
8501               name[2] == 'e')
8502           {                                       /* tie        */
8503             return KEY_tie;
8504           }
8505
8506           goto unknown;
8507
8508         case 'u':
8509           if (name[1] == 's' &&
8510               name[2] == 'e')
8511           {                                       /* use        */
8512             return KEY_use;
8513           }
8514
8515           goto unknown;
8516
8517         case 'v':
8518           if (name[1] == 'e' &&
8519               name[2] == 'c')
8520           {                                       /* vec        */
8521             return -KEY_vec;
8522           }
8523
8524           goto unknown;
8525
8526         case 'x':
8527           if (name[1] == 'o' &&
8528               name[2] == 'r')
8529           {                                       /* xor        */
8530             return -KEY_xor;
8531           }
8532
8533           goto unknown;
8534
8535         default:
8536           goto unknown;
8537       }
8538
8539     case 4: /* 41 tokens of length 4 */
8540       switch (name[0])
8541       {
8542         case 'C':
8543           if (name[1] == 'O' &&
8544               name[2] == 'R' &&
8545               name[3] == 'E')
8546           {                                       /* CORE       */
8547             return -KEY_CORE;
8548           }
8549
8550           goto unknown;
8551
8552         case 'I':
8553           if (name[1] == 'N' &&
8554               name[2] == 'I' &&
8555               name[3] == 'T')
8556           {                                       /* INIT       */
8557             return KEY_INIT;
8558           }
8559
8560           goto unknown;
8561
8562         case 'b':
8563           if (name[1] == 'i' &&
8564               name[2] == 'n' &&
8565               name[3] == 'd')
8566           {                                       /* bind       */
8567             return -KEY_bind;
8568           }
8569
8570           goto unknown;
8571
8572         case 'c':
8573           if (name[1] == 'h' &&
8574               name[2] == 'o' &&
8575               name[3] == 'p')
8576           {                                       /* chop       */
8577             return -KEY_chop;
8578           }
8579
8580           goto unknown;
8581
8582         case 'd':
8583           if (name[1] == 'u' &&
8584               name[2] == 'm' &&
8585               name[3] == 'p')
8586           {                                       /* dump       */
8587             return -KEY_dump;
8588           }
8589
8590           goto unknown;
8591
8592         case 'e':
8593           switch (name[1])
8594           {
8595             case 'a':
8596               if (name[2] == 'c' &&
8597                   name[3] == 'h')
8598               {                                   /* each       */
8599                 return -KEY_each;
8600               }
8601
8602               goto unknown;
8603
8604             case 'l':
8605               if (name[2] == 's' &&
8606                   name[3] == 'e')
8607               {                                   /* else       */
8608                 return KEY_else;
8609               }
8610
8611               goto unknown;
8612
8613             case 'v':
8614               if (name[2] == 'a' &&
8615                   name[3] == 'l')
8616               {                                   /* eval       */
8617                 return KEY_eval;
8618               }
8619
8620               goto unknown;
8621
8622             case 'x':
8623               switch (name[2])
8624               {
8625                 case 'e':
8626                   if (name[3] == 'c')
8627                   {                               /* exec       */
8628                     return -KEY_exec;
8629                   }
8630
8631                   goto unknown;
8632
8633                 case 'i':
8634                   if (name[3] == 't')
8635                   {                               /* exit       */
8636                     return -KEY_exit;
8637                   }
8638
8639                   goto unknown;
8640
8641                 default:
8642                   goto unknown;
8643               }
8644
8645             default:
8646               goto unknown;
8647           }
8648
8649         case 'f':
8650           if (name[1] == 'o' &&
8651               name[2] == 'r' &&
8652               name[3] == 'k')
8653           {                                       /* fork       */
8654             return -KEY_fork;
8655           }
8656
8657           goto unknown;
8658
8659         case 'g':
8660           switch (name[1])
8661           {
8662             case 'e':
8663               if (name[2] == 't' &&
8664                   name[3] == 'c')
8665               {                                   /* getc       */
8666                 return -KEY_getc;
8667               }
8668
8669               goto unknown;
8670
8671             case 'l':
8672               if (name[2] == 'o' &&
8673                   name[3] == 'b')
8674               {                                   /* glob       */
8675                 return KEY_glob;
8676               }
8677
8678               goto unknown;
8679
8680             case 'o':
8681               if (name[2] == 't' &&
8682                   name[3] == 'o')
8683               {                                   /* goto       */
8684                 return KEY_goto;
8685               }
8686
8687               goto unknown;
8688
8689             case 'r':
8690               if (name[2] == 'e' &&
8691                   name[3] == 'p')
8692               {                                   /* grep       */
8693                 return KEY_grep;
8694               }
8695
8696               goto unknown;
8697
8698             default:
8699               goto unknown;
8700           }
8701
8702         case 'j':
8703           if (name[1] == 'o' &&
8704               name[2] == 'i' &&
8705               name[3] == 'n')
8706           {                                       /* join       */
8707             return -KEY_join;
8708           }
8709
8710           goto unknown;
8711
8712         case 'k':
8713           switch (name[1])
8714           {
8715             case 'e':
8716               if (name[2] == 'y' &&
8717                   name[3] == 's')
8718               {                                   /* keys       */
8719                 return -KEY_keys;
8720               }
8721
8722               goto unknown;
8723
8724             case 'i':
8725               if (name[2] == 'l' &&
8726                   name[3] == 'l')
8727               {                                   /* kill       */
8728                 return -KEY_kill;
8729               }
8730
8731               goto unknown;
8732
8733             default:
8734               goto unknown;
8735           }
8736
8737         case 'l':
8738           switch (name[1])
8739           {
8740             case 'a':
8741               if (name[2] == 's' &&
8742                   name[3] == 't')
8743               {                                   /* last       */
8744                 return KEY_last;
8745               }
8746
8747               goto unknown;
8748
8749             case 'i':
8750               if (name[2] == 'n' &&
8751                   name[3] == 'k')
8752               {                                   /* link       */
8753                 return -KEY_link;
8754               }
8755
8756               goto unknown;
8757
8758             case 'o':
8759               if (name[2] == 'c' &&
8760                   name[3] == 'k')
8761               {                                   /* lock       */
8762                 return -KEY_lock;
8763               }
8764
8765               goto unknown;
8766
8767             default:
8768               goto unknown;
8769           }
8770
8771         case 'n':
8772           if (name[1] == 'e' &&
8773               name[2] == 'x' &&
8774               name[3] == 't')
8775           {                                       /* next       */
8776             return KEY_next;
8777           }
8778
8779           goto unknown;
8780
8781         case 'o':
8782           if (name[1] == 'p' &&
8783               name[2] == 'e' &&
8784               name[3] == 'n')
8785           {                                       /* open       */
8786             return -KEY_open;
8787           }
8788
8789           goto unknown;
8790
8791         case 'p':
8792           switch (name[1])
8793           {
8794             case 'a':
8795               if (name[2] == 'c' &&
8796                   name[3] == 'k')
8797               {                                   /* pack       */
8798                 return -KEY_pack;
8799               }
8800
8801               goto unknown;
8802
8803             case 'i':
8804               if (name[2] == 'p' &&
8805                   name[3] == 'e')
8806               {                                   /* pipe       */
8807                 return -KEY_pipe;
8808               }
8809
8810               goto unknown;
8811
8812             case 'u':
8813               if (name[2] == 's' &&
8814                   name[3] == 'h')
8815               {                                   /* push       */
8816                 return -KEY_push;
8817               }
8818
8819               goto unknown;
8820
8821             default:
8822               goto unknown;
8823           }
8824
8825         case 'r':
8826           switch (name[1])
8827           {
8828             case 'a':
8829               if (name[2] == 'n' &&
8830                   name[3] == 'd')
8831               {                                   /* rand       */
8832                 return -KEY_rand;
8833               }
8834
8835               goto unknown;
8836
8837             case 'e':
8838               switch (name[2])
8839               {
8840                 case 'a':
8841                   if (name[3] == 'd')
8842                   {                               /* read       */
8843                     return -KEY_read;
8844                   }
8845
8846                   goto unknown;
8847
8848                 case 'c':
8849                   if (name[3] == 'v')
8850                   {                               /* recv       */
8851                     return -KEY_recv;
8852                   }
8853
8854                   goto unknown;
8855
8856                 case 'd':
8857                   if (name[3] == 'o')
8858                   {                               /* redo       */
8859                     return KEY_redo;
8860                   }
8861
8862                   goto unknown;
8863
8864                 default:
8865                   goto unknown;
8866               }
8867
8868             default:
8869               goto unknown;
8870           }
8871
8872         case 's':
8873           switch (name[1])
8874           {
8875             case 'e':
8876               switch (name[2])
8877               {
8878                 case 'e':
8879                   if (name[3] == 'k')
8880                   {                               /* seek       */
8881                     return -KEY_seek;
8882                   }
8883
8884                   goto unknown;
8885
8886                 case 'n':
8887                   if (name[3] == 'd')
8888                   {                               /* send       */
8889                     return -KEY_send;
8890                   }
8891
8892                   goto unknown;
8893
8894                 default:
8895                   goto unknown;
8896               }
8897
8898             case 'o':
8899               if (name[2] == 'r' &&
8900                   name[3] == 't')
8901               {                                   /* sort       */
8902                 return KEY_sort;
8903               }
8904
8905               goto unknown;
8906
8907             case 'q':
8908               if (name[2] == 'r' &&
8909                   name[3] == 't')
8910               {                                   /* sqrt       */
8911                 return -KEY_sqrt;
8912               }
8913
8914               goto unknown;
8915
8916             case 't':
8917               if (name[2] == 'a' &&
8918                   name[3] == 't')
8919               {                                   /* stat       */
8920                 return -KEY_stat;
8921               }
8922
8923               goto unknown;
8924
8925             default:
8926               goto unknown;
8927           }
8928
8929         case 't':
8930           switch (name[1])
8931           {
8932             case 'e':
8933               if (name[2] == 'l' &&
8934                   name[3] == 'l')
8935               {                                   /* tell       */
8936                 return -KEY_tell;
8937               }
8938
8939               goto unknown;
8940
8941             case 'i':
8942               switch (name[2])
8943               {
8944                 case 'e':
8945                   if (name[3] == 'd')
8946                   {                               /* tied       */
8947                     return KEY_tied;
8948                   }
8949
8950                   goto unknown;
8951
8952                 case 'm':
8953                   if (name[3] == 'e')
8954                   {                               /* time       */
8955                     return -KEY_time;
8956                   }
8957
8958                   goto unknown;
8959
8960                 default:
8961                   goto unknown;
8962               }
8963
8964             default:
8965               goto unknown;
8966           }
8967
8968         case 'w':
8969           switch (name[1])
8970           {
8971             case 'a':
8972               switch (name[2])
8973               {
8974                 case 'i':
8975                   if (name[3] == 't')
8976                   {                               /* wait       */
8977                     return -KEY_wait;
8978                   }
8979
8980                   goto unknown;
8981
8982                 case 'r':
8983                   if (name[3] == 'n')
8984                   {                               /* warn       */
8985                     return -KEY_warn;
8986                   }
8987
8988                   goto unknown;
8989
8990                 default:
8991                   goto unknown;
8992               }
8993
8994             case 'h':
8995               if (name[2] == 'e' &&
8996                   name[3] == 'n')
8997               {                                   /* when       */
8998                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8999               }
9000
9001               goto unknown;
9002
9003             default:
9004               goto unknown;
9005           }
9006
9007         default:
9008           goto unknown;
9009       }
9010
9011     case 5: /* 39 tokens of length 5 */
9012       switch (name[0])
9013       {
9014         case 'B':
9015           if (name[1] == 'E' &&
9016               name[2] == 'G' &&
9017               name[3] == 'I' &&
9018               name[4] == 'N')
9019           {                                       /* BEGIN      */
9020             return KEY_BEGIN;
9021           }
9022
9023           goto unknown;
9024
9025         case 'C':
9026           if (name[1] == 'H' &&
9027               name[2] == 'E' &&
9028               name[3] == 'C' &&
9029               name[4] == 'K')
9030           {                                       /* CHECK      */
9031             return KEY_CHECK;
9032           }
9033
9034           goto unknown;
9035
9036         case 'a':
9037           switch (name[1])
9038           {
9039             case 'l':
9040               if (name[2] == 'a' &&
9041                   name[3] == 'r' &&
9042                   name[4] == 'm')
9043               {                                   /* alarm      */
9044                 return -KEY_alarm;
9045               }
9046
9047               goto unknown;
9048
9049             case 't':
9050               if (name[2] == 'a' &&
9051                   name[3] == 'n' &&
9052                   name[4] == '2')
9053               {                                   /* atan2      */
9054                 return -KEY_atan2;
9055               }
9056
9057               goto unknown;
9058
9059             default:
9060               goto unknown;
9061           }
9062
9063         case 'b':
9064           switch (name[1])
9065           {
9066             case 'l':
9067               if (name[2] == 'e' &&
9068                   name[3] == 's' &&
9069                   name[4] == 's')
9070               {                                   /* bless      */
9071                 return -KEY_bless;
9072               }
9073
9074               goto unknown;
9075
9076             case 'r':
9077               if (name[2] == 'e' &&
9078                   name[3] == 'a' &&
9079                   name[4] == 'k')
9080               {                                   /* break      */
9081                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
9082               }
9083
9084               goto unknown;
9085
9086             default:
9087               goto unknown;
9088           }
9089
9090         case 'c':
9091           switch (name[1])
9092           {
9093             case 'h':
9094               switch (name[2])
9095               {
9096                 case 'd':
9097                   if (name[3] == 'i' &&
9098                       name[4] == 'r')
9099                   {                               /* chdir      */
9100                     return -KEY_chdir;
9101                   }
9102
9103                   goto unknown;
9104
9105                 case 'm':
9106                   if (name[3] == 'o' &&
9107                       name[4] == 'd')
9108                   {                               /* chmod      */
9109                     return -KEY_chmod;
9110                   }
9111
9112                   goto unknown;
9113
9114                 case 'o':
9115                   switch (name[3])
9116                   {
9117                     case 'm':
9118                       if (name[4] == 'p')
9119                       {                           /* chomp      */
9120                         return -KEY_chomp;
9121                       }
9122
9123                       goto unknown;
9124
9125                     case 'w':
9126                       if (name[4] == 'n')
9127                       {                           /* chown      */
9128                         return -KEY_chown;
9129                       }
9130
9131                       goto unknown;
9132
9133                     default:
9134                       goto unknown;
9135                   }
9136
9137                 default:
9138                   goto unknown;
9139               }
9140
9141             case 'l':
9142               if (name[2] == 'o' &&
9143                   name[3] == 's' &&
9144                   name[4] == 'e')
9145               {                                   /* close      */
9146                 return -KEY_close;
9147               }
9148
9149               goto unknown;
9150
9151             case 'r':
9152               if (name[2] == 'y' &&
9153                   name[3] == 'p' &&
9154                   name[4] == 't')
9155               {                                   /* crypt      */
9156                 return -KEY_crypt;
9157               }
9158
9159               goto unknown;
9160
9161             default:
9162               goto unknown;
9163           }
9164
9165         case 'e':
9166           if (name[1] == 'l' &&
9167               name[2] == 's' &&
9168               name[3] == 'i' &&
9169               name[4] == 'f')
9170           {                                       /* elsif      */
9171             return KEY_elsif;
9172           }
9173
9174           goto unknown;
9175
9176         case 'f':
9177           switch (name[1])
9178           {
9179             case 'c':
9180               if (name[2] == 'n' &&
9181                   name[3] == 't' &&
9182                   name[4] == 'l')
9183               {                                   /* fcntl      */
9184                 return -KEY_fcntl;
9185               }
9186
9187               goto unknown;
9188
9189             case 'l':
9190               if (name[2] == 'o' &&
9191                   name[3] == 'c' &&
9192                   name[4] == 'k')
9193               {                                   /* flock      */
9194                 return -KEY_flock;
9195               }
9196
9197               goto unknown;
9198
9199             default:
9200               goto unknown;
9201           }
9202
9203         case 'g':
9204           if (name[1] == 'i' &&
9205               name[2] == 'v' &&
9206               name[3] == 'e' &&
9207               name[4] == 'n')
9208           {                                       /* given      */
9209             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
9210           }
9211
9212           goto unknown;
9213
9214         case 'i':
9215           switch (name[1])
9216           {
9217             case 'n':
9218               if (name[2] == 'd' &&
9219                   name[3] == 'e' &&
9220                   name[4] == 'x')
9221               {                                   /* index      */
9222                 return -KEY_index;
9223               }
9224
9225               goto unknown;
9226
9227             case 'o':
9228               if (name[2] == 'c' &&
9229                   name[3] == 't' &&
9230                   name[4] == 'l')
9231               {                                   /* ioctl      */
9232                 return -KEY_ioctl;
9233               }
9234
9235               goto unknown;
9236
9237             default:
9238               goto unknown;
9239           }
9240
9241         case 'l':
9242           switch (name[1])
9243           {
9244             case 'o':
9245               if (name[2] == 'c' &&
9246                   name[3] == 'a' &&
9247                   name[4] == 'l')
9248               {                                   /* local      */
9249                 return KEY_local;
9250               }
9251
9252               goto unknown;
9253
9254             case 's':
9255               if (name[2] == 't' &&
9256                   name[3] == 'a' &&
9257                   name[4] == 't')
9258               {                                   /* lstat      */
9259                 return -KEY_lstat;
9260               }
9261
9262               goto unknown;
9263
9264             default:
9265               goto unknown;
9266           }
9267
9268         case 'm':
9269           if (name[1] == 'k' &&
9270               name[2] == 'd' &&
9271               name[3] == 'i' &&
9272               name[4] == 'r')
9273           {                                       /* mkdir      */
9274             return -KEY_mkdir;
9275           }
9276
9277           goto unknown;
9278
9279         case 'p':
9280           if (name[1] == 'r' &&
9281               name[2] == 'i' &&
9282               name[3] == 'n' &&
9283               name[4] == 't')
9284           {                                       /* print      */
9285             return KEY_print;
9286           }
9287
9288           goto unknown;
9289
9290         case 'r':
9291           switch (name[1])
9292           {
9293             case 'e':
9294               if (name[2] == 's' &&
9295                   name[3] == 'e' &&
9296                   name[4] == 't')
9297               {                                   /* reset      */
9298                 return -KEY_reset;
9299               }
9300
9301               goto unknown;
9302
9303             case 'm':
9304               if (name[2] == 'd' &&
9305                   name[3] == 'i' &&
9306                   name[4] == 'r')
9307               {                                   /* rmdir      */
9308                 return -KEY_rmdir;
9309               }
9310
9311               goto unknown;
9312
9313             default:
9314               goto unknown;
9315           }
9316
9317         case 's':
9318           switch (name[1])
9319           {
9320             case 'e':
9321               if (name[2] == 'm' &&
9322                   name[3] == 'o' &&
9323                   name[4] == 'p')
9324               {                                   /* semop      */
9325                 return -KEY_semop;
9326               }
9327
9328               goto unknown;
9329
9330             case 'h':
9331               if (name[2] == 'i' &&
9332                   name[3] == 'f' &&
9333                   name[4] == 't')
9334               {                                   /* shift      */
9335                 return -KEY_shift;
9336               }
9337
9338               goto unknown;
9339
9340             case 'l':
9341               if (name[2] == 'e' &&
9342                   name[3] == 'e' &&
9343                   name[4] == 'p')
9344               {                                   /* sleep      */
9345                 return -KEY_sleep;
9346               }
9347
9348               goto unknown;
9349
9350             case 'p':
9351               if (name[2] == 'l' &&
9352                   name[3] == 'i' &&
9353                   name[4] == 't')
9354               {                                   /* split      */
9355                 return KEY_split;
9356               }
9357
9358               goto unknown;
9359
9360             case 'r':
9361               if (name[2] == 'a' &&
9362                   name[3] == 'n' &&
9363                   name[4] == 'd')
9364               {                                   /* srand      */
9365                 return -KEY_srand;
9366               }
9367
9368               goto unknown;
9369
9370             case 't':
9371               switch (name[2])
9372               {
9373                 case 'a':
9374                   if (name[3] == 't' &&
9375                       name[4] == 'e')
9376                   {                               /* state      */
9377                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9378                   }
9379
9380                   goto unknown;
9381
9382                 case 'u':
9383                   if (name[3] == 'd' &&
9384                       name[4] == 'y')
9385                   {                               /* study      */
9386                     return KEY_study;
9387                   }
9388
9389                   goto unknown;
9390
9391                 default:
9392                   goto unknown;
9393               }
9394
9395             default:
9396               goto unknown;
9397           }
9398
9399         case 't':
9400           if (name[1] == 'i' &&
9401               name[2] == 'm' &&
9402               name[3] == 'e' &&
9403               name[4] == 's')
9404           {                                       /* times      */
9405             return -KEY_times;
9406           }
9407
9408           goto unknown;
9409
9410         case 'u':
9411           switch (name[1])
9412           {
9413             case 'm':
9414               if (name[2] == 'a' &&
9415                   name[3] == 's' &&
9416                   name[4] == 'k')
9417               {                                   /* umask      */
9418                 return -KEY_umask;
9419               }
9420
9421               goto unknown;
9422
9423             case 'n':
9424               switch (name[2])
9425               {
9426                 case 'd':
9427                   if (name[3] == 'e' &&
9428                       name[4] == 'f')
9429                   {                               /* undef      */
9430                     return KEY_undef;
9431                   }
9432
9433                   goto unknown;
9434
9435                 case 't':
9436                   if (name[3] == 'i')
9437                   {
9438                     switch (name[4])
9439                     {
9440                       case 'e':
9441                         {                         /* untie      */
9442                           return KEY_untie;
9443                         }
9444
9445                       case 'l':
9446                         {                         /* until      */
9447                           return KEY_until;
9448                         }
9449
9450                       default:
9451                         goto unknown;
9452                     }
9453                   }
9454
9455                   goto unknown;
9456
9457                 default:
9458                   goto unknown;
9459               }
9460
9461             case 't':
9462               if (name[2] == 'i' &&
9463                   name[3] == 'm' &&
9464                   name[4] == 'e')
9465               {                                   /* utime      */
9466                 return -KEY_utime;
9467               }
9468
9469               goto unknown;
9470
9471             default:
9472               goto unknown;
9473           }
9474
9475         case 'w':
9476           switch (name[1])
9477           {
9478             case 'h':
9479               if (name[2] == 'i' &&
9480                   name[3] == 'l' &&
9481                   name[4] == 'e')
9482               {                                   /* while      */
9483                 return KEY_while;
9484               }
9485
9486               goto unknown;
9487
9488             case 'r':
9489               if (name[2] == 'i' &&
9490                   name[3] == 't' &&
9491                   name[4] == 'e')
9492               {                                   /* write      */
9493                 return -KEY_write;
9494               }
9495
9496               goto unknown;
9497
9498             default:
9499               goto unknown;
9500           }
9501
9502         default:
9503           goto unknown;
9504       }
9505
9506     case 6: /* 33 tokens of length 6 */
9507       switch (name[0])
9508       {
9509         case 'a':
9510           if (name[1] == 'c' &&
9511               name[2] == 'c' &&
9512               name[3] == 'e' &&
9513               name[4] == 'p' &&
9514               name[5] == 't')
9515           {                                       /* accept     */
9516             return -KEY_accept;
9517           }
9518
9519           goto unknown;
9520
9521         case 'c':
9522           switch (name[1])
9523           {
9524             case 'a':
9525               if (name[2] == 'l' &&
9526                   name[3] == 'l' &&
9527                   name[4] == 'e' &&
9528                   name[5] == 'r')
9529               {                                   /* caller     */
9530                 return -KEY_caller;
9531               }
9532
9533               goto unknown;
9534
9535             case 'h':
9536               if (name[2] == 'r' &&
9537                   name[3] == 'o' &&
9538                   name[4] == 'o' &&
9539                   name[5] == 't')
9540               {                                   /* chroot     */
9541                 return -KEY_chroot;
9542               }
9543
9544               goto unknown;
9545
9546             default:
9547               goto unknown;
9548           }
9549
9550         case 'd':
9551           if (name[1] == 'e' &&
9552               name[2] == 'l' &&
9553               name[3] == 'e' &&
9554               name[4] == 't' &&
9555               name[5] == 'e')
9556           {                                       /* delete     */
9557             return KEY_delete;
9558           }
9559
9560           goto unknown;
9561
9562         case 'e':
9563           switch (name[1])
9564           {
9565             case 'l':
9566               if (name[2] == 's' &&
9567                   name[3] == 'e' &&
9568                   name[4] == 'i' &&
9569                   name[5] == 'f')
9570               {                                   /* elseif     */
9571                   Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9572               }
9573
9574               goto unknown;
9575
9576             case 'x':
9577               if (name[2] == 'i' &&
9578                   name[3] == 's' &&
9579                   name[4] == 't' &&
9580                   name[5] == 's')
9581               {                                   /* exists     */
9582                 return KEY_exists;
9583               }
9584
9585               goto unknown;
9586
9587             default:
9588               goto unknown;
9589           }
9590
9591         case 'f':
9592           switch (name[1])
9593           {
9594             case 'i':
9595               if (name[2] == 'l' &&
9596                   name[3] == 'e' &&
9597                   name[4] == 'n' &&
9598                   name[5] == 'o')
9599               {                                   /* fileno     */
9600                 return -KEY_fileno;
9601               }
9602
9603               goto unknown;
9604
9605             case 'o':
9606               if (name[2] == 'r' &&
9607                   name[3] == 'm' &&
9608                   name[4] == 'a' &&
9609                   name[5] == 't')
9610               {                                   /* format     */
9611                 return KEY_format;
9612               }
9613
9614               goto unknown;
9615
9616             default:
9617               goto unknown;
9618           }
9619
9620         case 'g':
9621           if (name[1] == 'm' &&
9622               name[2] == 't' &&
9623               name[3] == 'i' &&
9624               name[4] == 'm' &&
9625               name[5] == 'e')
9626           {                                       /* gmtime     */
9627             return -KEY_gmtime;
9628           }
9629
9630           goto unknown;
9631
9632         case 'l':
9633           switch (name[1])
9634           {
9635             case 'e':
9636               if (name[2] == 'n' &&
9637                   name[3] == 'g' &&
9638                   name[4] == 't' &&
9639                   name[5] == 'h')
9640               {                                   /* length     */
9641                 return -KEY_length;
9642               }
9643
9644               goto unknown;
9645
9646             case 'i':
9647               if (name[2] == 's' &&
9648                   name[3] == 't' &&
9649                   name[4] == 'e' &&
9650                   name[5] == 'n')
9651               {                                   /* listen     */
9652                 return -KEY_listen;
9653               }
9654
9655               goto unknown;
9656
9657             default:
9658               goto unknown;
9659           }
9660
9661         case 'm':
9662           if (name[1] == 's' &&
9663               name[2] == 'g')
9664           {
9665             switch (name[3])
9666             {
9667               case 'c':
9668                 if (name[4] == 't' &&
9669                     name[5] == 'l')
9670                 {                                 /* msgctl     */
9671                   return -KEY_msgctl;
9672                 }
9673
9674                 goto unknown;
9675
9676               case 'g':
9677                 if (name[4] == 'e' &&
9678                     name[5] == 't')
9679                 {                                 /* msgget     */
9680                   return -KEY_msgget;
9681                 }
9682
9683                 goto unknown;
9684
9685               case 'r':
9686                 if (name[4] == 'c' &&
9687                     name[5] == 'v')
9688                 {                                 /* msgrcv     */
9689                   return -KEY_msgrcv;
9690                 }
9691
9692                 goto unknown;
9693
9694               case 's':
9695                 if (name[4] == 'n' &&
9696                     name[5] == 'd')
9697                 {                                 /* msgsnd     */
9698                   return -KEY_msgsnd;
9699                 }
9700
9701                 goto unknown;
9702
9703               default:
9704                 goto unknown;
9705             }
9706           }
9707
9708           goto unknown;
9709
9710         case 'p':
9711           if (name[1] == 'r' &&
9712               name[2] == 'i' &&
9713               name[3] == 'n' &&
9714               name[4] == 't' &&
9715               name[5] == 'f')
9716           {                                       /* printf     */
9717             return KEY_printf;
9718           }
9719
9720           goto unknown;
9721
9722         case 'r':
9723           switch (name[1])
9724           {
9725             case 'e':
9726               switch (name[2])
9727               {
9728                 case 'n':
9729                   if (name[3] == 'a' &&
9730                       name[4] == 'm' &&
9731                       name[5] == 'e')
9732                   {                               /* rename     */
9733                     return -KEY_rename;
9734                   }
9735
9736                   goto unknown;
9737
9738                 case 't':
9739                   if (name[3] == 'u' &&
9740                       name[4] == 'r' &&
9741                       name[5] == 'n')
9742                   {                               /* return     */
9743                     return KEY_return;
9744                   }
9745
9746                   goto unknown;
9747
9748                 default:
9749                   goto unknown;
9750               }
9751
9752             case 'i':
9753               if (name[2] == 'n' &&
9754                   name[3] == 'd' &&
9755                   name[4] == 'e' &&
9756                   name[5] == 'x')
9757               {                                   /* rindex     */
9758                 return -KEY_rindex;
9759               }
9760
9761               goto unknown;
9762
9763             default:
9764               goto unknown;
9765           }
9766
9767         case 's':
9768           switch (name[1])
9769           {
9770             case 'c':
9771               if (name[2] == 'a' &&
9772                   name[3] == 'l' &&
9773                   name[4] == 'a' &&
9774                   name[5] == 'r')
9775               {                                   /* scalar     */
9776                 return KEY_scalar;
9777               }
9778
9779               goto unknown;
9780
9781             case 'e':
9782               switch (name[2])
9783               {
9784                 case 'l':
9785                   if (name[3] == 'e' &&
9786                       name[4] == 'c' &&
9787                       name[5] == 't')
9788                   {                               /* select     */
9789                     return -KEY_select;
9790                   }
9791
9792                   goto unknown;
9793
9794                 case 'm':
9795                   switch (name[3])
9796                   {
9797                     case 'c':
9798                       if (name[4] == 't' &&
9799                           name[5] == 'l')
9800                       {                           /* semctl     */
9801                         return -KEY_semctl;
9802                       }
9803
9804                       goto unknown;
9805
9806                     case 'g':
9807                       if (name[4] == 'e' &&
9808                           name[5] == 't')
9809                       {                           /* semget     */
9810                         return -KEY_semget;
9811                       }
9812
9813                       goto unknown;
9814
9815                     default:
9816                       goto unknown;
9817                   }
9818
9819                 default:
9820                   goto unknown;
9821               }
9822
9823             case 'h':
9824               if (name[2] == 'm')
9825               {
9826                 switch (name[3])
9827                 {
9828                   case 'c':
9829                     if (name[4] == 't' &&
9830                         name[5] == 'l')
9831                     {                             /* shmctl     */
9832                       return -KEY_shmctl;
9833                     }
9834
9835                     goto unknown;
9836
9837                   case 'g':
9838                     if (name[4] == 'e' &&
9839                         name[5] == 't')
9840                     {                             /* shmget     */
9841                       return -KEY_shmget;
9842                     }
9843
9844                     goto unknown;
9845
9846                   default:
9847                     goto unknown;
9848                 }
9849               }
9850
9851               goto unknown;
9852
9853             case 'o':
9854               if (name[2] == 'c' &&
9855                   name[3] == 'k' &&
9856                   name[4] == 'e' &&
9857                   name[5] == 't')
9858               {                                   /* socket     */
9859                 return -KEY_socket;
9860               }
9861
9862               goto unknown;
9863
9864             case 'p':
9865               if (name[2] == 'l' &&
9866                   name[3] == 'i' &&
9867                   name[4] == 'c' &&
9868                   name[5] == 'e')
9869               {                                   /* splice     */
9870                 return -KEY_splice;
9871               }
9872
9873               goto unknown;
9874
9875             case 'u':
9876               if (name[2] == 'b' &&
9877                   name[3] == 's' &&
9878                   name[4] == 't' &&
9879                   name[5] == 'r')
9880               {                                   /* substr     */
9881                 return -KEY_substr;
9882               }
9883
9884               goto unknown;
9885
9886             case 'y':
9887               if (name[2] == 's' &&
9888                   name[3] == 't' &&
9889                   name[4] == 'e' &&
9890                   name[5] == 'm')
9891               {                                   /* system     */
9892                 return -KEY_system;
9893               }
9894
9895               goto unknown;
9896
9897             default:
9898               goto unknown;
9899           }
9900
9901         case 'u':
9902           if (name[1] == 'n')
9903           {
9904             switch (name[2])
9905             {
9906               case 'l':
9907                 switch (name[3])
9908                 {
9909                   case 'e':
9910                     if (name[4] == 's' &&
9911                         name[5] == 's')
9912                     {                             /* unless     */
9913                       return KEY_unless;
9914                     }
9915
9916                     goto unknown;
9917
9918                   case 'i':
9919                     if (name[4] == 'n' &&
9920                         name[5] == 'k')
9921                     {                             /* unlink     */
9922                       return -KEY_unlink;
9923                     }
9924
9925                     goto unknown;
9926
9927                   default:
9928                     goto unknown;
9929                 }
9930
9931               case 'p':
9932                 if (name[3] == 'a' &&
9933                     name[4] == 'c' &&
9934                     name[5] == 'k')
9935                 {                                 /* unpack     */
9936                   return -KEY_unpack;
9937                 }
9938
9939                 goto unknown;
9940
9941               default:
9942                 goto unknown;
9943             }
9944           }
9945
9946           goto unknown;
9947
9948         case 'v':
9949           if (name[1] == 'a' &&
9950               name[2] == 'l' &&
9951               name[3] == 'u' &&
9952               name[4] == 'e' &&
9953               name[5] == 's')
9954           {                                       /* values     */
9955             return -KEY_values;
9956           }
9957
9958           goto unknown;
9959
9960         default:
9961           goto unknown;
9962       }
9963
9964     case 7: /* 29 tokens of length 7 */
9965       switch (name[0])
9966       {
9967         case 'D':
9968           if (name[1] == 'E' &&
9969               name[2] == 'S' &&
9970               name[3] == 'T' &&
9971               name[4] == 'R' &&
9972               name[5] == 'O' &&
9973               name[6] == 'Y')
9974           {                                       /* DESTROY    */
9975             return KEY_DESTROY;
9976           }
9977
9978           goto unknown;
9979
9980         case '_':
9981           if (name[1] == '_' &&
9982               name[2] == 'E' &&
9983               name[3] == 'N' &&
9984               name[4] == 'D' &&
9985               name[5] == '_' &&
9986               name[6] == '_')
9987           {                                       /* __END__    */
9988             return KEY___END__;
9989           }
9990
9991           goto unknown;
9992
9993         case 'b':
9994           if (name[1] == 'i' &&
9995               name[2] == 'n' &&
9996               name[3] == 'm' &&
9997               name[4] == 'o' &&
9998               name[5] == 'd' &&
9999               name[6] == 'e')
10000           {                                       /* binmode    */
10001             return -KEY_binmode;
10002           }
10003
10004           goto unknown;
10005
10006         case 'c':
10007           if (name[1] == 'o' &&
10008               name[2] == 'n' &&
10009               name[3] == 'n' &&
10010               name[4] == 'e' &&
10011               name[5] == 'c' &&
10012               name[6] == 't')
10013           {                                       /* connect    */
10014             return -KEY_connect;
10015           }
10016
10017           goto unknown;
10018
10019         case 'd':
10020           switch (name[1])
10021           {
10022             case 'b':
10023               if (name[2] == 'm' &&
10024                   name[3] == 'o' &&
10025                   name[4] == 'p' &&
10026                   name[5] == 'e' &&
10027                   name[6] == 'n')
10028               {                                   /* dbmopen    */
10029                 return -KEY_dbmopen;
10030               }
10031
10032               goto unknown;
10033
10034             case 'e':
10035               if (name[2] == 'f')
10036               {
10037                 switch (name[3])
10038                 {
10039                   case 'a':
10040                     if (name[4] == 'u' &&
10041                         name[5] == 'l' &&
10042                         name[6] == 't')
10043                     {                             /* default    */
10044                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
10045                     }
10046
10047                     goto unknown;
10048
10049                   case 'i':
10050                     if (name[4] == 'n' &&
10051                         name[5] == 'e' &&
10052                         name[6] == 'd')
10053                     {                             /* defined    */
10054                       return KEY_defined;
10055                     }
10056
10057                     goto unknown;
10058
10059                   default:
10060                     goto unknown;
10061                 }
10062               }
10063
10064               goto unknown;
10065
10066             default:
10067               goto unknown;
10068           }
10069
10070         case 'f':
10071           if (name[1] == 'o' &&
10072               name[2] == 'r' &&
10073               name[3] == 'e' &&
10074               name[4] == 'a' &&
10075               name[5] == 'c' &&
10076               name[6] == 'h')
10077           {                                       /* foreach    */
10078             return KEY_foreach;
10079           }
10080
10081           goto unknown;
10082
10083         case 'g':
10084           if (name[1] == 'e' &&
10085               name[2] == 't' &&
10086               name[3] == 'p')
10087           {
10088             switch (name[4])
10089             {
10090               case 'g':
10091                 if (name[5] == 'r' &&
10092                     name[6] == 'p')
10093                 {                                 /* getpgrp    */
10094                   return -KEY_getpgrp;
10095                 }
10096
10097                 goto unknown;
10098
10099               case 'p':
10100                 if (name[5] == 'i' &&
10101                     name[6] == 'd')
10102                 {                                 /* getppid    */
10103                   return -KEY_getppid;
10104                 }
10105
10106                 goto unknown;
10107
10108               default:
10109                 goto unknown;
10110             }
10111           }
10112
10113           goto unknown;
10114
10115         case 'l':
10116           if (name[1] == 'c' &&
10117               name[2] == 'f' &&
10118               name[3] == 'i' &&
10119               name[4] == 'r' &&
10120               name[5] == 's' &&
10121               name[6] == 't')
10122           {                                       /* lcfirst    */
10123             return -KEY_lcfirst;
10124           }
10125
10126           goto unknown;
10127
10128         case 'o':
10129           if (name[1] == 'p' &&
10130               name[2] == 'e' &&
10131               name[3] == 'n' &&
10132               name[4] == 'd' &&
10133               name[5] == 'i' &&
10134               name[6] == 'r')
10135           {                                       /* opendir    */
10136             return -KEY_opendir;
10137           }
10138
10139           goto unknown;
10140
10141         case 'p':
10142           if (name[1] == 'a' &&
10143               name[2] == 'c' &&
10144               name[3] == 'k' &&
10145               name[4] == 'a' &&
10146               name[5] == 'g' &&
10147               name[6] == 'e')
10148           {                                       /* package    */
10149             return KEY_package;
10150           }
10151
10152           goto unknown;
10153
10154         case 'r':
10155           if (name[1] == 'e')
10156           {
10157             switch (name[2])
10158             {
10159               case 'a':
10160                 if (name[3] == 'd' &&
10161                     name[4] == 'd' &&
10162                     name[5] == 'i' &&
10163                     name[6] == 'r')
10164                 {                                 /* readdir    */
10165                   return -KEY_readdir;
10166                 }
10167
10168                 goto unknown;
10169
10170               case 'q':
10171                 if (name[3] == 'u' &&
10172                     name[4] == 'i' &&
10173                     name[5] == 'r' &&
10174                     name[6] == 'e')
10175                 {                                 /* require    */
10176                   return KEY_require;
10177                 }
10178
10179                 goto unknown;
10180
10181               case 'v':
10182                 if (name[3] == 'e' &&
10183                     name[4] == 'r' &&
10184                     name[5] == 's' &&
10185                     name[6] == 'e')
10186                 {                                 /* reverse    */
10187                   return -KEY_reverse;
10188                 }
10189
10190                 goto unknown;
10191
10192               default:
10193                 goto unknown;
10194             }
10195           }
10196
10197           goto unknown;
10198
10199         case 's':
10200           switch (name[1])
10201           {
10202             case 'e':
10203               switch (name[2])
10204               {
10205                 case 'e':
10206                   if (name[3] == 'k' &&
10207                       name[4] == 'd' &&
10208                       name[5] == 'i' &&
10209                       name[6] == 'r')
10210                   {                               /* seekdir    */
10211                     return -KEY_seekdir;
10212                   }
10213
10214                   goto unknown;
10215
10216                 case 't':
10217                   if (name[3] == 'p' &&
10218                       name[4] == 'g' &&
10219                       name[5] == 'r' &&
10220                       name[6] == 'p')
10221                   {                               /* setpgrp    */
10222                     return -KEY_setpgrp;
10223                   }
10224
10225                   goto unknown;
10226
10227                 default:
10228                   goto unknown;
10229               }
10230
10231             case 'h':
10232               if (name[2] == 'm' &&
10233                   name[3] == 'r' &&
10234                   name[4] == 'e' &&
10235                   name[5] == 'a' &&
10236                   name[6] == 'd')
10237               {                                   /* shmread    */
10238                 return -KEY_shmread;
10239               }
10240
10241               goto unknown;
10242
10243             case 'p':
10244               if (name[2] == 'r' &&
10245                   name[3] == 'i' &&
10246                   name[4] == 'n' &&
10247                   name[5] == 't' &&
10248                   name[6] == 'f')
10249               {                                   /* sprintf    */
10250                 return -KEY_sprintf;
10251               }
10252
10253               goto unknown;
10254
10255             case 'y':
10256               switch (name[2])
10257               {
10258                 case 'm':
10259                   if (name[3] == 'l' &&
10260                       name[4] == 'i' &&
10261                       name[5] == 'n' &&
10262                       name[6] == 'k')
10263                   {                               /* symlink    */
10264                     return -KEY_symlink;
10265                   }
10266
10267                   goto unknown;
10268
10269                 case 's':
10270                   switch (name[3])
10271                   {
10272                     case 'c':
10273                       if (name[4] == 'a' &&
10274                           name[5] == 'l' &&
10275                           name[6] == 'l')
10276                       {                           /* syscall    */
10277                         return -KEY_syscall;
10278                       }
10279
10280                       goto unknown;
10281
10282                     case 'o':
10283                       if (name[4] == 'p' &&
10284                           name[5] == 'e' &&
10285                           name[6] == 'n')
10286                       {                           /* sysopen    */
10287                         return -KEY_sysopen;
10288                       }
10289
10290                       goto unknown;
10291
10292                     case 'r':
10293                       if (name[4] == 'e' &&
10294                           name[5] == 'a' &&
10295                           name[6] == 'd')
10296                       {                           /* sysread    */
10297                         return -KEY_sysread;
10298                       }
10299
10300                       goto unknown;
10301
10302                     case 's':
10303                       if (name[4] == 'e' &&
10304                           name[5] == 'e' &&
10305                           name[6] == 'k')
10306                       {                           /* sysseek    */
10307                         return -KEY_sysseek;
10308                       }
10309
10310                       goto unknown;
10311
10312                     default:
10313                       goto unknown;
10314                   }
10315
10316                 default:
10317                   goto unknown;
10318               }
10319
10320             default:
10321               goto unknown;
10322           }
10323
10324         case 't':
10325           if (name[1] == 'e' &&
10326               name[2] == 'l' &&
10327               name[3] == 'l' &&
10328               name[4] == 'd' &&
10329               name[5] == 'i' &&
10330               name[6] == 'r')
10331           {                                       /* telldir    */
10332             return -KEY_telldir;
10333           }
10334
10335           goto unknown;
10336
10337         case 'u':
10338           switch (name[1])
10339           {
10340             case 'c':
10341               if (name[2] == 'f' &&
10342                   name[3] == 'i' &&
10343                   name[4] == 'r' &&
10344                   name[5] == 's' &&
10345                   name[6] == 't')
10346               {                                   /* ucfirst    */
10347                 return -KEY_ucfirst;
10348               }
10349
10350               goto unknown;
10351
10352             case 'n':
10353               if (name[2] == 's' &&
10354                   name[3] == 'h' &&
10355                   name[4] == 'i' &&
10356                   name[5] == 'f' &&
10357                   name[6] == 't')
10358               {                                   /* unshift    */
10359                 return -KEY_unshift;
10360               }
10361
10362               goto unknown;
10363
10364             default:
10365               goto unknown;
10366           }
10367
10368         case 'w':
10369           if (name[1] == 'a' &&
10370               name[2] == 'i' &&
10371               name[3] == 't' &&
10372               name[4] == 'p' &&
10373               name[5] == 'i' &&
10374               name[6] == 'd')
10375           {                                       /* waitpid    */
10376             return -KEY_waitpid;
10377           }
10378
10379           goto unknown;
10380
10381         default:
10382           goto unknown;
10383       }
10384
10385     case 8: /* 26 tokens of length 8 */
10386       switch (name[0])
10387       {
10388         case 'A':
10389           if (name[1] == 'U' &&
10390               name[2] == 'T' &&
10391               name[3] == 'O' &&
10392               name[4] == 'L' &&
10393               name[5] == 'O' &&
10394               name[6] == 'A' &&
10395               name[7] == 'D')
10396           {                                       /* AUTOLOAD   */
10397             return KEY_AUTOLOAD;
10398           }
10399
10400           goto unknown;
10401
10402         case '_':
10403           if (name[1] == '_')
10404           {
10405             switch (name[2])
10406             {
10407               case 'D':
10408                 if (name[3] == 'A' &&
10409                     name[4] == 'T' &&
10410                     name[5] == 'A' &&
10411                     name[6] == '_' &&
10412                     name[7] == '_')
10413                 {                                 /* __DATA__   */
10414                   return KEY___DATA__;
10415                 }
10416
10417                 goto unknown;
10418
10419               case 'F':
10420                 if (name[3] == 'I' &&
10421                     name[4] == 'L' &&
10422                     name[5] == 'E' &&
10423                     name[6] == '_' &&
10424                     name[7] == '_')
10425                 {                                 /* __FILE__   */
10426                   return -KEY___FILE__;
10427                 }
10428
10429                 goto unknown;
10430
10431               case 'L':
10432                 if (name[3] == 'I' &&
10433                     name[4] == 'N' &&
10434                     name[5] == 'E' &&
10435                     name[6] == '_' &&
10436                     name[7] == '_')
10437                 {                                 /* __LINE__   */
10438                   return -KEY___LINE__;
10439                 }
10440
10441                 goto unknown;
10442
10443               default:
10444                 goto unknown;
10445             }
10446           }
10447
10448           goto unknown;
10449
10450         case 'c':
10451           switch (name[1])
10452           {
10453             case 'l':
10454               if (name[2] == 'o' &&
10455                   name[3] == 's' &&
10456                   name[4] == 'e' &&
10457                   name[5] == 'd' &&
10458                   name[6] == 'i' &&
10459                   name[7] == 'r')
10460               {                                   /* closedir   */
10461                 return -KEY_closedir;
10462               }
10463
10464               goto unknown;
10465
10466             case 'o':
10467               if (name[2] == 'n' &&
10468                   name[3] == 't' &&
10469                   name[4] == 'i' &&
10470                   name[5] == 'n' &&
10471                   name[6] == 'u' &&
10472                   name[7] == 'e')
10473               {                                   /* continue   */
10474                 return -KEY_continue;
10475               }
10476
10477               goto unknown;
10478
10479             default:
10480               goto unknown;
10481           }
10482
10483         case 'd':
10484           if (name[1] == 'b' &&
10485               name[2] == 'm' &&
10486               name[3] == 'c' &&
10487               name[4] == 'l' &&
10488               name[5] == 'o' &&
10489               name[6] == 's' &&
10490               name[7] == 'e')
10491           {                                       /* dbmclose   */
10492             return -KEY_dbmclose;
10493           }
10494
10495           goto unknown;
10496
10497         case 'e':
10498           if (name[1] == 'n' &&
10499               name[2] == 'd')
10500           {
10501             switch (name[3])
10502             {
10503               case 'g':
10504                 if (name[4] == 'r' &&
10505                     name[5] == 'e' &&
10506                     name[6] == 'n' &&
10507                     name[7] == 't')
10508                 {                                 /* endgrent   */
10509                   return -KEY_endgrent;
10510                 }
10511
10512                 goto unknown;
10513
10514               case 'p':
10515                 if (name[4] == 'w' &&
10516                     name[5] == 'e' &&
10517                     name[6] == 'n' &&
10518                     name[7] == 't')
10519                 {                                 /* endpwent   */
10520                   return -KEY_endpwent;
10521                 }
10522
10523                 goto unknown;
10524
10525               default:
10526                 goto unknown;
10527             }
10528           }
10529
10530           goto unknown;
10531
10532         case 'f':
10533           if (name[1] == 'o' &&
10534               name[2] == 'r' &&
10535               name[3] == 'm' &&
10536               name[4] == 'l' &&
10537               name[5] == 'i' &&
10538               name[6] == 'n' &&
10539               name[7] == 'e')
10540           {                                       /* formline   */
10541             return -KEY_formline;
10542           }
10543
10544           goto unknown;
10545
10546         case 'g':
10547           if (name[1] == 'e' &&
10548               name[2] == 't')
10549           {
10550             switch (name[3])
10551             {
10552               case 'g':
10553                 if (name[4] == 'r')
10554                 {
10555                   switch (name[5])
10556                   {
10557                     case 'e':
10558                       if (name[6] == 'n' &&
10559                           name[7] == 't')
10560                       {                           /* getgrent   */
10561                         return -KEY_getgrent;
10562                       }
10563
10564                       goto unknown;
10565
10566                     case 'g':
10567                       if (name[6] == 'i' &&
10568                           name[7] == 'd')
10569                       {                           /* getgrgid   */
10570                         return -KEY_getgrgid;
10571                       }
10572
10573                       goto unknown;
10574
10575                     case 'n':
10576                       if (name[6] == 'a' &&
10577                           name[7] == 'm')
10578                       {                           /* getgrnam   */
10579                         return -KEY_getgrnam;
10580                       }
10581
10582                       goto unknown;
10583
10584                     default:
10585                       goto unknown;
10586                   }
10587                 }
10588
10589                 goto unknown;
10590
10591               case 'l':
10592                 if (name[4] == 'o' &&
10593                     name[5] == 'g' &&
10594                     name[6] == 'i' &&
10595                     name[7] == 'n')
10596                 {                                 /* getlogin   */
10597                   return -KEY_getlogin;
10598                 }
10599
10600                 goto unknown;
10601
10602               case 'p':
10603                 if (name[4] == 'w')
10604                 {
10605                   switch (name[5])
10606                   {
10607                     case 'e':
10608                       if (name[6] == 'n' &&
10609                           name[7] == 't')
10610                       {                           /* getpwent   */
10611                         return -KEY_getpwent;
10612                       }
10613
10614                       goto unknown;
10615
10616                     case 'n':
10617                       if (name[6] == 'a' &&
10618                           name[7] == 'm')
10619                       {                           /* getpwnam   */
10620                         return -KEY_getpwnam;
10621                       }
10622
10623                       goto unknown;
10624
10625                     case 'u':
10626                       if (name[6] == 'i' &&
10627                           name[7] == 'd')
10628                       {                           /* getpwuid   */
10629                         return -KEY_getpwuid;
10630                       }
10631
10632                       goto unknown;
10633
10634                     default:
10635                       goto unknown;
10636                   }
10637                 }
10638
10639                 goto unknown;
10640
10641               default:
10642                 goto unknown;
10643             }
10644           }
10645
10646           goto unknown;
10647
10648         case 'r':
10649           if (name[1] == 'e' &&
10650               name[2] == 'a' &&
10651               name[3] == 'd')
10652           {
10653             switch (name[4])
10654             {
10655               case 'l':
10656                 if (name[5] == 'i' &&
10657                     name[6] == 'n')
10658                 {
10659                   switch (name[7])
10660                   {
10661                     case 'e':
10662                       {                           /* readline   */
10663                         return -KEY_readline;
10664                       }
10665
10666                     case 'k':
10667                       {                           /* readlink   */
10668                         return -KEY_readlink;
10669                       }
10670
10671                     default:
10672                       goto unknown;
10673                   }
10674                 }
10675
10676                 goto unknown;
10677
10678               case 'p':
10679                 if (name[5] == 'i' &&
10680                     name[6] == 'p' &&
10681                     name[7] == 'e')
10682                 {                                 /* readpipe   */
10683                   return -KEY_readpipe;
10684                 }
10685
10686                 goto unknown;
10687
10688               default:
10689                 goto unknown;
10690             }
10691           }
10692
10693           goto unknown;
10694
10695         case 's':
10696           switch (name[1])
10697           {
10698             case 'e':
10699               if (name[2] == 't')
10700               {
10701                 switch (name[3])
10702                 {
10703                   case 'g':
10704                     if (name[4] == 'r' &&
10705                         name[5] == 'e' &&
10706                         name[6] == 'n' &&
10707                         name[7] == 't')
10708                     {                             /* setgrent   */
10709                       return -KEY_setgrent;
10710                     }
10711
10712                     goto unknown;
10713
10714                   case 'p':
10715                     if (name[4] == 'w' &&
10716                         name[5] == 'e' &&
10717                         name[6] == 'n' &&
10718                         name[7] == 't')
10719                     {                             /* setpwent   */
10720                       return -KEY_setpwent;
10721                     }
10722
10723                     goto unknown;
10724
10725                   default:
10726                     goto unknown;
10727                 }
10728               }
10729
10730               goto unknown;
10731
10732             case 'h':
10733               switch (name[2])
10734               {
10735                 case 'm':
10736                   if (name[3] == 'w' &&
10737                       name[4] == 'r' &&
10738                       name[5] == 'i' &&
10739                       name[6] == 't' &&
10740                       name[7] == 'e')
10741                   {                               /* shmwrite   */
10742                     return -KEY_shmwrite;
10743                   }
10744
10745                   goto unknown;
10746
10747                 case 'u':
10748                   if (name[3] == 't' &&
10749                       name[4] == 'd' &&
10750                       name[5] == 'o' &&
10751                       name[6] == 'w' &&
10752                       name[7] == 'n')
10753                   {                               /* shutdown   */
10754                     return -KEY_shutdown;
10755                   }
10756
10757                   goto unknown;
10758
10759                 default:
10760                   goto unknown;
10761               }
10762
10763             case 'y':
10764               if (name[2] == 's' &&
10765                   name[3] == 'w' &&
10766                   name[4] == 'r' &&
10767                   name[5] == 'i' &&
10768                   name[6] == 't' &&
10769                   name[7] == 'e')
10770               {                                   /* syswrite   */
10771                 return -KEY_syswrite;
10772               }
10773
10774               goto unknown;
10775
10776             default:
10777               goto unknown;
10778           }
10779
10780         case 't':
10781           if (name[1] == 'r' &&
10782               name[2] == 'u' &&
10783               name[3] == 'n' &&
10784               name[4] == 'c' &&
10785               name[5] == 'a' &&
10786               name[6] == 't' &&
10787               name[7] == 'e')
10788           {                                       /* truncate   */
10789             return -KEY_truncate;
10790           }
10791
10792           goto unknown;
10793
10794         default:
10795           goto unknown;
10796       }
10797
10798     case 9: /* 9 tokens of length 9 */
10799       switch (name[0])
10800       {
10801         case 'U':
10802           if (name[1] == 'N' &&
10803               name[2] == 'I' &&
10804               name[3] == 'T' &&
10805               name[4] == 'C' &&
10806               name[5] == 'H' &&
10807               name[6] == 'E' &&
10808               name[7] == 'C' &&
10809               name[8] == 'K')
10810           {                                       /* UNITCHECK  */
10811             return KEY_UNITCHECK;
10812           }
10813
10814           goto unknown;
10815
10816         case 'e':
10817           if (name[1] == 'n' &&
10818               name[2] == 'd' &&
10819               name[3] == 'n' &&
10820               name[4] == 'e' &&
10821               name[5] == 't' &&
10822               name[6] == 'e' &&
10823               name[7] == 'n' &&
10824               name[8] == 't')
10825           {                                       /* endnetent  */
10826             return -KEY_endnetent;
10827           }
10828
10829           goto unknown;
10830
10831         case 'g':
10832           if (name[1] == 'e' &&
10833               name[2] == 't' &&
10834               name[3] == 'n' &&
10835               name[4] == 'e' &&
10836               name[5] == 't' &&
10837               name[6] == 'e' &&
10838               name[7] == 'n' &&
10839               name[8] == 't')
10840           {                                       /* getnetent  */
10841             return -KEY_getnetent;
10842           }
10843
10844           goto unknown;
10845
10846         case 'l':
10847           if (name[1] == 'o' &&
10848               name[2] == 'c' &&
10849               name[3] == 'a' &&
10850               name[4] == 'l' &&
10851               name[5] == 't' &&
10852               name[6] == 'i' &&
10853               name[7] == 'm' &&
10854               name[8] == 'e')
10855           {                                       /* localtime  */
10856             return -KEY_localtime;
10857           }
10858
10859           goto unknown;
10860
10861         case 'p':
10862           if (name[1] == 'r' &&
10863               name[2] == 'o' &&
10864               name[3] == 't' &&
10865               name[4] == 'o' &&
10866               name[5] == 't' &&
10867               name[6] == 'y' &&
10868               name[7] == 'p' &&
10869               name[8] == 'e')
10870           {                                       /* prototype  */
10871             return KEY_prototype;
10872           }
10873
10874           goto unknown;
10875
10876         case 'q':
10877           if (name[1] == 'u' &&
10878               name[2] == 'o' &&
10879               name[3] == 't' &&
10880               name[4] == 'e' &&
10881               name[5] == 'm' &&
10882               name[6] == 'e' &&
10883               name[7] == 't' &&
10884               name[8] == 'a')
10885           {                                       /* quotemeta  */
10886             return -KEY_quotemeta;
10887           }
10888
10889           goto unknown;
10890
10891         case 'r':
10892           if (name[1] == 'e' &&
10893               name[2] == 'w' &&
10894               name[3] == 'i' &&
10895               name[4] == 'n' &&
10896               name[5] == 'd' &&
10897               name[6] == 'd' &&
10898               name[7] == 'i' &&
10899               name[8] == 'r')
10900           {                                       /* rewinddir  */
10901             return -KEY_rewinddir;
10902           }
10903
10904           goto unknown;
10905
10906         case 's':
10907           if (name[1] == 'e' &&
10908               name[2] == 't' &&
10909               name[3] == 'n' &&
10910               name[4] == 'e' &&
10911               name[5] == 't' &&
10912               name[6] == 'e' &&
10913               name[7] == 'n' &&
10914               name[8] == 't')
10915           {                                       /* setnetent  */
10916             return -KEY_setnetent;
10917           }
10918
10919           goto unknown;
10920
10921         case 'w':
10922           if (name[1] == 'a' &&
10923               name[2] == 'n' &&
10924               name[3] == 't' &&
10925               name[4] == 'a' &&
10926               name[5] == 'r' &&
10927               name[6] == 'r' &&
10928               name[7] == 'a' &&
10929               name[8] == 'y')
10930           {                                       /* wantarray  */
10931             return -KEY_wantarray;
10932           }
10933
10934           goto unknown;
10935
10936         default:
10937           goto unknown;
10938       }
10939
10940     case 10: /* 9 tokens of length 10 */
10941       switch (name[0])
10942       {
10943         case 'e':
10944           if (name[1] == 'n' &&
10945               name[2] == 'd')
10946           {
10947             switch (name[3])
10948             {
10949               case 'h':
10950                 if (name[4] == 'o' &&
10951                     name[5] == 's' &&
10952                     name[6] == 't' &&
10953                     name[7] == 'e' &&
10954                     name[8] == 'n' &&
10955                     name[9] == 't')
10956                 {                                 /* endhostent */
10957                   return -KEY_endhostent;
10958                 }
10959
10960                 goto unknown;
10961
10962               case 's':
10963                 if (name[4] == 'e' &&
10964                     name[5] == 'r' &&
10965                     name[6] == 'v' &&
10966                     name[7] == 'e' &&
10967                     name[8] == 'n' &&
10968                     name[9] == 't')
10969                 {                                 /* endservent */
10970                   return -KEY_endservent;
10971                 }
10972
10973                 goto unknown;
10974
10975               default:
10976                 goto unknown;
10977             }
10978           }
10979
10980           goto unknown;
10981
10982         case 'g':
10983           if (name[1] == 'e' &&
10984               name[2] == 't')
10985           {
10986             switch (name[3])
10987             {
10988               case 'h':
10989                 if (name[4] == 'o' &&
10990                     name[5] == 's' &&
10991                     name[6] == 't' &&
10992                     name[7] == 'e' &&
10993                     name[8] == 'n' &&
10994                     name[9] == 't')
10995                 {                                 /* gethostent */
10996                   return -KEY_gethostent;
10997                 }
10998
10999                 goto unknown;
11000
11001               case 's':
11002                 switch (name[4])
11003                 {
11004                   case 'e':
11005                     if (name[5] == 'r' &&
11006                         name[6] == 'v' &&
11007                         name[7] == 'e' &&
11008                         name[8] == 'n' &&
11009                         name[9] == 't')
11010                     {                             /* getservent */
11011                       return -KEY_getservent;
11012                     }
11013
11014                     goto unknown;
11015
11016                   case 'o':
11017                     if (name[5] == 'c' &&
11018                         name[6] == 'k' &&
11019                         name[7] == 'o' &&
11020                         name[8] == 'p' &&
11021                         name[9] == 't')
11022                     {                             /* getsockopt */
11023                       return -KEY_getsockopt;
11024                     }
11025
11026                     goto unknown;
11027
11028                   default:
11029                     goto unknown;
11030                 }
11031
11032               default:
11033                 goto unknown;
11034             }
11035           }
11036
11037           goto unknown;
11038
11039         case 's':
11040           switch (name[1])
11041           {
11042             case 'e':
11043               if (name[2] == 't')
11044               {
11045                 switch (name[3])
11046                 {
11047                   case 'h':
11048                     if (name[4] == 'o' &&
11049                         name[5] == 's' &&
11050                         name[6] == 't' &&
11051                         name[7] == 'e' &&
11052                         name[8] == 'n' &&
11053                         name[9] == 't')
11054                     {                             /* sethostent */
11055                       return -KEY_sethostent;
11056                     }
11057
11058                     goto unknown;
11059
11060                   case 's':
11061                     switch (name[4])
11062                     {
11063                       case 'e':
11064                         if (name[5] == 'r' &&
11065                             name[6] == 'v' &&
11066                             name[7] == 'e' &&
11067                             name[8] == 'n' &&
11068                             name[9] == 't')
11069                         {                         /* setservent */
11070                           return -KEY_setservent;
11071                         }
11072
11073                         goto unknown;
11074
11075                       case 'o':
11076                         if (name[5] == 'c' &&
11077                             name[6] == 'k' &&
11078                             name[7] == 'o' &&
11079                             name[8] == 'p' &&
11080                             name[9] == 't')
11081                         {                         /* setsockopt */
11082                           return -KEY_setsockopt;
11083                         }
11084
11085                         goto unknown;
11086
11087                       default:
11088                         goto unknown;
11089                     }
11090
11091                   default:
11092                     goto unknown;
11093                 }
11094               }
11095
11096               goto unknown;
11097
11098             case 'o':
11099               if (name[2] == 'c' &&
11100                   name[3] == 'k' &&
11101                   name[4] == 'e' &&
11102                   name[5] == 't' &&
11103                   name[6] == 'p' &&
11104                   name[7] == 'a' &&
11105                   name[8] == 'i' &&
11106                   name[9] == 'r')
11107               {                                   /* socketpair */
11108                 return -KEY_socketpair;
11109               }
11110
11111               goto unknown;
11112
11113             default:
11114               goto unknown;
11115           }
11116
11117         default:
11118           goto unknown;
11119       }
11120
11121     case 11: /* 8 tokens of length 11 */
11122       switch (name[0])
11123       {
11124         case '_':
11125           if (name[1] == '_' &&
11126               name[2] == 'P' &&
11127               name[3] == 'A' &&
11128               name[4] == 'C' &&
11129               name[5] == 'K' &&
11130               name[6] == 'A' &&
11131               name[7] == 'G' &&
11132               name[8] == 'E' &&
11133               name[9] == '_' &&
11134               name[10] == '_')
11135           {                                       /* __PACKAGE__ */
11136             return -KEY___PACKAGE__;
11137           }
11138
11139           goto unknown;
11140
11141         case 'e':
11142           if (name[1] == 'n' &&
11143               name[2] == 'd' &&
11144               name[3] == 'p' &&
11145               name[4] == 'r' &&
11146               name[5] == 'o' &&
11147               name[6] == 't' &&
11148               name[7] == 'o' &&
11149               name[8] == 'e' &&
11150               name[9] == 'n' &&
11151               name[10] == 't')
11152           {                                       /* endprotoent */
11153             return -KEY_endprotoent;
11154           }
11155
11156           goto unknown;
11157
11158         case 'g':
11159           if (name[1] == 'e' &&
11160               name[2] == 't')
11161           {
11162             switch (name[3])
11163             {
11164               case 'p':
11165                 switch (name[4])
11166                 {
11167                   case 'e':
11168                     if (name[5] == 'e' &&
11169                         name[6] == 'r' &&
11170                         name[7] == 'n' &&
11171                         name[8] == 'a' &&
11172                         name[9] == 'm' &&
11173                         name[10] == 'e')
11174                     {                             /* getpeername */
11175                       return -KEY_getpeername;
11176                     }
11177
11178                     goto unknown;
11179
11180                   case 'r':
11181                     switch (name[5])
11182                     {
11183                       case 'i':
11184                         if (name[6] == 'o' &&
11185                             name[7] == 'r' &&
11186                             name[8] == 'i' &&
11187                             name[9] == 't' &&
11188                             name[10] == 'y')
11189                         {                         /* getpriority */
11190                           return -KEY_getpriority;
11191                         }
11192
11193                         goto unknown;
11194
11195                       case 'o':
11196                         if (name[6] == 't' &&
11197                             name[7] == 'o' &&
11198                             name[8] == 'e' &&
11199                             name[9] == 'n' &&
11200                             name[10] == 't')
11201                         {                         /* getprotoent */
11202                           return -KEY_getprotoent;
11203                         }
11204
11205                         goto unknown;
11206
11207                       default:
11208                         goto unknown;
11209                     }
11210
11211                   default:
11212                     goto unknown;
11213                 }
11214
11215               case 's':
11216                 if (name[4] == 'o' &&
11217                     name[5] == 'c' &&
11218                     name[6] == 'k' &&
11219                     name[7] == 'n' &&
11220                     name[8] == 'a' &&
11221                     name[9] == 'm' &&
11222                     name[10] == 'e')
11223                 {                                 /* getsockname */
11224                   return -KEY_getsockname;
11225                 }
11226
11227                 goto unknown;
11228
11229               default:
11230                 goto unknown;
11231             }
11232           }
11233
11234           goto unknown;
11235
11236         case 's':
11237           if (name[1] == 'e' &&
11238               name[2] == 't' &&
11239               name[3] == 'p' &&
11240               name[4] == 'r')
11241           {
11242             switch (name[5])
11243             {
11244               case 'i':
11245                 if (name[6] == 'o' &&
11246                     name[7] == 'r' &&
11247                     name[8] == 'i' &&
11248                     name[9] == 't' &&
11249                     name[10] == 'y')
11250                 {                                 /* setpriority */
11251                   return -KEY_setpriority;
11252                 }
11253
11254                 goto unknown;
11255
11256               case 'o':
11257                 if (name[6] == 't' &&
11258                     name[7] == 'o' &&
11259                     name[8] == 'e' &&
11260                     name[9] == 'n' &&
11261                     name[10] == 't')
11262                 {                                 /* setprotoent */
11263                   return -KEY_setprotoent;
11264                 }
11265
11266                 goto unknown;
11267
11268               default:
11269                 goto unknown;
11270             }
11271           }
11272
11273           goto unknown;
11274
11275         default:
11276           goto unknown;
11277       }
11278
11279     case 12: /* 2 tokens of length 12 */
11280       if (name[0] == 'g' &&
11281           name[1] == 'e' &&
11282           name[2] == 't' &&
11283           name[3] == 'n' &&
11284           name[4] == 'e' &&
11285           name[5] == 't' &&
11286           name[6] == 'b' &&
11287           name[7] == 'y')
11288       {
11289         switch (name[8])
11290         {
11291           case 'a':
11292             if (name[9] == 'd' &&
11293                 name[10] == 'd' &&
11294                 name[11] == 'r')
11295             {                                     /* getnetbyaddr */
11296               return -KEY_getnetbyaddr;
11297             }
11298
11299             goto unknown;
11300
11301           case 'n':
11302             if (name[9] == 'a' &&
11303                 name[10] == 'm' &&
11304                 name[11] == 'e')
11305             {                                     /* getnetbyname */
11306               return -KEY_getnetbyname;
11307             }
11308
11309             goto unknown;
11310
11311           default:
11312             goto unknown;
11313         }
11314       }
11315
11316       goto unknown;
11317
11318     case 13: /* 4 tokens of length 13 */
11319       if (name[0] == 'g' &&
11320           name[1] == 'e' &&
11321           name[2] == 't')
11322       {
11323         switch (name[3])
11324         {
11325           case 'h':
11326             if (name[4] == 'o' &&
11327                 name[5] == 's' &&
11328                 name[6] == 't' &&
11329                 name[7] == 'b' &&
11330                 name[8] == 'y')
11331             {
11332               switch (name[9])
11333               {
11334                 case 'a':
11335                   if (name[10] == 'd' &&
11336                       name[11] == 'd' &&
11337                       name[12] == 'r')
11338                   {                               /* gethostbyaddr */
11339                     return -KEY_gethostbyaddr;
11340                   }
11341
11342                   goto unknown;
11343
11344                 case 'n':
11345                   if (name[10] == 'a' &&
11346                       name[11] == 'm' &&
11347                       name[12] == 'e')
11348                   {                               /* gethostbyname */
11349                     return -KEY_gethostbyname;
11350                   }
11351
11352                   goto unknown;
11353
11354                 default:
11355                   goto unknown;
11356               }
11357             }
11358
11359             goto unknown;
11360
11361           case 's':
11362             if (name[4] == 'e' &&
11363                 name[5] == 'r' &&
11364                 name[6] == 'v' &&
11365                 name[7] == 'b' &&
11366                 name[8] == 'y')
11367             {
11368               switch (name[9])
11369               {
11370                 case 'n':
11371                   if (name[10] == 'a' &&
11372                       name[11] == 'm' &&
11373                       name[12] == 'e')
11374                   {                               /* getservbyname */
11375                     return -KEY_getservbyname;
11376                   }
11377
11378                   goto unknown;
11379
11380                 case 'p':
11381                   if (name[10] == 'o' &&
11382                       name[11] == 'r' &&
11383                       name[12] == 't')
11384                   {                               /* getservbyport */
11385                     return -KEY_getservbyport;
11386                   }
11387
11388                   goto unknown;
11389
11390                 default:
11391                   goto unknown;
11392               }
11393             }
11394
11395             goto unknown;
11396
11397           default:
11398             goto unknown;
11399         }
11400       }
11401
11402       goto unknown;
11403
11404     case 14: /* 1 tokens of length 14 */
11405       if (name[0] == 'g' &&
11406           name[1] == 'e' &&
11407           name[2] == 't' &&
11408           name[3] == 'p' &&
11409           name[4] == 'r' &&
11410           name[5] == 'o' &&
11411           name[6] == 't' &&
11412           name[7] == 'o' &&
11413           name[8] == 'b' &&
11414           name[9] == 'y' &&
11415           name[10] == 'n' &&
11416           name[11] == 'a' &&
11417           name[12] == 'm' &&
11418           name[13] == 'e')
11419       {                                           /* getprotobyname */
11420         return -KEY_getprotobyname;
11421       }
11422
11423       goto unknown;
11424
11425     case 16: /* 1 tokens of length 16 */
11426       if (name[0] == 'g' &&
11427           name[1] == 'e' &&
11428           name[2] == 't' &&
11429           name[3] == 'p' &&
11430           name[4] == 'r' &&
11431           name[5] == 'o' &&
11432           name[6] == 't' &&
11433           name[7] == 'o' &&
11434           name[8] == 'b' &&
11435           name[9] == 'y' &&
11436           name[10] == 'n' &&
11437           name[11] == 'u' &&
11438           name[12] == 'm' &&
11439           name[13] == 'b' &&
11440           name[14] == 'e' &&
11441           name[15] == 'r')
11442       {                                           /* getprotobynumber */
11443         return -KEY_getprotobynumber;
11444       }
11445
11446       goto unknown;
11447
11448     default:
11449       goto unknown;
11450   }
11451
11452 unknown:
11453   return 0;
11454 }
11455
11456 STATIC void
11457 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11458 {
11459     dVAR;
11460
11461     PERL_ARGS_ASSERT_CHECKCOMMA;
11462
11463     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
11464         if (ckWARN(WARN_SYNTAX)) {
11465             int level = 1;
11466             const char *w;
11467             for (w = s+2; *w && level; w++) {
11468                 if (*w == '(')
11469                     ++level;
11470                 else if (*w == ')')
11471                     --level;
11472             }
11473             while (isSPACE(*w))
11474                 ++w;
11475             /* the list of chars below is for end of statements or
11476              * block / parens, boolean operators (&&, ||, //) and branch
11477              * constructs (or, and, if, until, unless, while, err, for).
11478              * Not a very solid hack... */
11479             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11480                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11481                             "%s (...) interpreted as function",name);
11482         }
11483     }
11484     while (s < PL_bufend && isSPACE(*s))
11485         s++;
11486     if (*s == '(')
11487         s++;
11488     while (s < PL_bufend && isSPACE(*s))
11489         s++;
11490     if (isIDFIRST_lazy_if(s,UTF)) {
11491         const char * const w = s++;
11492         while (isALNUM_lazy_if(s,UTF))
11493             s++;
11494         while (s < PL_bufend && isSPACE(*s))
11495             s++;
11496         if (*s == ',') {
11497             GV* gv;
11498             if (keyword(w, s - w, 0))
11499                 return;
11500
11501             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11502             if (gv && GvCVu(gv))
11503                 return;
11504             Perl_croak(aTHX_ "No comma allowed after %s", what);
11505         }
11506     }
11507 }
11508
11509 /* Either returns sv, or mortalizes sv and returns a new SV*.
11510    Best used as sv=new_constant(..., sv, ...).
11511    If s, pv are NULL, calls subroutine with one argument,
11512    and type is used with error messages only. */
11513
11514 STATIC SV *
11515 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11516                SV *sv, SV *pv, const char *type, STRLEN typelen)
11517 {
11518     dVAR; dSP;
11519     HV * const table = GvHV(PL_hintgv);          /* ^H */
11520     SV *res;
11521     SV **cvp;
11522     SV *cv, *typesv;
11523     const char *why1 = "", *why2 = "", *why3 = "";
11524
11525     PERL_ARGS_ASSERT_NEW_CONSTANT;
11526
11527     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11528         SV *msg;
11529
11530         why2 = (const char *)
11531             (strEQ(key,"charnames")
11532              ? "(possibly a missing \"use charnames ...\")"
11533              : "");
11534         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11535                             (type ? type: "undef"), why2);
11536
11537         /* This is convoluted and evil ("goto considered harmful")
11538          * but I do not understand the intricacies of all the different
11539          * failure modes of %^H in here.  The goal here is to make
11540          * the most probable error message user-friendly. --jhi */
11541
11542         goto msgdone;
11543
11544     report:
11545         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11546                             (type ? type: "undef"), why1, why2, why3);
11547     msgdone:
11548         yyerror(SvPVX_const(msg));
11549         SvREFCNT_dec(msg);
11550         return sv;
11551     }
11552
11553     /* charnames doesn't work well if there have been errors found */
11554     if (PL_error_count > 0 && strEQ(key,"charnames"))
11555         return &PL_sv_undef;
11556
11557     cvp = hv_fetch(table, key, keylen, FALSE);
11558     if (!cvp || !SvOK(*cvp)) {
11559         why1 = "$^H{";
11560         why2 = key;
11561         why3 = "} is not defined";
11562         goto report;
11563     }
11564     sv_2mortal(sv);                     /* Parent created it permanently */
11565     cv = *cvp;
11566     if (!pv && s)
11567         pv = newSVpvn_flags(s, len, SVs_TEMP);
11568     if (type && pv)
11569         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11570     else
11571         typesv = &PL_sv_undef;
11572
11573     PUSHSTACKi(PERLSI_OVERLOAD);
11574     ENTER ;
11575     SAVETMPS;
11576
11577     PUSHMARK(SP) ;
11578     EXTEND(sp, 3);
11579     if (pv)
11580         PUSHs(pv);
11581     PUSHs(sv);
11582     if (pv)
11583         PUSHs(typesv);
11584     PUTBACK;
11585     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11586
11587     SPAGAIN ;
11588
11589     /* Check the eval first */
11590     if (!PL_in_eval && SvTRUE(ERRSV)) {
11591         sv_catpvs(ERRSV, "Propagated");
11592         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11593         (void)POPs;
11594         res = SvREFCNT_inc_simple(sv);
11595     }
11596     else {
11597         res = POPs;
11598         SvREFCNT_inc_simple_void(res);
11599     }
11600
11601     PUTBACK ;
11602     FREETMPS ;
11603     LEAVE ;
11604     POPSTACK;
11605
11606     if (!SvOK(res)) {
11607         why1 = "Call to &{$^H{";
11608         why2 = key;
11609         why3 = "}} did not return a defined value";
11610         sv = res;
11611         goto report;
11612     }
11613
11614     return res;
11615 }
11616
11617 /* Returns a NUL terminated string, with the length of the string written to
11618    *slp
11619    */
11620 STATIC char *
11621 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11622 {
11623     dVAR;
11624     register char *d = dest;
11625     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
11626
11627     PERL_ARGS_ASSERT_SCAN_WORD;
11628
11629     for (;;) {
11630         if (d >= e)
11631             Perl_croak(aTHX_ ident_too_long);
11632         if (isALNUM(*s))        /* UTF handled below */
11633             *d++ = *s++;
11634         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11635             *d++ = ':';
11636             *d++ = ':';
11637             s++;
11638         }
11639         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11640             *d++ = *s++;
11641             *d++ = *s++;
11642         }
11643         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11644             char *t = s + UTF8SKIP(s);
11645             size_t len;
11646             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11647                 t += UTF8SKIP(t);
11648             len = t - s;
11649             if (d + len > e)
11650                 Perl_croak(aTHX_ ident_too_long);
11651             Copy(s, d, len, char);
11652             d += len;
11653             s = t;
11654         }
11655         else {
11656             *d = '\0';
11657             *slp = d - dest;
11658             return s;
11659         }
11660     }
11661 }
11662
11663 STATIC char *
11664 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11665 {
11666     dVAR;
11667     char *bracket = NULL;
11668     char funny = *s++;
11669     register char *d = dest;
11670     register char * const e = d + destlen - 3;    /* two-character token, ending NUL */
11671
11672     PERL_ARGS_ASSERT_SCAN_IDENT;
11673
11674     if (isSPACE(*s))
11675         s = PEEKSPACE(s);
11676     if (isDIGIT(*s)) {
11677         while (isDIGIT(*s)) {
11678             if (d >= e)
11679                 Perl_croak(aTHX_ ident_too_long);
11680             *d++ = *s++;
11681         }
11682     }
11683     else {
11684         for (;;) {
11685             if (d >= e)
11686                 Perl_croak(aTHX_ ident_too_long);
11687             if (isALNUM(*s))    /* UTF handled below */
11688                 *d++ = *s++;
11689             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11690                 *d++ = ':';
11691                 *d++ = ':';
11692                 s++;
11693             }
11694             else if (*s == ':' && s[1] == ':') {
11695                 *d++ = *s++;
11696                 *d++ = *s++;
11697             }
11698             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11699                 char *t = s + UTF8SKIP(s);
11700                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11701                     t += UTF8SKIP(t);
11702                 if (d + (t - s) > e)
11703                     Perl_croak(aTHX_ ident_too_long);
11704                 Copy(s, d, t - s, char);
11705                 d += t - s;
11706                 s = t;
11707             }
11708             else
11709                 break;
11710         }
11711     }
11712     *d = '\0';
11713     d = dest;
11714     if (*d) {
11715         if (PL_lex_state != LEX_NORMAL)
11716             PL_lex_state = LEX_INTERPENDMAYBE;
11717         return s;
11718     }
11719     if (*s == '$' && s[1] &&
11720         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11721     {
11722         return s;
11723     }
11724     if (*s == '{') {
11725         bracket = s;
11726         s++;
11727     }
11728     else if (ck_uni)
11729         check_uni();
11730     if (s < send)
11731         *d = *s++;
11732     d[1] = '\0';
11733     if (*d == '^' && *s && isCONTROLVAR(*s)) {
11734         *d = toCTRL(*s);
11735         s++;
11736     }
11737     if (bracket) {
11738         if (isSPACE(s[-1])) {
11739             while (s < send) {
11740                 const char ch = *s++;
11741                 if (!SPACE_OR_TAB(ch)) {
11742                     *d = ch;
11743                     break;
11744                 }
11745             }
11746         }
11747         if (isIDFIRST_lazy_if(d,UTF)) {
11748             d++;
11749             if (UTF) {
11750                 char *end = s;
11751                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11752                     end += UTF8SKIP(end);
11753                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11754                         end += UTF8SKIP(end);
11755                 }
11756                 Copy(s, d, end - s, char);
11757                 d += end - s;
11758                 s = end;
11759             }
11760             else {
11761                 while ((isALNUM(*s) || *s == ':') && d < e)
11762                     *d++ = *s++;
11763                 if (d >= e)
11764                     Perl_croak(aTHX_ ident_too_long);
11765             }
11766             *d = '\0';
11767             while (s < send && SPACE_OR_TAB(*s))
11768                 s++;
11769             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11770                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11771                     const char * const brack =
11772                         (const char *)
11773                         ((*s == '[') ? "[...]" : "{...}");
11774                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11775                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11776                         funny, dest, brack, funny, dest, brack);
11777                 }
11778                 bracket++;
11779                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11780                 return s;
11781             }
11782         }
11783         /* Handle extended ${^Foo} variables
11784          * 1999-02-27 mjd-perl-patch@plover.com */
11785         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11786                  && isALNUM(*s))
11787         {
11788             d++;
11789             while (isALNUM(*s) && d < e) {
11790                 *d++ = *s++;
11791             }
11792             if (d >= e)
11793                 Perl_croak(aTHX_ ident_too_long);
11794             *d = '\0';
11795         }
11796         if (*s == '}') {
11797             s++;
11798             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11799                 PL_lex_state = LEX_INTERPEND;
11800                 PL_expect = XREF;
11801             }
11802             if (PL_lex_state == LEX_NORMAL) {
11803                 if (ckWARN(WARN_AMBIGUOUS) &&
11804                     (keyword(dest, d - dest, 0)
11805                      || get_cvn_flags(dest, d - dest, 0)))
11806                 {
11807                     if (funny == '#')
11808                         funny = '@';
11809                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11810                         "Ambiguous use of %c{%s} resolved to %c%s",
11811                         funny, dest, funny, dest);
11812                 }
11813             }
11814         }
11815         else {
11816             s = bracket;                /* let the parser handle it */
11817             *dest = '\0';
11818         }
11819     }
11820     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11821         PL_lex_state = LEX_INTERPEND;
11822     return s;
11823 }
11824
11825 static U32
11826 S_pmflag(U32 pmfl, const char ch) {
11827     switch (ch) {
11828         CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11829     case GLOBAL_PAT_MOD:      pmfl |= PMf_GLOBAL; break;
11830     case CONTINUE_PAT_MOD:    pmfl |= PMf_CONTINUE; break;
11831     case ONCE_PAT_MOD:        pmfl |= PMf_KEEP; break;
11832     case KEEPCOPY_PAT_MOD:    pmfl |= PMf_KEEPCOPY; break;
11833     case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
11834     }
11835     return pmfl;
11836 }
11837
11838 STATIC char *
11839 S_scan_pat(pTHX_ char *start, I32 type)
11840 {
11841     dVAR;
11842     PMOP *pm;
11843     char *s = scan_str(start,!!PL_madskills,FALSE);
11844     const char * const valid_flags =
11845         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11846 #ifdef PERL_MAD
11847     char *modstart;
11848 #endif
11849
11850     PERL_ARGS_ASSERT_SCAN_PAT;
11851
11852     if (!s) {
11853         const char * const delimiter = skipspace(start);
11854         Perl_croak(aTHX_
11855                    (const char *)
11856                    (*delimiter == '?'
11857                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
11858                     : "Search pattern not terminated" ));
11859     }
11860
11861     pm = (PMOP*)newPMOP(type, 0);
11862     if (PL_multi_open == '?') {
11863         /* This is the only point in the code that sets PMf_ONCE:  */
11864         pm->op_pmflags |= PMf_ONCE;
11865
11866         /* Hence it's safe to do this bit of PMOP book-keeping here, which
11867            allows us to restrict the list needed by reset to just the ??
11868            matches.  */
11869         assert(type != OP_TRANS);
11870         if (PL_curstash) {
11871             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11872             U32 elements;
11873             if (!mg) {
11874                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11875                                  0);
11876             }
11877             elements = mg->mg_len / sizeof(PMOP**);
11878             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11879             ((PMOP**)mg->mg_ptr) [elements++] = pm;
11880             mg->mg_len = elements * sizeof(PMOP**);
11881             PmopSTASH_set(pm,PL_curstash);
11882         }
11883     }
11884 #ifdef PERL_MAD
11885     modstart = s;
11886 #endif
11887     while (*s && strchr(valid_flags, *s))
11888         pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11889 #ifdef PERL_MAD
11890     if (PL_madskills && modstart != s) {
11891         SV* tmptoken = newSVpvn(modstart, s - modstart);
11892         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11893     }
11894 #endif
11895     /* issue a warning if /c is specified,but /g is not */
11896     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11897     {
11898         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11899                        "Use of /c modifier is meaningless without /g" );
11900     }
11901
11902     PL_lex_op = (OP*)pm;
11903     pl_yylval.ival = OP_MATCH;
11904     return s;
11905 }
11906
11907 STATIC char *
11908 S_scan_subst(pTHX_ char *start)
11909 {
11910     dVAR;
11911     register char *s;
11912     register PMOP *pm;
11913     I32 first_start;
11914     I32 es = 0;
11915 #ifdef PERL_MAD
11916     char *modstart;
11917 #endif
11918
11919     PERL_ARGS_ASSERT_SCAN_SUBST;
11920
11921     pl_yylval.ival = OP_NULL;
11922
11923     s = scan_str(start,!!PL_madskills,FALSE);
11924
11925     if (!s)
11926         Perl_croak(aTHX_ "Substitution pattern not terminated");
11927
11928     if (s[-1] == PL_multi_open)
11929         s--;
11930 #ifdef PERL_MAD
11931     if (PL_madskills) {
11932         CURMAD('q', PL_thisopen);
11933         CURMAD('_', PL_thiswhite);
11934         CURMAD('E', PL_thisstuff);
11935         CURMAD('Q', PL_thisclose);
11936         PL_realtokenstart = s - SvPVX(PL_linestr);
11937     }
11938 #endif
11939
11940     first_start = PL_multi_start;
11941     s = scan_str(s,!!PL_madskills,FALSE);
11942     if (!s) {
11943         if (PL_lex_stuff) {
11944             SvREFCNT_dec(PL_lex_stuff);
11945             PL_lex_stuff = NULL;
11946         }
11947         Perl_croak(aTHX_ "Substitution replacement not terminated");
11948     }
11949     PL_multi_start = first_start;       /* so whole substitution is taken together */
11950
11951     pm = (PMOP*)newPMOP(OP_SUBST, 0);
11952
11953 #ifdef PERL_MAD
11954     if (PL_madskills) {
11955         CURMAD('z', PL_thisopen);
11956         CURMAD('R', PL_thisstuff);
11957         CURMAD('Z', PL_thisclose);
11958     }
11959     modstart = s;
11960 #endif
11961
11962     while (*s) {
11963         if (*s == EXEC_PAT_MOD) {
11964             s++;
11965             es++;
11966         }
11967         else if (strchr(S_PAT_MODS, *s))
11968             pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11969         else
11970             break;
11971     }
11972
11973 #ifdef PERL_MAD
11974     if (PL_madskills) {
11975         if (modstart != s)
11976             curmad('m', newSVpvn(modstart, s - modstart));
11977         append_madprops(PL_thismad, (OP*)pm, 0);
11978         PL_thismad = 0;
11979     }
11980 #endif
11981     if ((pm->op_pmflags & PMf_CONTINUE)) {
11982         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11983     }
11984
11985     if (es) {
11986         SV * const repl = newSVpvs("");
11987
11988         PL_sublex_info.super_bufptr = s;
11989         PL_sublex_info.super_bufend = PL_bufend;
11990         PL_multi_end = 0;
11991         pm->op_pmflags |= PMf_EVAL;
11992         while (es-- > 0) {
11993             if (es)
11994                 sv_catpvs(repl, "eval ");
11995             else
11996                 sv_catpvs(repl, "do ");
11997         }
11998         sv_catpvs(repl, "{");
11999         sv_catsv(repl, PL_lex_repl);
12000         if (strchr(SvPVX(PL_lex_repl), '#'))
12001             sv_catpvs(repl, "\n");
12002         sv_catpvs(repl, "}");
12003         SvEVALED_on(repl);
12004         SvREFCNT_dec(PL_lex_repl);
12005         PL_lex_repl = repl;
12006     }
12007
12008     PL_lex_op = (OP*)pm;
12009     pl_yylval.ival = OP_SUBST;
12010     return s;
12011 }
12012
12013 STATIC char *
12014 S_scan_trans(pTHX_ char *start)
12015 {
12016     dVAR;
12017     register char* s;
12018     OP *o;
12019     short *tbl;
12020     U8 squash;
12021     U8 del;
12022     U8 complement;
12023 #ifdef PERL_MAD
12024     char *modstart;
12025 #endif
12026
12027     PERL_ARGS_ASSERT_SCAN_TRANS;
12028
12029     pl_yylval.ival = OP_NULL;
12030
12031     s = scan_str(start,!!PL_madskills,FALSE);
12032     if (!s)
12033         Perl_croak(aTHX_ "Transliteration pattern not terminated");
12034
12035     if (s[-1] == PL_multi_open)
12036         s--;
12037 #ifdef PERL_MAD
12038     if (PL_madskills) {
12039         CURMAD('q', PL_thisopen);
12040         CURMAD('_', PL_thiswhite);
12041         CURMAD('E', PL_thisstuff);
12042         CURMAD('Q', PL_thisclose);
12043         PL_realtokenstart = s - SvPVX(PL_linestr);
12044     }
12045 #endif
12046
12047     s = scan_str(s,!!PL_madskills,FALSE);
12048     if (!s) {
12049         if (PL_lex_stuff) {
12050             SvREFCNT_dec(PL_lex_stuff);
12051             PL_lex_stuff = NULL;
12052         }
12053         Perl_croak(aTHX_ "Transliteration replacement not terminated");
12054     }
12055     if (PL_madskills) {
12056         CURMAD('z', PL_thisopen);
12057         CURMAD('R', PL_thisstuff);
12058         CURMAD('Z', PL_thisclose);
12059     }
12060
12061     complement = del = squash = 0;
12062 #ifdef PERL_MAD
12063     modstart = s;
12064 #endif
12065     while (1) {
12066         switch (*s) {
12067         case 'c':
12068             complement = OPpTRANS_COMPLEMENT;
12069             break;
12070         case 'd':
12071             del = OPpTRANS_DELETE;
12072             break;
12073         case 's':
12074             squash = OPpTRANS_SQUASH;
12075             break;
12076         default:
12077             goto no_more;
12078         }
12079         s++;
12080     }
12081   no_more:
12082
12083     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
12084     o = newPVOP(OP_TRANS, 0, (char*)tbl);
12085     o->op_private &= ~OPpTRANS_ALL;
12086     o->op_private |= del|squash|complement|
12087       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12088       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
12089
12090     PL_lex_op = o;
12091     pl_yylval.ival = OP_TRANS;
12092
12093 #ifdef PERL_MAD
12094     if (PL_madskills) {
12095         if (modstart != s)
12096             curmad('m', newSVpvn(modstart, s - modstart));
12097         append_madprops(PL_thismad, o, 0);
12098         PL_thismad = 0;
12099     }
12100 #endif
12101
12102     return s;
12103 }
12104
12105 STATIC char *
12106 S_scan_heredoc(pTHX_ register char *s)
12107 {
12108     dVAR;
12109     SV *herewas;
12110     I32 op_type = OP_SCALAR;
12111     I32 len;
12112     SV *tmpstr;
12113     char term;
12114     const char *found_newline;
12115     register char *d;
12116     register char *e;
12117     char *peek;
12118     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
12119 #ifdef PERL_MAD
12120     I32 stuffstart = s - SvPVX(PL_linestr);
12121     char *tstart;
12122
12123     PL_realtokenstart = -1;
12124 #endif
12125
12126     PERL_ARGS_ASSERT_SCAN_HEREDOC;
12127
12128     s += 2;
12129     d = PL_tokenbuf;
12130     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
12131     if (!outer)
12132         *d++ = '\n';
12133     peek = s;
12134     while (SPACE_OR_TAB(*peek))
12135         peek++;
12136     if (*peek == '`' || *peek == '\'' || *peek =='"') {
12137         s = peek;
12138         term = *s++;
12139         s = delimcpy(d, e, s, PL_bufend, term, &len);
12140         d += len;
12141         if (s < PL_bufend)
12142             s++;
12143     }
12144     else {
12145         if (*s == '\\')
12146             s++, term = '\'';
12147         else
12148             term = '"';
12149         if (!isALNUM_lazy_if(s,UTF))
12150             deprecate("bare << to mean <<\"\"");
12151         for (; isALNUM_lazy_if(s,UTF); s++) {
12152             if (d < e)
12153                 *d++ = *s;
12154         }
12155     }
12156     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
12157         Perl_croak(aTHX_ "Delimiter for here document is too long");
12158     *d++ = '\n';
12159     *d = '\0';
12160     len = d - PL_tokenbuf;
12161
12162 #ifdef PERL_MAD
12163     if (PL_madskills) {
12164         tstart = PL_tokenbuf + !outer;
12165         PL_thisclose = newSVpvn(tstart, len - !outer);
12166         tstart = SvPVX(PL_linestr) + stuffstart;
12167         PL_thisopen = newSVpvn(tstart, s - tstart);
12168         stuffstart = s - SvPVX(PL_linestr);
12169     }
12170 #endif
12171 #ifndef PERL_STRICT_CR
12172     d = strchr(s, '\r');
12173     if (d) {
12174         char * const olds = s;
12175         s = d;
12176         while (s < PL_bufend) {
12177             if (*s == '\r') {
12178                 *d++ = '\n';
12179                 if (*++s == '\n')
12180                     s++;
12181             }
12182             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
12183                 *d++ = *s++;
12184                 s++;
12185             }
12186             else
12187                 *d++ = *s++;
12188         }
12189         *d = '\0';
12190         PL_bufend = d;
12191         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12192         s = olds;
12193     }
12194 #endif
12195 #ifdef PERL_MAD
12196     found_newline = 0;
12197 #endif
12198     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
12199         herewas = newSVpvn(s,PL_bufend-s);
12200     }
12201     else {
12202 #ifdef PERL_MAD
12203         herewas = newSVpvn(s-1,found_newline-s+1);
12204 #else
12205         s--;
12206         herewas = newSVpvn(s,found_newline-s);
12207 #endif
12208     }
12209 #ifdef PERL_MAD
12210     if (PL_madskills) {
12211         tstart = SvPVX(PL_linestr) + stuffstart;
12212         if (PL_thisstuff)
12213             sv_catpvn(PL_thisstuff, tstart, s - tstart);
12214         else
12215             PL_thisstuff = newSVpvn(tstart, s - tstart);
12216     }
12217 #endif
12218     s += SvCUR(herewas);
12219
12220 #ifdef PERL_MAD
12221     stuffstart = s - SvPVX(PL_linestr);
12222
12223     if (found_newline)
12224         s--;
12225 #endif
12226
12227     tmpstr = newSV_type(SVt_PVIV);
12228     SvGROW(tmpstr, 80);
12229     if (term == '\'') {
12230         op_type = OP_CONST;
12231         SvIV_set(tmpstr, -1);
12232     }
12233     else if (term == '`') {
12234         op_type = OP_BACKTICK;
12235         SvIV_set(tmpstr, '\\');
12236     }
12237
12238     CLINE;
12239     PL_multi_start = CopLINE(PL_curcop);
12240     PL_multi_open = PL_multi_close = '<';
12241     term = *PL_tokenbuf;
12242     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
12243         char * const bufptr = PL_sublex_info.super_bufptr;
12244         char * const bufend = PL_sublex_info.super_bufend;
12245         char * const olds = s - SvCUR(herewas);
12246         s = strchr(bufptr, '\n');
12247         if (!s)
12248             s = bufend;
12249         d = s;
12250         while (s < bufend &&
12251           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12252             if (*s++ == '\n')
12253                 CopLINE_inc(PL_curcop);
12254         }
12255         if (s >= bufend) {
12256             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12257             missingterm(PL_tokenbuf);
12258         }
12259         sv_setpvn(herewas,bufptr,d-bufptr+1);
12260         sv_setpvn(tmpstr,d+1,s-d);
12261         s += len - 1;
12262         sv_catpvn(herewas,s,bufend-s);
12263         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
12264
12265         s = olds;
12266         goto retval;
12267     }
12268     else if (!outer) {
12269         d = s;
12270         while (s < PL_bufend &&
12271           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12272             if (*s++ == '\n')
12273                 CopLINE_inc(PL_curcop);
12274         }
12275         if (s >= PL_bufend) {
12276             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12277             missingterm(PL_tokenbuf);
12278         }
12279         sv_setpvn(tmpstr,d+1,s-d);
12280 #ifdef PERL_MAD
12281         if (PL_madskills) {
12282             if (PL_thisstuff)
12283                 sv_catpvn(PL_thisstuff, d + 1, s - d);
12284             else
12285                 PL_thisstuff = newSVpvn(d + 1, s - d);
12286             stuffstart = s - SvPVX(PL_linestr);
12287         }
12288 #endif
12289         s += len - 1;
12290         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12291
12292         sv_catpvn(herewas,s,PL_bufend-s);
12293         sv_setsv(PL_linestr,herewas);
12294         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12295         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12296         PL_last_lop = PL_last_uni = NULL;
12297     }
12298     else
12299         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
12300     while (s >= PL_bufend) {    /* multiple line string? */
12301 #ifdef PERL_MAD
12302         if (PL_madskills) {
12303             tstart = SvPVX(PL_linestr) + stuffstart;
12304             if (PL_thisstuff)
12305                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12306             else
12307                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12308         }
12309 #endif
12310         PL_bufptr = s;
12311         CopLINE_inc(PL_curcop);
12312         if (!outer || !lex_next_chunk(0)) {
12313             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12314             missingterm(PL_tokenbuf);
12315         }
12316         CopLINE_dec(PL_curcop);
12317         s = PL_bufptr;
12318 #ifdef PERL_MAD
12319         stuffstart = s - SvPVX(PL_linestr);
12320 #endif
12321         CopLINE_inc(PL_curcop);
12322         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12323         PL_last_lop = PL_last_uni = NULL;
12324 #ifndef PERL_STRICT_CR
12325         if (PL_bufend - PL_linestart >= 2) {
12326             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12327                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12328             {
12329                 PL_bufend[-2] = '\n';
12330                 PL_bufend--;
12331                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12332             }
12333             else if (PL_bufend[-1] == '\r')
12334                 PL_bufend[-1] = '\n';
12335         }
12336         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12337             PL_bufend[-1] = '\n';
12338 #endif
12339         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12340             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12341             *(SvPVX(PL_linestr) + off ) = ' ';
12342             sv_catsv(PL_linestr,herewas);
12343             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12344             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12345         }
12346         else {
12347             s = PL_bufend;
12348             sv_catsv(tmpstr,PL_linestr);
12349         }
12350     }
12351     s++;
12352 retval:
12353     PL_multi_end = CopLINE(PL_curcop);
12354     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12355         SvPV_shrink_to_cur(tmpstr);
12356     }
12357     SvREFCNT_dec(herewas);
12358     if (!IN_BYTES) {
12359         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12360             SvUTF8_on(tmpstr);
12361         else if (PL_encoding)
12362             sv_recode_to_utf8(tmpstr, PL_encoding);
12363     }
12364     PL_lex_stuff = tmpstr;
12365     pl_yylval.ival = op_type;
12366     return s;
12367 }
12368
12369 /* scan_inputsymbol
12370    takes: current position in input buffer
12371    returns: new position in input buffer
12372    side-effects: pl_yylval and lex_op are set.
12373
12374    This code handles:
12375
12376    <>           read from ARGV
12377    <FH>         read from filehandle
12378    <pkg::FH>    read from package qualified filehandle
12379    <pkg'FH>     read from package qualified filehandle
12380    <$fh>        read from filehandle in $fh
12381    <*.h>        filename glob
12382
12383 */
12384
12385 STATIC char *
12386 S_scan_inputsymbol(pTHX_ char *start)
12387 {
12388     dVAR;
12389     register char *s = start;           /* current position in buffer */
12390     char *end;
12391     I32 len;
12392     char *d = PL_tokenbuf;                                      /* start of temp holding space */
12393     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
12394
12395     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12396
12397     end = strchr(s, '\n');
12398     if (!end)
12399         end = PL_bufend;
12400     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
12401
12402     /* die if we didn't have space for the contents of the <>,
12403        or if it didn't end, or if we see a newline
12404     */
12405
12406     if (len >= (I32)sizeof PL_tokenbuf)
12407         Perl_croak(aTHX_ "Excessively long <> operator");
12408     if (s >= end)
12409         Perl_croak(aTHX_ "Unterminated <> operator");
12410
12411     s++;
12412
12413     /* check for <$fh>
12414        Remember, only scalar variables are interpreted as filehandles by
12415        this code.  Anything more complex (e.g., <$fh{$num}>) will be
12416        treated as a glob() call.
12417        This code makes use of the fact that except for the $ at the front,
12418        a scalar variable and a filehandle look the same.
12419     */
12420     if (*d == '$' && d[1]) d++;
12421
12422     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12423     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12424         d++;
12425
12426     /* If we've tried to read what we allow filehandles to look like, and
12427        there's still text left, then it must be a glob() and not a getline.
12428        Use scan_str to pull out the stuff between the <> and treat it
12429        as nothing more than a string.
12430     */
12431
12432     if (d - PL_tokenbuf != len) {
12433         pl_yylval.ival = OP_GLOB;
12434         s = scan_str(start,!!PL_madskills,FALSE);
12435         if (!s)
12436            Perl_croak(aTHX_ "Glob not terminated");
12437         return s;
12438     }
12439     else {
12440         bool readline_overriden = FALSE;
12441         GV *gv_readline;
12442         GV **gvp;
12443         /* we're in a filehandle read situation */
12444         d = PL_tokenbuf;
12445
12446         /* turn <> into <ARGV> */
12447         if (!len)
12448             Copy("ARGV",d,5,char);
12449
12450         /* Check whether readline() is overriden */
12451         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12452         if ((gv_readline
12453                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12454                 ||
12455                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12456                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12457                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12458             readline_overriden = TRUE;
12459
12460         /* if <$fh>, create the ops to turn the variable into a
12461            filehandle
12462         */
12463         if (*d == '$') {
12464             /* try to find it in the pad for this block, otherwise find
12465                add symbol table ops
12466             */
12467             const PADOFFSET tmp = pad_findmy(d, len, 0);
12468             if (tmp != NOT_IN_PAD) {
12469                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12470                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12471                     HEK * const stashname = HvNAME_HEK(stash);
12472                     SV * const sym = sv_2mortal(newSVhek(stashname));
12473                     sv_catpvs(sym, "::");
12474                     sv_catpv(sym, d+1);
12475                     d = SvPVX(sym);
12476                     goto intro_sym;
12477                 }
12478                 else {
12479                     OP * const o = newOP(OP_PADSV, 0);
12480                     o->op_targ = tmp;
12481                     PL_lex_op = readline_overriden
12482                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12483                                 append_elem(OP_LIST, o,
12484                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12485                         : (OP*)newUNOP(OP_READLINE, 0, o);
12486                 }
12487             }
12488             else {
12489                 GV *gv;
12490                 ++d;
12491 intro_sym:
12492                 gv = gv_fetchpv(d,
12493                                 (PL_in_eval
12494                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
12495                                  : GV_ADDMULTI),
12496                                 SVt_PV);
12497                 PL_lex_op = readline_overriden
12498                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12499                             append_elem(OP_LIST,
12500                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12501                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12502                     : (OP*)newUNOP(OP_READLINE, 0,
12503                             newUNOP(OP_RV2SV, 0,
12504                                 newGVOP(OP_GV, 0, gv)));
12505             }
12506             if (!readline_overriden)
12507                 PL_lex_op->op_flags |= OPf_SPECIAL;
12508             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12509             pl_yylval.ival = OP_NULL;
12510         }
12511
12512         /* If it's none of the above, it must be a literal filehandle
12513            (<Foo::BAR> or <FOO>) so build a simple readline OP */
12514         else {
12515             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12516             PL_lex_op = readline_overriden
12517                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12518                         append_elem(OP_LIST,
12519                             newGVOP(OP_GV, 0, gv),
12520                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12521                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12522             pl_yylval.ival = OP_NULL;
12523         }
12524     }
12525
12526     return s;
12527 }
12528
12529
12530 /* scan_str
12531    takes: start position in buffer
12532           keep_quoted preserve \ on the embedded delimiter(s)
12533           keep_delims preserve the delimiters around the string
12534    returns: position to continue reading from buffer
12535    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12536         updates the read buffer.
12537
12538    This subroutine pulls a string out of the input.  It is called for:
12539         q               single quotes           q(literal text)
12540         '               single quotes           'literal text'
12541         qq              double quotes           qq(interpolate $here please)
12542         "               double quotes           "interpolate $here please"
12543         qx              backticks               qx(/bin/ls -l)
12544         `               backticks               `/bin/ls -l`
12545         qw              quote words             @EXPORT_OK = qw( func() $spam )
12546         m//             regexp match            m/this/
12547         s///            regexp substitute       s/this/that/
12548         tr///           string transliterate    tr/this/that/
12549         y///            string transliterate    y/this/that/
12550         ($*@)           sub prototypes          sub foo ($)
12551         (stuff)         sub attr parameters     sub foo : attr(stuff)
12552         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
12553
12554    In most of these cases (all but <>, patterns and transliterate)
12555    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
12556    calls scan_str().  s/// makes yylex() call scan_subst() which calls
12557    scan_str().  tr/// and y/// make yylex() call scan_trans() which
12558    calls scan_str().
12559
12560    It skips whitespace before the string starts, and treats the first
12561    character as the delimiter.  If the delimiter is one of ([{< then
12562    the corresponding "close" character )]}> is used as the closing
12563    delimiter.  It allows quoting of delimiters, and if the string has
12564    balanced delimiters ([{<>}]) it allows nesting.
12565
12566    On success, the SV with the resulting string is put into lex_stuff or,
12567    if that is already non-NULL, into lex_repl. The second case occurs only
12568    when parsing the RHS of the special constructs s/// and tr/// (y///).
12569    For convenience, the terminating delimiter character is stuffed into
12570    SvIVX of the SV.
12571 */
12572
12573 STATIC char *
12574 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12575 {
12576     dVAR;
12577     SV *sv;                             /* scalar value: string */
12578     const char *tmps;                   /* temp string, used for delimiter matching */
12579     register char *s = start;           /* current position in the buffer */
12580     register char term;                 /* terminating character */
12581     register char *to;                  /* current position in the sv's data */
12582     I32 brackets = 1;                   /* bracket nesting level */
12583     bool has_utf8 = FALSE;              /* is there any utf8 content? */
12584     I32 termcode;                       /* terminating char. code */
12585     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
12586     STRLEN termlen;                     /* length of terminating string */
12587     int last_off = 0;                   /* last position for nesting bracket */
12588 #ifdef PERL_MAD
12589     int stuffstart;
12590     char *tstart;
12591 #endif
12592
12593     PERL_ARGS_ASSERT_SCAN_STR;
12594
12595     /* skip space before the delimiter */
12596     if (isSPACE(*s)) {
12597         s = PEEKSPACE(s);
12598     }
12599
12600 #ifdef PERL_MAD
12601     if (PL_realtokenstart >= 0) {
12602         stuffstart = PL_realtokenstart;
12603         PL_realtokenstart = -1;
12604     }
12605     else
12606         stuffstart = start - SvPVX(PL_linestr);
12607 #endif
12608     /* mark where we are, in case we need to report errors */
12609     CLINE;
12610
12611     /* after skipping whitespace, the next character is the terminator */
12612     term = *s;
12613     if (!UTF) {
12614         termcode = termstr[0] = term;
12615         termlen = 1;
12616     }
12617     else {
12618         termcode = utf8_to_uvchr((U8*)s, &termlen);
12619         Copy(s, termstr, termlen, U8);
12620         if (!UTF8_IS_INVARIANT(term))
12621             has_utf8 = TRUE;
12622     }
12623
12624     /* mark where we are */
12625     PL_multi_start = CopLINE(PL_curcop);
12626     PL_multi_open = term;
12627
12628     /* find corresponding closing delimiter */
12629     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12630         termcode = termstr[0] = term = tmps[5];
12631
12632     PL_multi_close = term;
12633
12634     /* create a new SV to hold the contents.  79 is the SV's initial length.
12635        What a random number. */
12636     sv = newSV_type(SVt_PVIV);
12637     SvGROW(sv, 80);
12638     SvIV_set(sv, termcode);
12639     (void)SvPOK_only(sv);               /* validate pointer */
12640
12641     /* move past delimiter and try to read a complete string */
12642     if (keep_delims)
12643         sv_catpvn(sv, s, termlen);
12644     s += termlen;
12645 #ifdef PERL_MAD
12646     tstart = SvPVX(PL_linestr) + stuffstart;
12647     if (!PL_thisopen && !keep_delims) {
12648         PL_thisopen = newSVpvn(tstart, s - tstart);
12649         stuffstart = s - SvPVX(PL_linestr);
12650     }
12651 #endif
12652     for (;;) {
12653         if (PL_encoding && !UTF) {
12654             bool cont = TRUE;
12655
12656             while (cont) {
12657                 int offset = s - SvPVX_const(PL_linestr);
12658                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12659                                            &offset, (char*)termstr, termlen);
12660                 const char * const ns = SvPVX_const(PL_linestr) + offset;
12661                 char * const svlast = SvEND(sv) - 1;
12662
12663                 for (; s < ns; s++) {
12664                     if (*s == '\n' && !PL_rsfp)
12665                         CopLINE_inc(PL_curcop);
12666                 }
12667                 if (!found)
12668                     goto read_more_line;
12669                 else {
12670                     /* handle quoted delimiters */
12671                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12672                         const char *t;
12673                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12674                             t--;
12675                         if ((svlast-1 - t) % 2) {
12676                             if (!keep_quoted) {
12677                                 *(svlast-1) = term;
12678                                 *svlast = '\0';
12679                                 SvCUR_set(sv, SvCUR(sv) - 1);
12680                             }
12681                             continue;
12682                         }
12683                     }
12684                     if (PL_multi_open == PL_multi_close) {
12685                         cont = FALSE;
12686                     }
12687                     else {
12688                         const char *t;
12689                         char *w;
12690                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12691                             /* At here, all closes are "was quoted" one,
12692                                so we don't check PL_multi_close. */
12693                             if (*t == '\\') {
12694                                 if (!keep_quoted && *(t+1) == PL_multi_open)
12695                                     t++;
12696                                 else
12697                                     *w++ = *t++;
12698                             }
12699                             else if (*t == PL_multi_open)
12700                                 brackets++;
12701
12702                             *w = *t;
12703                         }
12704                         if (w < t) {
12705                             *w++ = term;
12706                             *w = '\0';
12707                             SvCUR_set(sv, w - SvPVX_const(sv));
12708                         }
12709                         last_off = w - SvPVX(sv);
12710                         if (--brackets <= 0)
12711                             cont = FALSE;
12712                     }
12713                 }
12714             }
12715             if (!keep_delims) {
12716                 SvCUR_set(sv, SvCUR(sv) - 1);
12717                 *SvEND(sv) = '\0';
12718             }
12719             break;
12720         }
12721
12722         /* extend sv if need be */
12723         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12724         /* set 'to' to the next character in the sv's string */
12725         to = SvPVX(sv)+SvCUR(sv);
12726
12727         /* if open delimiter is the close delimiter read unbridle */
12728         if (PL_multi_open == PL_multi_close) {
12729             for (; s < PL_bufend; s++,to++) {
12730                 /* embedded newlines increment the current line number */
12731                 if (*s == '\n' && !PL_rsfp)
12732                     CopLINE_inc(PL_curcop);
12733                 /* handle quoted delimiters */
12734                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12735                     if (!keep_quoted && s[1] == term)
12736                         s++;
12737                 /* any other quotes are simply copied straight through */
12738                     else
12739                         *to++ = *s++;
12740                 }
12741                 /* terminate when run out of buffer (the for() condition), or
12742                    have found the terminator */
12743                 else if (*s == term) {
12744                     if (termlen == 1)
12745                         break;
12746                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12747                         break;
12748                 }
12749                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12750                     has_utf8 = TRUE;
12751                 *to = *s;
12752             }
12753         }
12754
12755         /* if the terminator isn't the same as the start character (e.g.,
12756            matched brackets), we have to allow more in the quoting, and
12757            be prepared for nested brackets.
12758         */
12759         else {
12760             /* read until we run out of string, or we find the terminator */
12761             for (; s < PL_bufend; s++,to++) {
12762                 /* embedded newlines increment the line count */
12763                 if (*s == '\n' && !PL_rsfp)
12764                     CopLINE_inc(PL_curcop);
12765                 /* backslashes can escape the open or closing characters */
12766                 if (*s == '\\' && s+1 < PL_bufend) {
12767                     if (!keep_quoted &&
12768                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12769                         s++;
12770                     else
12771                         *to++ = *s++;
12772                 }
12773                 /* allow nested opens and closes */
12774                 else if (*s == PL_multi_close && --brackets <= 0)
12775                     break;
12776                 else if (*s == PL_multi_open)
12777                     brackets++;
12778                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12779                     has_utf8 = TRUE;
12780                 *to = *s;
12781             }
12782         }
12783         /* terminate the copied string and update the sv's end-of-string */
12784         *to = '\0';
12785         SvCUR_set(sv, to - SvPVX_const(sv));
12786
12787         /*
12788          * this next chunk reads more into the buffer if we're not done yet
12789          */
12790
12791         if (s < PL_bufend)
12792             break;              /* handle case where we are done yet :-) */
12793
12794 #ifndef PERL_STRICT_CR
12795         if (to - SvPVX_const(sv) >= 2) {
12796             if ((to[-2] == '\r' && to[-1] == '\n') ||
12797                 (to[-2] == '\n' && to[-1] == '\r'))
12798             {
12799                 to[-2] = '\n';
12800                 to--;
12801                 SvCUR_set(sv, to - SvPVX_const(sv));
12802             }
12803             else if (to[-1] == '\r')
12804                 to[-1] = '\n';
12805         }
12806         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12807             to[-1] = '\n';
12808 #endif
12809
12810      read_more_line:
12811         /* if we're out of file, or a read fails, bail and reset the current
12812            line marker so we can report where the unterminated string began
12813         */
12814 #ifdef PERL_MAD
12815         if (PL_madskills) {
12816             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12817             if (PL_thisstuff)
12818                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12819             else
12820                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12821         }
12822 #endif
12823         CopLINE_inc(PL_curcop);
12824         PL_bufptr = PL_bufend;
12825         if (!lex_next_chunk(0)) {
12826             sv_free(sv);
12827             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12828             return NULL;
12829         }
12830         s = PL_bufptr;
12831 #ifdef PERL_MAD
12832         stuffstart = 0;
12833 #endif
12834     }
12835
12836     /* at this point, we have successfully read the delimited string */
12837
12838     if (!PL_encoding || UTF) {
12839 #ifdef PERL_MAD
12840         if (PL_madskills) {
12841             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12842             const int len = s - tstart;
12843             if (PL_thisstuff)
12844                 sv_catpvn(PL_thisstuff, tstart, len);
12845             else
12846                 PL_thisstuff = newSVpvn(tstart, len);
12847             if (!PL_thisclose && !keep_delims)
12848                 PL_thisclose = newSVpvn(s,termlen);
12849         }
12850 #endif
12851
12852         if (keep_delims)
12853             sv_catpvn(sv, s, termlen);
12854         s += termlen;
12855     }
12856 #ifdef PERL_MAD
12857     else {
12858         if (PL_madskills) {
12859             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12860             const int len = s - tstart - termlen;
12861             if (PL_thisstuff)
12862                 sv_catpvn(PL_thisstuff, tstart, len);
12863             else
12864                 PL_thisstuff = newSVpvn(tstart, len);
12865             if (!PL_thisclose && !keep_delims)
12866                 PL_thisclose = newSVpvn(s - termlen,termlen);
12867         }
12868     }
12869 #endif
12870     if (has_utf8 || PL_encoding)
12871         SvUTF8_on(sv);
12872
12873     PL_multi_end = CopLINE(PL_curcop);
12874
12875     /* if we allocated too much space, give some back */
12876     if (SvCUR(sv) + 5 < SvLEN(sv)) {
12877         SvLEN_set(sv, SvCUR(sv) + 1);
12878         SvPV_renew(sv, SvLEN(sv));
12879     }
12880
12881     /* decide whether this is the first or second quoted string we've read
12882        for this op
12883     */
12884
12885     if (PL_lex_stuff)
12886         PL_lex_repl = sv;
12887     else
12888         PL_lex_stuff = sv;
12889     return s;
12890 }
12891
12892 /*
12893   scan_num
12894   takes: pointer to position in buffer
12895   returns: pointer to new position in buffer
12896   side-effects: builds ops for the constant in pl_yylval.op
12897
12898   Read a number in any of the formats that Perl accepts:
12899
12900   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12901   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12902   0b[01](_?[01])*
12903   0[0-7](_?[0-7])*
12904   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12905
12906   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12907   thing it reads.
12908
12909   If it reads a number without a decimal point or an exponent, it will
12910   try converting the number to an integer and see if it can do so
12911   without loss of precision.
12912 */
12913
12914 char *
12915 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12916 {
12917     dVAR;
12918     register const char *s = start;     /* current position in buffer */
12919     register char *d;                   /* destination in temp buffer */
12920     register char *e;                   /* end of temp buffer */
12921     NV nv;                              /* number read, as a double */
12922     SV *sv = NULL;                      /* place to put the converted number */
12923     bool floatit;                       /* boolean: int or float? */
12924     const char *lastub = NULL;          /* position of last underbar */
12925     static char const number_too_long[] = "Number too long";
12926
12927     PERL_ARGS_ASSERT_SCAN_NUM;
12928
12929     /* We use the first character to decide what type of number this is */
12930
12931     switch (*s) {
12932     default:
12933       Perl_croak(aTHX_ "panic: scan_num");
12934
12935     /* if it starts with a 0, it could be an octal number, a decimal in
12936        0.13 disguise, or a hexadecimal number, or a binary number. */
12937     case '0':
12938         {
12939           /* variables:
12940              u          holds the "number so far"
12941              shift      the power of 2 of the base
12942                         (hex == 4, octal == 3, binary == 1)
12943              overflowed was the number more than we can hold?
12944
12945              Shift is used when we add a digit.  It also serves as an "are
12946              we in octal/hex/binary?" indicator to disallow hex characters
12947              when in octal mode.
12948            */
12949             NV n = 0.0;
12950             UV u = 0;
12951             I32 shift;
12952             bool overflowed = FALSE;
12953             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
12954             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12955             static const char* const bases[5] =
12956               { "", "binary", "", "octal", "hexadecimal" };
12957             static const char* const Bases[5] =
12958               { "", "Binary", "", "Octal", "Hexadecimal" };
12959             static const char* const maxima[5] =
12960               { "",
12961                 "0b11111111111111111111111111111111",
12962                 "",
12963                 "037777777777",
12964                 "0xffffffff" };
12965             const char *base, *Base, *max;
12966
12967             /* check for hex */
12968             if (s[1] == 'x') {
12969                 shift = 4;
12970                 s += 2;
12971                 just_zero = FALSE;
12972             } else if (s[1] == 'b') {
12973                 shift = 1;
12974                 s += 2;
12975                 just_zero = FALSE;
12976             }
12977             /* check for a decimal in disguise */
12978             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12979                 goto decimal;
12980             /* so it must be octal */
12981             else {
12982                 shift = 3;
12983                 s++;
12984             }
12985
12986             if (*s == '_') {
12987                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12988                                "Misplaced _ in number");
12989                lastub = s++;
12990             }
12991
12992             base = bases[shift];
12993             Base = Bases[shift];
12994             max  = maxima[shift];
12995
12996             /* read the rest of the number */
12997             for (;;) {
12998                 /* x is used in the overflow test,
12999                    b is the digit we're adding on. */
13000                 UV x, b;
13001
13002                 switch (*s) {
13003
13004                 /* if we don't mention it, we're done */
13005                 default:
13006                     goto out;
13007
13008                 /* _ are ignored -- but warned about if consecutive */
13009                 case '_':
13010                     if (lastub && s == lastub + 1)
13011                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13012                                        "Misplaced _ in number");
13013                     lastub = s++;
13014                     break;
13015
13016                 /* 8 and 9 are not octal */
13017                 case '8': case '9':
13018                     if (shift == 3)
13019                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
13020                     /* FALL THROUGH */
13021
13022                 /* octal digits */
13023                 case '2': case '3': case '4':
13024                 case '5': case '6': case '7':
13025                     if (shift == 1)
13026                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
13027                     /* FALL THROUGH */
13028
13029                 case '0': case '1':
13030                     b = *s++ & 15;              /* ASCII digit -> value of digit */
13031                     goto digit;
13032
13033                 /* hex digits */
13034                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13035                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
13036                     /* make sure they said 0x */
13037                     if (shift != 4)
13038                         goto out;
13039                     b = (*s++ & 7) + 9;
13040
13041                     /* Prepare to put the digit we have onto the end
13042                        of the number so far.  We check for overflows.
13043                     */
13044
13045                   digit:
13046                     just_zero = FALSE;
13047                     if (!overflowed) {
13048                         x = u << shift; /* make room for the digit */
13049
13050                         if ((x >> shift) != u
13051                             && !(PL_hints & HINT_NEW_BINARY)) {
13052                             overflowed = TRUE;
13053                             n = (NV) u;
13054                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13055                                              "Integer overflow in %s number",
13056                                              base);
13057                         } else
13058                             u = x | b;          /* add the digit to the end */
13059                     }
13060                     if (overflowed) {
13061                         n *= nvshift[shift];
13062                         /* If an NV has not enough bits in its
13063                          * mantissa to represent an UV this summing of
13064                          * small low-order numbers is a waste of time
13065                          * (because the NV cannot preserve the
13066                          * low-order bits anyway): we could just
13067                          * remember when did we overflow and in the
13068                          * end just multiply n by the right
13069                          * amount. */
13070                         n += (NV) b;
13071                     }
13072                     break;
13073                 }
13074             }
13075
13076           /* if we get here, we had success: make a scalar value from
13077              the number.
13078           */
13079           out:
13080
13081             /* final misplaced underbar check */
13082             if (s[-1] == '_') {
13083                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13084             }
13085
13086             if (overflowed) {
13087                 if (n > 4294967295.0)
13088                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13089                                    "%s number > %s non-portable",
13090                                    Base, max);
13091                 sv = newSVnv(n);
13092             }
13093             else {
13094 #if UVSIZE > 4
13095                 if (u > 0xffffffff)
13096                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13097                                    "%s number > %s non-portable",
13098                                    Base, max);
13099 #endif
13100                 sv = newSVuv(u);
13101             }
13102             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
13103                 sv = new_constant(start, s - start, "integer",
13104                                   sv, NULL, NULL, 0);
13105             else if (PL_hints & HINT_NEW_BINARY)
13106                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
13107         }
13108         break;
13109
13110     /*
13111       handle decimal numbers.
13112       we're also sent here when we read a 0 as the first digit
13113     */
13114     case '1': case '2': case '3': case '4': case '5':
13115     case '6': case '7': case '8': case '9': case '.':
13116       decimal:
13117         d = PL_tokenbuf;
13118         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
13119         floatit = FALSE;
13120
13121         /* read next group of digits and _ and copy into d */
13122         while (isDIGIT(*s) || *s == '_') {
13123             /* skip underscores, checking for misplaced ones
13124                if -w is on
13125             */
13126             if (*s == '_') {
13127                 if (lastub && s == lastub + 1)
13128                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13129                                    "Misplaced _ in number");
13130                 lastub = s++;
13131             }
13132             else {
13133                 /* check for end of fixed-length buffer */
13134                 if (d >= e)
13135                     Perl_croak(aTHX_ number_too_long);
13136                 /* if we're ok, copy the character */
13137                 *d++ = *s++;
13138             }
13139         }
13140
13141         /* final misplaced underbar check */
13142         if (lastub && s == lastub + 1) {
13143             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13144         }
13145
13146         /* read a decimal portion if there is one.  avoid
13147            3..5 being interpreted as the number 3. followed
13148            by .5
13149         */
13150         if (*s == '.' && s[1] != '.') {
13151             floatit = TRUE;
13152             *d++ = *s++;
13153
13154             if (*s == '_') {
13155                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13156                                "Misplaced _ in number");
13157                 lastub = s;
13158             }
13159
13160             /* copy, ignoring underbars, until we run out of digits.
13161             */
13162             for (; isDIGIT(*s) || *s == '_'; s++) {
13163                 /* fixed length buffer check */
13164                 if (d >= e)
13165                     Perl_croak(aTHX_ number_too_long);
13166                 if (*s == '_') {
13167                    if (lastub && s == lastub + 1)
13168                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13169                                       "Misplaced _ in number");
13170                    lastub = s;
13171                 }
13172                 else
13173                     *d++ = *s;
13174             }
13175             /* fractional part ending in underbar? */
13176             if (s[-1] == '_') {
13177                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13178                                "Misplaced _ in number");
13179             }
13180             if (*s == '.' && isDIGIT(s[1])) {
13181                 /* oops, it's really a v-string, but without the "v" */
13182                 s = start;
13183                 goto vstring;
13184             }
13185         }
13186
13187         /* read exponent part, if present */
13188         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
13189             floatit = TRUE;
13190             s++;
13191
13192             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
13193             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
13194
13195             /* stray preinitial _ */
13196             if (*s == '_') {
13197                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13198                                "Misplaced _ in number");
13199                 lastub = s++;
13200             }
13201
13202             /* allow positive or negative exponent */
13203             if (*s == '+' || *s == '-')
13204                 *d++ = *s++;
13205
13206             /* stray initial _ */
13207             if (*s == '_') {
13208                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13209                                "Misplaced _ in number");
13210                 lastub = s++;
13211             }
13212
13213             /* read digits of exponent */
13214             while (isDIGIT(*s) || *s == '_') {
13215                 if (isDIGIT(*s)) {
13216                     if (d >= e)
13217                         Perl_croak(aTHX_ number_too_long);
13218                     *d++ = *s++;
13219                 }
13220                 else {
13221                    if (((lastub && s == lastub + 1) ||
13222                         (!isDIGIT(s[1]) && s[1] != '_')))
13223                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13224                                       "Misplaced _ in number");
13225                    lastub = s++;
13226                 }
13227             }
13228         }
13229
13230
13231         /*
13232            We try to do an integer conversion first if no characters
13233            indicating "float" have been found.
13234          */
13235
13236         if (!floatit) {
13237             UV uv;
13238             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
13239
13240             if (flags == IS_NUMBER_IN_UV) {
13241               if (uv <= IV_MAX)
13242                 sv = newSViv(uv); /* Prefer IVs over UVs. */
13243               else
13244                 sv = newSVuv(uv);
13245             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13246               if (uv <= (UV) IV_MIN)
13247                 sv = newSViv(-(IV)uv);
13248               else
13249                 floatit = TRUE;
13250             } else
13251               floatit = TRUE;
13252         }
13253         if (floatit) {
13254             /* terminate the string */
13255             *d = '\0';
13256             nv = Atof(PL_tokenbuf);
13257             sv = newSVnv(nv);
13258         }
13259
13260         if ( floatit
13261              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13262             const char *const key = floatit ? "float" : "integer";
13263             const STRLEN keylen = floatit ? 5 : 7;
13264             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13265                                 key, keylen, sv, NULL, NULL, 0);
13266         }
13267         break;
13268
13269     /* if it starts with a v, it could be a v-string */
13270     case 'v':
13271 vstring:
13272                 sv = newSV(5); /* preallocate storage space */
13273                 s = scan_vstring(s, PL_bufend, sv);
13274         break;
13275     }
13276
13277     /* make the op for the constant and return */
13278
13279     if (sv)
13280         lvalp->opval = newSVOP(OP_CONST, 0, sv);
13281     else
13282         lvalp->opval = NULL;
13283
13284     return (char *)s;
13285 }
13286
13287 STATIC char *
13288 S_scan_formline(pTHX_ register char *s)
13289 {
13290     dVAR;
13291     register char *eol;
13292     register char *t;
13293     SV * const stuff = newSVpvs("");
13294     bool needargs = FALSE;
13295     bool eofmt = FALSE;
13296 #ifdef PERL_MAD
13297     char *tokenstart = s;
13298     SV* savewhite = NULL;
13299
13300     if (PL_madskills) {
13301         savewhite = PL_thiswhite;
13302         PL_thiswhite = 0;
13303     }
13304 #endif
13305
13306     PERL_ARGS_ASSERT_SCAN_FORMLINE;
13307
13308     while (!needargs) {
13309         if (*s == '.') {
13310             t = s+1;
13311 #ifdef PERL_STRICT_CR
13312             while (SPACE_OR_TAB(*t))
13313                 t++;
13314 #else
13315             while (SPACE_OR_TAB(*t) || *t == '\r')
13316                 t++;
13317 #endif
13318             if (*t == '\n' || t == PL_bufend) {
13319                 eofmt = TRUE;
13320                 break;
13321             }
13322         }
13323         if (PL_in_eval && !PL_rsfp) {
13324             eol = (char *) memchr(s,'\n',PL_bufend-s);
13325             if (!eol++)
13326                 eol = PL_bufend;
13327         }
13328         else
13329             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13330         if (*s != '#') {
13331             for (t = s; t < eol; t++) {
13332                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13333                     needargs = FALSE;
13334                     goto enough;        /* ~~ must be first line in formline */
13335                 }
13336                 if (*t == '@' || *t == '^')
13337                     needargs = TRUE;
13338             }
13339             if (eol > s) {
13340                 sv_catpvn(stuff, s, eol-s);
13341 #ifndef PERL_STRICT_CR
13342                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13343                     char *end = SvPVX(stuff) + SvCUR(stuff);
13344                     end[-2] = '\n';
13345                     end[-1] = '\0';
13346                     SvCUR_set(stuff, SvCUR(stuff) - 1);
13347                 }
13348 #endif
13349             }
13350             else
13351               break;
13352         }
13353         s = (char*)eol;
13354         if (PL_rsfp) {
13355             bool got_some;
13356 #ifdef PERL_MAD
13357             if (PL_madskills) {
13358                 if (PL_thistoken)
13359                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13360                 else
13361                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13362             }
13363 #endif
13364             PL_bufptr = PL_bufend;
13365             CopLINE_inc(PL_curcop);
13366             got_some = lex_next_chunk(0);
13367             CopLINE_dec(PL_curcop);
13368             s = PL_bufptr;
13369 #ifdef PERL_MAD
13370             tokenstart = PL_bufptr;
13371 #endif
13372             if (!got_some)
13373                 break;
13374         }
13375         incline(s);
13376     }
13377   enough:
13378     if (SvCUR(stuff)) {
13379         PL_expect = XTERM;
13380         if (needargs) {
13381             PL_lex_state = LEX_NORMAL;
13382             start_force(PL_curforce);
13383             NEXTVAL_NEXTTOKE.ival = 0;
13384             force_next(',');
13385         }
13386         else
13387             PL_lex_state = LEX_FORMLINE;
13388         if (!IN_BYTES) {
13389             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13390                 SvUTF8_on(stuff);
13391             else if (PL_encoding)
13392                 sv_recode_to_utf8(stuff, PL_encoding);
13393         }
13394         start_force(PL_curforce);
13395         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13396         force_next(THING);
13397         start_force(PL_curforce);
13398         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13399         force_next(LSTOP);
13400     }
13401     else {
13402         SvREFCNT_dec(stuff);
13403         if (eofmt)
13404             PL_lex_formbrack = 0;
13405         PL_bufptr = s;
13406     }
13407 #ifdef PERL_MAD
13408     if (PL_madskills) {
13409         if (PL_thistoken)
13410             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13411         else
13412             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13413         PL_thiswhite = savewhite;
13414     }
13415 #endif
13416     return s;
13417 }
13418
13419 I32
13420 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13421 {
13422     dVAR;
13423     const I32 oldsavestack_ix = PL_savestack_ix;
13424     CV* const outsidecv = PL_compcv;
13425
13426     if (PL_compcv) {
13427         assert(SvTYPE(PL_compcv) == SVt_PVCV);
13428     }
13429     SAVEI32(PL_subline);
13430     save_item(PL_subname);
13431     SAVESPTR(PL_compcv);
13432
13433     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13434     CvFLAGS(PL_compcv) |= flags;
13435
13436     PL_subline = CopLINE(PL_curcop);
13437     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13438     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13439     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13440
13441     return oldsavestack_ix;
13442 }
13443
13444 #ifdef __SC__
13445 #pragma segment Perl_yylex
13446 #endif
13447 static int
13448 S_yywarn(pTHX_ const char *const s)
13449 {
13450     dVAR;
13451
13452     PERL_ARGS_ASSERT_YYWARN;
13453
13454     PL_in_eval |= EVAL_WARNONLY;
13455     yyerror(s);
13456     PL_in_eval &= ~EVAL_WARNONLY;
13457     return 0;
13458 }
13459
13460 int
13461 Perl_yyerror(pTHX_ const char *const s)
13462 {
13463     dVAR;
13464     const char *where = NULL;
13465     const char *context = NULL;
13466     int contlen = -1;
13467     SV *msg;
13468     int yychar  = PL_parser->yychar;
13469
13470     PERL_ARGS_ASSERT_YYERROR;
13471
13472     if (!yychar || (yychar == ';' && !PL_rsfp))
13473         where = "at EOF";
13474     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13475       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13476       PL_oldbufptr != PL_bufptr) {
13477         /*
13478                 Only for NetWare:
13479                 The code below is removed for NetWare because it abends/crashes on NetWare
13480                 when the script has error such as not having the closing quotes like:
13481                     if ($var eq "value)
13482                 Checking of white spaces is anyway done in NetWare code.
13483         */
13484 #ifndef NETWARE
13485         while (isSPACE(*PL_oldoldbufptr))
13486             PL_oldoldbufptr++;
13487 #endif
13488         context = PL_oldoldbufptr;
13489         contlen = PL_bufptr - PL_oldoldbufptr;
13490     }
13491     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13492       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13493         /*
13494                 Only for NetWare:
13495                 The code below is removed for NetWare because it abends/crashes on NetWare
13496                 when the script has error such as not having the closing quotes like:
13497                     if ($var eq "value)
13498                 Checking of white spaces is anyway done in NetWare code.
13499         */
13500 #ifndef NETWARE
13501         while (isSPACE(*PL_oldbufptr))
13502             PL_oldbufptr++;
13503 #endif
13504         context = PL_oldbufptr;
13505         contlen = PL_bufptr - PL_oldbufptr;
13506     }
13507     else if (yychar > 255)
13508         where = "next token ???";
13509     else if (yychar == -2) { /* YYEMPTY */
13510         if (PL_lex_state == LEX_NORMAL ||
13511            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13512             where = "at end of line";
13513         else if (PL_lex_inpat)
13514             where = "within pattern";
13515         else
13516             where = "within string";
13517     }
13518     else {
13519         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13520         if (yychar < 32)
13521             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13522         else if (isPRINT_LC(yychar)) {
13523             const char string = yychar;
13524             sv_catpvn(where_sv, &string, 1);
13525         }
13526         else
13527             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13528         where = SvPVX_const(where_sv);
13529     }
13530     msg = sv_2mortal(newSVpv(s, 0));
13531     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13532         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13533     if (context)
13534         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13535     else
13536         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13537     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13538         Perl_sv_catpvf(aTHX_ msg,
13539         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13540                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13541         PL_multi_end = 0;
13542     }
13543     if (PL_in_eval & EVAL_WARNONLY) {
13544         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13545     }
13546     else
13547         qerror(msg);
13548     if (PL_error_count >= 10) {
13549         if (PL_in_eval && SvCUR(ERRSV))
13550             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13551                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
13552         else
13553             Perl_croak(aTHX_ "%s has too many errors.\n",
13554             OutCopFILE(PL_curcop));
13555     }
13556     PL_in_my = 0;
13557     PL_in_my_stash = NULL;
13558     return 0;
13559 }
13560 #ifdef __SC__
13561 #pragma segment Main
13562 #endif
13563
13564 STATIC char*
13565 S_swallow_bom(pTHX_ U8 *s)
13566 {
13567     dVAR;
13568     const STRLEN slen = SvCUR(PL_linestr);
13569
13570     PERL_ARGS_ASSERT_SWALLOW_BOM;
13571
13572     switch (s[0]) {
13573     case 0xFF:
13574         if (s[1] == 0xFE) {
13575             /* UTF-16 little-endian? (or UTF-32LE?) */
13576             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13577                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13578 #ifndef PERL_NO_UTF16_FILTER
13579             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13580             s += 2;
13581             if (PL_bufend > (char*)s) {
13582                 s = add_utf16_textfilter(s, TRUE);
13583             }
13584 #else
13585             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13586 #endif
13587         }
13588         break;
13589     case 0xFE:
13590         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13591 #ifndef PERL_NO_UTF16_FILTER
13592             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13593             s += 2;
13594             if (PL_bufend > (char *)s) {
13595                 s = add_utf16_textfilter(s, FALSE);
13596             }
13597 #else
13598             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13599 #endif
13600         }
13601         break;
13602     case 0xEF:
13603         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13604             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13605             s += 3;                      /* UTF-8 */
13606         }
13607         break;
13608     case 0:
13609         if (slen > 3) {
13610              if (s[1] == 0) {
13611                   if (s[2] == 0xFE && s[3] == 0xFF) {
13612                        /* UTF-32 big-endian */
13613                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13614                   }
13615              }
13616              else if (s[2] == 0 && s[3] != 0) {
13617                   /* Leading bytes
13618                    * 00 xx 00 xx
13619                    * are a good indicator of UTF-16BE. */
13620 #ifndef PERL_NO_UTF16_FILTER
13621                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13622                   s = add_utf16_textfilter(s, FALSE);
13623 #else
13624                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13625 #endif
13626              }
13627         }
13628 #ifdef EBCDIC
13629     case 0xDD:
13630         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13631             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13632             s += 4;                      /* UTF-8 */
13633         }
13634         break;
13635 #endif
13636
13637     default:
13638          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13639                   /* Leading bytes
13640                    * xx 00 xx 00
13641                    * are a good indicator of UTF-16LE. */
13642 #ifndef PERL_NO_UTF16_FILTER
13643               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13644               s = add_utf16_textfilter(s, TRUE);
13645 #else
13646               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13647 #endif
13648          }
13649     }
13650     return (char*)s;
13651 }
13652
13653
13654 #ifndef PERL_NO_UTF16_FILTER
13655 static I32
13656 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13657 {
13658     dVAR;
13659     SV *const filter = FILTER_DATA(idx);
13660     /* We re-use this each time round, throwing the contents away before we
13661        return.  */
13662     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13663     SV *const utf8_buffer = filter;
13664     IV status = IoPAGE(filter);
13665     const bool reverse = cBOOL(IoLINES(filter));
13666     I32 retval;
13667
13668     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13669
13670     /* As we're automatically added, at the lowest level, and hence only called
13671        from this file, we can be sure that we're not called in block mode. Hence
13672        don't bother writing code to deal with block mode.  */
13673     if (maxlen) {
13674         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13675     }
13676     if (status < 0) {
13677         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13678     }
13679     DEBUG_P(PerlIO_printf(Perl_debug_log,
13680                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13681                           FPTR2DPTR(void *, S_utf16_textfilter),
13682                           reverse ? 'l' : 'b', idx, maxlen, status,
13683                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13684
13685     while (1) {
13686         STRLEN chars;
13687         STRLEN have;
13688         I32 newlen;
13689         U8 *end;
13690         /* First, look in our buffer of existing UTF-8 data:  */
13691         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13692
13693         if (nl) {
13694             ++nl;
13695         } else if (status == 0) {
13696             /* EOF */
13697             IoPAGE(filter) = 0;
13698             nl = SvEND(utf8_buffer);
13699         }
13700         if (nl) {
13701             STRLEN got = nl - SvPVX(utf8_buffer);
13702             /* Did we have anything to append?  */
13703             retval = got != 0;
13704             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13705             /* Everything else in this code works just fine if SVp_POK isn't
13706                set.  This, however, needs it, and we need it to work, else
13707                we loop infinitely because the buffer is never consumed.  */
13708             sv_chop(utf8_buffer, nl);
13709             break;
13710         }
13711
13712         /* OK, not a complete line there, so need to read some more UTF-16.
13713            Read an extra octect if the buffer currently has an odd number. */
13714         while (1) {
13715             if (status <= 0)
13716                 break;
13717             if (SvCUR(utf16_buffer) >= 2) {
13718                 /* Location of the high octet of the last complete code point.
13719                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13720                    *coupled* with all the benefits of partial reads and
13721                    endianness.  */
13722                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13723                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13724
13725                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13726                     break;
13727                 }
13728
13729                 /* We have the first half of a surrogate. Read more.  */
13730                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13731             }
13732
13733             status = FILTER_READ(idx + 1, utf16_buffer,
13734                                  160 + (SvCUR(utf16_buffer) & 1));
13735             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13736             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13737             if (status < 0) {
13738                 /* Error */
13739                 IoPAGE(filter) = status;
13740                 return status;
13741             }
13742         }
13743
13744         chars = SvCUR(utf16_buffer) >> 1;
13745         have = SvCUR(utf8_buffer);
13746         SvGROW(utf8_buffer, have + chars * 3 + 1);
13747
13748         if (reverse) {
13749             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13750                                          (U8*)SvPVX_const(utf8_buffer) + have,
13751                                          chars * 2, &newlen);
13752         } else {
13753             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13754                                 (U8*)SvPVX_const(utf8_buffer) + have,
13755                                 chars * 2, &newlen);
13756         }
13757         SvCUR_set(utf8_buffer, have + newlen);
13758         *end = '\0';
13759
13760         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13761            it's private to us, and utf16_to_utf8{,reversed} take a
13762            (pointer,length) pair, rather than a NUL-terminated string.  */
13763         if(SvCUR(utf16_buffer) & 1) {
13764             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13765             SvCUR_set(utf16_buffer, 1);
13766         } else {
13767             SvCUR_set(utf16_buffer, 0);
13768         }
13769     }
13770     DEBUG_P(PerlIO_printf(Perl_debug_log,
13771                           "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13772                           status,
13773                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13774     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13775     return retval;
13776 }
13777
13778 static U8 *
13779 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13780 {
13781     SV *filter = filter_add(S_utf16_textfilter, NULL);
13782
13783     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13784
13785     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13786     sv_setpvs(filter, "");
13787     IoLINES(filter) = reversed;
13788     IoPAGE(filter) = 1; /* Not EOF */
13789
13790     /* Sadly, we have to return a valid pointer, come what may, so we have to
13791        ignore any error return from this.  */
13792     SvCUR_set(PL_linestr, 0);
13793     if (FILTER_READ(0, PL_linestr, 0)) {
13794         SvUTF8_on(PL_linestr);
13795     } else {
13796         SvUTF8_on(PL_linestr);
13797     }
13798     PL_bufend = SvEND(PL_linestr);
13799     return (U8*)SvPVX(PL_linestr);
13800 }
13801 #endif
13802
13803 /*
13804 Returns a pointer to the next character after the parsed
13805 vstring, as well as updating the passed in sv.
13806
13807 Function must be called like
13808
13809         sv = newSV(5);
13810         s = scan_vstring(s,e,sv);
13811
13812 where s and e are the start and end of the string.
13813 The sv should already be large enough to store the vstring
13814 passed in, for performance reasons.
13815
13816 */
13817
13818 char *
13819 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13820 {
13821     dVAR;
13822     const char *pos = s;
13823     const char *start = s;
13824
13825     PERL_ARGS_ASSERT_SCAN_VSTRING;
13826
13827     if (*pos == 'v') pos++;  /* get past 'v' */
13828     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13829         pos++;
13830     if ( *pos != '.') {
13831         /* this may not be a v-string if followed by => */
13832         const char *next = pos;
13833         while (next < e && isSPACE(*next))
13834             ++next;
13835         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13836             /* return string not v-string */
13837             sv_setpvn(sv,(char *)s,pos-s);
13838             return (char *)pos;
13839         }
13840     }
13841
13842     if (!isALPHA(*pos)) {
13843         U8 tmpbuf[UTF8_MAXBYTES+1];
13844
13845         if (*s == 'v')
13846             s++;  /* get past 'v' */
13847
13848         sv_setpvs(sv, "");
13849
13850         for (;;) {
13851             /* this is atoi() that tolerates underscores */
13852             U8 *tmpend;
13853             UV rev = 0;
13854             const char *end = pos;
13855             UV mult = 1;
13856             while (--end >= s) {
13857                 if (*end != '_') {
13858                     const UV orev = rev;
13859                     rev += (*end - '0') * mult;
13860                     mult *= 10;
13861                     if (orev > rev)
13862                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13863                                          "Integer overflow in decimal number");
13864                 }
13865             }
13866 #ifdef EBCDIC
13867             if (rev > 0x7FFFFFFF)
13868                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13869 #endif
13870             /* Append native character for the rev point */
13871             tmpend = uvchr_to_utf8(tmpbuf, rev);
13872             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13873             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13874                  SvUTF8_on(sv);
13875             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13876                  s = ++pos;
13877             else {
13878                  s = pos;
13879                  break;
13880             }
13881             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13882                  pos++;
13883         }
13884         SvPOK_on(sv);
13885         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13886         SvRMAGICAL_on(sv);
13887     }
13888     return (char *)s;
13889 }
13890
13891 int
13892 Perl_keyword_plugin_standard(pTHX_
13893         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13894 {
13895     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13896     PERL_UNUSED_CONTEXT;
13897     PERL_UNUSED_ARG(keyword_ptr);
13898     PERL_UNUSED_ARG(keyword_len);
13899     PERL_UNUSED_ARG(op_ptr);
13900     return KEYWORD_PLUGIN_DECLINE;
13901 }
13902
13903 /*
13904  * Local variables:
13905  * c-indentation-style: bsd
13906  * c-basic-offset: 4
13907  * indent-tabs-mode: t
13908  * End:
13909  *
13910  * ex: set ts=8 sts=4 sw=4 noet:
13911  */