toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 #include "EXTERN.h"
  25 #define PERL_IN_TOKE_C
  26 #include "perl.h"
  27
  28 #define new_constant(a,b,c,d,e,f,g)     \
  29         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
  30
  31 #define pl_yylval       (PL_parser->yylval)
  32
  33 /* YYINITDEPTH -- initial size of the parser's stacks.  */
  34 #define YYINITDEPTH 200
  35
  36 /* XXX temporary backwards compatibility */
  37 #define PL_lex_brackets         (PL_parser->lex_brackets)
  38 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  39 #define PL_lex_casemods         (PL_parser->lex_casemods)
  40 #define PL_lex_casestack        (PL_parser->lex_casestack)
  41 #define PL_lex_defer            (PL_parser->lex_defer)
  42 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  43 #define PL_lex_expect           (PL_parser->lex_expect)
  44 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  45 #define PL_lex_inpat            (PL_parser->lex_inpat)
  46 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  47 #define PL_lex_op               (PL_parser->lex_op)
  48 #define PL_lex_repl             (PL_parser->lex_repl)
  49 #define PL_lex_starts           (PL_parser->lex_starts)
  50 #define PL_lex_stuff            (PL_parser->lex_stuff)
  51 #define PL_multi_start          (PL_parser->multi_start)
  52 #define PL_multi_open           (PL_parser->multi_open)
  53 #define PL_multi_close          (PL_parser->multi_close)
  54 #define PL_pending_ident        (PL_parser->pending_ident)
  55 #define PL_preambled            (PL_parser->preambled)
  56 #define PL_sublex_info          (PL_parser->sublex_info)
  57 #define PL_linestr              (PL_parser->linestr)
  58 #define PL_expect               (PL_parser->expect)
  59 #define PL_copline              (PL_parser->copline)
  60 #define PL_bufptr               (PL_parser->bufptr)
  61 #define PL_oldbufptr            (PL_parser->oldbufptr)
  62 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  63 #define PL_linestart            (PL_parser->linestart)
  64 #define PL_bufend               (PL_parser->bufend)
  65 #define PL_last_uni             (PL_parser->last_uni)
  66 #define PL_last_lop             (PL_parser->last_lop)
  67 #define PL_last_lop_op          (PL_parser->last_lop_op)
  68 #define PL_lex_state            (PL_parser->lex_state)
  69 #define PL_rsfp                 (PL_parser->rsfp)
  70 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  71 #define PL_in_my                (PL_parser->in_my)
  72 #define PL_in_my_stash          (PL_parser->in_my_stash)
  73 #define PL_tokenbuf             (PL_parser->tokenbuf)
  74 #define PL_multi_end            (PL_parser->multi_end)
  75 #define PL_error_count          (PL_parser->error_count)
  76
  77 #ifdef PERL_MAD
  78 #  define PL_endwhite           (PL_parser->endwhite)
  79 #  define PL_faketokens         (PL_parser->faketokens)
  80 #  define PL_lasttoke           (PL_parser->lasttoke)
  81 #  define PL_nextwhite          (PL_parser->nextwhite)
  82 #  define PL_realtokenstart     (PL_parser->realtokenstart)
  83 #  define PL_skipwhite          (PL_parser->skipwhite)
  84 #  define PL_thisclose          (PL_parser->thisclose)
  85 #  define PL_thismad            (PL_parser->thismad)
  86 #  define PL_thisopen           (PL_parser->thisopen)
  87 #  define PL_thisstuff          (PL_parser->thisstuff)
  88 #  define PL_thistoken          (PL_parser->thistoken)
  89 #  define PL_thiswhite          (PL_parser->thiswhite)
  90 #  define PL_thiswhite          (PL_parser->thiswhite)
  91 #  define PL_nexttoke           (PL_parser->nexttoke)
  92 #  define PL_curforce           (PL_parser->curforce)
  93 #else
  94 #  define PL_nexttoke           (PL_parser->nexttoke)
  95 #  define PL_nexttype           (PL_parser->nexttype)
  96 #  define PL_nextval            (PL_parser->nextval)
  97 #endif
  98
  99 static int
 100 S_pending_ident(pTHX);
 101
 102 static const char ident_too_long[] = "Identifier too long";
 103 static const char commaless_variable_list[] = "comma-less variable list";
 104
 105 #ifndef PERL_NO_UTF16_FILTER
 106 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
 107 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
 108 #endif
 109
 110 #ifdef PERL_MAD
 111 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
 112 #  define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
 113 #else
 114 #  define CURMAD(slot,sv)
 115 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 116 #endif
 117
 118 #define XFAKEBRACK 128
 119 #define XENUMMASK 127
 120
 121 #ifdef USE_UTF8_SCRIPTS
 122 #   define UTF (!IN_BYTES)
 123 #else
 124 #   define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
 125 #endif
 126
 127 /* The maximum number of characters preceding the unrecognized one to display */
 128 #define UNRECOGNIZED_PRECEDE_COUNT 10
 129
 130 /* In variables named $^X, these are the legal values for X.
 131  * 1999-02-27 mjd-perl-patch@plover.com */
 132 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
 133
 134 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
 135
 136 /* LEX_* are values for PL_lex_state, the state of the lexer.
 137  * They are arranged oddly so that the guard on the switch statement
 138  * can get by with a single comparison (if the compiler is smart enough).
 139  */
 140
 141 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 142
 143 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 144 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 145 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 146 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 147 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 148
 149                                    /* at end of code, eg "$x" followed by:  */
 150 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 151 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 152
 153 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 154                                         string or after \E, $foo, etc       */
 155 #define LEX_INTERPCONST          2 /* NOT USED */
 156 #define LEX_FORMLINE             1 /* expecting a format line               */
 157 #define LEX_KNOWNEXT             0 /* next token known; just return it      */
 158
 159
 160 #ifdef DEBUGGING
 161 static const char* const lex_state_names[] = {
 162     "KNOWNEXT",
 163     "FORMLINE",
 164     "INTERPCONST",
 165     "INTERPCONCAT",
 166     "INTERPENDMAYBE",
 167     "INTERPEND",
 168     "INTERPSTART",
 169     "INTERPPUSH",
 170     "INTERPCASEMOD",
 171     "INTERPNORMAL",
 172     "NORMAL"
 173 };
 174 #endif
 175
 176 #ifdef ff_next
 177 #undef ff_next
 178 #endif
 179
 180 #include "keywords.h"
 181
 182 /* CLINE is a macro that ensures PL_copline has a sane value */
 183
 184 #ifdef CLINE
 185 #undef CLINE
 186 #endif
 187 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 188
 189 #ifdef PERL_MAD
 190 #  define SKIPSPACE0(s) skipspace0(s)
 191 #  define SKIPSPACE1(s) skipspace1(s)
 192 #  define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
 193 #  define PEEKSPACE(s) skipspace2(s,0)
 194 #else
 195 #  define SKIPSPACE0(s) skipspace(s)
 196 #  define SKIPSPACE1(s) skipspace(s)
 197 #  define SKIPSPACE2(s,tsv) skipspace(s)
 198 #  define PEEKSPACE(s) skipspace(s)
 199 #endif
 200
 201 /*
 202  * Convenience functions to return different tokens and prime the
 203  * lexer for the next token.  They all take an argument.
 204  *
 205  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 206  * OPERATOR     : generic operator
 207  * AOPERATOR    : assignment operator
 208  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 209  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 210  * PREREF       : *EXPR where EXPR is not a simple identifier
 211  * TERM         : expression term
 212  * LOOPX        : loop exiting command (goto, last, dump, etc)
 213  * FTST         : file test operator
 214  * FUN0         : zero-argument function
 215  * FUN1         : not used, except for not, which isn't a UNIOP
 216  * BOop         : bitwise or or xor
 217  * BAop         : bitwise and
 218  * SHop         : shift operator
 219  * PWop         : power operator
 220  * PMop         : pattern-matching operator
 221  * Aop          : addition-level operator
 222  * Mop          : multiplication-level operator
 223  * Eop          : equality-testing operator
 224  * Rop          : relational operator <= != gt
 225  *
 226  * Also see LOP and lop() below.
 227  */
 228
 229 #ifdef DEBUGGING /* Serve -DT. */
 230 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 231 #else
 232 #   define REPORT(retval) (retval)
 233 #endif
 234
 235 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 236 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 237 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
 238 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 239 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 240 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 241 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 242 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
 243 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 244 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 245 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 246 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
 247 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
 248 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
 249 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
 250 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 251 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
 252 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
 253 #define Eop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
 254 #define Rop(f)   return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 255
 256 /* This bit of chicanery makes a unary function followed by
 257  * a parenthesis into a function with one argument, highest precedence.
 258  * The UNIDOR macro is for unary functions that can be followed by the //
 259  * operator (such as C<shift // 0>).
 260  */
 261 #define UNI2(f,x) { \
 262         pl_yylval.ival = f; \
 263         PL_expect = x; \
 264         PL_bufptr = s; \
 265         PL_last_uni = PL_oldbufptr; \
 266         PL_last_lop_op = f; \
 267         if (*s == '(') \
 268             return REPORT( (int)FUNC1 ); \
 269         s = PEEKSPACE(s); \
 270         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 271         }
 272 #define UNI(f)    UNI2(f,XTERM)
 273 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 274
 275 #define UNIBRACK(f) { \
 276         pl_yylval.ival = f; \
 277         PL_bufptr = s; \
 278         PL_last_uni = PL_oldbufptr; \
 279         if (*s == '(') \
 280             return REPORT( (int)FUNC1 ); \
 281         s = PEEKSPACE(s); \
 282         return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
 283         }
 284
 285 /* grandfather return to old style */
 286 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 287
 288 #ifdef DEBUGGING
 289
 290 /* how to interpret the pl_yylval associated with the token */
 291 enum token_type {
 292     TOKENTYPE_NONE,
 293     TOKENTYPE_IVAL,
 294     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 295     TOKENTYPE_PVAL,
 296     TOKENTYPE_OPVAL,
 297     TOKENTYPE_GVVAL
 298 };
 299
 300 static struct debug_tokens {
 301     const int token;
 302     enum token_type type;
 303     const char *name;
 304 } const debug_tokens[] =
 305 {
 306     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 307     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 308     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 309     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 310     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 311     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 312     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 313     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 314     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 315     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 316     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 317     { DO,               TOKENTYPE_NONE,         "DO" },
 318     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 319     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 320     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 321     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 322     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 323     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 324     { EQOP,             TOKENTYPE_OPNUM,        "EQOP" },
 325     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 326     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 327     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 328     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 329     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 330     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 331     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 332     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 333     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 334     { IF,               TOKENTYPE_IVAL,         "IF" },
 335     { LABEL,            TOKENTYPE_PVAL,         "LABEL" },
 336     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 337     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 338     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 339     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 340     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 341     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 342     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 343     { MY,               TOKENTYPE_IVAL,         "MY" },
 344     { MYSUB,            TOKENTYPE_NONE,         "MYSUB" },
 345     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 346     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 347     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 348     { OROR,             TOKENTYPE_NONE,         "OROR" },
 349     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 350     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 351     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 352     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 353     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 354     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 355     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 356     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 357     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 358     { RELOP,            TOKENTYPE_OPNUM,        "RELOP" },
 359     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 360     { SUB,              TOKENTYPE_NONE,         "SUB" },
 361     { THING,            TOKENTYPE_OPVAL,        "THING" },
 362     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 363     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 364     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 365     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 366     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 367     { USE,              TOKENTYPE_IVAL,         "USE" },
 368     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 369     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 370     { WORD,             TOKENTYPE_OPVAL,        "WORD" },
 371     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 372     { 0,                TOKENTYPE_NONE,         NULL }
 373 };
 374
 375 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 376
 377 STATIC int
 378 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 379 {
 380     dVAR;
 381
 382     PERL_ARGS_ASSERT_TOKEREPORT;
 383
 384     if (DEBUG_T_TEST) {
 385         const char *name = NULL;
 386         enum token_type type = TOKENTYPE_NONE;
 387         const struct debug_tokens *p;
 388         SV* const report = newSVpvs("<== ");
 389
 390         for (p = debug_tokens; p->token; p++) {
 391             if (p->token == (int)rv) {
 392                 name = p->name;
 393                 type = p->type;
 394                 break;
 395             }
 396         }
 397         if (name)
 398             Perl_sv_catpv(aTHX_ report, name);
 399         else if ((char)rv > ' ' && (char)rv < '~')
 400             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 401         else if (!rv)
 402             sv_catpvs(report, "EOF");
 403         else
 404             Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
 405         switch (type) {
 406         case TOKENTYPE_NONE:
 407         case TOKENTYPE_GVVAL: /* doesn't appear to be used */
 408             break;
 409         case TOKENTYPE_IVAL:
 410             Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
 411             break;
 412         case TOKENTYPE_OPNUM:
 413             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 414                                     PL_op_name[lvalp->ival]);
 415             break;
 416         case TOKENTYPE_PVAL:
 417             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 418             break;
 419         case TOKENTYPE_OPVAL:
 420             if (lvalp->opval) {
 421                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 422                                     PL_op_name[lvalp->opval->op_type]);
 423                 if (lvalp->opval->op_type == OP_CONST) {
 424                     Perl_sv_catpvf(aTHX_ report, " %s",
 425                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 426                 }
 427
 428             }
 429             else
 430                 sv_catpvs(report, "(opval=null)");
 431             break;
 432         }
 433         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 434     };
 435     return (int)rv;
 436 }
 437
 438
 439 /* print the buffer with suitable escapes */
 440
 441 STATIC void
 442 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 443 {
 444     SV* const tmp = newSVpvs("");
 445
 446     PERL_ARGS_ASSERT_PRINTBUF;
 447
 448     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 449     SvREFCNT_dec(tmp);
 450 }
 451
 452 #endif
 453
 454 /*
 455  * S_ao
 456  *
 457  * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
 458  * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
 459  */
 460
 461 STATIC int
 462 S_ao(pTHX_ int toketype)
 463 {
 464     dVAR;
 465     if (*PL_bufptr == '=') {
 466         PL_bufptr++;
 467         if (toketype == ANDAND)
 468             pl_yylval.ival = OP_ANDASSIGN;
 469         else if (toketype == OROR)
 470             pl_yylval.ival = OP_ORASSIGN;
 471         else if (toketype == DORDOR)
 472             pl_yylval.ival = OP_DORASSIGN;
 473         toketype = ASSIGNOP;
 474     }
 475     return toketype;
 476 }
 477
 478 /*
 479  * S_no_op
 480  * When Perl expects an operator and finds something else, no_op
 481  * prints the warning.  It always prints "<something> found where
 482  * operator expected.  It prints "Missing semicolon on previous line?"
 483  * if the surprise occurs at the start of the line.  "do you need to
 484  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 485  * where the compiler doesn't know if foo is a method call or a function.
 486  * It prints "Missing operator before end of line" if there's nothing
 487  * after the missing operator, or "... before <...>" if there is something
 488  * after the missing operator.
 489  */
 490
 491 STATIC void
 492 S_no_op(pTHX_ const char *const what, char *s)
 493 {
 494     dVAR;
 495     char * const oldbp = PL_bufptr;
 496     const bool is_first = (PL_oldbufptr == PL_linestart);
 497
 498     PERL_ARGS_ASSERT_NO_OP;
 499
 500     if (!s)
 501         s = oldbp;
 502     else
 503         PL_bufptr = s;
 504     yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
 505     if (ckWARN_d(WARN_SYNTAX)) {
 506         if (is_first)
 507             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 508                     "\t(Missing semicolon on previous line?)\n");
 509         else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
 510             const char *t;
 511             for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
 512                 NOOP;
 513             if (t < PL_bufptr && isSPACE(*t))
 514                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 515                         "\t(Do you need to predeclare %.*s?)\n",
 516                     (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
 517         }
 518         else {
 519             assert(s >= oldbp);
 520             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 521                     "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
 522         }
 523     }
 524     PL_bufptr = oldbp;
 525 }
 526
 527 /*
 528  * S_missingterm
 529  * Complain about missing quote/regexp/heredoc terminator.
 530  * If it's called with NULL then it cauterizes the line buffer.
 531  * If we're in a delimited string and the delimiter is a control
 532  * character, it's reformatted into a two-char sequence like ^C.
 533  * This is fatal.
 534  */
 535
 536 STATIC void
 537 S_missingterm(pTHX_ char *s)
 538 {
 539     dVAR;
 540     char tmpbuf[3];
 541     char q;
 542     if (s) {
 543         char * const nl = strrchr(s,'\n');
 544         if (nl)
 545             *nl = '\0';
 546     }
 547     else if (isCNTRL(PL_multi_close)) {
 548         *tmpbuf = '^';
 549         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 550         tmpbuf[2] = '\0';
 551         s = tmpbuf;
 552     }
 553     else {
 554         *tmpbuf = (char)PL_multi_close;
 555         tmpbuf[1] = '\0';
 556         s = tmpbuf;
 557     }
 558     q = strchr(s,'"') ? '\'' : '"';
 559     Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
 560 }
 561
 562 #define FEATURE_IS_ENABLED(name)                                        \
 563         ((0 != (PL_hints & HINT_LOCALIZE_HH))                           \
 564             && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
 565 /* The longest string we pass in.  */
 566 #define MAX_FEATURE_LEN (sizeof("switch")-1)
 567
 568 /*
 569  * S_feature_is_enabled
 570  * Check whether the named feature is enabled.
 571  */
 572 STATIC bool
 573 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
 574 {
 575     dVAR;
 576     HV * const hinthv = GvHV(PL_hintgv);
 577     char he_name[8 + MAX_FEATURE_LEN] = "feature_";
 578
 579     PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
 580
 581     assert(namelen <= MAX_FEATURE_LEN);
 582     memcpy(&he_name[8], name, namelen);
 583
 584     return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
 585 }
 586
 587 /*
 588  * Perl_deprecate
 589  */
 590
 591 void
 592 Perl_deprecate(pTHX_ const char *const s)
 593 {
 594     PERL_ARGS_ASSERT_DEPRECATE;
 595
 596     Perl_ck_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
 597 }
 598
 599 static void
 600 S_deprecate_old(pTHX_ const char *const s)
 601 {
 602     /* This function should NOT be called for any new deprecated warnings */
 603     /* Use Perl_deprecate instead                                         */
 604     /*                                                                    */
 605     /* It is here to maintain backward compatibility with the pre-5.8     */
 606     /* warnings category hierarchy. The "deprecated" category used to     */
 607     /* live under the "syntax" category. It is now a top-level category   */
 608     /* in its own right.                                                  */
 609
 610     PERL_ARGS_ASSERT_DEPRECATE_OLD;
 611
 612     Perl_ck_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
 613                    "Use of %s is deprecated", s);
 614 }
 615
 616 /*
 617  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 618  * utf16-to-utf8-reversed.
 619  */
 620
 621 #ifdef PERL_CR_FILTER
 622 static void
 623 strip_return(SV *sv)
 624 {
 625     register const char *s = SvPVX_const(sv);
 626     register const char * const e = s + SvCUR(sv);
 627
 628     PERL_ARGS_ASSERT_STRIP_RETURN;
 629
 630     /* outer loop optimized to do nothing if there are no CR-LFs */
 631     while (s < e) {
 632         if (*s++ == '\r' && *s == '\n') {
 633             /* hit a CR-LF, need to copy the rest */
 634             register char *d = s - 1;
 635             *d++ = *s++;
 636             while (s < e) {
 637                 if (*s == '\r' && s[1] == '\n')
 638                     s++;
 639                 *d++ = *s++;
 640             }
 641             SvCUR(sv) -= s - d;
 642             return;
 643         }
 644     }
 645 }
 646
 647 STATIC I32
 648 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 649 {
 650     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 651     if (count > 0 && !maxlen)
 652         strip_return(sv);
 653     return count;
 654 }
 655 #endif
 656
 657
 658
 659 /*
 660  * Perl_lex_start
 661  *
 662  * Create a parser object and initialise its parser and lexer fields
 663  *
 664  * rsfp       is the opened file handle to read from (if any),
 665  *
 666  * line       holds any initial content already read from the file (or in
 667  *            the case of no file, such as an eval, the whole contents);
 668  *
 669  * new_filter indicates that this is a new file and it shouldn't inherit
 670  *            the filters from the current parser (ie require).
 671  */
 672
 673 void
 674 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
 675 {
 676     dVAR;
 677     const char *s = NULL;
 678     STRLEN len;
 679     yy_parser *parser, *oparser;
 680
 681     /* create and initialise a parser */
 682
 683     Newxz(parser, 1, yy_parser);
 684     parser->old_parser = oparser = PL_parser;
 685     PL_parser = parser;
 686
 687     Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
 688     parser->ps = parser->stack;
 689     parser->stack_size = YYINITDEPTH;
 690
 691     parser->stack->state = 0;
 692     parser->yyerrstatus = 0;
 693     parser->yychar = YYEMPTY;           /* Cause a token to be read.  */
 694
 695     /* on scope exit, free this parser and restore any outer one */
 696     SAVEPARSER(parser);
 697     parser->saved_curcop = PL_curcop;
 698
 699     /* initialise lexer state */
 700
 701 #ifdef PERL_MAD
 702     parser->curforce = -1;
 703 #else
 704     parser->nexttoke = 0;
 705 #endif
 706     parser->error_count = oparser ? oparser->error_count : 0;
 707     parser->copline = NOLINE;
 708     parser->lex_state = LEX_NORMAL;
 709     parser->expect = XSTATE;
 710     parser->rsfp = rsfp;
 711     parser->rsfp_filters = (new_filter || !oparser) ? newAV()
 712                 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
 713
 714     Newx(parser->lex_brackstack, 120, char);
 715     Newx(parser->lex_casestack, 12, char);
 716     *parser->lex_casestack = '\0';
 717
 718     if (line) {
 719         s = SvPV_const(line, len);
 720     } else {
 721         len = 0;
 722     }
 723
 724     if (!len) {
 725         parser->linestr = newSVpvs("\n;");
 726     } else if (SvREADONLY(line) || s[len-1] != ';') {
 727         parser->linestr = newSVsv(line);
 728         if (s[len-1] != ';')
 729             sv_catpvs(parser->linestr, "\n;");
 730     } else {
 731         SvTEMP_off(line);
 732         SvREFCNT_inc_simple_void_NN(line);
 733         parser->linestr = line;
 734     }
 735     parser->oldoldbufptr =
 736         parser->oldbufptr =
 737         parser->bufptr =
 738         parser->linestart = SvPVX(parser->linestr);
 739     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 740     parser->last_lop = parser->last_uni = NULL;
 741 }
 742
 743
 744 /* delete a parser object */
 745
 746 void
 747 Perl_parser_free(pTHX_  const yy_parser *parser)
 748 {
 749     PERL_ARGS_ASSERT_PARSER_FREE;
 750
 751     PL_curcop = parser->saved_curcop;
 752     SvREFCNT_dec(parser->linestr);
 753
 754     if (parser->rsfp == PerlIO_stdin())
 755         PerlIO_clearerr(parser->rsfp);
 756     else if (parser->rsfp && (!parser->old_parser ||
 757                 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 758         PerlIO_close(parser->rsfp);
 759     SvREFCNT_dec(parser->rsfp_filters);
 760
 761     Safefree(parser->stack);
 762     Safefree(parser->lex_brackstack);
 763     Safefree(parser->lex_casestack);
 764     PL_parser = parser->old_parser;
 765     Safefree(parser);
 766 }
 767
 768
 769 /*
 770  * Perl_lex_end
 771  * Finalizer for lexing operations.  Must be called when the parser is
 772  * done with the lexer.
 773  */
 774
 775 void
 776 Perl_lex_end(pTHX)
 777 {
 778     dVAR;
 779     PL_doextract = FALSE;
 780 }
 781
 782 /*
 783  * S_incline
 784  * This subroutine has nothing to do with tilting, whether at windmills
 785  * or pinball tables.  Its name is short for "increment line".  It
 786  * increments the current line number in CopLINE(PL_curcop) and checks
 787  * to see whether the line starts with a comment of the form
 788  *    # line 500 "foo.pm"
 789  * If so, it sets the current line number and file to the values in the comment.
 790  */
 791
 792 STATIC void
 793 S_incline(pTHX_ const char *s)
 794 {
 795     dVAR;
 796     const char *t;
 797     const char *n;
 798     const char *e;
 799
 800     PERL_ARGS_ASSERT_INCLINE;
 801
 802     CopLINE_inc(PL_curcop);
 803     if (*s++ != '#')
 804         return;
 805     while (SPACE_OR_TAB(*s))
 806         s++;
 807     if (strnEQ(s, "line", 4))
 808         s += 4;
 809     else
 810         return;
 811     if (SPACE_OR_TAB(*s))
 812         s++;
 813     else
 814         return;
 815     while (SPACE_OR_TAB(*s))
 816         s++;
 817     if (!isDIGIT(*s))
 818         return;
 819
 820     n = s;
 821     while (isDIGIT(*s))
 822         s++;
 823     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
 824         return;
 825     while (SPACE_OR_TAB(*s))
 826         s++;
 827     if (*s == '"' && (t = strchr(s+1, '"'))) {
 828         s++;
 829         e = t + 1;
 830     }
 831     else {
 832         t = s;
 833         while (!isSPACE(*t))
 834             t++;
 835         e = t;
 836     }
 837     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
 838         e++;
 839     if (*e != '\n' && *e != '\0')
 840         return;         /* false alarm */
 841
 842     if (t - s > 0) {
 843         const STRLEN len = t - s;
 844 #ifndef USE_ITHREADS
 845         SV *const temp_sv = CopFILESV(PL_curcop);
 846         const char *cf;
 847         STRLEN tmplen;
 848
 849         if (temp_sv) {
 850             cf = SvPVX(temp_sv);
 851             tmplen = SvCUR(temp_sv);
 852         } else {
 853             cf = NULL;
 854             tmplen = 0;
 855         }
 856
 857         if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
 858             /* must copy *{"::_<(eval N)[oldfilename:L]"}
 859              * to *{"::_<newfilename"} */
 860             /* However, the long form of evals is only turned on by the
 861                debugger - usually they're "(eval %lu)" */
 862             char smallbuf[128];
 863             char *tmpbuf;
 864             GV **gvp;
 865             STRLEN tmplen2 = len;
 866             if (tmplen + 2 <= sizeof smallbuf)
 867                 tmpbuf = smallbuf;
 868             else
 869                 Newx(tmpbuf, tmplen + 2, char);
 870             tmpbuf[0] = '_';
 871             tmpbuf[1] = '<';
 872             memcpy(tmpbuf + 2, cf, tmplen);
 873             tmplen += 2;
 874             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
 875             if (gvp) {
 876                 char *tmpbuf2;
 877                 GV *gv2;
 878
 879                 if (tmplen2 + 2 <= sizeof smallbuf)
 880                     tmpbuf2 = smallbuf;
 881                 else
 882                     Newx(tmpbuf2, tmplen2 + 2, char);
 883
 884                 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
 885                     /* Either they malloc'd it, or we malloc'd it,
 886                        so no prefix is present in ours.  */
 887                     tmpbuf2[0] = '_';
 888                     tmpbuf2[1] = '<';
 889                 }
 890
 891                 memcpy(tmpbuf2 + 2, s, tmplen2);
 892                 tmplen2 += 2;
 893
 894                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
 895                 if (!isGV(gv2)) {
 896                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
 897                     /* adjust ${"::_<newfilename"} to store the new file name */
 898                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
 899                     GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
 900                     GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
 901                 }
 902
 903                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
 904             }
 905             if (tmpbuf != smallbuf) Safefree(tmpbuf);
 906         }
 907 #endif
 908         CopFILE_free(PL_curcop);
 909         CopFILE_setn(PL_curcop, s, len);
 910     }
 911     CopLINE_set(PL_curcop, atoi(n)-1);
 912 }
 913
 914 #ifdef PERL_MAD
 915 /* skip space before PL_thistoken */
 916
 917 STATIC char *
 918 S_skipspace0(pTHX_ register char *s)
 919 {
 920     PERL_ARGS_ASSERT_SKIPSPACE0;
 921
 922     s = skipspace(s);
 923     if (!PL_madskills)
 924         return s;
 925     if (PL_skipwhite) {
 926         if (!PL_thiswhite)
 927             PL_thiswhite = newSVpvs("");
 928         sv_catsv(PL_thiswhite, PL_skipwhite);
 929         sv_free(PL_skipwhite);
 930         PL_skipwhite = 0;
 931     }
 932     PL_realtokenstart = s - SvPVX(PL_linestr);
 933     return s;
 934 }
 935
 936 /* skip space after PL_thistoken */
 937
 938 STATIC char *
 939 S_skipspace1(pTHX_ register char *s)
 940 {
 941     const char *start = s;
 942     I32 startoff = start - SvPVX(PL_linestr);
 943
 944     PERL_ARGS_ASSERT_SKIPSPACE1;
 945
 946     s = skipspace(s);
 947     if (!PL_madskills)
 948         return s;
 949     start = SvPVX(PL_linestr) + startoff;
 950     if (!PL_thistoken && PL_realtokenstart >= 0) {
 951         const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
 952         PL_thistoken = newSVpvn(tstart, start - tstart);
 953     }
 954     PL_realtokenstart = -1;
 955     if (PL_skipwhite) {
 956         if (!PL_nextwhite)
 957             PL_nextwhite = newSVpvs("");
 958         sv_catsv(PL_nextwhite, PL_skipwhite);
 959         sv_free(PL_skipwhite);
 960         PL_skipwhite = 0;
 961     }
 962     return s;
 963 }
 964
 965 STATIC char *
 966 S_skipspace2(pTHX_ register char *s, SV **svp)
 967 {
 968     char *start;
 969     const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
 970     const I32 startoff = s - SvPVX(PL_linestr);
 971
 972     PERL_ARGS_ASSERT_SKIPSPACE2;
 973
 974     s = skipspace(s);
 975     PL_bufptr = SvPVX(PL_linestr) + bufptroff;
 976     if (!PL_madskills || !svp)
 977         return s;
 978     start = SvPVX(PL_linestr) + startoff;
 979     if (!PL_thistoken && PL_realtokenstart >= 0) {
 980         char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
 981         PL_thistoken = newSVpvn(tstart, start - tstart);
 982         PL_realtokenstart = -1;
 983     }
 984     if (PL_skipwhite) {
 985         if (!*svp)
 986             *svp = newSVpvs("");
 987         sv_setsv(*svp, PL_skipwhite);
 988         sv_free(PL_skipwhite);
 989         PL_skipwhite = 0;
 990     }
 991
 992     return s;
 993 }
 994 #endif
 995
 996 STATIC void
 997 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
 998 {
 999     AV *av = CopFILEAVx(PL_curcop);
1000     if (av) {
1001         SV * const sv = newSV_type(SVt_PVMG);
1002         if (orig_sv)
1003             sv_setsv(sv, orig_sv);
1004         else
1005             sv_setpvn(sv, buf, len);
1006         (void)SvIOK_on(sv);
1007         SvIV_set(sv, 0);
1008         av_store(av, (I32)CopLINE(PL_curcop), sv);
1009     }
1010 }
1011
1012 /*
1013  * S_skipspace
1014  * Called to gobble the appropriate amount and type of whitespace.
1015  * Skips comments as well.
1016  */
1017
1018 STATIC char *
1019 S_skipspace(pTHX_ register char *s)
1020 {
1021     dVAR;
1022 #ifdef PERL_MAD
1023     int curoff;
1024     int startoff = s - SvPVX(PL_linestr);
1025
1026     PERL_ARGS_ASSERT_SKIPSPACE;
1027
1028     if (PL_skipwhite) {
1029         sv_free(PL_skipwhite);
1030         PL_skipwhite = 0;
1031     }
1032 #endif
1033     PERL_ARGS_ASSERT_SKIPSPACE;
1034
1035     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1036         while (s < PL_bufend && SPACE_OR_TAB(*s))
1037             s++;
1038 #ifdef PERL_MAD
1039         goto done;
1040 #else
1041         return s;
1042 #endif
1043     }
1044     for (;;) {
1045         STRLEN prevlen;
1046         SSize_t oldprevlen, oldoldprevlen;
1047         SSize_t oldloplen = 0, oldunilen = 0;
1048         while (s < PL_bufend && isSPACE(*s)) {
1049             if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
1050                 incline(s);
1051         }
1052
1053         /* comment */
1054         if (s < PL_bufend && *s == '#') {
1055             while (s < PL_bufend && *s != '\n')
1056                 s++;
1057             if (s < PL_bufend) {
1058                 s++;
1059                 if (PL_in_eval && !PL_rsfp) {
1060                     incline(s);
1061                     continue;
1062                 }
1063             }
1064         }
1065
1066         /* only continue to recharge the buffer if we're at the end
1067          * of the buffer, we're not reading from a source filter, and
1068          * we're in normal lexing mode
1069          */
1070         if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
1071                 PL_lex_state == LEX_FORMLINE)
1072 #ifdef PERL_MAD
1073             goto done;
1074 #else
1075             return s;
1076 #endif
1077
1078         /* try to recharge the buffer */
1079 #ifdef PERL_MAD
1080         curoff = s - SvPVX(PL_linestr);
1081 #endif
1082
1083         if ((s = filter_gets(PL_linestr, PL_rsfp,
1084                              (prevlen = SvCUR(PL_linestr)))) == NULL)
1085         {
1086 #ifdef PERL_MAD
1087             if (PL_madskills && curoff != startoff) {
1088                 if (!PL_skipwhite)
1089                     PL_skipwhite = newSVpvs("");
1090                 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1091                                         curoff - startoff);
1092             }
1093
1094             /* mustn't throw out old stuff yet if madpropping */
1095             SvCUR(PL_linestr) = curoff;
1096             s = SvPVX(PL_linestr) + curoff;
1097             *s = 0;
1098             if (curoff && s[-1] == '\n')
1099                 s[-1] = ' ';
1100 #endif
1101
1102             /* end of file.  Add on the -p or -n magic */
1103             /* XXX these shouldn't really be added here, can't set PL_faketokens */
1104             if (PL_minus_p) {
1105 #ifdef PERL_MAD
1106                 sv_catpvs(PL_linestr,
1107                          ";}continue{print or die qq(-p destination: $!\\n);}");
1108 #else
1109                 sv_setpvs(PL_linestr,
1110                          ";}continue{print or die qq(-p destination: $!\\n);}");
1111 #endif
1112                 PL_minus_n = PL_minus_p = 0;
1113             }
1114             else if (PL_minus_n) {
1115 #ifdef PERL_MAD
1116                 sv_catpvs(PL_linestr, ";}");
1117 #else
1118                 sv_setpvs(PL_linestr, ";}");
1119 #endif
1120                 PL_minus_n = 0;
1121             }
1122             else
1123 #ifdef PERL_MAD
1124                 sv_catpvs(PL_linestr,";");
1125 #else
1126                 sv_setpvs(PL_linestr,";");
1127 #endif
1128
1129             /* reset variables for next time we lex */
1130             PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
1131                 = SvPVX(PL_linestr)
1132 #ifdef PERL_MAD
1133                 + curoff
1134 #endif
1135                 ;
1136             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1137             PL_last_lop = PL_last_uni = NULL;
1138
1139             /* Close the filehandle.  Could be from
1140              * STDIN, or a regular file.  If we were reading code from
1141              * STDIN (because the commandline held no -e or filename)
1142              * then we don't close it, we reset it so the code can
1143              * read from STDIN too.
1144              */
1145
1146             if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1147                 PerlIO_clearerr(PL_rsfp);
1148             else
1149                 (void)PerlIO_close(PL_rsfp);
1150             PL_rsfp = NULL;
1151             return s;
1152         }
1153
1154         /* not at end of file, so we only read another line */
1155         /* make corresponding updates to old pointers, for yyerror() */
1156         oldprevlen = PL_oldbufptr - PL_bufend;
1157         oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1158         if (PL_last_uni)
1159             oldunilen = PL_last_uni - PL_bufend;
1160         if (PL_last_lop)
1161             oldloplen = PL_last_lop - PL_bufend;
1162         PL_linestart = PL_bufptr = s + prevlen;
1163         PL_bufend = s + SvCUR(PL_linestr);
1164         s = PL_bufptr;
1165         PL_oldbufptr = s + oldprevlen;
1166         PL_oldoldbufptr = s + oldoldprevlen;
1167         if (PL_last_uni)
1168             PL_last_uni = s + oldunilen;
1169         if (PL_last_lop)
1170             PL_last_lop = s + oldloplen;
1171         incline(s);
1172
1173         /* debugger active and we're not compiling the debugger code,
1174          * so store the line into the debugger's array of lines
1175          */
1176         if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
1177             update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
1178     }
1179
1180 #ifdef PERL_MAD
1181   done:
1182     if (PL_madskills) {
1183         if (!PL_skipwhite)
1184             PL_skipwhite = newSVpvs("");
1185         curoff = s - SvPVX(PL_linestr);
1186         if (curoff - startoff)
1187             sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
1188                                 curoff - startoff);
1189     }
1190     return s;
1191 #endif
1192 }
1193
1194 /*
1195  * S_check_uni
1196  * Check the unary operators to ensure there's no ambiguity in how they're
1197  * used.  An ambiguous piece of code would be:
1198  *     rand + 5
1199  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1200  * the +5 is its argument.
1201  */
1202
1203 STATIC void
1204 S_check_uni(pTHX)
1205 {
1206     dVAR;
1207     const char *s;
1208     const char *t;
1209
1210     if (PL_oldoldbufptr != PL_last_uni)
1211         return;
1212     while (isSPACE(*PL_last_uni))
1213         PL_last_uni++;
1214     s = PL_last_uni;
1215     while (isALNUM_lazy_if(s,UTF) || *s == '-')
1216         s++;
1217     if ((t = strchr(s, '(')) && t < PL_bufptr)
1218         return;
1219
1220     if (ckWARN_d(WARN_AMBIGUOUS)){
1221         Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1222                    "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1223                    (int)(s - PL_last_uni), PL_last_uni);
1224     }
1225 }
1226
1227 /*
1228  * LOP : macro to build a list operator.  Its behaviour has been replaced
1229  * with a subroutine, S_lop() for which LOP is just another name.
1230  */
1231
1232 #define LOP(f,x) return lop(f,x,s)
1233
1234 /*
1235  * S_lop
1236  * Build a list operator (or something that might be one).  The rules:
1237  *  - if we have a next token, then it's a list operator [why?]
1238  *  - if the next thing is an opening paren, then it's a function
1239  *  - else it's a list operator
1240  */
1241
1242 STATIC I32
1243 S_lop(pTHX_ I32 f, int x, char *s)
1244 {
1245     dVAR;
1246
1247     PERL_ARGS_ASSERT_LOP;
1248
1249     pl_yylval.ival = f;
1250     CLINE;
1251     PL_expect = x;
1252     PL_bufptr = s;
1253     PL_last_lop = PL_oldbufptr;
1254     PL_last_lop_op = (OPCODE)f;
1255 #ifdef PERL_MAD
1256     if (PL_lasttoke)
1257         return REPORT(LSTOP);
1258 #else
1259     if (PL_nexttoke)
1260         return REPORT(LSTOP);
1261 #endif
1262     if (*s == '(')
1263         return REPORT(FUNC);
1264     s = PEEKSPACE(s);
1265     if (*s == '(')
1266         return REPORT(FUNC);
1267     else
1268         return REPORT(LSTOP);
1269 }
1270
1271 #ifdef PERL_MAD
1272  /*
1273  * S_start_force
1274  * Sets up for an eventual force_next().  start_force(0) basically does
1275  * an unshift, while start_force(-1) does a push.  yylex removes items
1276  * on the "pop" end.
1277  */
1278
1279 STATIC void
1280 S_start_force(pTHX_ int where)
1281 {
1282     int i;
1283
1284     if (where < 0)      /* so people can duplicate start_force(PL_curforce) */
1285         where = PL_lasttoke;
1286     assert(PL_curforce < 0 || PL_curforce == where);
1287     if (PL_curforce != where) {
1288         for (i = PL_lasttoke; i > where; --i) {
1289             PL_nexttoke[i] = PL_nexttoke[i-1];
1290         }
1291         PL_lasttoke++;
1292     }
1293     if (PL_curforce < 0)        /* in case of duplicate start_force() */
1294         Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1295     PL_curforce = where;
1296     if (PL_nextwhite) {
1297         if (PL_madskills)
1298             curmad('^', newSVpvs(""));
1299         CURMAD('_', PL_nextwhite);
1300     }
1301 }
1302
1303 STATIC void
1304 S_curmad(pTHX_ char slot, SV *sv)
1305 {
1306     MADPROP **where;
1307
1308     if (!sv)
1309         return;
1310     if (PL_curforce < 0)
1311         where = &PL_thismad;
1312     else
1313         where = &PL_nexttoke[PL_curforce].next_mad;
1314
1315     if (PL_faketokens)
1316         sv_setpvs(sv, "");
1317     else {
1318         if (!IN_BYTES) {
1319             if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1320                 SvUTF8_on(sv);
1321             else if (PL_encoding) {
1322                 sv_recode_to_utf8(sv, PL_encoding);
1323             }
1324         }
1325     }
1326
1327     /* keep a slot open for the head of the list? */
1328     if (slot != '_' && *where && (*where)->mad_key == '^') {
1329         (*where)->mad_key = slot;
1330         sv_free(MUTABLE_SV(((*where)->mad_val)));
1331         (*where)->mad_val = (void*)sv;
1332     }
1333     else
1334         addmad(newMADsv(slot, sv), where, 0);
1335 }
1336 #else
1337 #  define start_force(where)    NOOP
1338 #  define curmad(slot, sv)      NOOP
1339 #endif
1340
1341 /*
1342  * S_force_next
1343  * When the lexer realizes it knows the next token (for instance,
1344  * it is reordering tokens for the parser) then it can call S_force_next
1345  * to know what token to return the next time the lexer is called.  Caller
1346  * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1347  * and possibly PL_expect to ensure the lexer handles the token correctly.
1348  */
1349
1350 STATIC void
1351 S_force_next(pTHX_ I32 type)
1352 {
1353     dVAR;
1354 #ifdef DEBUGGING
1355     if (DEBUG_T_TEST) {
1356         PerlIO_printf(Perl_debug_log, "### forced token:\n");
1357         tokereport(type, &NEXTVAL_NEXTTOKE);
1358     }
1359 #endif
1360 #ifdef PERL_MAD
1361     if (PL_curforce < 0)
1362         start_force(PL_lasttoke);
1363     PL_nexttoke[PL_curforce].next_type = type;
1364     if (PL_lex_state != LEX_KNOWNEXT)
1365         PL_lex_defer = PL_lex_state;
1366     PL_lex_state = LEX_KNOWNEXT;
1367     PL_lex_expect = PL_expect;
1368     PL_curforce = -1;
1369 #else
1370     PL_nexttype[PL_nexttoke] = type;
1371     PL_nexttoke++;
1372     if (PL_lex_state != LEX_KNOWNEXT) {
1373         PL_lex_defer = PL_lex_state;
1374         PL_lex_expect = PL_expect;
1375         PL_lex_state = LEX_KNOWNEXT;
1376     }
1377 #endif
1378 }
1379
1380 STATIC SV *
1381 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1382 {
1383     dVAR;
1384     SV * const sv = newSVpvn_utf8(start, len,
1385                                   !IN_BYTES
1386                                   && UTF
1387                                   && !is_ascii_string((const U8*)start, len)
1388                                   && is_utf8_string((const U8*)start, len));
1389     return sv;
1390 }
1391
1392 /*
1393  * S_force_word
1394  * When the lexer knows the next thing is a word (for instance, it has
1395  * just seen -> and it knows that the next char is a word char, then
1396  * it calls S_force_word to stick the next word into the PL_nexttoke/val
1397  * lookahead.
1398  *
1399  * Arguments:
1400  *   char *start : buffer position (must be within PL_linestr)
1401  *   int token   : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1402  *   int check_keyword : if true, Perl checks to make sure the word isn't
1403  *       a keyword (do this if the word is a label, e.g. goto FOO)
1404  *   int allow_pack : if true, : characters will also be allowed (require,
1405  *       use, etc. do this)
1406  *   int allow_initial_tick : used by the "sub" lexer only.
1407  */
1408
1409 STATIC char *
1410 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1411 {
1412     dVAR;
1413     register char *s;
1414     STRLEN len;
1415
1416     PERL_ARGS_ASSERT_FORCE_WORD;
1417
1418     start = SKIPSPACE1(start);
1419     s = start;
1420     if (isIDFIRST_lazy_if(s,UTF) ||
1421         (allow_pack && *s == ':') ||
1422         (allow_initial_tick && *s == '\'') )
1423     {
1424         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1425         if (check_keyword && keyword(PL_tokenbuf, len, 0))
1426             return start;
1427         start_force(PL_curforce);
1428         if (PL_madskills)
1429             curmad('X', newSVpvn(start,s-start));
1430         if (token == METHOD) {
1431             s = SKIPSPACE1(s);
1432             if (*s == '(')
1433                 PL_expect = XTERM;
1434             else {
1435                 PL_expect = XOPERATOR;
1436             }
1437         }
1438         if (PL_madskills)
1439             curmad('g', newSVpvs( "forced" ));
1440         NEXTVAL_NEXTTOKE.opval
1441             = (OP*)newSVOP(OP_CONST,0,
1442                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1443         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1444         force_next(token);
1445     }
1446     return s;
1447 }
1448
1449 /*
1450  * S_force_ident
1451  * Called when the lexer wants $foo *foo &foo etc, but the program
1452  * text only contains the "foo" portion.  The first argument is a pointer
1453  * to the "foo", and the second argument is the type symbol to prefix.
1454  * Forces the next token to be a "WORD".
1455  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1456  */
1457
1458 STATIC void
1459 S_force_ident(pTHX_ register const char *s, int kind)
1460 {
1461     dVAR;
1462
1463     PERL_ARGS_ASSERT_FORCE_IDENT;
1464
1465     if (*s) {
1466         const STRLEN len = strlen(s);
1467         OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1468         start_force(PL_curforce);
1469         NEXTVAL_NEXTTOKE.opval = o;
1470         force_next(WORD);
1471         if (kind) {
1472             o->op_private = OPpCONST_ENTERED;
1473             /* XXX see note in pp_entereval() for why we forgo typo
1474                warnings if the symbol must be introduced in an eval.
1475                GSAR 96-10-12 */
1476             gv_fetchpvn_flags(s, len,
1477                               PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1478                               : GV_ADD,
1479                               kind == '$' ? SVt_PV :
1480                               kind == '@' ? SVt_PVAV :
1481                               kind == '%' ? SVt_PVHV :
1482                               SVt_PVGV
1483                               );
1484         }
1485     }
1486 }
1487
1488 NV
1489 Perl_str_to_version(pTHX_ SV *sv)
1490 {
1491     NV retval = 0.0;
1492     NV nshift = 1.0;
1493     STRLEN len;
1494     const char *start = SvPV_const(sv,len);
1495     const char * const end = start + len;
1496     const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1497
1498     PERL_ARGS_ASSERT_STR_TO_VERSION;
1499
1500     while (start < end) {
1501         STRLEN skip;
1502         UV n;
1503         if (utf)
1504             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1505         else {
1506             n = *(U8*)start;
1507             skip = 1;
1508         }
1509         retval += ((NV)n)/nshift;
1510         start += skip;
1511         nshift *= 1000;
1512     }
1513     return retval;
1514 }
1515
1516 /*
1517  * S_force_version
1518  * Forces the next token to be a version number.
1519  * If the next token appears to be an invalid version number, (e.g. "v2b"),
1520  * and if "guessing" is TRUE, then no new token is created (and the caller
1521  * must use an alternative parsing method).
1522  */
1523
1524 STATIC char *
1525 S_force_version(pTHX_ char *s, int guessing)
1526 {
1527     dVAR;
1528     OP *version = NULL;
1529     char *d;
1530 #ifdef PERL_MAD
1531     I32 startoff = s - SvPVX(PL_linestr);
1532 #endif
1533
1534     PERL_ARGS_ASSERT_FORCE_VERSION;
1535
1536     s = SKIPSPACE1(s);
1537
1538     d = s;
1539     if (*d == 'v')
1540         d++;
1541     if (isDIGIT(*d)) {
1542         while (isDIGIT(*d) || *d == '_' || *d == '.')
1543             d++;
1544 #ifdef PERL_MAD
1545         if (PL_madskills) {
1546             start_force(PL_curforce);
1547             curmad('X', newSVpvn(s,d-s));
1548         }
1549 #endif
1550         if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1551             SV *ver;
1552             s = scan_num(s, &pl_yylval);
1553             version = pl_yylval.opval;
1554             ver = cSVOPx(version)->op_sv;
1555             if (SvPOK(ver) && !SvNIOK(ver)) {
1556                 SvUPGRADE(ver, SVt_PVNV);
1557                 SvNV_set(ver, str_to_version(ver));
1558                 SvNOK_on(ver);          /* hint that it is a version */
1559             }
1560         }
1561         else if (guessing) {
1562 #ifdef PERL_MAD
1563             if (PL_madskills) {
1564                 sv_free(PL_nextwhite);  /* let next token collect whitespace */
1565                 PL_nextwhite = 0;
1566                 s = SvPVX(PL_linestr) + startoff;
1567             }
1568 #endif
1569             return s;
1570         }
1571     }
1572
1573 #ifdef PERL_MAD
1574     if (PL_madskills && !version) {
1575         sv_free(PL_nextwhite);  /* let next token collect whitespace */
1576         PL_nextwhite = 0;
1577         s = SvPVX(PL_linestr) + startoff;
1578     }
1579 #endif
1580     /* NOTE: The parser sees the package name and the VERSION swapped */
1581     start_force(PL_curforce);
1582     NEXTVAL_NEXTTOKE.opval = version;
1583     force_next(WORD);
1584
1585     return s;
1586 }
1587
1588 /*
1589  * S_tokeq
1590  * Tokenize a quoted string passed in as an SV.  It finds the next
1591  * chunk, up to end of string or a backslash.  It may make a new
1592  * SV containing that chunk (if HINT_NEW_STRING is on).  It also
1593  * turns \\ into \.
1594  */
1595
1596 STATIC SV *
1597 S_tokeq(pTHX_ SV *sv)
1598 {
1599     dVAR;
1600     register char *s;
1601     register char *send;
1602     register char *d;
1603     STRLEN len = 0;
1604     SV *pv = sv;
1605
1606     PERL_ARGS_ASSERT_TOKEQ;
1607
1608     if (!SvLEN(sv))
1609         goto finish;
1610
1611     s = SvPV_force(sv, len);
1612     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1613         goto finish;
1614     send = s + len;
1615     while (s < send && *s != '\\')
1616         s++;
1617     if (s == send)
1618         goto finish;
1619     d = s;
1620     if ( PL_hints & HINT_NEW_STRING ) {
1621         pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
1622     }
1623     while (s < send) {
1624         if (*s == '\\') {
1625             if (s + 1 < send && (s[1] == '\\'))
1626                 s++;            /* all that, just for this */
1627         }
1628         *d++ = *s++;
1629     }
1630     *d = '\0';
1631     SvCUR_set(sv, d - SvPVX_const(sv));
1632   finish:
1633     if ( PL_hints & HINT_NEW_STRING )
1634        return new_constant(NULL, 0, "q", sv, pv, "q", 1);
1635     return sv;
1636 }
1637
1638 /*
1639  * Now come three functions related to double-quote context,
1640  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
1641  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
1642  * interact with PL_lex_state, and create fake ( ... ) argument lists
1643  * to handle functions and concatenation.
1644  * They assume that whoever calls them will be setting up a fake
1645  * join call, because each subthing puts a ',' after it.  This lets
1646  *   "lower \luPpEr"
1647  * become
1648  *  join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1649  *
1650  * (I'm not sure whether the spurious commas at the end of lcfirst's
1651  * arguments and join's arguments are created or not).
1652  */
1653
1654 /*
1655  * S_sublex_start
1656  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1657  *
1658  * Pattern matching will set PL_lex_op to the pattern-matching op to
1659  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
1660  *
1661  * OP_CONST and OP_READLINE are easy--just make the new op and return.
1662  *
1663  * Everything else becomes a FUNC.
1664  *
1665  * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1666  * had an OP_CONST or OP_READLINE).  This just sets us up for a
1667  * call to S_sublex_push().
1668  */
1669
1670 STATIC I32
1671 S_sublex_start(pTHX)
1672 {
1673     dVAR;
1674     register const I32 op_type = pl_yylval.ival;
1675
1676     if (op_type == OP_NULL) {
1677         pl_yylval.opval = PL_lex_op;
1678         PL_lex_op = NULL;
1679         return THING;
1680     }
1681     if (op_type == OP_CONST || op_type == OP_READLINE) {
1682         SV *sv = tokeq(PL_lex_stuff);
1683
1684         if (SvTYPE(sv) == SVt_PVIV) {
1685             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1686             STRLEN len;
1687             const char * const p = SvPV_const(sv, len);
1688             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
1689             SvREFCNT_dec(sv);
1690             sv = nsv;
1691         }
1692         pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1693         PL_lex_stuff = NULL;
1694         /* Allow <FH> // "foo" */
1695         if (op_type == OP_READLINE)
1696             PL_expect = XTERMORDORDOR;
1697         return THING;
1698     }
1699     else if (op_type == OP_BACKTICK && PL_lex_op) {
1700         /* readpipe() vas overriden */
1701         cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1702         pl_yylval.opval = PL_lex_op;
1703         PL_lex_op = NULL;
1704         PL_lex_stuff = NULL;
1705         return THING;
1706     }
1707
1708     PL_sublex_info.super_state = PL_lex_state;
1709     PL_sublex_info.sub_inwhat = (U16)op_type;
1710     PL_sublex_info.sub_op = PL_lex_op;
1711     PL_lex_state = LEX_INTERPPUSH;
1712
1713     PL_expect = XTERM;
1714     if (PL_lex_op) {
1715         pl_yylval.opval = PL_lex_op;
1716         PL_lex_op = NULL;
1717         return PMFUNC;
1718     }
1719     else
1720         return FUNC;
1721 }
1722
1723 /*
1724  * S_sublex_push
1725  * Create a new scope to save the lexing state.  The scope will be
1726  * ended in S_sublex_done.  Returns a '(', starting the function arguments
1727  * to the uc, lc, etc. found before.
1728  * Sets PL_lex_state to LEX_INTERPCONCAT.
1729  */
1730
1731 STATIC I32
1732 S_sublex_push(pTHX)
1733 {
1734     dVAR;
1735     ENTER;
1736
1737     PL_lex_state = PL_sublex_info.super_state;
1738     SAVEBOOL(PL_lex_dojoin);
1739     SAVEI32(PL_lex_brackets);
1740     SAVEI32(PL_lex_casemods);
1741     SAVEI32(PL_lex_starts);
1742     SAVEI8(PL_lex_state);
1743     SAVEVPTR(PL_lex_inpat);
1744     SAVEI16(PL_lex_inwhat);
1745     SAVECOPLINE(PL_curcop);
1746     SAVEPPTR(PL_bufptr);
1747     SAVEPPTR(PL_bufend);
1748     SAVEPPTR(PL_oldbufptr);
1749     SAVEPPTR(PL_oldoldbufptr);
1750     SAVEPPTR(PL_last_lop);
1751     SAVEPPTR(PL_last_uni);
1752     SAVEPPTR(PL_linestart);
1753     SAVESPTR(PL_linestr);
1754     SAVEGENERICPV(PL_lex_brackstack);
1755     SAVEGENERICPV(PL_lex_casestack);
1756
1757     PL_linestr = PL_lex_stuff;
1758     PL_lex_stuff = NULL;
1759
1760     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1761         = SvPVX(PL_linestr);
1762     PL_bufend += SvCUR(PL_linestr);
1763     PL_last_lop = PL_last_uni = NULL;
1764     SAVEFREESV(PL_linestr);
1765
1766     PL_lex_dojoin = FALSE;
1767     PL_lex_brackets = 0;
1768     Newx(PL_lex_brackstack, 120, char);
1769     Newx(PL_lex_casestack, 12, char);
1770     PL_lex_casemods = 0;
1771     *PL_lex_casestack = '\0';
1772     PL_lex_starts = 0;
1773     PL_lex_state = LEX_INTERPCONCAT;
1774     CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1775
1776     PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1777     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1778         PL_lex_inpat = PL_sublex_info.sub_op;
1779     else
1780         PL_lex_inpat = NULL;
1781
1782     return '(';
1783 }
1784
1785 /*
1786  * S_sublex_done
1787  * Restores lexer state after a S_sublex_push.
1788  */
1789
1790 STATIC I32
1791 S_sublex_done(pTHX)
1792 {
1793     dVAR;
1794     if (!PL_lex_starts++) {
1795         SV * const sv = newSVpvs("");
1796         if (SvUTF8(PL_linestr))
1797             SvUTF8_on(sv);
1798         PL_expect = XOPERATOR;
1799         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1800         return THING;
1801     }
1802
1803     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
1804         PL_lex_state = LEX_INTERPCASEMOD;
1805         return yylex();
1806     }
1807
1808     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1809     if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1810         PL_linestr = PL_lex_repl;
1811         PL_lex_inpat = 0;
1812         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1813         PL_bufend += SvCUR(PL_linestr);
1814         PL_last_lop = PL_last_uni = NULL;
1815         SAVEFREESV(PL_linestr);
1816         PL_lex_dojoin = FALSE;
1817         PL_lex_brackets = 0;
1818         PL_lex_casemods = 0;
1819         *PL_lex_casestack = '\0';
1820         PL_lex_starts = 0;
1821         if (SvEVALED(PL_lex_repl)) {
1822             PL_lex_state = LEX_INTERPNORMAL;
1823             PL_lex_starts++;
1824             /*  we don't clear PL_lex_repl here, so that we can check later
1825                 whether this is an evalled subst; that means we rely on the
1826                 logic to ensure sublex_done() is called again only via the
1827                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1828         }
1829         else {
1830             PL_lex_state = LEX_INTERPCONCAT;
1831             PL_lex_repl = NULL;
1832         }
1833         return ',';
1834     }
1835     else {
1836 #ifdef PERL_MAD
1837         if (PL_madskills) {
1838             if (PL_thiswhite) {
1839                 if (!PL_endwhite)
1840                     PL_endwhite = newSVpvs("");
1841                 sv_catsv(PL_endwhite, PL_thiswhite);
1842                 PL_thiswhite = 0;
1843             }
1844             if (PL_thistoken)
1845                 sv_setpvs(PL_thistoken,"");
1846             else
1847                 PL_realtokenstart = -1;
1848         }
1849 #endif
1850         LEAVE;
1851         PL_bufend = SvPVX(PL_linestr);
1852         PL_bufend += SvCUR(PL_linestr);
1853         PL_expect = XOPERATOR;
1854         PL_sublex_info.sub_inwhat = 0;
1855         return ')';
1856     }
1857 }
1858
1859 /*
1860   scan_const
1861
1862   Extracts a pattern, double-quoted string, or transliteration.  This
1863   is terrifying code.
1864
1865   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1866   processing a pattern (PL_lex_inpat is true), a transliteration
1867   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1868
1869   Returns a pointer to the character scanned up to. If this is
1870   advanced from the start pointer supplied (i.e. if anything was
1871   successfully parsed), will leave an OP for the substring scanned
1872   in pl_yylval. Caller must intuit reason for not parsing further
1873   by looking at the next characters herself.
1874
1875   In patterns:
1876     backslashes:
1877       double-quoted style: \r and \n
1878       regexp special ones: \D \s
1879       constants: \x31
1880       backrefs: \1
1881       case and quoting: \U \Q \E
1882     stops on @ and $, but not for $ as tail anchor
1883
1884   In transliterations:
1885     characters are VERY literal, except for - not at the start or end
1886     of the string, which indicates a range. If the range is in bytes,
1887     scan_const expands the range to the full set of intermediate
1888     characters. If the range is in utf8, the hyphen is replaced with
1889     a certain range mark which will be handled by pmtrans() in op.c.
1890
1891   In double-quoted strings:
1892     backslashes:
1893       double-quoted style: \r and \n
1894       constants: \x31
1895       deprecated backrefs: \1 (in substitution replacements)
1896       case and quoting: \U \Q \E
1897     stops on @ and $
1898
1899   scan_const does *not* construct ops to handle interpolated strings.
1900   It stops processing as soon as it finds an embedded $ or @ variable
1901   and leaves it to the caller to work out what's going on.
1902
1903   embedded arrays (whether in pattern or not) could be:
1904       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1905
1906   $ in double-quoted strings must be the symbol of an embedded scalar.
1907
1908   $ in pattern could be $foo or could be tail anchor.  Assumption:
1909   it's a tail anchor if $ is the last thing in the string, or if it's
1910   followed by one of "()| \r\n\t"
1911
1912   \1 (backreferences) are turned into $1
1913
1914   The structure of the code is
1915       while (there's a character to process) {
1916           handle transliteration ranges
1917           skip regexp comments /(?#comment)/ and codes /(?{code})/
1918           skip #-initiated comments in //x patterns
1919           check for embedded arrays
1920           check for embedded scalars
1921           if (backslash) {
1922               leave intact backslashes from leaveit (below)
1923               deprecate \1 in substitution replacements
1924               handle string-changing backslashes \l \U \Q \E, etc.
1925               switch (what was escaped) {
1926                   handle \- in a transliteration (becomes a literal -)
1927                   handle \132 (octal characters)
1928                   handle \x15 and \x{1234} (hex characters)
1929                   handle \N{name} (named characters)
1930                   handle \cV (control characters)
1931                   handle printf-style backslashes (\f, \r, \n, etc)
1932               } (end switch)
1933               continue
1934           } (end if backslash)
1935           handle regular character
1936     } (end while character to read)
1937
1938 */
1939
1940 STATIC char *
1941 S_scan_const(pTHX_ char *start)
1942 {
1943     dVAR;
1944     register char *send = PL_bufend;            /* end of the constant */
1945     SV *sv = newSV(send - start);               /* sv for the constant.  See
1946                                                    note below on sizing. */
1947     register char *s = start;                   /* start of the constant */
1948     register char *d = SvPVX(sv);               /* destination for copies */
1949     bool dorange = FALSE;                       /* are we in a translit range? */
1950     bool didrange = FALSE;                      /* did we just finish a range? */
1951     I32  has_utf8 = FALSE;                      /* Output constant is UTF8 */
1952     I32  this_utf8 = UTF;                       /* Is the source string assumed
1953                                                    to be UTF8?  But, this can
1954                                                    show as true when the source
1955                                                    isn't utf8, as for example
1956                                                    when it is entirely composed
1957                                                    of hex constants */
1958
1959     /* Note on sizing:  The scanned constant is placed into sv, which is
1960      * initialized by newSV() assuming one byte of output for every byte of
1961      * input.  This routine expects newSV() to allocate an extra byte for a
1962      * trailing NUL, which this routine will append if it gets to the end of
1963      * the input.  There may be more bytes of input than output (eg., \N{LATIN
1964      * CAPITAL LETTER A}), or more output than input if the constant ends up
1965      * recoded to utf8, but each time a construct is found that might increase
1966      * the needed size, SvGROW() is called.  Its size parameter each time is
1967      * based on the best guess estimate at the time, namely the length used so
1968      * far, plus the length the current construct will occupy, plus room for
1969      * the trailing NUL, plus one byte for every input byte still unscanned */
1970
1971     UV uv;
1972 #ifdef EBCDIC
1973     UV literal_endpoint = 0;
1974     bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1975 #endif
1976
1977     PERL_ARGS_ASSERT_SCAN_CONST;
1978
1979     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1980         /* If we are doing a trans and we know we want UTF8 set expectation */
1981         has_utf8   = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1982         this_utf8  = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1983     }
1984
1985
1986     while (s < send || dorange) {
1987         /* get transliterations out of the way (they're most literal) */
1988         if (PL_lex_inwhat == OP_TRANS) {
1989             /* expand a range A-Z to the full set of characters.  AIE! */
1990             if (dorange) {
1991                 I32 i;                          /* current expanded character */
1992                 I32 min;                        /* first character in range */
1993                 I32 max;                        /* last character in range */
1994
1995 #ifdef EBCDIC
1996                 UV uvmax = 0;
1997 #endif
1998
1999                 if (has_utf8
2000 #ifdef EBCDIC
2001                     && !native_range
2002 #endif
2003                     ) {
2004                     char * const c = (char*)utf8_hop((U8*)d, -1);
2005                     char *e = d++;
2006                     while (e-- > c)
2007                         *(e + 1) = *e;
2008                     *c = (char)UTF_TO_NATIVE(0xff);
2009                     /* mark the range as done, and continue */
2010                     dorange = FALSE;
2011                     didrange = TRUE;
2012                     continue;
2013                 }
2014
2015                 i = d - SvPVX_const(sv);                /* remember current offset */
2016 #ifdef EBCDIC
2017                 SvGROW(sv,
2018                        SvLEN(sv) + (has_utf8 ?
2019                                     (512 - UTF_CONTINUATION_MARK +
2020                                      UNISKIP(0x100))
2021                                     : 256));
2022                 /* How many two-byte within 0..255: 128 in UTF-8,
2023                  * 96 in UTF-8-mod. */
2024 #else
2025                 SvGROW(sv, SvLEN(sv) + 256);    /* never more than 256 chars in a range */
2026 #endif
2027                 d = SvPVX(sv) + i;              /* refresh d after realloc */
2028 #ifdef EBCDIC
2029                 if (has_utf8) {
2030                     int j;
2031                     for (j = 0; j <= 1; j++) {
2032                         char * const c = (char*)utf8_hop((U8*)d, -1);
2033                         const UV uv    = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2034                         if (j)
2035                             min = (U8)uv;
2036                         else if (uv < 256)
2037                             max = (U8)uv;
2038                         else {
2039                             max = (U8)0xff; /* only to \xff */
2040                             uvmax = uv; /* \x{100} to uvmax */
2041                         }
2042                         d = c; /* eat endpoint chars */
2043                      }
2044                 }
2045                else {
2046 #endif
2047                    d -= 2;              /* eat the first char and the - */
2048                    min = (U8)*d;        /* first char in range */
2049                    max = (U8)d[1];      /* last char in range  */
2050 #ifdef EBCDIC
2051                }
2052 #endif
2053
2054                 if (min > max) {
2055                     Perl_croak(aTHX_
2056                                "Invalid range \"%c-%c\" in transliteration operator",
2057                                (char)min, (char)max);
2058                 }
2059
2060 #ifdef EBCDIC
2061                 if (literal_endpoint == 2 &&
2062                     ((isLOWER(min) && isLOWER(max)) ||
2063                      (isUPPER(min) && isUPPER(max)))) {
2064                     if (isLOWER(min)) {
2065                         for (i = min; i <= max; i++)
2066                             if (isLOWER(i))
2067                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2068                     } else {
2069                         for (i = min; i <= max; i++)
2070                             if (isUPPER(i))
2071                                 *d++ = NATIVE_TO_NEED(has_utf8,i);
2072                     }
2073                 }
2074                 else
2075 #endif
2076                     for (i = min; i <= max; i++)
2077 #ifdef EBCDIC
2078                         if (has_utf8) {
2079                             const U8 ch = (U8)NATIVE_TO_UTF(i);
2080                             if (UNI_IS_INVARIANT(ch))
2081                                 *d++ = (U8)i;
2082                             else {
2083                                 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2084                                 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2085                             }
2086                         }
2087                         else
2088 #endif
2089                             *d++ = (char)i;
2090
2091 #ifdef EBCDIC
2092                 if (uvmax) {
2093                     d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2094                     if (uvmax > 0x101)
2095                         *d++ = (char)UTF_TO_NATIVE(0xff);
2096                     if (uvmax > 0x100)
2097                         d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2098                 }
2099 #endif
2100
2101                 /* mark the range as done, and continue */
2102                 dorange = FALSE;
2103                 didrange = TRUE;
2104 #ifdef EBCDIC
2105                 literal_endpoint = 0;
2106 #endif
2107                 continue;
2108             }
2109
2110             /* range begins (ignore - as first or last char) */
2111             else if (*s == '-' && s+1 < send  && s != start) {
2112                 if (didrange) {
2113                     Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2114                 }
2115                 if (has_utf8
2116 #ifdef EBCDIC
2117                     && !native_range
2118 #endif
2119                     ) {
2120                     *d++ = (char)UTF_TO_NATIVE(0xff);   /* use illegal utf8 byte--see pmtrans */
2121                     s++;
2122                     continue;
2123                 }
2124                 dorange = TRUE;
2125                 s++;
2126             }
2127             else {
2128                 didrange = FALSE;
2129 #ifdef EBCDIC
2130                 literal_endpoint = 0;
2131                 native_range = TRUE;
2132 #endif
2133             }
2134         }
2135
2136         /* if we get here, we're not doing a transliteration */
2137
2138         /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2139            except for the last char, which will be done separately. */
2140         else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2141             if (s[2] == '#') {
2142                 while (s+1 < send && *s != ')')
2143                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2144             }
2145             else if (s[2] == '{' /* This should match regcomp.c */
2146                     || (s[2] == '?' && s[3] == '{'))
2147             {
2148                 I32 count = 1;
2149                 char *regparse = s + (s[2] == '{' ? 3 : 4);
2150                 char c;
2151
2152                 while (count && (c = *regparse)) {
2153                     if (c == '\\' && regparse[1])
2154                         regparse++;
2155                     else if (c == '{')
2156                         count++;
2157                     else if (c == '}')
2158                         count--;
2159                     regparse++;
2160                 }
2161                 if (*regparse != ')')
2162                     regparse--;         /* Leave one char for continuation. */
2163                 while (s < regparse)
2164                     *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2165             }
2166         }
2167
2168         /* likewise skip #-initiated comments in //x patterns */
2169         else if (*s == '#' && PL_lex_inpat &&
2170           ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2171             while (s+1 < send && *s != '\n')
2172                 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2173         }
2174
2175         /* check for embedded arrays
2176            (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2177            */
2178         else if (*s == '@' && s[1]) {
2179             if (isALNUM_lazy_if(s+1,UTF))
2180                 break;
2181             if (strchr(":'{$", s[1]))
2182                 break;
2183             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2184                 break; /* in regexp, neither @+ nor @- are interpolated */
2185         }
2186
2187         /* check for embedded scalars.  only stop if we're sure it's a
2188            variable.
2189         */
2190         else if (*s == '$') {
2191             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
2192                 break;
2193             if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2194                 if (s[1] == '\\') {
2195                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2196                                    "Possible unintended interpolation of $\\ in regex");
2197                 }
2198                 break;          /* in regexp, $ might be tail anchor */
2199             }
2200         }
2201
2202         /* End of else if chain - OP_TRANS rejoin rest */
2203
2204         /* backslashes */
2205         if (*s == '\\' && s+1 < send) {
2206             s++;
2207
2208             /* deprecate \1 in strings and substitution replacements */
2209             if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2210                 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2211             {
2212                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2213                 *--s = '$';
2214                 break;
2215             }
2216
2217             /* string-change backslash escapes */
2218             if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2219                 --s;
2220                 break;
2221             }
2222             /* skip any other backslash escapes in a pattern */
2223             else if (PL_lex_inpat) {
2224                 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2225                 goto default_action;
2226             }
2227
2228             /* if we get here, it's either a quoted -, or a digit */
2229             switch (*s) {
2230
2231             /* quoted - in transliterations */
2232             case '-':
2233                 if (PL_lex_inwhat == OP_TRANS) {
2234                     *d++ = *s++;
2235                     continue;
2236                 }
2237                 /* FALL THROUGH */
2238             default:
2239                 {
2240                     if ((isALPHA(*s) || isDIGIT(*s)))
2241                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2242                                        "Unrecognized escape \\%c passed through",
2243                                        *s);
2244                     /* default action is to copy the quoted character */
2245                     goto default_action;
2246                 }
2247
2248             /* eg. \132 indicates the octal constant 0x132 */
2249             case '0': case '1': case '2': case '3':
2250             case '4': case '5': case '6': case '7':
2251                 {
2252                     I32 flags = 0;
2253                     STRLEN len = 3;
2254                     uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2255                     s += len;
2256                 }
2257                 goto NUM_ESCAPE_INSERT;
2258
2259             /* eg. \x24 indicates the hex constant 0x24 */
2260             case 'x':
2261                 ++s;
2262                 if (*s == '{') {
2263                     char* const e = strchr(s, '}');
2264                     I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2265                       PERL_SCAN_DISALLOW_PREFIX;
2266                     STRLEN len;
2267
2268                     ++s;
2269                     if (!e) {
2270                         yyerror("Missing right brace on \\x{}");
2271                         continue;
2272                     }
2273                     len = e - s;
2274                     uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2275                     s = e + 1;
2276                 }
2277                 else {
2278                     {
2279                         STRLEN len = 2;
2280                         I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2281                         uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2282                         s += len;
2283                     }
2284                 }
2285
2286               NUM_ESCAPE_INSERT:
2287                 /* Insert oct, hex, or \N{U+...} escaped character.  There will
2288                  * always be enough room in sv since such escapes will be
2289                  * longer than any UTF-8 sequence they can end up as, except if
2290                  * they force us to recode the rest of the string into utf8 */
2291
2292                 /* Here uv is the ordinal of the next character being added in
2293                  * unicode (converted from native).  (It has to be done before
2294                  * here because \N is interpreted as unicode, and oct and hex
2295                  * as native.) */
2296                 if (!UNI_IS_INVARIANT(uv)) {
2297                     if (!has_utf8 && uv > 255) {
2298                         /* Might need to recode whatever we have accumulated so
2299                          * far if it contains any chars variant in utf8 or
2300                          * utf-ebcdic. */
2301
2302                         SvCUR_set(sv, d - SvPVX_const(sv));
2303                         SvPOK_on(sv);
2304                         *d = '\0';
2305                         /* See Note on sizing above.  */
2306                         sv_utf8_upgrade_flags_grow(sv,
2307                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2308                                         UNISKIP(uv) + (STRLEN)(send - s) + 1);
2309                         d = SvPVX(sv) + SvCUR(sv);
2310                         has_utf8 = TRUE;
2311                     }
2312
2313                     if (has_utf8) {
2314                         d = (char*)uvuni_to_utf8((U8*)d, uv);
2315                         if (PL_lex_inwhat == OP_TRANS &&
2316                             PL_sublex_info.sub_op) {
2317                             PL_sublex_info.sub_op->op_private |=
2318                                 (PL_lex_repl ? OPpTRANS_FROM_UTF
2319                                              : OPpTRANS_TO_UTF);
2320                         }
2321 #ifdef EBCDIC
2322                         if (uv > 255 && !dorange)
2323                             native_range = FALSE;
2324 #endif
2325                     }
2326                     else {
2327                         *d++ = (char)uv;
2328                     }
2329                 }
2330                 else {
2331                     *d++ = (char) uv;
2332                 }
2333                 continue;
2334
2335             /* \N{LATIN SMALL LETTER A} is a named character, and so is
2336              * \N{U+0041} */
2337             case 'N':
2338                 ++s;
2339                 if (*s == '{') {
2340                     char* e = strchr(s, '}');
2341                     SV *res;
2342                     STRLEN len;
2343                     const char *str;
2344
2345                     if (!e) {
2346                         yyerror("Missing right brace on \\N{}");
2347                         e = s - 1;
2348                         goto cont_scan;
2349                     }
2350                     if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2351                         /* \N{U+...} The ... is a unicode value even on EBCDIC
2352                          * machines */
2353                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2354                           PERL_SCAN_DISALLOW_PREFIX;
2355                         s += 3;
2356                         len = e - s;
2357                         uv = grok_hex(s, &len, &flags, NULL);
2358                         if ( e > s && len != (STRLEN)(e - s) ) {
2359                             uv = 0xFFFD;
2360                         }
2361                         s = e + 1;
2362                         goto NUM_ESCAPE_INSERT;
2363                     }
2364                     res = newSVpvn(s + 1, e - s - 1);
2365                     res = new_constant( NULL, 0, "charnames",
2366                                         res, NULL, s - 2, e - s + 3 );
2367                     if (has_utf8)
2368                         sv_utf8_upgrade(res);
2369                     str = SvPV_const(res,len);
2370 #ifdef EBCDIC_NEVER_MIND
2371                     /* charnames uses pack U and that has been
2372                      * recently changed to do the below uni->native
2373                      * mapping, so this would be redundant (and wrong,
2374                      * the code point would be doubly converted).
2375                      * But leave this in just in case the pack U change
2376                      * gets revoked, but the semantics is still
2377                      * desireable for charnames. --jhi */
2378                     {
2379                          UV uv = utf8_to_uvchr((const U8*)str, 0);
2380
2381                          if (uv < 0x100) {
2382                               U8 tmpbuf[UTF8_MAXBYTES+1], *d;
2383
2384                               d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2385                               sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
2386                               str = SvPV_const(res, len);
2387                          }
2388                     }
2389 #endif
2390                     /* If destination is not in utf8 but this new character is,
2391                      * recode the dest to utf8 */
2392                     if (!has_utf8 && SvUTF8(res)) {
2393                         SvCUR_set(sv, d - SvPVX_const(sv));
2394                         SvPOK_on(sv);
2395                         *d = '\0';
2396                         /* See Note on sizing above.  */
2397                         sv_utf8_upgrade_flags_grow(sv,
2398                                             SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2399                                             len + (STRLEN)(send - s) + 1);
2400                         d = SvPVX(sv) + SvCUR(sv);
2401                         has_utf8 = TRUE;
2402                     } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
2403
2404                         /* See Note on sizing above.  (NOTE: SvCUR() is not set
2405                          * correctly here). */
2406                         const STRLEN off = d - SvPVX_const(sv);
2407                         d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
2408                     }
2409 #ifdef EBCDIC
2410                     if (!dorange)
2411                         native_range = FALSE; /* \N{} is guessed to be Unicode */
2412 #endif
2413                     Copy(str, d, len, char);
2414                     d += len;
2415                     SvREFCNT_dec(res);
2416                   cont_scan:
2417                     s = e + 1;
2418                 }
2419                 else
2420                     yyerror("Missing braces on \\N{}");
2421                 continue;
2422
2423             /* \c is a control character */
2424             case 'c':
2425                 s++;
2426                 if (s < send) {
2427                     U8 c = *s++;
2428 #ifdef EBCDIC
2429                     if (isLOWER(c))
2430                         c = toUPPER(c);
2431 #endif
2432                     *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
2433                 }
2434                 else {
2435                     yyerror("Missing control char name in \\c");
2436                 }
2437                 continue;
2438
2439             /* printf-style backslashes, formfeeds, newlines, etc */
2440             case 'b':
2441                 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
2442                 break;
2443             case 'n':
2444                 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
2445                 break;
2446             case 'r':
2447                 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
2448                 break;
2449             case 'f':
2450                 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
2451                 break;
2452             case 't':
2453                 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
2454                 break;
2455             case 'e':
2456                 *d++ = ASCII_TO_NEED(has_utf8,'\033');
2457                 break;
2458             case 'a':
2459                 *d++ = ASCII_TO_NEED(has_utf8,'\007');
2460                 break;
2461             } /* end switch */
2462
2463             s++;
2464             continue;
2465         } /* end if (backslash) */
2466 #ifdef EBCDIC
2467         else
2468             literal_endpoint++;
2469 #endif
2470
2471     default_action:
2472         /* If we started with encoded form, or already know we want it,
2473            then encode the next character */
2474         if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2475             STRLEN len  = 1;
2476
2477
2478             /* One might think that it is wasted effort in the case of the
2479              * source being utf8 (this_utf8 == TRUE) to take the next character
2480              * in the source, convert it to an unsigned value, and then convert
2481              * it back again.  But the source has not been validated here.  The
2482              * routine that does the conversion checks for errors like
2483              * malformed utf8 */
2484
2485             const UV nextuv   = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2486             const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2487             if (!has_utf8) {
2488                 SvCUR_set(sv, d - SvPVX_const(sv));
2489                 SvPOK_on(sv);
2490                 *d = '\0';
2491                 /* See Note on sizing above.  */
2492                 sv_utf8_upgrade_flags_grow(sv,
2493                                         SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2494                                         need + (STRLEN)(send - s) + 1);
2495                 d = SvPVX(sv) + SvCUR(sv);
2496                 has_utf8 = TRUE;
2497             } else if (need > len) {
2498                 /* encoded value larger than old, may need extra space (NOTE:
2499                  * SvCUR() is not set correctly here).   See Note on sizing
2500                  * above.  */
2501                 const STRLEN off = d - SvPVX_const(sv);
2502                 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2503             }
2504             s += len;
2505
2506             d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2507 #ifdef EBCDIC
2508             if (uv > 255 && !dorange)
2509                 native_range = FALSE;
2510 #endif
2511         }
2512         else {
2513             *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2514         }
2515     } /* while loop to process each character */
2516
2517     /* terminate the string and set up the sv */
2518     *d = '\0';
2519     SvCUR_set(sv, d - SvPVX_const(sv));
2520     if (SvCUR(sv) >= SvLEN(sv))
2521         Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2522
2523     SvPOK_on(sv);
2524     if (PL_encoding && !has_utf8) {
2525         sv_recode_to_utf8(sv, PL_encoding);
2526         if (SvUTF8(sv))
2527             has_utf8 = TRUE;
2528     }
2529     if (has_utf8) {
2530         SvUTF8_on(sv);
2531         if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2532             PL_sublex_info.sub_op->op_private |=
2533                     (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2534         }
2535     }
2536
2537     /* shrink the sv if we allocated more than we used */
2538     if (SvCUR(sv) + 5 < SvLEN(sv)) {
2539         SvPV_shrink_to_cur(sv);
2540     }
2541
2542     /* return the substring (via pl_yylval) only if we parsed anything */
2543     if (s > PL_bufptr) {
2544         if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
2545             const char *const key = PL_lex_inpat ? "qr" : "q";
2546             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
2547             const char *type;
2548             STRLEN typelen;
2549
2550             if (PL_lex_inwhat == OP_TRANS) {
2551                 type = "tr";
2552                 typelen = 2;
2553             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
2554                 type = "s";
2555                 typelen = 1;
2556             } else  {
2557                 type = "qq";
2558                 typelen = 2;
2559             }
2560
2561             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
2562                                 type, typelen);
2563         }
2564         pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2565     } else
2566         SvREFCNT_dec(sv);
2567     return s;
2568 }
2569
2570 /* S_intuit_more
2571  * Returns TRUE if there's more to the expression (e.g., a subscript),
2572  * FALSE otherwise.
2573  *
2574  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2575  *
2576  * ->[ and ->{ return TRUE
2577  * { and [ outside a pattern are always subscripts, so return TRUE
2578  * if we're outside a pattern and it's not { or [, then return FALSE
2579  * if we're in a pattern and the first char is a {
2580  *   {4,5} (any digits around the comma) returns FALSE
2581  * if we're in a pattern and the first char is a [
2582  *   [] returns FALSE
2583  *   [SOMETHING] has a funky algorithm to decide whether it's a
2584  *      character class or not.  It has to deal with things like
2585  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2586  * anything else returns TRUE
2587  */
2588
2589 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2590
2591 STATIC int
2592 S_intuit_more(pTHX_ register char *s)
2593 {
2594     dVAR;
2595
2596     PERL_ARGS_ASSERT_INTUIT_MORE;
2597
2598     if (PL_lex_brackets)
2599         return TRUE;
2600     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2601         return TRUE;
2602     if (*s != '{' && *s != '[')
2603         return FALSE;
2604     if (!PL_lex_inpat)
2605         return TRUE;
2606
2607     /* In a pattern, so maybe we have {n,m}. */
2608     if (*s == '{') {
2609         s++;
2610         if (!isDIGIT(*s))
2611             return TRUE;
2612         while (isDIGIT(*s))
2613             s++;
2614         if (*s == ',')
2615             s++;
2616         while (isDIGIT(*s))
2617             s++;
2618         if (*s == '}')
2619             return FALSE;
2620         return TRUE;
2621
2622     }
2623
2624     /* On the other hand, maybe we have a character class */
2625
2626     s++;
2627     if (*s == ']' || *s == '^')
2628         return FALSE;
2629     else {
2630         /* this is terrifying, and it works */
2631         int weight = 2;         /* let's weigh the evidence */
2632         char seen[256];
2633         unsigned char un_char = 255, last_un_char;
2634         const char * const send = strchr(s,']');
2635         char tmpbuf[sizeof PL_tokenbuf * 4];
2636
2637         if (!send)              /* has to be an expression */
2638             return TRUE;
2639
2640         Zero(seen,256,char);
2641         if (*s == '$')
2642             weight -= 3;
2643         else if (isDIGIT(*s)) {
2644             if (s[1] != ']') {
2645                 if (isDIGIT(s[1]) && s[2] == ']')
2646                     weight -= 10;
2647             }
2648             else
2649                 weight -= 100;
2650         }
2651         for (; s < send; s++) {
2652             last_un_char = un_char;
2653             un_char = (unsigned char)*s;
2654             switch (*s) {
2655             case '@':
2656             case '&':
2657             case '$':
2658                 weight -= seen[un_char] * 10;
2659                 if (isALNUM_lazy_if(s+1,UTF)) {
2660                     int len;
2661                     scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2662                     len = (int)strlen(tmpbuf);
2663                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2664                         weight -= 100;
2665                     else
2666                         weight -= 10;
2667                 }
2668                 else if (*s == '$' && s[1] &&
2669                   strchr("[#!%*<>()-=",s[1])) {
2670                     if (/*{*/ strchr("])} =",s[2]))
2671                         weight -= 10;
2672                     else
2673                         weight -= 1;
2674                 }
2675                 break;
2676             case '\\':
2677                 un_char = 254;
2678                 if (s[1]) {
2679                     if (strchr("wds]",s[1]))
2680                         weight += 100;
2681                     else if (seen[(U8)'\''] || seen[(U8)'"'])
2682                         weight += 1;
2683                     else if (strchr("rnftbxcav",s[1]))
2684                         weight += 40;
2685                     else if (isDIGIT(s[1])) {
2686                         weight += 40;
2687                         while (s[1] && isDIGIT(s[1]))
2688                             s++;
2689                     }
2690                 }
2691                 else
2692                     weight += 100;
2693                 break;
2694             case '-':
2695                 if (s[1] == '\\')
2696                     weight += 50;
2697                 if (strchr("aA01! ",last_un_char))
2698                     weight += 30;
2699                 if (strchr("zZ79~",s[1]))
2700                     weight += 30;
2701                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2702                     weight -= 5;        /* cope with negative subscript */
2703                 break;
2704             default:
2705                 if (!isALNUM(last_un_char)
2706                     && !(last_un_char == '$' || last_un_char == '@'
2707                          || last_un_char == '&')
2708                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2709                     char *d = tmpbuf;
2710                     while (isALPHA(*s))
2711                         *d++ = *s++;
2712                     *d = '\0';
2713                     if (keyword(tmpbuf, d - tmpbuf, 0))
2714                         weight -= 150;
2715                 }
2716                 if (un_char == last_un_char + 1)
2717                     weight += 5;
2718                 weight -= seen[un_char];
2719                 break;
2720             }
2721             seen[un_char]++;
2722         }
2723         if (weight >= 0)        /* probably a character class */
2724             return FALSE;
2725     }
2726
2727     return TRUE;
2728 }
2729
2730 /*
2731  * S_intuit_method
2732  *
2733  * Does all the checking to disambiguate
2734  *   foo bar
2735  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
2736  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2737  *
2738  * First argument is the stuff after the first token, e.g. "bar".
2739  *
2740  * Not a method if bar is a filehandle.
2741  * Not a method if foo is a subroutine prototyped to take a filehandle.
2742  * Not a method if it's really "Foo $bar"
2743  * Method if it's "foo $bar"
2744  * Not a method if it's really "print foo $bar"
2745  * Method if it's really "foo package::" (interpreted as package->foo)
2746  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2747  * Not a method if bar is a filehandle or package, but is quoted with
2748  *   =>
2749  */
2750
2751 STATIC int
2752 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2753 {
2754     dVAR;
2755     char *s = start + (*start == '$');
2756     char tmpbuf[sizeof PL_tokenbuf];
2757     STRLEN len;
2758     GV* indirgv;
2759 #ifdef PERL_MAD
2760     int soff;
2761 #endif
2762
2763     PERL_ARGS_ASSERT_INTUIT_METHOD;
2764
2765     if (gv) {
2766         if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2767             return 0;
2768         if (cv) {
2769             if (SvPOK(cv)) {
2770                 const char *proto = SvPVX_const(cv);
2771                 if (proto) {
2772                     if (*proto == ';')
2773                         proto++;
2774                     if (*proto == '*')
2775                         return 0;
2776                 }
2777             }
2778         } else
2779             gv = NULL;
2780     }
2781     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2782     /* start is the beginning of the possible filehandle/object,
2783      * and s is the end of it
2784      * tmpbuf is a copy of it
2785      */
2786
2787     if (*start == '$') {
2788         if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
2789                 isUPPER(*PL_tokenbuf))
2790             return 0;
2791 #ifdef PERL_MAD
2792         len = start - SvPVX(PL_linestr);
2793 #endif
2794         s = PEEKSPACE(s);
2795 #ifdef PERL_MAD
2796         start = SvPVX(PL_linestr) + len;
2797 #endif
2798         PL_bufptr = start;
2799         PL_expect = XREF;
2800         return *s == '(' ? FUNCMETH : METHOD;
2801     }
2802     if (!keyword(tmpbuf, len, 0)) {
2803         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2804             len -= 2;
2805             tmpbuf[len] = '\0';
2806 #ifdef PERL_MAD
2807             soff = s - SvPVX(PL_linestr);
2808 #endif
2809             goto bare_package;
2810         }
2811         indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2812         if (indirgv && GvCVu(indirgv))
2813             return 0;
2814         /* filehandle or package name makes it a method */
2815         if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
2816 #ifdef PERL_MAD
2817             soff = s - SvPVX(PL_linestr);
2818 #endif
2819             s = PEEKSPACE(s);
2820             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2821                 return 0;       /* no assumptions -- "=>" quotes bearword */
2822       bare_package:
2823             start_force(PL_curforce);
2824             NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2825                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
2826             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2827             if (PL_madskills)
2828                 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
2829             PL_expect = XTERM;
2830             force_next(WORD);
2831             PL_bufptr = s;
2832 #ifdef PERL_MAD
2833             PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2834 #endif
2835             return *s == '(' ? FUNCMETH : METHOD;
2836         }
2837     }
2838     return 0;
2839 }
2840
2841 /* Encoded script support. filter_add() effectively inserts a
2842  * 'pre-processing' function into the current source input stream.
2843  * Note that the filter function only applies to the current source file
2844  * (e.g., it will not affect files 'require'd or 'use'd by this one).
2845  *
2846  * The datasv parameter (which may be NULL) can be used to pass
2847  * private data to this instance of the filter. The filter function
2848  * can recover the SV using the FILTER_DATA macro and use it to
2849  * store private buffers and state information.
2850  *
2851  * The supplied datasv parameter is upgraded to a PVIO type
2852  * and the IoDIRP/IoANY field is used to store the function pointer,
2853  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2854  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2855  * private use must be set using malloc'd pointers.
2856  */
2857
2858 SV *
2859 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2860 {
2861     dVAR;
2862     if (!funcp)
2863         return NULL;
2864
2865     if (!PL_parser)
2866         return NULL;
2867
2868     if (!PL_rsfp_filters)
2869         PL_rsfp_filters = newAV();
2870     if (!datasv)
2871         datasv = newSV(0);
2872     SvUPGRADE(datasv, SVt_PVIO);
2873     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2874     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2875     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2876                           FPTR2DPTR(void *, IoANY(datasv)),
2877                           SvPV_nolen(datasv)));
2878     av_unshift(PL_rsfp_filters, 1);
2879     av_store(PL_rsfp_filters, 0, datasv) ;
2880     return(datasv);
2881 }
2882
2883
2884 /* Delete most recently added instance of this filter function. */
2885 void
2886 Perl_filter_del(pTHX_ filter_t funcp)
2887 {
2888     dVAR;
2889     SV *datasv;
2890
2891     PERL_ARGS_ASSERT_FILTER_DEL;
2892
2893 #ifdef DEBUGGING
2894     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2895                           FPTR2DPTR(void*, funcp)));
2896 #endif
2897     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2898         return;
2899     /* if filter is on top of stack (usual case) just pop it off */
2900     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2901     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2902         IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2903         IoANY(datasv) = (void *)NULL;
2904         sv_free(av_pop(PL_rsfp_filters));
2905
2906         return;
2907     }
2908     /* we need to search for the correct entry and clear it     */
2909     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2910 }
2911
2912
2913 /* Invoke the idxth filter function for the current rsfp.        */
2914 /* maxlen 0 = read one text line */
2915 I32
2916 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2917 {
2918     dVAR;
2919     filter_t funcp;
2920     SV *datasv = NULL;
2921     /* This API is bad. It should have been using unsigned int for maxlen.
2922        Not sure if we want to change the API, but if not we should sanity
2923        check the value here.  */
2924     const unsigned int correct_length
2925         = maxlen < 0 ?
2926 #ifdef PERL_MICRO
2927         0x7FFFFFFF
2928 #else
2929         INT_MAX
2930 #endif
2931         : maxlen;
2932
2933     PERL_ARGS_ASSERT_FILTER_READ;
2934
2935     if (!PL_parser || !PL_rsfp_filters)
2936         return -1;
2937     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
2938         /* Provide a default input filter to make life easy.    */
2939         /* Note that we append to the line. This is handy.      */
2940         DEBUG_P(PerlIO_printf(Perl_debug_log,
2941                               "filter_read %d: from rsfp\n", idx));
2942         if (correct_length) {
2943             /* Want a block */
2944             int len ;
2945             const int old_len = SvCUR(buf_sv);
2946
2947             /* ensure buf_sv is large enough */
2948             SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2949             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2950                                    correct_length)) <= 0) {
2951                 if (PerlIO_error(PL_rsfp))
2952                     return -1;          /* error */
2953                 else
2954                     return 0 ;          /* end of file */
2955             }
2956             SvCUR_set(buf_sv, old_len + len) ;
2957         } else {
2958             /* Want a line */
2959             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2960                 if (PerlIO_error(PL_rsfp))
2961                     return -1;          /* error */
2962                 else
2963                     return 0 ;          /* end of file */
2964             }
2965         }
2966         return SvCUR(buf_sv);
2967     }
2968     /* Skip this filter slot if filter has been deleted */
2969     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2970         DEBUG_P(PerlIO_printf(Perl_debug_log,
2971                               "filter_read %d: skipped (filter deleted)\n",
2972                               idx));
2973         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2974     }
2975     /* Get function pointer hidden within datasv        */
2976     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2977     DEBUG_P(PerlIO_printf(Perl_debug_log,
2978                           "filter_read %d: via function %p (%s)\n",
2979                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
2980     /* Call function. The function is expected to       */
2981     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
2982     /* Return: <0:error, =0:eof, >0:not eof             */
2983     return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2984 }
2985
2986 STATIC char *
2987 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2988 {
2989     dVAR;
2990
2991     PERL_ARGS_ASSERT_FILTER_GETS;
2992
2993 #ifdef PERL_CR_FILTER
2994     if (!PL_rsfp_filters) {
2995         filter_add(S_cr_textfilter,NULL);
2996     }
2997 #endif
2998     if (PL_rsfp_filters) {
2999         if (!append)
3000             SvCUR_set(sv, 0);   /* start with empty line        */
3001         if (FILTER_READ(0, sv, 0) > 0)
3002             return ( SvPVX(sv) ) ;
3003         else
3004             return NULL ;
3005     }
3006     else
3007         return (sv_gets(sv, fp, append));
3008 }
3009
3010 STATIC HV *
3011 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3012 {
3013     dVAR;
3014     GV *gv;
3015
3016     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3017
3018     if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3019         return PL_curstash;
3020
3021     if (len > 2 &&
3022         (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3023         (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3024     {
3025         return GvHV(gv);                        /* Foo:: */
3026     }
3027
3028     /* use constant CLASS => 'MyClass' */
3029     gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3030     if (gv && GvCV(gv)) {
3031         SV * const sv = cv_const_sv(GvCV(gv));
3032         if (sv)
3033             pkgname = SvPV_const(sv, len);
3034     }
3035
3036     return gv_stashpvn(pkgname, len, 0);
3037 }
3038
3039 /*
3040  * S_readpipe_override
3041  * Check whether readpipe() is overriden, and generates the appropriate
3042  * optree, provided sublex_start() is called afterwards.
3043  */
3044 STATIC void
3045 S_readpipe_override(pTHX)
3046 {
3047     GV **gvp;
3048     GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3049     pl_yylval.ival = OP_BACKTICK;
3050     if ((gv_readpipe
3051                 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3052             ||
3053             ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3054              && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3055              && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3056     {
3057         PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3058             append_elem(OP_LIST,
3059                 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3060                 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3061     }
3062 }
3063
3064 #ifdef PERL_MAD
3065  /*
3066  * Perl_madlex
3067  * The intent of this yylex wrapper is to minimize the changes to the
3068  * tokener when we aren't interested in collecting madprops.  It remains
3069  * to be seen how successful this strategy will be...
3070  */
3071
3072 int
3073 Perl_madlex(pTHX)
3074 {
3075     int optype;
3076     char *s = PL_bufptr;
3077
3078     /* make sure PL_thiswhite is initialized */
3079     PL_thiswhite = 0;
3080     PL_thismad = 0;
3081
3082     /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3083     if (PL_pending_ident)
3084         return S_pending_ident(aTHX);
3085
3086     /* previous token ate up our whitespace? */
3087     if (!PL_lasttoke && PL_nextwhite) {
3088         PL_thiswhite = PL_nextwhite;
3089         PL_nextwhite = 0;
3090     }
3091
3092     /* isolate the token, and figure out where it is without whitespace */
3093     PL_realtokenstart = -1;
3094     PL_thistoken = 0;
3095     optype = yylex();
3096     s = PL_bufptr;
3097     assert(PL_curforce < 0);
3098
3099     if (!PL_thismad || PL_thismad->mad_key == '^') {    /* not forced already? */
3100         if (!PL_thistoken) {
3101             if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3102                 PL_thistoken = newSVpvs("");
3103             else {
3104                 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3105                 PL_thistoken = newSVpvn(tstart, s - tstart);
3106             }
3107         }
3108         if (PL_thismad) /* install head */
3109             CURMAD('X', PL_thistoken);
3110     }
3111
3112     /* last whitespace of a sublex? */
3113     if (optype == ')' && PL_endwhite) {
3114         CURMAD('X', PL_endwhite);
3115     }
3116
3117     if (!PL_thismad) {
3118
3119         /* if no whitespace and we're at EOF, bail.  Otherwise fake EOF below. */
3120         if (!PL_thiswhite && !PL_endwhite && !optype) {
3121             sv_free(PL_thistoken);
3122             PL_thistoken = 0;
3123             return 0;
3124         }
3125
3126         /* put off final whitespace till peg */
3127         if (optype == ';' && !PL_rsfp) {
3128             PL_nextwhite = PL_thiswhite;
3129             PL_thiswhite = 0;
3130         }
3131         else if (PL_thisopen) {
3132             CURMAD('q', PL_thisopen);
3133             if (PL_thistoken)
3134                 sv_free(PL_thistoken);
3135             PL_thistoken = 0;
3136         }
3137         else {
3138             /* Store actual token text as madprop X */
3139             CURMAD('X', PL_thistoken);
3140         }
3141
3142         if (PL_thiswhite) {
3143             /* add preceding whitespace as madprop _ */
3144             CURMAD('_', PL_thiswhite);
3145         }
3146
3147         if (PL_thisstuff) {
3148             /* add quoted material as madprop = */
3149             CURMAD('=', PL_thisstuff);
3150         }
3151
3152         if (PL_thisclose) {
3153             /* add terminating quote as madprop Q */
3154             CURMAD('Q', PL_thisclose);
3155         }
3156     }
3157
3158     /* special processing based on optype */
3159
3160     switch (optype) {
3161
3162     /* opval doesn't need a TOKEN since it can already store mp */
3163     case WORD:
3164     case METHOD:
3165     case FUNCMETH:
3166     case THING:
3167     case PMFUNC:
3168     case PRIVATEREF:
3169     case FUNC0SUB:
3170     case UNIOPSUB:
3171     case LSTOPSUB:
3172         if (pl_yylval.opval)
3173             append_madprops(PL_thismad, pl_yylval.opval, 0);
3174         PL_thismad = 0;
3175         return optype;
3176
3177     /* fake EOF */
3178     case 0:
3179         optype = PEG;
3180         if (PL_endwhite) {
3181             addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3182             PL_endwhite = 0;
3183         }
3184         break;
3185
3186     case ']':
3187     case '}':
3188         if (PL_faketokens)
3189             break;
3190         /* remember any fake bracket that lexer is about to discard */
3191         if (PL_lex_brackets == 1 &&
3192             ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3193         {
3194             s = PL_bufptr;
3195             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3196                 s++;
3197             if (*s == '}') {
3198                 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3199                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3200                 PL_thiswhite = 0;
3201                 PL_bufptr = s - 1;
3202                 break;  /* don't bother looking for trailing comment */
3203             }
3204             else
3205                 s = PL_bufptr;
3206         }
3207         if (optype == ']')
3208             break;
3209         /* FALLTHROUGH */
3210
3211     /* attach a trailing comment to its statement instead of next token */
3212     case ';':
3213         if (PL_faketokens)
3214             break;
3215         if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3216             s = PL_bufptr;
3217             while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3218                 s++;
3219             if (*s == '\n' || *s == '#') {
3220                 while (s < PL_bufend && *s != '\n')
3221                     s++;
3222                 if (s < PL_bufend)
3223                     s++;
3224                 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3225                 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3226                 PL_thiswhite = 0;
3227                 PL_bufptr = s;
3228             }
3229         }
3230         break;
3231
3232     /* pval */
3233     case LABEL:
3234         break;
3235
3236     /* ival */
3237     default:
3238         break;
3239
3240     }
3241
3242     /* Create new token struct.  Note: opvals return early above. */
3243     pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
3244     PL_thismad = 0;
3245     return optype;
3246 }
3247 #endif
3248
3249 STATIC char *
3250 S_tokenize_use(pTHX_ int is_use, char *s) {
3251     dVAR;
3252
3253     PERL_ARGS_ASSERT_TOKENIZE_USE;
3254
3255     if (PL_expect != XSTATE)
3256         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3257                     is_use ? "use" : "no"));
3258     s = SKIPSPACE1(s);
3259     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3260         s = force_version(s, TRUE);
3261         if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
3262             start_force(PL_curforce);
3263             NEXTVAL_NEXTTOKE.opval = NULL;
3264             force_next(WORD);
3265         }
3266         else if (*s == 'v') {
3267             s = force_word(s,WORD,FALSE,TRUE,FALSE);
3268             s = force_version(s, FALSE);
3269         }
3270     }
3271     else {
3272         s = force_word(s,WORD,FALSE,TRUE,FALSE);
3273         s = force_version(s, FALSE);
3274     }
3275     pl_yylval.ival = is_use;
3276     return s;
3277 }
3278 #ifdef DEBUGGING
3279     static const char* const exp_name[] =
3280         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
3281           "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
3282         };
3283 #endif
3284
3285 /*
3286   yylex
3287
3288   Works out what to call the token just pulled out of the input
3289   stream.  The yacc parser takes care of taking the ops we return and
3290   stitching them into a tree.
3291
3292   Returns:
3293     PRIVATEREF
3294
3295   Structure:
3296       if read an identifier
3297           if we're in a my declaration
3298               croak if they tried to say my($foo::bar)
3299               build the ops for a my() declaration
3300           if it's an access to a my() variable
3301               are we in a sort block?
3302                   croak if my($a); $a <=> $b
3303               build ops for access to a my() variable
3304           if in a dq string, and they've said @foo and we can't find @foo
3305               croak
3306           build ops for a bareword
3307       if we already built the token before, use it.
3308 */
3309
3310
3311 #ifdef __SC__
3312 #pragma segment Perl_yylex
3313 #endif
3314 int
3315 Perl_yylex(pTHX)
3316 {
3317     dVAR;
3318     register char *s = PL_bufptr;
3319     register char *d;
3320     STRLEN len;
3321     bool bof = FALSE;
3322
3323     /* orig_keyword, gvp, and gv are initialized here because
3324      * jump to the label just_a_word_zero can bypass their
3325      * initialization later. */
3326     I32 orig_keyword = 0;
3327     GV *gv = NULL;
3328     GV **gvp = NULL;
3329
3330     DEBUG_T( {
3331         SV* tmp = newSVpvs("");
3332         PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3333             (IV)CopLINE(PL_curcop),
3334             lex_state_names[PL_lex_state],
3335             exp_name[PL_expect],
3336             pv_display(tmp, s, strlen(s), 0, 60));
3337         SvREFCNT_dec(tmp);
3338     } );
3339     /* check if there's an identifier for us to look at */
3340     if (PL_pending_ident)
3341         return REPORT(S_pending_ident(aTHX));
3342
3343     /* no identifier pending identification */
3344
3345     switch (PL_lex_state) {
3346 #ifdef COMMENTARY
3347     case LEX_NORMAL:            /* Some compilers will produce faster */
3348     case LEX_INTERPNORMAL:      /* code if we comment these out. */
3349         break;
3350 #endif
3351
3352     /* when we've already built the next token, just pull it out of the queue */
3353     case LEX_KNOWNEXT:
3354 #ifdef PERL_MAD
3355         PL_lasttoke--;
3356         pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
3357         if (PL_madskills) {
3358             PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
3359             PL_nexttoke[PL_lasttoke].next_mad = 0;
3360             if (PL_thismad && PL_thismad->mad_key == '_') {
3361                 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
3362                 PL_thismad->mad_val = 0;
3363                 mad_free(PL_thismad);
3364                 PL_thismad = 0;
3365             }
3366         }
3367         if (!PL_lasttoke) {
3368             PL_lex_state = PL_lex_defer;
3369             PL_expect = PL_lex_expect;
3370             PL_lex_defer = LEX_NORMAL;
3371             if (!PL_nexttoke[PL_lasttoke].next_type)
3372                 return yylex();
3373         }
3374 #else
3375         PL_nexttoke--;
3376         pl_yylval = PL_nextval[PL_nexttoke];
3377         if (!PL_nexttoke) {
3378             PL_lex_state = PL_lex_defer;
3379             PL_expect = PL_lex_expect;
3380             PL_lex_defer = LEX_NORMAL;
3381         }
3382 #endif
3383 #ifdef PERL_MAD
3384         /* FIXME - can these be merged?  */
3385         return(PL_nexttoke[PL_lasttoke].next_type);
3386 #else
3387         return REPORT(PL_nexttype[PL_nexttoke]);
3388 #endif
3389
3390     /* interpolated case modifiers like \L \U, including \Q and \E.
3391        when we get here, PL_bufptr is at the \
3392     */
3393     case LEX_INTERPCASEMOD:
3394 #ifdef DEBUGGING
3395         if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
3396             Perl_croak(aTHX_ "panic: INTERPCASEMOD");
3397 #endif
3398         /* handle \E or end of string */
3399         if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
3400             /* if at a \E */
3401             if (PL_lex_casemods) {
3402                 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3403                 PL_lex_casestack[PL_lex_casemods] = '\0';
3404
3405                 if (PL_bufptr != PL_bufend
3406                     && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3407                     PL_bufptr += 2;
3408                     PL_lex_state = LEX_INTERPCONCAT;
3409 #ifdef PERL_MAD
3410                     if (PL_madskills)
3411                         PL_thistoken = newSVpvs("\\E");
3412 #endif
3413                 }
3414                 return REPORT(')');
3415             }
3416 #ifdef PERL_MAD
3417             while (PL_bufptr != PL_bufend &&
3418               PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
3419                 if (!PL_thiswhite)
3420                     PL_thiswhite = newSVpvs("");
3421                 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
3422                 PL_bufptr += 2;
3423             }
3424 #else
3425             if (PL_bufptr != PL_bufend)
3426                 PL_bufptr += 2;
3427 #endif
3428             PL_lex_state = LEX_INTERPCONCAT;
3429             return yylex();
3430         }
3431         else {
3432             DEBUG_T({ PerlIO_printf(Perl_debug_log,
3433               "### Saw case modifier\n"); });
3434             s = PL_bufptr + 1;
3435             if (s[1] == '\\' && s[2] == 'E') {
3436 #ifdef PERL_MAD
3437                 if (!PL_thiswhite)
3438                     PL_thiswhite = newSVpvs("");
3439                 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
3440 #endif
3441                 PL_bufptr = s + 3;
3442                 PL_lex_state = LEX_INTERPCONCAT;
3443                 return yylex();
3444             }
3445             else {
3446                 I32 tmp;
3447                 if (!PL_madskills) /* when just compiling don't need correct */
3448                     if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3449                         tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
3450                 if ((*s == 'L' || *s == 'U') &&
3451                     (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3452                     PL_lex_casestack[--PL_lex_casemods] = '\0';
3453                     return REPORT(')');
3454                 }
3455                 if (PL_lex_casemods > 10)
3456                     Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3457                 PL_lex_casestack[PL_lex_casemods++] = *s;
3458                 PL_lex_casestack[PL_lex_casemods] = '\0';
3459                 PL_lex_state = LEX_INTERPCONCAT;
3460                 start_force(PL_curforce);
3461                 NEXTVAL_NEXTTOKE.ival = 0;
3462                 force_next('(');
3463                 start_force(PL_curforce);
3464                 if (*s == 'l')
3465                     NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
3466                 else if (*s == 'u')
3467                     NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
3468                 else if (*s == 'L')
3469                     NEXTVAL_NEXTTOKE.ival = OP_LC;
3470                 else if (*s == 'U')
3471                     NEXTVAL_NEXTTOKE.ival = OP_UC;
3472                 else if (*s == 'Q')
3473                     NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
3474                 else
3475                     Perl_croak(aTHX_ "panic: yylex");
3476                 if (PL_madskills) {
3477                     SV* const tmpsv = newSVpvs("\\ ");
3478                     /* replace the space with the character we want to escape
3479                      */
3480                     SvPVX(tmpsv)[1] = *s;
3481                     curmad('_', tmpsv);
3482                 }
3483                 PL_bufptr = s + 1;
3484             }
3485             force_next(FUNC);
3486             if (PL_lex_starts) {
3487                 s = PL_bufptr;
3488                 PL_lex_starts = 0;
3489 #ifdef PERL_MAD
3490                 if (PL_madskills) {
3491                     if (PL_thistoken)
3492                         sv_free(PL_thistoken);
3493                     PL_thistoken = newSVpvs("");
3494                 }
3495 #endif
3496                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3497                 if (PL_lex_casemods == 1 && PL_lex_inpat)
3498                     OPERATOR(',');
3499                 else
3500                     Aop(OP_CONCAT);
3501             }
3502             else
3503                 return yylex();
3504         }
3505
3506     case LEX_INTERPPUSH:
3507         return REPORT(sublex_push());
3508
3509     case LEX_INTERPSTART:
3510         if (PL_bufptr == PL_bufend)
3511             return REPORT(sublex_done());
3512         DEBUG_T({ PerlIO_printf(Perl_debug_log,
3513               "### Interpolated variable\n"); });
3514         PL_expect = XTERM;
3515         PL_lex_dojoin = (*PL_bufptr == '@');
3516         PL_lex_state = LEX_INTERPNORMAL;
3517         if (PL_lex_dojoin) {
3518             start_force(PL_curforce);
3519             NEXTVAL_NEXTTOKE.ival = 0;
3520             force_next(',');
3521             start_force(PL_curforce);
3522             force_ident("\"", '$');
3523             start_force(PL_curforce);
3524             NEXTVAL_NEXTTOKE.ival = 0;
3525             force_next('$');
3526             start_force(PL_curforce);
3527             NEXTVAL_NEXTTOKE.ival = 0;
3528             force_next('(');
3529             start_force(PL_curforce);
3530             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
3531             force_next(FUNC);
3532         }
3533         if (PL_lex_starts++) {
3534             s = PL_bufptr;
3535 #ifdef PERL_MAD
3536             if (PL_madskills) {
3537                 if (PL_thistoken)
3538                     sv_free(PL_thistoken);
3539                 PL_thistoken = newSVpvs("");
3540             }
3541 #endif
3542             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3543             if (!PL_lex_casemods && PL_lex_inpat)
3544                 OPERATOR(',');
3545             else
3546                 Aop(OP_CONCAT);
3547         }
3548         return yylex();
3549
3550     case LEX_INTERPENDMAYBE:
3551         if (intuit_more(PL_bufptr)) {
3552             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
3553             break;
3554         }
3555         /* FALL THROUGH */
3556
3557     case LEX_INTERPEND:
3558         if (PL_lex_dojoin) {
3559             PL_lex_dojoin = FALSE;
3560             PL_lex_state = LEX_INTERPCONCAT;
3561 #ifdef PERL_MAD
3562             if (PL_madskills) {
3563                 if (PL_thistoken)
3564                     sv_free(PL_thistoken);
3565                 PL_thistoken = newSVpvs("");
3566             }
3567 #endif
3568             return REPORT(')');
3569         }
3570         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
3571             && SvEVALED(PL_lex_repl))
3572         {
3573             if (PL_bufptr != PL_bufend)
3574                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
3575             PL_lex_repl = NULL;
3576         }
3577         /* FALLTHROUGH */
3578     case LEX_INTERPCONCAT:
3579 #ifdef DEBUGGING
3580         if (PL_lex_brackets)
3581             Perl_croak(aTHX_ "panic: INTERPCONCAT");
3582 #endif
3583         if (PL_bufptr == PL_bufend)
3584             return REPORT(sublex_done());
3585
3586         if (SvIVX(PL_linestr) == '\'') {
3587             SV *sv = newSVsv(PL_linestr);
3588             if (!PL_lex_inpat)
3589                 sv = tokeq(sv);
3590             else if ( PL_hints & HINT_NEW_RE )
3591                 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
3592             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3593             s = PL_bufend;
3594         }
3595         else {
3596             s = scan_const(PL_bufptr);
3597             if (*s == '\\')
3598                 PL_lex_state = LEX_INTERPCASEMOD;
3599             else
3600                 PL_lex_state = LEX_INTERPSTART;
3601         }
3602
3603         if (s != PL_bufptr) {
3604             start_force(PL_curforce);
3605             if (PL_madskills) {
3606                 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3607             }
3608             NEXTVAL_NEXTTOKE = pl_yylval;
3609             PL_expect = XTERM;
3610             force_next(THING);
3611             if (PL_lex_starts++) {
3612 #ifdef PERL_MAD
3613                 if (PL_madskills) {
3614                     if (PL_thistoken)
3615                         sv_free(PL_thistoken);
3616                     PL_thistoken = newSVpvs("");
3617                 }
3618 #endif
3619                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3620                 if (!PL_lex_casemods && PL_lex_inpat)
3621                     OPERATOR(',');
3622                 else
3623                     Aop(OP_CONCAT);
3624             }
3625             else {
3626                 PL_bufptr = s;
3627                 return yylex();
3628             }
3629         }
3630
3631         return yylex();
3632     case LEX_FORMLINE:
3633         PL_lex_state = LEX_NORMAL;
3634         s = scan_formline(PL_bufptr);
3635         if (!PL_lex_formbrack)
3636             goto rightbracket;
3637         OPERATOR(';');
3638     }
3639
3640     s = PL_bufptr;
3641     PL_oldoldbufptr = PL_oldbufptr;
3642     PL_oldbufptr = s;
3643
3644   retry:
3645 #ifdef PERL_MAD
3646     if (PL_thistoken) {
3647         sv_free(PL_thistoken);
3648         PL_thistoken = 0;
3649     }
3650     PL_realtokenstart = s - SvPVX(PL_linestr);  /* assume but undo on ws */
3651 #endif
3652     switch (*s) {
3653     default:
3654         if (isIDFIRST_lazy_if(s,UTF))
3655             goto keylookup;
3656         {
3657         unsigned char c = *s;
3658         len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
3659         if (len > UNRECOGNIZED_PRECEDE_COUNT) {
3660             d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
3661         } else {
3662             d = PL_linestart;
3663         }
3664         *s = '\0';
3665         Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
3666     }
3667     case 4:
3668     case 26:
3669         goto fake_eof;                  /* emulate EOF on ^D or ^Z */
3670     case 0:
3671 #ifdef PERL_MAD
3672         if (PL_madskills)
3673             PL_faketokens = 0;
3674 #endif
3675         if (!PL_rsfp) {
3676             PL_last_uni = 0;
3677             PL_last_lop = 0;
3678             if (PL_lex_brackets) {
3679                 yyerror((const char *)
3680                         (PL_lex_formbrack
3681                          ? "Format not terminated"
3682                          : "Missing right curly or square bracket"));
3683             }
3684             DEBUG_T( { PerlIO_printf(Perl_debug_log,
3685                         "### Tokener got EOF\n");
3686             } );
3687             TOKEN(0);
3688         }
3689         if (s++ < PL_bufend)
3690             goto retry;                 /* ignore stray nulls */
3691         PL_last_uni = 0;
3692         PL_last_lop = 0;
3693         if (!PL_in_eval && !PL_preambled) {
3694             PL_preambled = TRUE;
3695 #ifdef PERL_MAD
3696             if (PL_madskills)
3697                 PL_faketokens = 1;
3698 #endif
3699             if (PL_perldb) {
3700                 /* Generate a string of Perl code to load the debugger.
3701                  * If PERL5DB is set, it will return the contents of that,
3702                  * otherwise a compile-time require of perl5db.pl.  */
3703
3704                 const char * const pdb = PerlEnv_getenv("PERL5DB");
3705
3706                 if (pdb) {
3707                     sv_setpv(PL_linestr, pdb);
3708                     sv_catpvs(PL_linestr,";");
3709                 } else {
3710                     SETERRNO(0,SS_NORMAL);
3711                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
3712                 }
3713             } else
3714                 sv_setpvs(PL_linestr,"");
3715             if (PL_preambleav) {
3716                 SV **svp = AvARRAY(PL_preambleav);
3717                 SV **const end = svp + AvFILLp(PL_preambleav);
3718                 while(svp <= end) {
3719                     sv_catsv(PL_linestr, *svp);
3720                     ++svp;
3721                     sv_catpvs(PL_linestr, ";");
3722                 }
3723                 sv_free(MUTABLE_SV(PL_preambleav));
3724                 PL_preambleav = NULL;
3725             }
3726             if (PL_minus_E)
3727                 sv_catpvs(PL_linestr,
3728                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3729             if (PL_minus_n || PL_minus_p) {
3730                 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3731                 if (PL_minus_l)
3732                     sv_catpvs(PL_linestr,"chomp;");
3733                 if (PL_minus_a) {
3734                     if (PL_minus_F) {
3735                         if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3736                              || *PL_splitstr == '"')
3737                               && strchr(PL_splitstr + 1, *PL_splitstr))
3738                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
3739                         else {
3740                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3741                                bytes can be used as quoting characters.  :-) */
3742                             const char *splits = PL_splitstr;
3743                             sv_catpvs(PL_linestr, "our @F=split(q\0");
3744                             do {
3745                                 /* Need to \ \s  */
3746                                 if (*splits == '\\')
3747                                     sv_catpvn(PL_linestr, splits, 1);
3748                                 sv_catpvn(PL_linestr, splits, 1);
3749                             } while (*splits++);
3750                             /* This loop will embed the trailing NUL of
3751                                PL_linestr as the last thing it does before
3752                                terminating.  */
3753                             sv_catpvs(PL_linestr, ");");
3754                         }
3755                     }
3756                     else
3757                         sv_catpvs(PL_linestr,"our @F=split(' ');");
3758                 }
3759             }
3760             sv_catpvs(PL_linestr, "\n");
3761             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3762             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3763             PL_last_lop = PL_last_uni = NULL;
3764             if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
3765                 update_debugger_info(PL_linestr, NULL, 0);
3766             goto retry;
3767         }
3768         do {
3769             bof = PL_rsfp ? TRUE : FALSE;
3770             if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
3771               fake_eof:
3772 #ifdef PERL_MAD
3773                 PL_realtokenstart = -1;
3774 #endif
3775                 if (PL_rsfp) {
3776                     if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3777                         PerlIO_clearerr(PL_rsfp);
3778                     else
3779                         (void)PerlIO_close(PL_rsfp);
3780                     PL_rsfp = NULL;
3781                     PL_doextract = FALSE;
3782                 }
3783                 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
3784 #ifdef PERL_MAD
3785                     if (PL_madskills)
3786                         PL_faketokens = 1;
3787 #endif
3788                     if (PL_minus_p)
3789                         sv_setpvs(PL_linestr, ";}continue{print;}");
3790                     else
3791                         sv_setpvs(PL_linestr, ";}");
3792                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3793                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3794                     PL_last_lop = PL_last_uni = NULL;
3795                     PL_minus_n = PL_minus_p = 0;
3796                     goto retry;
3797                 }
3798                 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3799                 PL_last_lop = PL_last_uni = NULL;
3800                 sv_setpvs(PL_linestr,"");
3801                 TOKEN(';');     /* not infinite loop because rsfp is NULL now */
3802             }
3803             /* If it looks like the start of a BOM or raw UTF-16,
3804              * check if it in fact is. */
3805             else if (bof &&
3806                      (*s == 0 ||
3807                       *(U8*)s == 0xEF ||
3808                       *(U8*)s >= 0xFE ||
3809                       s[1] == 0)) {
3810 #ifdef PERLIO_IS_STDIO
3811 #  ifdef __GNU_LIBRARY__
3812 #    if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
3813 #      define FTELL_FOR_PIPE_IS_BROKEN
3814 #    endif
3815 #  else
3816 #    ifdef __GLIBC__
3817 #      if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3818 #        define FTELL_FOR_PIPE_IS_BROKEN
3819 #      endif
3820 #    endif
3821 #  endif
3822 #endif
3823                 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
3824                 if (bof) {
3825                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3826                     s = swallow_bom((U8*)s);
3827                 }
3828             }
3829             if (PL_doextract) {
3830                 /* Incest with pod. */
3831 #ifdef PERL_MAD
3832                 if (PL_madskills)
3833                     sv_catsv(PL_thiswhite, PL_linestr);
3834 #endif
3835                 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
3836                     sv_setpvs(PL_linestr, "");
3837                     PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3838                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3839                     PL_last_lop = PL_last_uni = NULL;
3840                     PL_doextract = FALSE;
3841                 }
3842             }
3843             incline(s);
3844         } while (PL_doextract);
3845         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3846         if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
3847             update_debugger_info(PL_linestr, NULL, 0);
3848         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3849         PL_last_lop = PL_last_uni = NULL;
3850         if (CopLINE(PL_curcop) == 1) {
3851             while (s < PL_bufend && isSPACE(*s))
3852                 s++;
3853             if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
3854                 s++;
3855 #ifdef PERL_MAD
3856             if (PL_madskills)
3857                 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
3858 #endif
3859             d = NULL;
3860             if (!PL_in_eval) {
3861                 if (*s == '#' && *(s+1) == '!')
3862                     d = s + 2;
3863 #ifdef ALTERNATE_SHEBANG
3864                 else {
3865                     static char const as[] = ALTERNATE_SHEBANG;
3866                     if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
3867                         d = s + (sizeof(as) - 1);
3868                 }
3869 #endif /* ALTERNATE_SHEBANG */
3870             }
3871             if (d) {
3872                 char *ipath;
3873                 char *ipathend;
3874
3875                 while (isSPACE(*d))
3876                     d++;
3877                 ipath = d;
3878                 while (*d && !isSPACE(*d))
3879                     d++;
3880                 ipathend = d;
3881
3882 #ifdef ARG_ZERO_IS_SCRIPT
3883                 if (ipathend > ipath) {
3884                     /*
3885                      * HP-UX (at least) sets argv[0] to the script name,
3886                      * which makes $^X incorrect.  And Digital UNIX and Linux,
3887                      * at least, set argv[0] to the basename of the Perl
3888                      * interpreter. So, having found "#!", we'll set it right.
3889                      */
3890                     SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3891                                                     SVt_PV)); /* $^X */
3892                     assert(SvPOK(x) || SvGMAGICAL(x));
3893                     if (sv_eq(x, CopFILESV(PL_curcop))) {
3894                         sv_setpvn(x, ipath, ipathend - ipath);
3895                         SvSETMAGIC(x);
3896                     }
3897                     else {
3898                         STRLEN blen;
3899                         STRLEN llen;
3900                         const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3901                         const char * const lstart = SvPV_const(x,llen);
3902                         if (llen < blen) {
3903                             bstart += blen - llen;
3904                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3905                                 sv_setpvn(x, ipath, ipathend - ipath);
3906                                 SvSETMAGIC(x);
3907                             }
3908                         }
3909                     }
3910                     TAINT_NOT;  /* $^X is always tainted, but that's OK */
3911                 }
3912 #endif /* ARG_ZERO_IS_SCRIPT */
3913
3914                 /*
3915                  * Look for options.
3916                  */
3917                 d = instr(s,"perl -");
3918                 if (!d) {
3919                     d = instr(s,"perl");
3920 #if defined(DOSISH)
3921                     /* avoid getting into infinite loops when shebang
3922                      * line contains "Perl" rather than "perl" */
3923                     if (!d) {
3924                         for (d = ipathend-4; d >= ipath; --d) {
3925                             if ((*d == 'p' || *d == 'P')
3926                                 && !ibcmp(d, "perl", 4))
3927                             {
3928                                 break;
3929                             }
3930                         }
3931                         if (d < ipath)
3932                             d = NULL;
3933                     }
3934 #endif
3935                 }
3936 #ifdef ALTERNATE_SHEBANG
3937                 /*
3938                  * If the ALTERNATE_SHEBANG on this system starts with a
3939                  * character that can be part of a Perl expression, then if
3940                  * we see it but not "perl", we're probably looking at the
3941                  * start of Perl code, not a request to hand off to some
3942                  * other interpreter.  Similarly, if "perl" is there, but
3943                  * not in the first 'word' of the line, we assume the line
3944                  * contains the start of the Perl program.
3945                  */
3946                 if (d && *s != '#') {
3947                     const char *c = ipath;
3948                     while (*c && !strchr("; \t\r\n\f\v#", *c))
3949                         c++;
3950                     if (c < d)
3951                         d = NULL;       /* "perl" not in first word; ignore */
3952                     else
3953                         *s = '#';       /* Don't try to parse shebang line */
3954                 }
3955 #endif /* ALTERNATE_SHEBANG */
3956                 if (!d &&
3957                     *s == '#' &&
3958                     ipathend > ipath &&
3959                     !PL_minus_c &&
3960                     !instr(s,"indir") &&
3961                     instr(PL_origargv[0],"perl"))
3962                 {
3963                     dVAR;
3964                     char **newargv;
3965
3966                     *ipathend = '\0';
3967                     s = ipathend + 1;
3968                     while (s < PL_bufend && isSPACE(*s))
3969                         s++;
3970                     if (s < PL_bufend) {
3971                         Newx(newargv,PL_origargc+3,char*);
3972                         newargv[1] = s;
3973                         while (s < PL_bufend && !isSPACE(*s))
3974                             s++;
3975                         *s = '\0';
3976                         Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3977                     }
3978                     else
3979                         newargv = PL_origargv;
3980                     newargv[0] = ipath;
3981                     PERL_FPU_PRE_EXEC
3982                     PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3983                     PERL_FPU_POST_EXEC
3984                     Perl_croak(aTHX_ "Can't exec %s", ipath);
3985                 }
3986                 if (d) {
3987                     while (*d && !isSPACE(*d))
3988                         d++;
3989                     while (SPACE_OR_TAB(*d))
3990                         d++;
3991
3992                     if (*d++ == '-') {
3993                         const bool switches_done = PL_doswitches;
3994                         const U32 oldpdb = PL_perldb;
3995                         const bool oldn = PL_minus_n;
3996                         const bool oldp = PL_minus_p;
3997                         const char *d1 = d;
3998
3999                         do {
4000                             bool baduni = FALSE;
4001                             if (*d1 == 'C') {
4002                                 const char *d2 = d1 + 1;
4003                                 if (parse_unicode_opts((const char **)&d2)
4004                                     != PL_unicode)
4005                                     baduni = TRUE;
4006                             }
4007                             if (baduni || *d1 == 'M' || *d1 == 'm') {
4008                                 const char * const m = d1;
4009                                 while (*d1 && !isSPACE(*d1))
4010                                     d1++;
4011                                 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4012                                       (int)(d1 - m), m);
4013                             }
4014                             d1 = moreswitches(d1);
4015                         } while (d1);
4016                         if (PL_doswitches && !switches_done) {
4017                             int argc = PL_origargc;
4018                             char **argv = PL_origargv;
4019                             do {
4020                                 argc--,argv++;
4021                             } while (argc && argv[0][0] == '-' && argv[0][1]);
4022                             init_argv_symbols(argc,argv);
4023                         }
4024                         if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4025                             ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4026                               /* if we have already added "LINE: while (<>) {",
4027                                  we must not do it again */
4028                         {
4029                             sv_setpvs(PL_linestr, "");
4030                             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4031                             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4032                             PL_last_lop = PL_last_uni = NULL;
4033                             PL_preambled = FALSE;
4034                             if (PERLDB_LINE || PERLDB_SAVESRC)
4035                                 (void)gv_fetchfile(PL_origfilename);
4036                             goto retry;
4037                         }
4038                     }
4039                 }
4040             }
4041         }
4042         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4043             PL_bufptr = s;
4044             PL_lex_state = LEX_FORMLINE;
4045             return yylex();
4046         }
4047         goto retry;
4048     case '\r':
4049 #ifdef PERL_STRICT_CR
4050         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4051         Perl_croak(aTHX_
4052       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4053 #endif
4054     case ' ': case '\t': case '\f': case 013:
4055 #ifdef PERL_MAD
4056         PL_realtokenstart = -1;
4057         if (!PL_thiswhite)
4058             PL_thiswhite = newSVpvs("");
4059         sv_catpvn(PL_thiswhite, s, 1);
4060 #endif
4061         s++;
4062         goto retry;
4063     case '#':
4064     case '\n':
4065 #ifdef PERL_MAD
4066         PL_realtokenstart = -1;
4067         if (PL_madskills)
4068             PL_faketokens = 0;
4069 #endif
4070         if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4071             if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4072                 /* handle eval qq[#line 1 "foo"\n ...] */
4073                 CopLINE_dec(PL_curcop);
4074                 incline(s);
4075             }
4076             if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4077                 s = SKIPSPACE0(s);
4078                 if (!PL_in_eval || PL_rsfp)
4079                     incline(s);
4080             }
4081             else {
4082                 d = s;
4083                 while (d < PL_bufend && *d != '\n')
4084                     d++;
4085                 if (d < PL_bufend)
4086                     d++;
4087                 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4088                   Perl_croak(aTHX_ "panic: input overflow");
4089 #ifdef PERL_MAD
4090                 if (PL_madskills)
4091                     PL_thiswhite = newSVpvn(s, d - s);
4092 #endif
4093                 s = d;
4094                 incline(s);
4095             }
4096             if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4097                 PL_bufptr = s;
4098                 PL_lex_state = LEX_FORMLINE;
4099                 return yylex();
4100             }
4101         }
4102         else {
4103 #ifdef PERL_MAD
4104             if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4105                 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4106                     PL_faketokens = 0;
4107                     s = SKIPSPACE0(s);
4108                     TOKEN(PEG); /* make sure any #! line is accessible */
4109                 }
4110                 s = SKIPSPACE0(s);
4111             }
4112             else {
4113 /*              if (PL_madskills && PL_lex_formbrack) { */
4114                     d = s;
4115                     while (d < PL_bufend && *d != '\n')
4116                         d++;
4117                     if (d < PL_bufend)
4118                         d++;
4119                     else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4120                       Perl_croak(aTHX_ "panic: input overflow");
4121                     if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4122                         if (!PL_thiswhite)
4123                             PL_thiswhite = newSVpvs("");
4124                         if (CopLINE(PL_curcop) == 1) {
4125                             sv_setpvs(PL_thiswhite, "");
4126                             PL_faketokens = 0;
4127                         }
4128                         sv_catpvn(PL_thiswhite, s, d - s);
4129                     }
4130                     s = d;
4131 /*              }
4132                 *s = '\0';
4133                 PL_bufend = s; */
4134             }
4135 #else
4136             *s = '\0';
4137             PL_bufend = s;
4138 #endif
4139         }
4140         goto retry;
4141     case '-':
4142         if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
4143             I32 ftst = 0;
4144             char tmp;
4145
4146             s++;
4147             PL_bufptr = s;
4148             tmp = *s++;
4149
4150             while (s < PL_bufend && SPACE_OR_TAB(*s))
4151                 s++;
4152
4153             if (strnEQ(s,"=>",2)) {
4154                 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4155                 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
4156                 OPERATOR('-');          /* unary minus */
4157             }
4158             PL_last_uni = PL_oldbufptr;
4159             switch (tmp) {
4160             case 'r': ftst = OP_FTEREAD;        break;
4161             case 'w': ftst = OP_FTEWRITE;       break;
4162             case 'x': ftst = OP_FTEEXEC;        break;
4163             case 'o': ftst = OP_FTEOWNED;       break;
4164             case 'R': ftst = OP_FTRREAD;        break;
4165             case 'W': ftst = OP_FTRWRITE;       break;
4166             case 'X': ftst = OP_FTREXEC;        break;
4167             case 'O': ftst = OP_FTROWNED;       break;
4168             case 'e': ftst = OP_FTIS;           break;
4169             case 'z': ftst = OP_FTZERO;         break;
4170             case 's': ftst = OP_FTSIZE;         break;
4171             case 'f': ftst = OP_FTFILE;         break;
4172             case 'd': ftst = OP_FTDIR;          break;
4173             case 'l': ftst = OP_FTLINK;         break;
4174             case 'p': ftst = OP_FTPIPE;         break;
4175             case 'S': ftst = OP_FTSOCK;         break;
4176             case 'u': ftst = OP_FTSUID;         break;
4177             case 'g': ftst = OP_FTSGID;         break;
4178             case 'k': ftst = OP_FTSVTX;         break;
4179             case 'b': ftst = OP_FTBLK;          break;
4180             case 'c': ftst = OP_FTCHR;          break;
4181             case 't': ftst = OP_FTTTY;          break;
4182             case 'T': ftst = OP_FTTEXT;         break;
4183             case 'B': ftst = OP_FTBINARY;       break;
4184             case 'M': case 'A': case 'C':
4185                 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
4186                 switch (tmp) {
4187                 case 'M': ftst = OP_FTMTIME;    break;
4188                 case 'A': ftst = OP_FTATIME;    break;
4189                 case 'C': ftst = OP_FTCTIME;    break;
4190                 default:                        break;
4191                 }
4192                 break;
4193             default:
4194                 break;
4195             }
4196             if (ftst) {
4197                 PL_last_lop_op = (OPCODE)ftst;
4198                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4199                         "### Saw file test %c\n", (int)tmp);
4200                 } );
4201                 FTST(ftst);
4202             }
4203             else {
4204                 /* Assume it was a minus followed by a one-letter named
4205                  * subroutine call (or a -bareword), then. */
4206                 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4207                         "### '-%c' looked like a file test but was not\n",
4208                         (int) tmp);
4209                 } );
4210                 s = --PL_bufptr;
4211             }
4212         }
4213         {
4214             const char tmp = *s++;
4215             if (*s == tmp) {
4216                 s++;
4217                 if (PL_expect == XOPERATOR)
4218                     TERM(POSTDEC);
4219                 else
4220                     OPERATOR(PREDEC);
4221             }
4222             else if (*s == '>') {
4223                 s++;
4224                 s = SKIPSPACE1(s);
4225                 if (isIDFIRST_lazy_if(s,UTF)) {
4226                     s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4227                     TOKEN(ARROW);
4228                 }
4229                 else if (*s == '$')
4230                     OPERATOR(ARROW);
4231                 else
4232                     TERM(ARROW);
4233             }
4234             if (PL_expect == XOPERATOR)
4235                 Aop(OP_SUBTRACT);
4236             else {
4237                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4238                     check_uni();
4239                 OPERATOR('-');          /* unary minus */
4240             }
4241         }
4242
4243     case '+':
4244         {
4245             const char tmp = *s++;
4246             if (*s == tmp) {
4247                 s++;
4248                 if (PL_expect == XOPERATOR)
4249                     TERM(POSTINC);
4250                 else
4251                     OPERATOR(PREINC);
4252             }
4253             if (PL_expect == XOPERATOR)
4254                 Aop(OP_ADD);
4255             else {
4256                 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4257                     check_uni();
4258                 OPERATOR('+');
4259             }
4260         }
4261
4262     case '*':
4263         if (PL_expect != XOPERATOR) {
4264             s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4265             PL_expect = XOPERATOR;
4266             force_ident(PL_tokenbuf, '*');
4267             if (!*PL_tokenbuf)
4268                 PREREF('*');
4269             TERM('*');
4270         }
4271         s++;
4272         if (*s == '*') {
4273             s++;
4274             PWop(OP_POW);
4275         }
4276         Mop(OP_MULTIPLY);
4277
4278     case '%':
4279         if (PL_expect == XOPERATOR) {
4280             ++s;
4281             Mop(OP_MODULO);
4282         }
4283         PL_tokenbuf[0] = '%';
4284         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4285                 sizeof PL_tokenbuf - 1, FALSE);
4286         if (!PL_tokenbuf[1]) {
4287             PREREF('%');
4288         }
4289         PL_pending_ident = '%';
4290         TERM('%');
4291
4292     case '^':
4293         s++;
4294         BOop(OP_BIT_XOR);
4295     case '[':
4296         PL_lex_brackets++;
4297         {
4298             const char tmp = *s++;
4299             OPERATOR(tmp);
4300         }
4301     case '~':
4302         if (s[1] == '~'
4303             && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
4304         {
4305             s += 2;
4306             Eop(OP_SMARTMATCH);
4307         }
4308     case ',':
4309         {
4310             const char tmp = *s++;
4311             OPERATOR(tmp);
4312         }
4313     case ':':
4314         if (s[1] == ':') {
4315             len = 0;
4316             goto just_a_word_zero_gv;
4317         }
4318         s++;
4319         switch (PL_expect) {
4320             OP *attrs;
4321 #ifdef PERL_MAD
4322             I32 stuffstart;
4323 #endif
4324         case XOPERATOR:
4325             if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4326                 break;
4327             PL_bufptr = s;      /* update in case we back off */
4328             goto grabattrs;
4329         case XATTRBLOCK:
4330             PL_expect = XBLOCK;
4331             goto grabattrs;
4332         case XATTRTERM:
4333             PL_expect = XTERMBLOCK;
4334          grabattrs:
4335 #ifdef PERL_MAD
4336             stuffstart = s - SvPVX(PL_linestr) - 1;
4337 #endif
4338             s = PEEKSPACE(s);
4339             attrs = NULL;
4340             while (isIDFIRST_lazy_if(s,UTF)) {
4341                 I32 tmp;
4342                 SV *sv;
4343                 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4344                 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
4345                     if (tmp < 0) tmp = -tmp;
4346                     switch (tmp) {
4347                     case KEY_or:
4348                     case KEY_and:
4349                     case KEY_for:
4350                     case KEY_foreach:
4351                     case KEY_unless:
4352                     case KEY_if:
4353                     case KEY_while:
4354                     case KEY_until:
4355                         goto got_attrs;
4356                     default:
4357                         break;
4358                     }
4359                 }
4360                 sv = newSVpvn(s, len);
4361                 if (*d == '(') {
4362                     d = scan_str(d,TRUE,TRUE);
4363                     if (!d) {
4364                         /* MUST advance bufptr here to avoid bogus
4365                            "at end of line" context messages from yyerror().
4366                          */
4367                         PL_bufptr = s + len;
4368                         yyerror("Unterminated attribute parameter in attribute list");
4369                         if (attrs)
4370                             op_free(attrs);
4371                         sv_free(sv);
4372                         return REPORT(0);       /* EOF indicator */
4373                     }
4374                 }
4375                 if (PL_lex_stuff) {
4376                     sv_catsv(sv, PL_lex_stuff);
4377                     attrs = append_elem(OP_LIST, attrs,
4378                                         newSVOP(OP_CONST, 0, sv));
4379                     SvREFCNT_dec(PL_lex_stuff);
4380                     PL_lex_stuff = NULL;
4381                 }
4382                 else {
4383                     if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4384                         sv_free(sv);
4385                         if (PL_in_my == KEY_our) {
4386                             deprecate(":unique");
4387                         }
4388                         else
4389                             Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4390                     }
4391
4392                     /* NOTE: any CV attrs applied here need to be part of
4393                        the CVf_BUILTIN_ATTRS define in cv.h! */
4394                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4395                         sv_free(sv);
4396                         CvLVALUE_on(PL_compcv);
4397                     }
4398                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4399                         sv_free(sv);
4400                         deprecate(":locked");
4401                     }
4402                     else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4403                         sv_free(sv);
4404                         CvMETHOD_on(PL_compcv);
4405                     }
4406                     /* After we've set the flags, it could be argued that
4407                        we don't need to do the attributes.pm-based setting
4408                        process, and shouldn't bother appending recognized
4409                        flags.  To experiment with that, uncomment the
4410                        following "else".  (Note that's already been
4411                        uncommented.  That keeps the above-applied built-in
4412                        attributes from being intercepted (and possibly
4413                        rejected) by a package's attribute routines, but is
4414                        justified by the performance win for the common case
4415                        of applying only built-in attributes.) */
4416                     else
4417                         attrs = append_elem(OP_LIST, attrs,
4418                                             newSVOP(OP_CONST, 0,
4419                                                     sv));
4420                 }
4421                 s = PEEKSPACE(d);
4422                 if (*s == ':' && s[1] != ':')
4423                     s = PEEKSPACE(s+1);
4424                 else if (s == d)
4425                     break;      /* require real whitespace or :'s */
4426                 /* XXX losing whitespace on sequential attributes here */
4427             }
4428             {
4429                 const char tmp
4430                     = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4431                 if (*s != ';' && *s != '}' && *s != tmp
4432                     && (tmp != '=' || *s != ')')) {
4433                     const char q = ((*s == '\'') ? '"' : '\'');
4434                     /* If here for an expression, and parsed no attrs, back
4435                        off. */
4436                     if (tmp == '=' && !attrs) {
4437                         s = PL_bufptr;
4438                         break;
4439                     }
4440                     /* MUST advance bufptr here to avoid bogus "at end of line"
4441                        context messages from yyerror().
4442                     */
4443                     PL_bufptr = s;
4444                     yyerror( (const char *)
4445                              (*s
4446                               ? Perl_form(aTHX_ "Invalid separator character "
4447                                           "%c%c%c in attribute list", q, *s, q)
4448                               : "Unterminated attribute list" ) );
4449                     if (attrs)
4450                         op_free(attrs);
4451                     OPERATOR(':');
4452                 }
4453             }
4454         got_attrs:
4455             if (attrs) {
4456                 start_force(PL_curforce);
4457                 NEXTVAL_NEXTTOKE.opval = attrs;
4458                 CURMAD('_', PL_nextwhite);
4459                 force_next(THING);
4460             }
4461 #ifdef PERL_MAD
4462             if (PL_madskills) {
4463                 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
4464                                      (s - SvPVX(PL_linestr)) - stuffstart);
4465             }
4466 #endif
4467             TOKEN(COLONATTR);
4468         }
4469         OPERATOR(':');
4470     case '(':
4471         s++;
4472         if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4473             PL_oldbufptr = PL_oldoldbufptr;             /* allow print(STDOUT 123) */
4474         else
4475             PL_expect = XTERM;
4476         s = SKIPSPACE1(s);
4477         TOKEN('(');
4478     case ';':
4479         CLINE;
4480         {
4481             const char tmp = *s++;
4482             OPERATOR(tmp);
4483         }
4484     case ')':
4485         {
4486             const char tmp = *s++;
4487             s = SKIPSPACE1(s);
4488             if (*s == '{')
4489                 PREBLOCK(tmp);
4490             TERM(tmp);
4491         }
4492     case ']':
4493         s++;
4494         if (PL_lex_brackets <= 0)
4495             yyerror("Unmatched right square bracket");
4496         else
4497             --PL_lex_brackets;
4498         if (PL_lex_state == LEX_INTERPNORMAL) {
4499             if (PL_lex_brackets == 0) {
4500                 if (*s == '-' && s[1] == '>')
4501                     PL_lex_state = LEX_INTERPENDMAYBE;
4502                 else if (*s != '[' && *s != '{')
4503                     PL_lex_state = LEX_INTERPEND;
4504             }
4505         }
4506         TERM(']');
4507     case '{':
4508       leftbracket:
4509         s++;
4510         if (PL_lex_brackets > 100) {
4511             Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4512         }
4513         switch (PL_expect) {
4514         case XTERM:
4515             if (PL_lex_formbrack) {
4516                 s--;
4517                 PRETERMBLOCK(DO);
4518             }
4519             if (PL_oldoldbufptr == PL_last_lop)
4520                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4521             else
4522                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4523             OPERATOR(HASHBRACK);
4524         case XOPERATOR:
4525             while (s < PL_bufend && SPACE_OR_TAB(*s))
4526                 s++;
4527             d = s;
4528             PL_tokenbuf[0] = '\0';
4529             if (d < PL_bufend && *d == '-') {
4530                 PL_tokenbuf[0] = '-';
4531                 d++;
4532                 while (d < PL_bufend && SPACE_OR_TAB(*d))
4533                     d++;
4534             }
4535             if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
4536                 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
4537                               FALSE, &len);
4538                 while (d < PL_bufend && SPACE_OR_TAB(*d))
4539                     d++;
4540                 if (*d == '}') {
4541                     const char minus = (PL_tokenbuf[0] == '-');
4542                     s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
4543                     if (minus)
4544                         force_next('-');
4545                 }
4546             }
4547             /* FALL THROUGH */
4548         case XATTRBLOCK:
4549         case XBLOCK:
4550             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
4551             PL_expect = XSTATE;
4552             break;
4553         case XATTRTERM:
4554         case XTERMBLOCK:
4555             PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4556             PL_expect = XSTATE;
4557             break;
4558         default: {
4559                 const char *t;
4560                 if (PL_oldoldbufptr == PL_last_lop)
4561                     PL_lex_brackstack[PL_lex_brackets++] = XTERM;
4562                 else
4563                     PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
4564                 s = SKIPSPACE1(s);
4565                 if (*s == '}') {
4566                     if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
4567                         PL_expect = XTERM;
4568                         /* This hack is to get the ${} in the message. */
4569                         PL_bufptr = s+1;
4570                         yyerror("syntax error");
4571                         break;
4572                     }
4573                     OPERATOR(HASHBRACK);
4574                 }
4575                 /* This hack serves to disambiguate a pair of curlies
4576                  * as being a block or an anon hash.  Normally, expectation
4577                  * determines that, but in cases where we're not in a
4578                  * position to expect anything in particular (like inside
4579                  * eval"") we have to resolve the ambiguity.  This code
4580                  * covers the case where the first term in the curlies is a
4581                  * quoted string.  Most other cases need to be explicitly
4582                  * disambiguated by prepending a "+" before the opening
4583                  * curly in order to force resolution as an anon hash.
4584                  *
4585                  * XXX should probably propagate the outer expectation
4586                  * into eval"" to rely less on this hack, but that could
4587                  * potentially break current behavior of eval"".
4588                  * GSAR 97-07-21
4589                  */
4590                 t = s;
4591                 if (*s == '\'' || *s == '"' || *s == '`') {
4592                     /* common case: get past first string, handling escapes */
4593                     for (t++; t < PL_bufend && *t != *s;)
4594                         if (*t++ == '\\' && (*t == '\\' || *t == *s))
4595                             t++;
4596                     t++;
4597                 }
4598                 else if (*s == 'q') {
4599                     if (++t < PL_bufend
4600                         && (!isALNUM(*t)
4601                             || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
4602                                 && !isALNUM(*t))))
4603                     {
4604                         /* skip q//-like construct */
4605                         const char *tmps;
4606                         char open, close, term;
4607                         I32 brackets = 1;
4608
4609                         while (t < PL_bufend && isSPACE(*t))
4610                             t++;
4611                         /* check for q => */
4612                         if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
4613                             OPERATOR(HASHBRACK);
4614                         }
4615                         term = *t;
4616                         open = term;
4617                         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
4618                             term = tmps[5];
4619                         close = term;
4620                         if (open == close)
4621                             for (t++; t < PL_bufend; t++) {
4622                                 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
4623                                     t++;
4624                                 else if (*t == open)
4625                                     break;
4626                             }
4627                         else {
4628                             for (t++; t < PL_bufend; t++) {
4629                                 if (*t == '\\' && t+1 < PL_bufend)
4630                                     t++;
4631                                 else if (*t == close && --brackets <= 0)
4632                                     break;
4633                                 else if (*t == open)
4634                                     brackets++;
4635                             }
4636                         }
4637                         t++;
4638                     }
4639                     else
4640                         /* skip plain q word */
4641                         while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4642                              t += UTF8SKIP(t);
4643                 }
4644                 else if (isALNUM_lazy_if(t,UTF)) {
4645                     t += UTF8SKIP(t);
4646                     while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
4647                          t += UTF8SKIP(t);
4648                 }
4649                 while (t < PL_bufend && isSPACE(*t))
4650                     t++;
4651                 /* if comma follows first term, call it an anon hash */
4652                 /* XXX it could be a comma expression with loop modifiers */
4653                 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
4654                                    || (*t == '=' && t[1] == '>')))
4655                     OPERATOR(HASHBRACK);
4656                 if (PL_expect == XREF)
4657                     PL_expect = XTERM;
4658                 else {
4659                     PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
4660                     PL_expect = XSTATE;
4661                 }
4662             }
4663             break;
4664         }
4665         pl_yylval.ival = CopLINE(PL_curcop);
4666         if (isSPACE(*s) || *s == '#')
4667             PL_copline = NOLINE;   /* invalidate current command line number */
4668         TOKEN('{');
4669     case '}':
4670       rightbracket:
4671         s++;
4672         if (PL_lex_brackets <= 0)
4673             yyerror("Unmatched right curly bracket");
4674         else
4675             PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
4676         if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
4677             PL_lex_formbrack = 0;
4678         if (PL_lex_state == LEX_INTERPNORMAL) {
4679             if (PL_lex_brackets == 0) {
4680                 if (PL_expect & XFAKEBRACK) {
4681                     PL_expect &= XENUMMASK;
4682                     PL_lex_state = LEX_INTERPEND;
4683                     PL_bufptr = s;
4684 #if 0
4685                     if (PL_madskills) {
4686                         if (!PL_thiswhite)
4687                             PL_thiswhite = newSVpvs("");
4688                         sv_catpvs(PL_thiswhite,"}");
4689                     }
4690 #endif
4691                     return yylex();     /* ignore fake brackets */
4692                 }
4693                 if (*s == '-' && s[1] == '>')
4694                     PL_lex_state = LEX_INTERPENDMAYBE;
4695                 else if (*s != '[' && *s != '{')
4696                     PL_lex_state = LEX_INTERPEND;
4697             }
4698         }
4699         if (PL_expect & XFAKEBRACK) {
4700             PL_expect &= XENUMMASK;
4701             PL_bufptr = s;
4702             return yylex();             /* ignore fake brackets */
4703         }
4704         start_force(PL_curforce);
4705         if (PL_madskills) {
4706             curmad('X', newSVpvn(s-1,1));
4707             CURMAD('_', PL_thiswhite);
4708         }
4709         force_next('}');
4710 #ifdef PERL_MAD
4711         if (!PL_thistoken)
4712             PL_thistoken = newSVpvs("");
4713 #endif
4714         TOKEN(';');
4715     case '&':
4716         s++;
4717         if (*s++ == '&')
4718             AOPERATOR(ANDAND);
4719         s--;
4720         if (PL_expect == XOPERATOR) {
4721             if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
4722                 && isIDFIRST_lazy_if(s,UTF))
4723             {
4724                 CopLINE_dec(PL_curcop);
4725                 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
4726                 CopLINE_inc(PL_curcop);
4727             }
4728             BAop(OP_BIT_AND);
4729         }
4730
4731         s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4732         if (*PL_tokenbuf) {
4733             PL_expect = XOPERATOR;
4734             force_ident(PL_tokenbuf, '&');
4735         }
4736         else
4737             PREREF('&');
4738         pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
4739         TERM('&');
4740
4741     case '|':
4742         s++;
4743         if (*s++ == '|')
4744             AOPERATOR(OROR);
4745         s--;
4746         BOop(OP_BIT_OR);
4747     case '=':
4748         s++;
4749         {
4750             const char tmp = *s++;
4751             if (tmp == '=')
4752                 Eop(OP_EQ);
4753             if (tmp == '>')
4754                 OPERATOR(',');
4755             if (tmp == '~')
4756                 PMop(OP_MATCH);
4757             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
4758                 && strchr("+-*/%.^&|<",tmp))
4759                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4760                             "Reversed %c= operator",(int)tmp);
4761             s--;
4762             if (PL_expect == XSTATE && isALPHA(tmp) &&
4763                 (s == PL_linestart+1 || s[-2] == '\n') )
4764                 {
4765                     if (PL_in_eval && !PL_rsfp) {
4766                         d = PL_bufend;
4767                         while (s < d) {
4768                             if (*s++ == '\n') {
4769                                 incline(s);
4770                                 if (strnEQ(s,"=cut",4)) {
4771                                     s = strchr(s,'\n');
4772                                     if (s)
4773                                         s++;
4774                                     else
4775                                         s = d;
4776                                     incline(s);
4777                                     goto retry;
4778                                 }
4779                             }
4780                         }
4781                         goto retry;
4782                     }
4783 #ifdef PERL_MAD
4784                     if (PL_madskills) {
4785                         if (!PL_thiswhite)
4786                             PL_thiswhite = newSVpvs("");
4787                         sv_catpvn(PL_thiswhite, PL_linestart,
4788                                   PL_bufend - PL_linestart);
4789                     }
4790 #endif
4791                     s = PL_bufend;
4792                     PL_doextract = TRUE;
4793                     goto retry;
4794                 }
4795         }
4796         if (PL_lex_brackets < PL_lex_formbrack) {
4797             const char *t = s;
4798 #ifdef PERL_STRICT_CR
4799             while (SPACE_OR_TAB(*t))
4800 #else
4801             while (SPACE_OR_TAB(*t) || *t == '\r')
4802 #endif
4803                 t++;
4804             if (*t == '\n' || *t == '#') {
4805                 s--;
4806                 PL_expect = XBLOCK;
4807                 goto leftbracket;
4808             }
4809         }
4810         pl_yylval.ival = 0;
4811         OPERATOR(ASSIGNOP);
4812     case '!':
4813         s++;
4814         {
4815             const char tmp = *s++;
4816             if (tmp == '=') {
4817                 /* was this !=~ where !~ was meant?
4818                  * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
4819
4820                 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
4821                     const char *t = s+1;
4822
4823                     while (t < PL_bufend && isSPACE(*t))
4824                         ++t;
4825
4826                     if (*t == '/' || *t == '?' ||
4827                         ((*t == 'm' || *t == 's' || *t == 'y')
4828                          && !isALNUM(t[1])) ||
4829                         (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
4830                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4831                                     "!=~ should be !~");
4832                 }
4833                 Eop(OP_NE);
4834             }
4835             if (tmp == '~')
4836                 PMop(OP_NOT);
4837         }
4838         s--;
4839         OPERATOR('!');
4840     case '<':
4841         if (PL_expect != XOPERATOR) {
4842             if (s[1] != '<' && !strchr(s,'>'))
4843                 check_uni();
4844             if (s[1] == '<')
4845                 s = scan_heredoc(s);
4846             else
4847                 s = scan_inputsymbol(s);
4848             TERM(sublex_start());
4849         }
4850         s++;
4851         {
4852             char tmp = *s++;
4853             if (tmp == '<')
4854                 SHop(OP_LEFT_SHIFT);
4855             if (tmp == '=') {
4856                 tmp = *s++;
4857                 if (tmp == '>')
4858                     Eop(OP_NCMP);
4859                 s--;
4860                 Rop(OP_LE);
4861             }
4862         }
4863         s--;
4864         Rop(OP_LT);
4865     case '>':
4866         s++;
4867         {
4868             const char tmp = *s++;
4869             if (tmp == '>')
4870                 SHop(OP_RIGHT_SHIFT);
4871             else if (tmp == '=')
4872                 Rop(OP_GE);
4873         }
4874         s--;
4875         Rop(OP_GT);
4876
4877     case '$':
4878         CLINE;
4879
4880         if (PL_expect == XOPERATOR) {
4881             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4882                 PL_expect = XTERM;
4883                 deprecate_old(commaless_variable_list);
4884                 return REPORT(','); /* grandfather non-comma-format format */
4885             }
4886         }
4887
4888         if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
4889             PL_tokenbuf[0] = '@';
4890             s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
4891                            sizeof PL_tokenbuf - 1, FALSE);
4892             if (PL_expect == XOPERATOR)
4893                 no_op("Array length", s);
4894             if (!PL_tokenbuf[1])
4895                 PREREF(DOLSHARP);
4896             PL_expect = XOPERATOR;
4897             PL_pending_ident = '#';
4898             TOKEN(DOLSHARP);
4899         }
4900
4901         PL_tokenbuf[0] = '$';
4902         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4903                        sizeof PL_tokenbuf - 1, FALSE);
4904         if (PL_expect == XOPERATOR)
4905             no_op("Scalar", s);
4906         if (!PL_tokenbuf[1]) {
4907             if (s == PL_bufend)
4908                 yyerror("Final $ should be \\$ or $name");
4909             PREREF('$');
4910         }
4911
4912         /* This kludge not intended to be bulletproof. */
4913         if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
4914             pl_yylval.opval = newSVOP(OP_CONST, 0,
4915                                    newSViv(CopARYBASE_get(&PL_compiling)));
4916             pl_yylval.opval->op_private = OPpCONST_ARYBASE;
4917             TERM(THING);
4918         }
4919
4920         d = s;
4921         {
4922             const char tmp = *s;
4923             if (PL_lex_state == LEX_NORMAL)
4924                 s = SKIPSPACE1(s);
4925
4926             if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4927                 && intuit_more(s)) {
4928                 if (*s == '[') {
4929                     PL_tokenbuf[0] = '@';
4930                     if (ckWARN(WARN_SYNTAX)) {
4931                         char *t = s+1;
4932
4933                         while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
4934                             t++;
4935                         if (*t++ == ',') {
4936                             PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4937                             while (t < PL_bufend && *t != ']')
4938                                 t++;
4939                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4940                                         "Multidimensional syntax %.*s not supported",
4941                                     (int)((t - PL_bufptr) + 1), PL_bufptr);
4942                         }
4943                     }
4944                 }
4945                 else if (*s == '{') {
4946                     char *t;
4947                     PL_tokenbuf[0] = '%';
4948                     if (strEQ(PL_tokenbuf+1, "SIG")  && ckWARN(WARN_SYNTAX)
4949                         && (t = strchr(s, '}')) && (t = strchr(t, '=')))
4950                         {
4951                             char tmpbuf[sizeof PL_tokenbuf];
4952                             do {
4953                                 t++;
4954                             } while (isSPACE(*t));
4955                             if (isIDFIRST_lazy_if(t,UTF)) {
4956                                 STRLEN len;
4957                                 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4958                                               &len);
4959                                 while (isSPACE(*t))
4960                                     t++;
4961                                 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
4962                                     Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4963                                                 "You need to quote \"%s\"",
4964                                                 tmpbuf);
4965                             }
4966                         }
4967                 }
4968             }
4969
4970             PL_expect = XOPERATOR;
4971             if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
4972                 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4973                 if (!islop || PL_last_lop_op == OP_GREPSTART)
4974                     PL_expect = XOPERATOR;
4975                 else if (strchr("$@\"'`q", *s))
4976                     PL_expect = XTERM;          /* e.g. print $fh "foo" */
4977                 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
4978                     PL_expect = XTERM;          /* e.g. print $fh &sub */
4979                 else if (isIDFIRST_lazy_if(s,UTF)) {
4980                     char tmpbuf[sizeof PL_tokenbuf];
4981                     int t2;
4982                     scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4983                     if ((t2 = keyword(tmpbuf, len, 0))) {
4984                         /* binary operators exclude handle interpretations */
4985                         switch (t2) {
4986                         case -KEY_x:
4987                         case -KEY_eq:
4988                         case -KEY_ne:
4989                         case -KEY_gt:
4990                         case -KEY_lt:
4991                         case -KEY_ge:
4992                         case -KEY_le:
4993                         case -KEY_cmp:
4994                             break;
4995                         default:
4996                             PL_expect = XTERM;  /* e.g. print $fh length() */
4997                             break;
4998                         }
4999                     }
5000                     else {
5001                         PL_expect = XTERM;      /* e.g. print $fh subr() */
5002                     }
5003                 }
5004                 else if (isDIGIT(*s))
5005                     PL_expect = XTERM;          /* e.g. print $fh 3 */
5006                 else if (*s == '.' && isDIGIT(s[1]))
5007                     PL_expect = XTERM;          /* e.g. print $fh .3 */
5008                 else if ((*s == '?' || *s == '-' || *s == '+')
5009                          && !isSPACE(s[1]) && s[1] != '=')
5010                     PL_expect = XTERM;          /* e.g. print $fh -1 */
5011                 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5012                          && s[1] != '/')
5013                     PL_expect = XTERM;          /* e.g. print $fh /.../
5014                                                    XXX except DORDOR operator
5015                                                 */
5016                 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5017                          && s[2] != '=')
5018                     PL_expect = XTERM;          /* print $fh <<"EOF" */
5019             }
5020         }
5021         PL_pending_ident = '$';
5022         TOKEN('$');
5023
5024     case '@':
5025         if (PL_expect == XOPERATOR)
5026             no_op("Array", s);
5027         PL_tokenbuf[0] = '@';
5028         s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5029         if (!PL_tokenbuf[1]) {
5030             PREREF('@');
5031         }
5032         if (PL_lex_state == LEX_NORMAL)
5033             s = SKIPSPACE1(s);
5034         if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5035             if (*s == '{')
5036                 PL_tokenbuf[0] = '%';
5037
5038             /* Warn about @ where they meant $. */
5039             if (*s == '[' || *s == '{') {
5040                 if (ckWARN(WARN_SYNTAX)) {
5041                     const char *t = s + 1;
5042                     while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5043                         t++;
5044                     if (*t == '}' || *t == ']') {
5045                         t++;
5046                         PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5047                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5048                             "Scalar value %.*s better written as $%.*s",
5049                             (int)(t-PL_bufptr), PL_bufptr,
5050                             (int)(t-PL_bufptr-1), PL_bufptr+1);
5051                     }
5052                 }
5053             }
5054         }
5055         PL_pending_ident = '@';
5056         TERM('@');
5057
5058      case '/':                  /* may be division, defined-or, or pattern */
5059         if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5060             s += 2;
5061             AOPERATOR(DORDOR);
5062         }
5063      case '?':                  /* may either be conditional or pattern */
5064         if (PL_expect == XOPERATOR) {
5065              char tmp = *s++;
5066              if(tmp == '?') {
5067                 OPERATOR('?');
5068              }
5069              else {
5070                  tmp = *s++;
5071                  if(tmp == '/') {
5072                      /* A // operator. */
5073                     AOPERATOR(DORDOR);
5074                  }
5075                  else {
5076                      s--;
5077                      Mop(OP_DIVIDE);
5078                  }
5079              }
5080          }
5081          else {
5082              /* Disable warning on "study /blah/" */
5083              if (PL_oldoldbufptr == PL_last_uni
5084               && (*PL_last_uni != 's' || s - PL_last_uni < 5
5085                   || memNE(PL_last_uni, "study", 5)
5086                   || isALNUM_lazy_if(PL_last_uni+5,UTF)
5087               ))
5088                  check_uni();
5089              s = scan_pat(s,OP_MATCH);
5090              TERM(sublex_start());
5091          }
5092
5093     case '.':
5094         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5095 #ifdef PERL_STRICT_CR
5096             && s[1] == '\n'
5097 #else
5098             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5099 #endif
5100             && (s == PL_linestart || s[-1] == '\n') )
5101         {
5102             PL_lex_formbrack = 0;
5103             PL_expect = XSTATE;
5104             goto rightbracket;
5105         }
5106         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5107             s += 3;
5108             OPERATOR(YADAYADA);
5109         }
5110         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5111             char tmp = *s++;
5112             if (*s == tmp) {
5113                 s++;
5114                 if (*s == tmp) {
5115                     s++;
5116                     pl_yylval.ival = OPf_SPECIAL;
5117                 }
5118                 else
5119                     pl_yylval.ival = 0;
5120                 OPERATOR(DOTDOT);
5121             }
5122             if (PL_expect != XOPERATOR)
5123                 check_uni();
5124             Aop(OP_CONCAT);
5125         }
5126         /* FALL THROUGH */
5127     case '0': case '1': case '2': case '3': case '4':
5128     case '5': case '6': case '7': case '8': case '9':
5129         s = scan_num(s, &pl_yylval);
5130         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5131         if (PL_expect == XOPERATOR)
5132             no_op("Number",s);
5133         TERM(THING);
5134
5135     case '\'':
5136         s = scan_str(s,!!PL_madskills,FALSE);
5137         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5138         if (PL_expect == XOPERATOR) {
5139             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5140                 PL_expect = XTERM;
5141                 deprecate_old(commaless_variable_list);
5142                 return REPORT(','); /* grandfather non-comma-format format */
5143             }
5144             else
5145                 no_op("String",s);
5146         }
5147         if (!s)
5148             missingterm(NULL);
5149         pl_yylval.ival = OP_CONST;
5150         TERM(sublex_start());
5151
5152     case '"':
5153         s = scan_str(s,!!PL_madskills,FALSE);
5154         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5155         if (PL_expect == XOPERATOR) {
5156             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5157                 PL_expect = XTERM;
5158                 deprecate_old(commaless_variable_list);
5159                 return REPORT(','); /* grandfather non-comma-format format */
5160             }
5161             else
5162                 no_op("String",s);
5163         }
5164         if (!s)
5165             missingterm(NULL);
5166         pl_yylval.ival = OP_CONST;
5167         /* FIXME. I think that this can be const if char *d is replaced by
5168            more localised variables.  */
5169         for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
5170             if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
5171                 pl_yylval.ival = OP_STRINGIFY;
5172                 break;
5173             }
5174         }
5175         TERM(sublex_start());
5176
5177     case '`':
5178         s = scan_str(s,!!PL_madskills,FALSE);
5179         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
5180         if (PL_expect == XOPERATOR)
5181             no_op("Backticks",s);
5182         if (!s)
5183             missingterm(NULL);
5184         readpipe_override();
5185         TERM(sublex_start());
5186
5187     case '\\':
5188         s++;
5189         if (PL_lex_inwhat && isDIGIT(*s))
5190             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5191                            *s, *s);
5192         if (PL_expect == XOPERATOR)
5193             no_op("Backslash",s);
5194         OPERATOR(REFGEN);
5195
5196     case 'v':
5197         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
5198             char *start = s + 2;
5199             while (isDIGIT(*start) || *start == '_')
5200                 start++;
5201             if (*start == '.' && isDIGIT(start[1])) {
5202                 s = scan_num(s, &pl_yylval);
5203                 TERM(THING);
5204             }
5205             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
5206             else if (!isALPHA(*start) && (PL_expect == XTERM
5207                         || PL_expect == XREF || PL_expect == XSTATE
5208                         || PL_expect == XTERMORDORDOR)) {
5209                 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
5210                 if (!gv) {
5211                     s = scan_num(s, &pl_yylval);
5212                     TERM(THING);
5213                 }
5214             }
5215         }
5216         goto keylookup;
5217     case 'x':
5218         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
5219             s++;
5220             Mop(OP_REPEAT);
5221         }
5222         goto keylookup;
5223
5224     case '_':
5225     case 'a': case 'A':
5226     case 'b': case 'B':
5227     case 'c': case 'C':
5228     case 'd': case 'D':
5229     case 'e': case 'E':
5230     case 'f': case 'F':
5231     case 'g': case 'G':
5232     case 'h': case 'H':
5233     case 'i': case 'I':
5234     case 'j': case 'J':
5235     case 'k': case 'K':
5236     case 'l': case 'L':
5237     case 'm': case 'M':
5238     case 'n': case 'N':
5239     case 'o': case 'O':
5240     case 'p': case 'P':
5241     case 'q': case 'Q':
5242     case 'r': case 'R':
5243     case 's': case 'S':
5244     case 't': case 'T':
5245     case 'u': case 'U':
5246               case 'V':
5247     case 'w': case 'W':
5248               case 'X':
5249     case 'y': case 'Y':
5250     case 'z': case 'Z':
5251
5252       keylookup: {
5253         I32 tmp;
5254
5255         orig_keyword = 0;
5256         gv = NULL;
5257         gvp = NULL;
5258
5259         PL_bufptr = s;
5260         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5261
5262         /* Some keywords can be followed by any delimiter, including ':' */
5263         tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
5264                (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5265                              (PL_tokenbuf[0] == 'q' &&
5266                               strchr("qwxr", PL_tokenbuf[1])))));
5267
5268         /* x::* is just a word, unless x is "CORE" */
5269         if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
5270             goto just_a_word;
5271
5272         d = s;
5273         while (d < PL_bufend && isSPACE(*d))
5274                 d++;    /* no comments skipped here, or s### is misparsed */
5275
5276         /* Is this a label? */
5277         if (!tmp && PL_expect == XSTATE
5278               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5279             tmp = keyword(PL_tokenbuf, len, 0);
5280             if (tmp)
5281                 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5282             s = d + 1;
5283             pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5284             CLINE;
5285             TOKEN(LABEL);
5286         }
5287         else
5288             /* Check for keywords */
5289             tmp = keyword(PL_tokenbuf, len, 0);
5290
5291         /* Is this a word before a => operator? */
5292         if (*d == '=' && d[1] == '>') {
5293             CLINE;
5294             pl_yylval.opval
5295                 = (OP*)newSVOP(OP_CONST, 0,
5296                                S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
5297             pl_yylval.opval->op_private = OPpCONST_BARE;
5298             TERM(WORD);
5299         }
5300
5301         if (tmp < 0) {                  /* second-class keyword? */
5302             GV *ogv = NULL;     /* override (winner) */
5303             GV *hgv = NULL;     /* hidden (loser) */
5304             if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5305                 CV *cv;
5306                 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
5307                     (cv = GvCVu(gv)))
5308                 {
5309                     if (GvIMPORTED_CV(gv))
5310                         ogv = gv;
5311                     else if (! CvMETHOD(cv))
5312                         hgv = gv;
5313                 }
5314                 if (!ogv &&
5315                     (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
5316                     (gv = *gvp) && isGV_with_GP(gv) &&
5317                     GvCVu(gv) && GvIMPORTED_CV(gv))
5318                 {
5319                     ogv = gv;
5320                 }
5321             }
5322             if (ogv) {
5323                 orig_keyword = tmp;
5324                 tmp = 0;                /* overridden by import or by GLOBAL */
5325             }
5326             else if (gv && !gvp
5327                      && -tmp==KEY_lock  /* XXX generalizable kludge */
5328                      && GvCVu(gv))
5329             {
5330                 tmp = 0;                /* any sub overrides "weak" keyword */
5331             }
5332             else {                      /* no override */
5333                 tmp = -tmp;
5334                 if (tmp == KEY_dump) {
5335                     Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5336                                    "dump() better written as CORE::dump()");
5337                 }
5338                 gv = NULL;
5339                 gvp = 0;
5340                 if (hgv && tmp != KEY_x && tmp != KEY_CORE)     /* never ambiguous */
5341                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5342                                    "Ambiguous call resolved as CORE::%s(), %s",
5343                                    GvENAME(hgv), "qualify as such or use &");
5344             }
5345         }
5346
5347       reserved_word:
5348         switch (tmp) {
5349
5350         default:                        /* not a keyword */
5351             /* Trade off - by using this evil construction we can pull the
5352                variable gv into the block labelled keylookup. If not, then
5353                we have to give it function scope so that the goto from the
5354                earlier ':' case doesn't bypass the initialisation.  */
5355             if (0) {
5356             just_a_word_zero_gv:
5357                 gv = NULL;
5358                 gvp = NULL;
5359                 orig_keyword = 0;
5360             }
5361           just_a_word: {
5362                 SV *sv;
5363                 int pkgname = 0;
5364                 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5365                 CV *cv;
5366 #ifdef PERL_MAD
5367                 SV *nextPL_nextwhite = 0;
5368 #endif
5369
5370
5371                 /* Get the rest if it looks like a package qualifier */
5372
5373                 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
5374                     STRLEN morelen;
5375                     s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
5376                                   TRUE, &morelen);
5377                     if (!morelen)
5378                         Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
5379                                 *s == '\'' ? "'" : "::");
5380                     len += morelen;
5381                     pkgname = 1;
5382                 }
5383
5384                 if (PL_expect == XOPERATOR) {
5385                     if (PL_bufptr == PL_linestart) {
5386                         CopLINE_dec(PL_curcop);
5387                         Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5388                         CopLINE_inc(PL_curcop);
5389                     }
5390                     else
5391                         no_op("Bareword",s);
5392                 }
5393
5394                 /* Look for a subroutine with this name in current package,
5395                    unless name is "Foo::", in which case Foo is a bearword
5396                    (and a package name). */
5397
5398                 if (len > 2 && !PL_madskills &&
5399                     PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
5400                 {
5401                     if (ckWARN(WARN_BAREWORD)
5402                         && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
5403                         Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
5404                             "Bareword \"%s\" refers to nonexistent package",
5405                              PL_tokenbuf);
5406                     len -= 2;
5407                     PL_tokenbuf[len] = '\0';
5408                     gv = NULL;
5409                     gvp = 0;
5410                 }
5411                 else {
5412                     if (!gv) {
5413                         /* Mustn't actually add anything to a symbol table.
5414                            But also don't want to "initialise" any placeholder
5415                            constants that might already be there into full
5416                            blown PVGVs with attached PVCV.  */
5417                         gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5418                                                GV_NOADD_NOINIT, SVt_PVCV);
5419                     }
5420                     len = 0;
5421                 }
5422
5423                 /* if we saw a global override before, get the right name */
5424
5425                 if (gvp) {
5426                     sv = newSVpvs("CORE::GLOBAL::");
5427                     sv_catpv(sv,PL_tokenbuf);
5428                 }
5429                 else {
5430                     /* If len is 0, newSVpv does strlen(), which is correct.
5431                        If len is non-zero, then it will be the true length,
5432                        and so the scalar will be created correctly.  */
5433                     sv = newSVpv(PL_tokenbuf,len);
5434                 }
5435 #ifdef PERL_MAD
5436                 if (PL_madskills && !PL_thistoken) {
5437                     char *start = SvPVX(PL_linestr) + PL_realtokenstart;
5438                     PL_thistoken = newSVpvn(start,s - start);
5439                     PL_realtokenstart = s - SvPVX(PL_linestr);
5440                 }
5441 #endif
5442
5443                 /* Presume this is going to be a bareword of some sort. */
5444
5445                 CLINE;
5446                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5447                 pl_yylval.opval->op_private = OPpCONST_BARE;
5448                 /* UTF-8 package name? */
5449                 if (UTF && !IN_BYTES &&
5450                     is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
5451                     SvUTF8_on(sv);
5452
5453                 /* And if "Foo::", then that's what it certainly is. */
5454
5455                 if (len)
5456                     goto safe_bareword;
5457
5458                 /* Do the explicit type check so that we don't need to force
5459                    the initialisation of the symbol table to have a real GV.
5460                    Beware - gv may not really be a PVGV, cv may not really be
5461                    a PVCV, (because of the space optimisations that gv_init
5462                    understands) But they're true if for this symbol there is
5463                    respectively a typeglob and a subroutine.
5464                 */
5465                 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5466                     /* Real typeglob, so get the real subroutine: */
5467                            ? GvCVu(gv)
5468                     /* A proxy for a subroutine in this package? */
5469                            : SvOK(gv) ? MUTABLE_CV(gv) : NULL)
5470                     : NULL;
5471
5472                 /* See if it's the indirect object for a list operator. */
5473
5474                 if (PL_oldoldbufptr &&
5475                     PL_oldoldbufptr < PL_bufptr &&
5476                     (PL_oldoldbufptr == PL_last_lop
5477                      || PL_oldoldbufptr == PL_last_uni) &&
5478                     /* NO SKIPSPACE BEFORE HERE! */
5479                     (PL_expect == XREF ||
5480                      ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
5481                 {
5482                     bool immediate_paren = *s == '(';
5483
5484                     /* (Now we can afford to cross potential line boundary.) */
5485                     s = SKIPSPACE2(s,nextPL_nextwhite);
5486 #ifdef PERL_MAD
5487                     PL_nextwhite = nextPL_nextwhite;    /* assume no & deception */
5488 #endif
5489
5490                     /* Two barewords in a row may indicate method call. */
5491
5492                     if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5493                         (tmp = intuit_method(s, gv, cv)))
5494                         return REPORT(tmp);
5495
5496                     /* If not a declared subroutine, it's an indirect object. */
5497                     /* (But it's an indir obj regardless for sort.) */
5498                     /* Also, if "_" follows a filetest operator, it's a bareword */
5499
5500                     if (
5501                         ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5502                          ((!gv || !cv) &&
5503                         (PL_last_lop_op != OP_MAPSTART &&
5504                          PL_last_lop_op != OP_GREPSTART))))
5505                        || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
5506                             && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
5507                        )
5508                     {
5509                         PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
5510                         goto bareword;
5511                     }
5512                 }
5513
5514                 PL_expect = XOPERATOR;
5515 #ifdef PERL_MAD
5516                 if (isSPACE(*s))
5517                     s = SKIPSPACE2(s,nextPL_nextwhite);
5518                 PL_nextwhite = nextPL_nextwhite;
5519 #else
5520                 s = skipspace(s);
5521 #endif
5522
5523                 /* Is this a word before a => operator? */
5524                 if (*s == '=' && s[1] == '>' && !pkgname) {
5525                     CLINE;
5526                     sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
5527                     if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
5528                       SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
5529                     TERM(WORD);
5530                 }
5531
5532                 /* If followed by a paren, it's certainly a subroutine. */
5533                 if (*s == '(') {
5534                     CLINE;
5535                     if (cv) {
5536                         d = s + 1;
5537                         while (SPACE_OR_TAB(*d))
5538                             d++;
5539                         if (*d == ')' && (sv = gv_const_sv(gv))) {
5540                             s = d + 1;
5541                             goto its_constant;
5542                         }
5543                     }
5544 #ifdef PERL_MAD
5545                     if (PL_madskills) {
5546                         PL_nextwhite = PL_thiswhite;
5547                         PL_thiswhite = 0;
5548                     }
5549                     start_force(PL_curforce);
5550 #endif
5551                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5552                     PL_expect = XOPERATOR;
5553 #ifdef PERL_MAD
5554                     if (PL_madskills) {
5555                         PL_nextwhite = nextPL_nextwhite;
5556                         curmad('X', PL_thistoken);
5557                         PL_thistoken = newSVpvs("");
5558                     }
5559 #endif
5560                     force_next(WORD);
5561                     pl_yylval.ival = 0;
5562                     TOKEN('&');
5563                 }
5564
5565                 /* If followed by var or block, call it a method (unless sub) */
5566
5567                 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5568                     PL_last_lop = PL_oldbufptr;
5569                     PL_last_lop_op = OP_METHOD;
5570                     PREBLOCK(METHOD);
5571                 }
5572
5573                 /* If followed by a bareword, see if it looks like indir obj. */
5574
5575                 if (!orig_keyword
5576                         && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5577                         && (tmp = intuit_method(s, gv, cv)))
5578                     return REPORT(tmp);
5579
5580                 /* Not a method, so call it a subroutine (if defined) */
5581
5582                 if (cv) {
5583                     if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
5584                         Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5585                                 "Ambiguous use of -%s resolved as -&%s()",
5586                                 PL_tokenbuf, PL_tokenbuf);
5587                     /* Check for a constant sub */
5588                     if ((sv = gv_const_sv(gv))) {
5589                   its_constant:
5590                         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
5591                         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
5592                         pl_yylval.opval->op_private = 0;
5593                         TOKEN(WORD);
5594                     }
5595
5596                     /* Resolve to GV now. */
5597                     if (SvTYPE(gv) != SVt_PVGV) {
5598                         gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5599                         assert (SvTYPE(gv) == SVt_PVGV);
5600                         /* cv must have been some sort of placeholder, so
5601                            now needs replacing with a real code reference.  */
5602                         cv = GvCV(gv);
5603                     }
5604
5605                     op_free(pl_yylval.opval);
5606                     pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5607                     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5608                     PL_last_lop = PL_oldbufptr;
5609                     PL_last_lop_op = OP_ENTERSUB;
5610                     /* Is there a prototype? */
5611                     if (
5612 #ifdef PERL_MAD
5613                         cv &&
5614 #endif
5615                         SvPOK(cv))
5616                     {
5617                         STRLEN protolen;
5618                         const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5619                         if (!protolen)
5620                             TERM(FUNC0SUB);
5621                         if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
5622                             OPERATOR(UNIOPSUB);
5623                         while (*proto == ';')
5624                             proto++;
5625                         if (*proto == '&' && *s == '{') {
5626                             if (PL_curstash)
5627                                 sv_setpvs(PL_subname, "__ANON__");
5628                             else
5629                                 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5630                             PREBLOCK(LSTOPSUB);
5631                         }
5632                     }
5633 #ifdef PERL_MAD
5634                     {
5635                         if (PL_madskills) {
5636                             PL_nextwhite = PL_thiswhite;
5637                             PL_thiswhite = 0;
5638                         }
5639                         start_force(PL_curforce);
5640                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5641                         PL_expect = XTERM;
5642                         if (PL_madskills) {
5643                             PL_nextwhite = nextPL_nextwhite;
5644                             curmad('X', PL_thistoken);
5645                             PL_thistoken = newSVpvs("");
5646                         }
5647                         force_next(WORD);
5648                         TOKEN(NOAMP);
5649                     }
5650                 }
5651
5652                 /* Guess harder when madskills require "best effort". */
5653                 if (PL_madskills && (!gv || !GvCVu(gv))) {
5654                     int probable_sub = 0;
5655                     if (strchr("\"'`$@%0123456789!*+{[<", *s))
5656                         probable_sub = 1;
5657                     else if (isALPHA(*s)) {
5658                         char tmpbuf[1024];
5659                         STRLEN tmplen;
5660                         d = s;
5661                         d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5662                         if (!keyword(tmpbuf, tmplen, 0))
5663                             probable_sub = 1;
5664                         else {
5665                             while (d < PL_bufend && isSPACE(*d))
5666                                 d++;
5667                             if (*d == '=' && d[1] == '>')
5668                                 probable_sub = 1;
5669                         }
5670                     }
5671                     if (probable_sub) {
5672                         gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
5673                         op_free(pl_yylval.opval);
5674                         pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5675                         pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5676                         PL_last_lop = PL_oldbufptr;
5677                         PL_last_lop_op = OP_ENTERSUB;
5678                         PL_nextwhite = PL_thiswhite;
5679                         PL_thiswhite = 0;
5680                         start_force(PL_curforce);
5681                         NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5682                         PL_expect = XTERM;
5683                         PL_nextwhite = nextPL_nextwhite;
5684                         curmad('X', PL_thistoken);
5685                         PL_thistoken = newSVpvs("");
5686                         force_next(WORD);
5687                         TOKEN(NOAMP);
5688                     }
5689 #else
5690                     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5691                     PL_expect = XTERM;
5692                     force_next(WORD);
5693                     TOKEN(NOAMP);
5694 #endif
5695                 }
5696
5697                 /* Call it a bare word */
5698
5699                 if (PL_hints & HINT_STRICT_SUBS)
5700                     pl_yylval.opval->op_private |= OPpCONST_STRICT;
5701                 else {
5702                 bareword:
5703                     /* after "print" and similar functions (corresponding to
5704                      * "F? L" in opcode.pl), whatever wasn't already parsed as
5705                      * a filehandle should be subject to "strict subs".
5706                      * Likewise for the optional indirect-object argument to system
5707                      * or exec, which can't be a bareword */
5708                     if ((PL_last_lop_op == OP_PRINT
5709                             || PL_last_lop_op == OP_PRTF
5710                             || PL_last_lop_op == OP_SAY
5711                             || PL_last_lop_op == OP_SYSTEM
5712                             || PL_last_lop_op == OP_EXEC)
5713                             && (PL_hints & HINT_STRICT_SUBS))
5714                         pl_yylval.opval->op_private |= OPpCONST_STRICT;
5715                     if (lastchar != '-') {
5716                         if (ckWARN(WARN_RESERVED)) {
5717                             d = PL_tokenbuf;
5718                             while (isLOWER(*d))
5719                                 d++;
5720                             if (!*d && !gv_stashpv(PL_tokenbuf, 0))
5721                                 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5722                                        PL_tokenbuf);
5723                         }
5724                     }
5725                 }
5726
5727             safe_bareword:
5728                 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
5729                     && ckWARN_d(WARN_AMBIGUOUS)) {
5730                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5731                         "Operator or semicolon missing before %c%s",
5732                         lastchar, PL_tokenbuf);
5733                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5734                         "Ambiguous use of %c resolved as operator %c",
5735                         lastchar, lastchar);
5736                 }
5737                 TOKEN(WORD);
5738             }
5739
5740         case KEY___FILE__:
5741             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5742                                         newSVpv(CopFILE(PL_curcop),0));
5743             TERM(THING);
5744
5745         case KEY___LINE__:
5746             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5747                                     Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
5748             TERM(THING);
5749
5750         case KEY___PACKAGE__:
5751             pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
5752                                         (PL_curstash
5753                                          ? newSVhek(HvNAME_HEK(PL_curstash))
5754                                          : &PL_sv_undef));
5755             TERM(THING);
5756
5757         case KEY___DATA__:
5758         case KEY___END__: {
5759             GV *gv;
5760             if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
5761                 const char *pname = "main";
5762                 if (PL_tokenbuf[2] == 'D')
5763                     pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
5764                 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
5765                                 SVt_PVIO);
5766                 GvMULTI_on(gv);
5767                 if (!GvIO(gv))
5768                     GvIOp(gv) = newIO();
5769                 IoIFP(GvIOp(gv)) = PL_rsfp;
5770 #if defined(HAS_FCNTL) && defined(F_SETFD)
5771                 {
5772                     const int fd = PerlIO_fileno(PL_rsfp);
5773                     fcntl(fd,F_SETFD,fd >= 3);
5774                 }
5775 #endif
5776                 /* Mark this internal pseudo-handle as clean */
5777                 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
5778                 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
5779                     IoTYPE(GvIOp(gv)) = IoTYPE_STD;
5780                 else
5781                     IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
5782 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
5783                 /* if the script was opened in binmode, we need to revert
5784                  * it to text mode for compatibility; but only iff it has CRs
5785                  * XXX this is a questionable hack at best. */
5786                 if (PL_bufend-PL_bufptr > 2
5787                     && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
5788                 {
5789                     Off_t loc = 0;
5790                     if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
5791                         loc = PerlIO_tell(PL_rsfp);
5792                         (void)PerlIO_seek(PL_rsfp, 0L, 0);
5793                     }
5794 #ifdef NETWARE
5795                         if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
5796 #else
5797                     if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
5798 #endif  /* NETWARE */
5799 #ifdef PERLIO_IS_STDIO /* really? */
5800 #  if defined(__BORLANDC__)
5801                         /* XXX see note in do_binmode() */
5802                         ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
5803 #  endif
5804 #endif
5805                         if (loc > 0)
5806                             PerlIO_seek(PL_rsfp, loc, 0);
5807                     }
5808                 }
5809 #endif
5810 #ifdef PERLIO_LAYERS
5811                 if (!IN_BYTES) {
5812                     if (UTF)
5813                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
5814                     else if (PL_encoding) {
5815                         SV *name;
5816                         dSP;
5817                         ENTER;
5818                         SAVETMPS;
5819                         PUSHMARK(sp);
5820                         EXTEND(SP, 1);
5821                         XPUSHs(PL_encoding);
5822                         PUTBACK;
5823                         call_method("name", G_SCALAR);
5824                         SPAGAIN;
5825                         name = POPs;
5826                         PUTBACK;
5827                         PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
5828                                             Perl_form(aTHX_ ":encoding(%"SVf")",
5829                                                       SVfARG(name)));
5830                         FREETMPS;
5831                         LEAVE;
5832                     }
5833                 }
5834 #endif
5835 #ifdef PERL_MAD
5836                 if (PL_madskills) {
5837                     if (PL_realtokenstart >= 0) {
5838                         char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5839                         if (!PL_endwhite)
5840                             PL_endwhite = newSVpvs("");
5841                         sv_catsv(PL_endwhite, PL_thiswhite);
5842                         PL_thiswhite = 0;
5843                         sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
5844                         PL_realtokenstart = -1;
5845                     }
5846                     while ((s = filter_gets(PL_endwhite, PL_rsfp,
5847                                  SvCUR(PL_endwhite))) != NULL) ;
5848                 }
5849 #endif
5850                 PL_rsfp = NULL;
5851             }
5852             goto fake_eof;
5853         }
5854
5855         case KEY_AUTOLOAD:
5856         case KEY_DESTROY:
5857         case KEY_BEGIN:
5858         case KEY_UNITCHECK:
5859         case KEY_CHECK:
5860         case KEY_INIT:
5861         case KEY_END:
5862             if (PL_expect == XSTATE) {
5863                 s = PL_bufptr;
5864                 goto really_sub;
5865             }
5866             goto just_a_word;
5867
5868         case KEY_CORE:
5869             if (*s == ':' && s[1] == ':') {
5870                 s += 2;
5871                 d = s;
5872                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5873                 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
5874                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
5875                 if (tmp < 0)
5876                     tmp = -tmp;
5877                 else if (tmp == KEY_require || tmp == KEY_do)
5878                     /* that's a way to remember we saw "CORE::" */
5879                     orig_keyword = tmp;
5880                 goto reserved_word;
5881             }
5882             goto just_a_word;
5883
5884         case KEY_abs:
5885             UNI(OP_ABS);
5886
5887         case KEY_alarm:
5888             UNI(OP_ALARM);
5889
5890         case KEY_accept:
5891             LOP(OP_ACCEPT,XTERM);
5892
5893         case KEY_and:
5894             OPERATOR(ANDOP);
5895
5896         case KEY_atan2:
5897             LOP(OP_ATAN2,XTERM);
5898
5899         case KEY_bind:
5900             LOP(OP_BIND,XTERM);
5901
5902         case KEY_binmode:
5903             LOP(OP_BINMODE,XTERM);
5904
5905         case KEY_bless:
5906             LOP(OP_BLESS,XTERM);
5907
5908         case KEY_break:
5909             FUN0(OP_BREAK);
5910
5911         case KEY_chop:
5912             UNI(OP_CHOP);
5913
5914         case KEY_continue:
5915             /* When 'use switch' is in effect, continue has a dual
5916                life as a control operator. */
5917             {
5918                 if (!FEATURE_IS_ENABLED("switch"))
5919                     PREBLOCK(CONTINUE);
5920                 else {
5921                     /* We have to disambiguate the two senses of
5922                       "continue". If the next token is a '{' then
5923                       treat it as the start of a continue block;
5924                       otherwise treat it as a control operator.
5925                      */
5926                     s = skipspace(s);
5927                     if (*s == '{')
5928             PREBLOCK(CONTINUE);
5929                     else
5930                         FUN0(OP_CONTINUE);
5931                 }
5932             }
5933
5934         case KEY_chdir:
5935             /* may use HOME */
5936             (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
5937             UNI(OP_CHDIR);
5938
5939         case KEY_close:
5940             UNI(OP_CLOSE);
5941
5942         case KEY_closedir:
5943             UNI(OP_CLOSEDIR);
5944
5945         case KEY_cmp:
5946             Eop(OP_SCMP);
5947
5948         case KEY_caller:
5949             UNI(OP_CALLER);
5950
5951         case KEY_crypt:
5952 #ifdef FCRYPT
5953             if (!PL_cryptseen) {
5954                 PL_cryptseen = TRUE;
5955                 init_des();
5956             }
5957 #endif
5958             LOP(OP_CRYPT,XTERM);
5959
5960         case KEY_chmod:
5961             LOP(OP_CHMOD,XTERM);
5962
5963         case KEY_chown:
5964             LOP(OP_CHOWN,XTERM);
5965
5966         case KEY_connect:
5967             LOP(OP_CONNECT,XTERM);
5968
5969         case KEY_chr:
5970             UNI(OP_CHR);
5971
5972         case KEY_cos:
5973             UNI(OP_COS);
5974
5975         case KEY_chroot:
5976             UNI(OP_CHROOT);
5977
5978         case KEY_default:
5979             PREBLOCK(DEFAULT);
5980
5981         case KEY_do:
5982             s = SKIPSPACE1(s);
5983             if (*s == '{')
5984                 PRETERMBLOCK(DO);
5985             if (*s != '\'')
5986                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5987             if (orig_keyword == KEY_do) {
5988                 orig_keyword = 0;
5989                 pl_yylval.ival = 1;
5990             }
5991             else
5992                 pl_yylval.ival = 0;
5993             OPERATOR(DO);
5994
5995         case KEY_die:
5996             PL_hints |= HINT_BLOCK_SCOPE;
5997             LOP(OP_DIE,XTERM);
5998
5999         case KEY_defined:
6000             UNI(OP_DEFINED);
6001
6002         case KEY_delete:
6003             UNI(OP_DELETE);
6004
6005         case KEY_dbmopen:
6006             gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6007             LOP(OP_DBMOPEN,XTERM);
6008
6009         case KEY_dbmclose:
6010             UNI(OP_DBMCLOSE);
6011
6012         case KEY_dump:
6013             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6014             LOOPX(OP_DUMP);
6015
6016         case KEY_else:
6017             PREBLOCK(ELSE);
6018
6019         case KEY_elsif:
6020             pl_yylval.ival = CopLINE(PL_curcop);
6021             OPERATOR(ELSIF);
6022
6023         case KEY_eq:
6024             Eop(OP_SEQ);
6025
6026         case KEY_exists:
6027             UNI(OP_EXISTS);
6028
6029         case KEY_exit:
6030             if (PL_madskills)
6031                 UNI(OP_INT);
6032             UNI(OP_EXIT);
6033
6034         case KEY_eval:
6035             s = SKIPSPACE1(s);
6036             PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
6037             UNIBRACK(OP_ENTEREVAL);
6038
6039         case KEY_eof:
6040             UNI(OP_EOF);
6041
6042         case KEY_exp:
6043             UNI(OP_EXP);
6044
6045         case KEY_each:
6046             UNI(OP_EACH);
6047
6048         case KEY_exec:
6049             LOP(OP_EXEC,XREF);
6050
6051         case KEY_endhostent:
6052             FUN0(OP_EHOSTENT);
6053
6054         case KEY_endnetent:
6055             FUN0(OP_ENETENT);
6056
6057         case KEY_endservent:
6058             FUN0(OP_ESERVENT);
6059
6060         case KEY_endprotoent:
6061             FUN0(OP_EPROTOENT);
6062
6063         case KEY_endpwent:
6064             FUN0(OP_EPWENT);
6065
6066         case KEY_endgrent:
6067             FUN0(OP_EGRENT);
6068
6069         case KEY_for:
6070         case KEY_foreach:
6071             pl_yylval.ival = CopLINE(PL_curcop);
6072             s = SKIPSPACE1(s);
6073             if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6074                 char *p = s;
6075 #ifdef PERL_MAD
6076                 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6077 #endif
6078
6079                 if ((PL_bufend - p) >= 3 &&
6080                     strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6081                     p += 2;
6082                 else if ((PL_bufend - p) >= 4 &&
6083                     strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6084                     p += 3;
6085                 p = PEEKSPACE(p);
6086                 if (isIDFIRST_lazy_if(p,UTF)) {
6087                     p = scan_ident(p, PL_bufend,
6088                         PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6089                     p = PEEKSPACE(p);
6090                 }
6091                 if (*p != '$')
6092                     Perl_croak(aTHX_ "Missing $ on loop variable");
6093 #ifdef PERL_MAD
6094                 s = SvPVX(PL_linestr) + soff;
6095 #endif
6096             }
6097             OPERATOR(FOR);
6098
6099         case KEY_formline:
6100             LOP(OP_FORMLINE,XTERM);
6101
6102         case KEY_fork:
6103             FUN0(OP_FORK);
6104
6105         case KEY_fcntl:
6106             LOP(OP_FCNTL,XTERM);
6107
6108         case KEY_fileno:
6109             UNI(OP_FILENO);
6110
6111         case KEY_flock:
6112             LOP(OP_FLOCK,XTERM);
6113
6114         case KEY_gt:
6115             Rop(OP_SGT);
6116
6117         case KEY_ge:
6118             Rop(OP_SGE);
6119
6120         case KEY_grep:
6121             LOP(OP_GREPSTART, XREF);
6122
6123         case KEY_goto:
6124             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6125             LOOPX(OP_GOTO);
6126
6127         case KEY_gmtime:
6128             UNI(OP_GMTIME);
6129
6130         case KEY_getc:
6131             UNIDOR(OP_GETC);
6132
6133         case KEY_getppid:
6134             FUN0(OP_GETPPID);
6135
6136         case KEY_getpgrp:
6137             UNI(OP_GETPGRP);
6138
6139         case KEY_getpriority:
6140             LOP(OP_GETPRIORITY,XTERM);
6141
6142         case KEY_getprotobyname:
6143             UNI(OP_GPBYNAME);
6144
6145         case KEY_getprotobynumber:
6146             LOP(OP_GPBYNUMBER,XTERM);
6147
6148         case KEY_getprotoent:
6149             FUN0(OP_GPROTOENT);
6150
6151         case KEY_getpwent:
6152             FUN0(OP_GPWENT);
6153
6154         case KEY_getpwnam:
6155             UNI(OP_GPWNAM);
6156
6157         case KEY_getpwuid:
6158             UNI(OP_GPWUID);
6159
6160         case KEY_getpeername:
6161             UNI(OP_GETPEERNAME);
6162
6163         case KEY_gethostbyname:
6164             UNI(OP_GHBYNAME);
6165
6166         case KEY_gethostbyaddr:
6167             LOP(OP_GHBYADDR,XTERM);
6168
6169         case KEY_gethostent:
6170             FUN0(OP_GHOSTENT);
6171
6172         case KEY_getnetbyname:
6173             UNI(OP_GNBYNAME);
6174
6175         case KEY_getnetbyaddr:
6176             LOP(OP_GNBYADDR,XTERM);
6177
6178         case KEY_getnetent:
6179             FUN0(OP_GNETENT);
6180
6181         case KEY_getservbyname:
6182             LOP(OP_GSBYNAME,XTERM);
6183
6184         case KEY_getservbyport:
6185             LOP(OP_GSBYPORT,XTERM);
6186
6187         case KEY_getservent:
6188             FUN0(OP_GSERVENT);
6189
6190         case KEY_getsockname:
6191             UNI(OP_GETSOCKNAME);
6192
6193         case KEY_getsockopt:
6194             LOP(OP_GSOCKOPT,XTERM);
6195
6196         case KEY_getgrent:
6197             FUN0(OP_GGRENT);
6198
6199         case KEY_getgrnam:
6200             UNI(OP_GGRNAM);
6201
6202         case KEY_getgrgid:
6203             UNI(OP_GGRGID);
6204
6205         case KEY_getlogin:
6206             FUN0(OP_GETLOGIN);
6207
6208         case KEY_given:
6209             pl_yylval.ival = CopLINE(PL_curcop);
6210             OPERATOR(GIVEN);
6211
6212         case KEY_glob:
6213             LOP(OP_GLOB,XTERM);
6214
6215         case KEY_hex:
6216             UNI(OP_HEX);
6217
6218         case KEY_if:
6219             pl_yylval.ival = CopLINE(PL_curcop);
6220             OPERATOR(IF);
6221
6222         case KEY_index:
6223             LOP(OP_INDEX,XTERM);
6224
6225         case KEY_int:
6226             UNI(OP_INT);
6227
6228         case KEY_ioctl:
6229             LOP(OP_IOCTL,XTERM);
6230
6231         case KEY_join:
6232             LOP(OP_JOIN,XTERM);
6233
6234         case KEY_keys:
6235             UNI(OP_KEYS);
6236
6237         case KEY_kill:
6238             LOP(OP_KILL,XTERM);
6239
6240         case KEY_last:
6241             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6242             LOOPX(OP_LAST);
6243
6244         case KEY_lc:
6245             UNI(OP_LC);
6246
6247         case KEY_lcfirst:
6248             UNI(OP_LCFIRST);
6249
6250         case KEY_local:
6251             pl_yylval.ival = 0;
6252             OPERATOR(LOCAL);
6253
6254         case KEY_length:
6255             UNI(OP_LENGTH);
6256
6257         case KEY_lt:
6258             Rop(OP_SLT);
6259
6260         case KEY_le:
6261             Rop(OP_SLE);
6262
6263         case KEY_localtime:
6264             UNI(OP_LOCALTIME);
6265
6266         case KEY_log:
6267             UNI(OP_LOG);
6268
6269         case KEY_link:
6270             LOP(OP_LINK,XTERM);
6271
6272         case KEY_listen:
6273             LOP(OP_LISTEN,XTERM);
6274
6275         case KEY_lock:
6276             UNI(OP_LOCK);
6277
6278         case KEY_lstat:
6279             UNI(OP_LSTAT);
6280
6281         case KEY_m:
6282             s = scan_pat(s,OP_MATCH);
6283             TERM(sublex_start());
6284
6285         case KEY_map:
6286             LOP(OP_MAPSTART, XREF);
6287
6288         case KEY_mkdir:
6289             LOP(OP_MKDIR,XTERM);
6290
6291         case KEY_msgctl:
6292             LOP(OP_MSGCTL,XTERM);
6293
6294         case KEY_msgget:
6295             LOP(OP_MSGGET,XTERM);
6296
6297         case KEY_msgrcv:
6298             LOP(OP_MSGRCV,XTERM);
6299
6300         case KEY_msgsnd:
6301             LOP(OP_MSGSND,XTERM);
6302
6303         case KEY_our:
6304         case KEY_my:
6305         case KEY_state:
6306             PL_in_my = (U16)tmp;
6307             s = SKIPSPACE1(s);
6308             if (isIDFIRST_lazy_if(s,UTF)) {
6309 #ifdef PERL_MAD
6310                 char* start = s;
6311 #endif
6312                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6313                 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6314                     goto really_sub;
6315                 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6316                 if (!PL_in_my_stash) {
6317                     char tmpbuf[1024];
6318                     PL_bufptr = s;
6319                     my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6320                     yyerror(tmpbuf);
6321                 }
6322 #ifdef PERL_MAD
6323                 if (PL_madskills) {     /* just add type to declarator token */
6324                     sv_catsv(PL_thistoken, PL_nextwhite);
6325                     PL_nextwhite = 0;
6326                     sv_catpvn(PL_thistoken, start, s - start);
6327                 }
6328 #endif
6329             }
6330             pl_yylval.ival = 1;
6331             OPERATOR(MY);
6332
6333         case KEY_next:
6334             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6335             LOOPX(OP_NEXT);
6336
6337         case KEY_ne:
6338             Eop(OP_SNE);
6339
6340         case KEY_no:
6341             s = tokenize_use(0, s);
6342             OPERATOR(USE);
6343
6344         case KEY_not:
6345             if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
6346                 FUN1(OP_NOT);
6347             else
6348                 OPERATOR(NOTOP);
6349
6350         case KEY_open:
6351             s = SKIPSPACE1(s);
6352             if (isIDFIRST_lazy_if(s,UTF)) {
6353                 const char *t;
6354                 for (d = s; isALNUM_lazy_if(d,UTF);)
6355                     d++;
6356                 for (t=d; isSPACE(*t);)
6357                     t++;
6358                 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
6359                     /* [perl #16184] */
6360                     && !(t[0] == '=' && t[1] == '>')
6361                 ) {
6362                     int parms_len = (int)(d-s);
6363                     Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
6364                            "Precedence problem: open %.*s should be open(%.*s)",
6365                             parms_len, s, parms_len, s);
6366                 }
6367             }
6368             LOP(OP_OPEN,XTERM);
6369
6370         case KEY_or:
6371             pl_yylval.ival = OP_OR;
6372             OPERATOR(OROP);
6373
6374         case KEY_ord:
6375             UNI(OP_ORD);
6376
6377         case KEY_oct:
6378             UNI(OP_OCT);
6379
6380         case KEY_opendir:
6381             LOP(OP_OPEN_DIR,XTERM);
6382
6383         case KEY_print:
6384             checkcomma(s,PL_tokenbuf,"filehandle");
6385             LOP(OP_PRINT,XREF);
6386
6387         case KEY_printf:
6388             checkcomma(s,PL_tokenbuf,"filehandle");
6389             LOP(OP_PRTF,XREF);
6390
6391         case KEY_prototype:
6392             UNI(OP_PROTOTYPE);
6393
6394         case KEY_push:
6395             LOP(OP_PUSH,XTERM);
6396
6397         case KEY_pop:
6398             UNIDOR(OP_POP);
6399
6400         case KEY_pos:
6401             UNIDOR(OP_POS);
6402
6403         case KEY_pack:
6404             LOP(OP_PACK,XTERM);
6405
6406         case KEY_package:
6407             s = force_word(s,WORD,FALSE,TRUE,FALSE);
6408             s = force_version(s, FALSE);
6409             OPERATOR(PACKAGE);
6410
6411         case KEY_pipe:
6412             LOP(OP_PIPE_OP,XTERM);
6413
6414         case KEY_q:
6415             s = scan_str(s,!!PL_madskills,FALSE);
6416             if (!s)
6417                 missingterm(NULL);
6418             pl_yylval.ival = OP_CONST;
6419             TERM(sublex_start());
6420
6421         case KEY_quotemeta:
6422             UNI(OP_QUOTEMETA);
6423
6424         case KEY_qw:
6425             s = scan_str(s,!!PL_madskills,FALSE);
6426             if (!s)
6427                 missingterm(NULL);
6428             PL_expect = XOPERATOR;
6429             force_next(')');
6430             if (SvCUR(PL_lex_stuff)) {
6431                 OP *words = NULL;
6432                 int warned = 0;
6433                 d = SvPV_force(PL_lex_stuff, len);
6434                 while (len) {
6435                     for (; isSPACE(*d) && len; --len, ++d)
6436                         /**/;
6437                     if (len) {
6438                         SV *sv;
6439                         const char *b = d;
6440                         if (!warned && ckWARN(WARN_QW)) {
6441                             for (; !isSPACE(*d) && len; --len, ++d) {
6442                                 if (*d == ',') {
6443                                     Perl_warner(aTHX_ packWARN(WARN_QW),
6444                                         "Possible attempt to separate words with commas");
6445                                     ++warned;
6446                                 }
6447                                 else if (*d == '#') {
6448                                     Perl_warner(aTHX_ packWARN(WARN_QW),
6449                                         "Possible attempt to put comments in qw() list");
6450                                     ++warned;
6451                                 }
6452                             }
6453                         }
6454                         else {
6455                             for (; !isSPACE(*d) && len; --len, ++d)
6456                                 /**/;
6457                         }
6458                         sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
6459                         words = append_elem(OP_LIST, words,
6460                                             newSVOP(OP_CONST, 0, tokeq(sv)));
6461                     }
6462                 }
6463                 if (words) {
6464                     start_force(PL_curforce);
6465                     NEXTVAL_NEXTTOKE.opval = words;
6466                     force_next(THING);
6467                 }
6468             }
6469             if (PL_lex_stuff) {
6470                 SvREFCNT_dec(PL_lex_stuff);
6471                 PL_lex_stuff = NULL;
6472             }
6473             PL_expect = XTERM;
6474             TOKEN('(');
6475
6476         case KEY_qq:
6477             s = scan_str(s,!!PL_madskills,FALSE);
6478             if (!s)
6479                 missingterm(NULL);
6480             pl_yylval.ival = OP_STRINGIFY;
6481             if (SvIVX(PL_lex_stuff) == '\'')
6482                 SvIV_set(PL_lex_stuff, 0);      /* qq'$foo' should intepolate */
6483             TERM(sublex_start());
6484
6485         case KEY_qr:
6486             s = scan_pat(s,OP_QR);
6487             TERM(sublex_start());
6488
6489         case KEY_qx:
6490             s = scan_str(s,!!PL_madskills,FALSE);
6491             if (!s)
6492                 missingterm(NULL);
6493             readpipe_override();
6494             TERM(sublex_start());
6495
6496         case KEY_return:
6497             OLDLOP(OP_RETURN);
6498
6499         case KEY_require:
6500             s = SKIPSPACE1(s);
6501             if (isDIGIT(*s)) {
6502                 s = force_version(s, FALSE);
6503             }
6504             else if (*s != 'v' || !isDIGIT(s[1])
6505                     || (s = force_version(s, TRUE), *s == 'v'))
6506             {
6507                 *PL_tokenbuf = '\0';
6508                 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6509                 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
6510                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
6511                 else if (*s == '<')
6512                     yyerror("<> should be quotes");
6513             }
6514             if (orig_keyword == KEY_require) {
6515                 orig_keyword = 0;
6516                 pl_yylval.ival = 1;
6517             }
6518             else
6519                 pl_yylval.ival = 0;
6520             PL_expect = XTERM;
6521             PL_bufptr = s;
6522             PL_last_uni = PL_oldbufptr;
6523             PL_last_lop_op = OP_REQUIRE;
6524             s = skipspace(s);
6525             return REPORT( (int)REQUIRE );
6526
6527         case KEY_reset:
6528             UNI(OP_RESET);
6529
6530         case KEY_redo:
6531             s = force_word(s,WORD,TRUE,FALSE,FALSE);
6532             LOOPX(OP_REDO);
6533
6534         case KEY_rename:
6535             LOP(OP_RENAME,XTERM);
6536
6537         case KEY_rand:
6538             UNI(OP_RAND);
6539
6540         case KEY_rmdir:
6541             UNI(OP_RMDIR);
6542
6543         case KEY_rindex:
6544             LOP(OP_RINDEX,XTERM);
6545
6546         case KEY_read:
6547             LOP(OP_READ,XTERM);
6548
6549         case KEY_readdir:
6550             UNI(OP_READDIR);
6551
6552         case KEY_readline:
6553             UNIDOR(OP_READLINE);
6554
6555         case KEY_readpipe:
6556             UNIDOR(OP_BACKTICK);
6557
6558         case KEY_rewinddir:
6559             UNI(OP_REWINDDIR);
6560
6561         case KEY_recv:
6562             LOP(OP_RECV,XTERM);
6563
6564         case KEY_reverse:
6565             LOP(OP_REVERSE,XTERM);
6566
6567         case KEY_readlink:
6568             UNIDOR(OP_READLINK);
6569
6570         case KEY_ref:
6571             UNI(OP_REF);
6572
6573         case KEY_s:
6574             s = scan_subst(s);
6575             if (pl_yylval.opval)
6576                 TERM(sublex_start());
6577             else
6578                 TOKEN(1);       /* force error */
6579
6580         case KEY_say:
6581             checkcomma(s,PL_tokenbuf,"filehandle");
6582             LOP(OP_SAY,XREF);
6583
6584         case KEY_chomp:
6585             UNI(OP_CHOMP);
6586
6587         case KEY_scalar:
6588             UNI(OP_SCALAR);
6589
6590         case KEY_select:
6591             LOP(OP_SELECT,XTERM);
6592
6593         case KEY_seek:
6594             LOP(OP_SEEK,XTERM);
6595
6596         case KEY_semctl:
6597             LOP(OP_SEMCTL,XTERM);
6598
6599         case KEY_semget:
6600             LOP(OP_SEMGET,XTERM);
6601
6602         case KEY_semop:
6603             LOP(OP_SEMOP,XTERM);
6604
6605         case KEY_send:
6606             LOP(OP_SEND,XTERM);
6607
6608         case KEY_setpgrp:
6609             LOP(OP_SETPGRP,XTERM);
6610
6611         case KEY_setpriority:
6612             LOP(OP_SETPRIORITY,XTERM);
6613
6614         case KEY_sethostent:
6615             UNI(OP_SHOSTENT);
6616
6617         case KEY_setnetent:
6618             UNI(OP_SNETENT);
6619
6620         case KEY_setservent:
6621             UNI(OP_SSERVENT);
6622
6623         case KEY_setprotoent:
6624             UNI(OP_SPROTOENT);
6625
6626         case KEY_setpwent:
6627             FUN0(OP_SPWENT);
6628
6629         case KEY_setgrent:
6630             FUN0(OP_SGRENT);
6631
6632         case KEY_seekdir:
6633             LOP(OP_SEEKDIR,XTERM);
6634
6635         case KEY_setsockopt:
6636             LOP(OP_SSOCKOPT,XTERM);
6637
6638         case KEY_shift:
6639             UNIDOR(OP_SHIFT);
6640
6641         case KEY_shmctl:
6642             LOP(OP_SHMCTL,XTERM);
6643
6644         case KEY_shmget:
6645             LOP(OP_SHMGET,XTERM);
6646
6647         case KEY_shmread:
6648             LOP(OP_SHMREAD,XTERM);
6649
6650         case KEY_shmwrite:
6651             LOP(OP_SHMWRITE,XTERM);
6652
6653         case KEY_shutdown:
6654             LOP(OP_SHUTDOWN,XTERM);
6655
6656         case KEY_sin:
6657             UNI(OP_SIN);
6658
6659         case KEY_sleep:
6660             UNI(OP_SLEEP);
6661
6662         case KEY_socket:
6663             LOP(OP_SOCKET,XTERM);
6664
6665         case KEY_socketpair:
6666             LOP(OP_SOCKPAIR,XTERM);
6667
6668         case KEY_sort:
6669             checkcomma(s,PL_tokenbuf,"subroutine name");
6670             s = SKIPSPACE1(s);
6671             if (*s == ';' || *s == ')')         /* probably a close */
6672                 Perl_croak(aTHX_ "sort is now a reserved word");
6673             PL_expect = XTERM;
6674             s = force_word(s,WORD,TRUE,TRUE,FALSE);
6675             LOP(OP_SORT,XREF);
6676
6677         case KEY_split:
6678             LOP(OP_SPLIT,XTERM);
6679
6680         case KEY_sprintf:
6681             LOP(OP_SPRINTF,XTERM);
6682
6683         case KEY_splice:
6684             LOP(OP_SPLICE,XTERM);
6685
6686         case KEY_sqrt:
6687             UNI(OP_SQRT);
6688
6689         case KEY_srand:
6690             UNI(OP_SRAND);
6691
6692         case KEY_stat:
6693             UNI(OP_STAT);
6694
6695         case KEY_study:
6696             UNI(OP_STUDY);
6697
6698         case KEY_substr:
6699             LOP(OP_SUBSTR,XTERM);
6700
6701         case KEY_format:
6702         case KEY_sub:
6703           really_sub:
6704             {
6705                 char tmpbuf[sizeof PL_tokenbuf];
6706                 SSize_t tboffset = 0;
6707                 expectation attrful;
6708                 bool have_name, have_proto;
6709                 const int key = tmp;
6710
6711 #ifdef PERL_MAD
6712                 SV *tmpwhite = 0;
6713
6714                 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6715                 SV *subtoken = newSVpvn(tstart, s - tstart);
6716                 PL_thistoken = 0;
6717
6718                 d = s;
6719                 s = SKIPSPACE2(s,tmpwhite);
6720 #else
6721                 s = skipspace(s);
6722 #endif
6723
6724                 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
6725                     (*s == ':' && s[1] == ':'))
6726                 {
6727 #ifdef PERL_MAD
6728                     SV *nametoke = NULL;
6729 #endif
6730
6731                     PL_expect = XBLOCK;
6732                     attrful = XATTRBLOCK;
6733                     /* remember buffer pos'n for later force_word */
6734                     tboffset = s - PL_oldbufptr;
6735                     d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6736 #ifdef PERL_MAD
6737                     if (PL_madskills)
6738                         nametoke = newSVpvn(s, d - s);
6739 #endif
6740                     if (memchr(tmpbuf, ':', len))
6741                         sv_setpvn(PL_subname, tmpbuf, len);
6742                     else {
6743                         sv_setsv(PL_subname,PL_curstname);
6744                         sv_catpvs(PL_subname,"::");
6745                         sv_catpvn(PL_subname,tmpbuf,len);
6746                     }
6747                     have_name = TRUE;
6748
6749 #ifdef PERL_MAD
6750
6751                     start_force(0);
6752                     CURMAD('X', nametoke);
6753                     CURMAD('_', tmpwhite);
6754                     (void) force_word(PL_oldbufptr + tboffset, WORD,
6755                                       FALSE, TRUE, TRUE);
6756
6757                     s = SKIPSPACE2(d,tmpwhite);
6758 #else
6759                     s = skipspace(d);
6760 #endif
6761                 }
6762                 else {
6763                     if (key == KEY_my)
6764                         Perl_croak(aTHX_ "Missing name in \"my sub\"");
6765                     PL_expect = XTERMBLOCK;
6766                     attrful = XATTRTERM;
6767                     sv_setpvs(PL_subname,"?");
6768                     have_name = FALSE;
6769                 }
6770
6771                 if (key == KEY_format) {
6772                     if (*s == '=')
6773                         PL_lex_formbrack = PL_lex_brackets + 1;
6774 #ifdef PERL_MAD
6775                     PL_thistoken = subtoken;
6776                     s = d;
6777 #else
6778                     if (have_name)
6779                         (void) force_word(PL_oldbufptr + tboffset, WORD,
6780                                           FALSE, TRUE, TRUE);
6781 #endif
6782                     OPERATOR(FORMAT);
6783                 }
6784
6785                 /* Look for a prototype */
6786                 if (*s == '(') {
6787                     char *p;
6788                     bool bad_proto = FALSE;
6789                     bool in_brackets = FALSE;
6790                     char greedy_proto = ' ';
6791                     bool proto_after_greedy_proto = FALSE;
6792                     bool must_be_last = FALSE;
6793                     bool underscore = FALSE;
6794                     bool seen_underscore = FALSE;
6795                     const bool warnsyntax = ckWARN(WARN_SYNTAX);
6796
6797                     s = scan_str(s,!!PL_madskills,FALSE);
6798                     if (!s)
6799                         Perl_croak(aTHX_ "Prototype not terminated");
6800                     /* strip spaces and check for bad characters */
6801                     d = SvPVX(PL_lex_stuff);
6802                     tmp = 0;
6803                     for (p = d; *p; ++p) {
6804                         if (!isSPACE(*p)) {
6805                             d[tmp++] = *p;
6806
6807                             if (warnsyntax) {
6808                                 if (must_be_last)
6809                                     proto_after_greedy_proto = TRUE;
6810                                 if (!strchr("$@%*;[]&\\_", *p)) {
6811                                     bad_proto = TRUE;
6812                                 }
6813                                 else {
6814                                     if ( underscore ) {
6815                                         if ( *p != ';' )
6816                                             bad_proto = TRUE;
6817                                         underscore = FALSE;
6818                                     }
6819                                     if ( *p == '[' ) {
6820                                         in_brackets = TRUE;
6821                                     }
6822                                     else if ( *p == ']' ) {
6823                                         in_brackets = FALSE;
6824                                     }
6825                                     else if ( (*p == '@' || *p == '%') &&
6826                                          ( tmp < 2 || d[tmp-2] != '\\' ) &&
6827                                          !in_brackets ) {
6828                                         must_be_last = TRUE;
6829                                         greedy_proto = *p;
6830                                     }
6831                                     else if ( *p == '_' ) {
6832                                         underscore = seen_underscore = TRUE;
6833                                     }
6834                                 }
6835                             }
6836                         }
6837                     }
6838                     d[tmp] = '\0';
6839                     if (proto_after_greedy_proto)
6840                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6841                                     "Prototype after '%c' for %"SVf" : %s",
6842                                     greedy_proto, SVfARG(PL_subname), d);
6843                     if (bad_proto)
6844                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6845                                     "Illegal character %sin prototype for %"SVf" : %s",
6846                                     seen_underscore ? "after '_' " : "",
6847                                     SVfARG(PL_subname), d);
6848                     SvCUR_set(PL_lex_stuff, tmp);
6849                     have_proto = TRUE;
6850
6851 #ifdef PERL_MAD
6852                     start_force(0);
6853                     CURMAD('q', PL_thisopen);
6854                     CURMAD('_', tmpwhite);
6855                     CURMAD('=', PL_thisstuff);
6856                     CURMAD('Q', PL_thisclose);
6857                     NEXTVAL_NEXTTOKE.opval =
6858                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6859                     PL_lex_stuff = NULL;
6860                     force_next(THING);
6861
6862                     s = SKIPSPACE2(s,tmpwhite);
6863 #else
6864                     s = skipspace(s);
6865 #endif
6866                 }
6867                 else
6868                     have_proto = FALSE;
6869
6870                 if (*s == ':' && s[1] != ':')
6871                     PL_expect = attrful;
6872                 else if (*s != '{' && key == KEY_sub) {
6873                     if (!have_name)
6874                         Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
6875                     else if (*s != ';')
6876                         Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
6877                 }
6878
6879 #ifdef PERL_MAD
6880                 start_force(0);
6881                 if (tmpwhite) {
6882                     if (PL_madskills)
6883                         curmad('^', newSVpvs(""));
6884                     CURMAD('_', tmpwhite);
6885                 }
6886                 force_next(0);
6887
6888                 PL_thistoken = subtoken;
6889 #else
6890                 if (have_proto) {
6891                     NEXTVAL_NEXTTOKE.opval =
6892                         (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
6893                     PL_lex_stuff = NULL;
6894                     force_next(THING);
6895                 }
6896 #endif
6897                 if (!have_name) {
6898                     if (PL_curstash)
6899                         sv_setpvs(PL_subname, "__ANON__");
6900                     else
6901                         sv_setpvs(PL_subname, "__ANON__::__ANON__");
6902                     TOKEN(ANONSUB);
6903                 }
6904 #ifndef PERL_MAD
6905                 (void) force_word(PL_oldbufptr + tboffset, WORD,
6906                                   FALSE, TRUE, TRUE);
6907 #endif
6908                 if (key == KEY_my)
6909                     TOKEN(MYSUB);
6910                 TOKEN(SUB);
6911             }
6912
6913         case KEY_system:
6914             LOP(OP_SYSTEM,XREF);
6915
6916         case KEY_symlink:
6917             LOP(OP_SYMLINK,XTERM);
6918
6919         case KEY_syscall:
6920             LOP(OP_SYSCALL,XTERM);
6921
6922         case KEY_sysopen:
6923             LOP(OP_SYSOPEN,XTERM);
6924
6925         case KEY_sysseek:
6926             LOP(OP_SYSSEEK,XTERM);
6927
6928         case KEY_sysread:
6929             LOP(OP_SYSREAD,XTERM);
6930
6931         case KEY_syswrite:
6932             LOP(OP_SYSWRITE,XTERM);
6933
6934         case KEY_tr:
6935             s = scan_trans(s);
6936             TERM(sublex_start());
6937
6938         case KEY_tell:
6939             UNI(OP_TELL);
6940
6941         case KEY_telldir:
6942             UNI(OP_TELLDIR);
6943
6944         case KEY_tie:
6945             LOP(OP_TIE,XTERM);
6946
6947         case KEY_tied:
6948             UNI(OP_TIED);
6949
6950         case KEY_time:
6951             FUN0(OP_TIME);
6952
6953         case KEY_times:
6954             FUN0(OP_TMS);
6955
6956         case KEY_truncate:
6957             LOP(OP_TRUNCATE,XTERM);
6958
6959         case KEY_uc:
6960             UNI(OP_UC);
6961
6962         case KEY_ucfirst:
6963             UNI(OP_UCFIRST);
6964
6965         case KEY_untie:
6966             UNI(OP_UNTIE);
6967
6968         case KEY_until:
6969             pl_yylval.ival = CopLINE(PL_curcop);
6970             OPERATOR(UNTIL);
6971
6972         case KEY_unless:
6973             pl_yylval.ival = CopLINE(PL_curcop);
6974             OPERATOR(UNLESS);
6975
6976         case KEY_unlink:
6977             LOP(OP_UNLINK,XTERM);
6978
6979         case KEY_undef:
6980             UNIDOR(OP_UNDEF);
6981
6982         case KEY_unpack:
6983             LOP(OP_UNPACK,XTERM);
6984
6985         case KEY_utime:
6986             LOP(OP_UTIME,XTERM);
6987
6988         case KEY_umask:
6989             UNIDOR(OP_UMASK);
6990
6991         case KEY_unshift:
6992             LOP(OP_UNSHIFT,XTERM);
6993
6994         case KEY_use:
6995             s = tokenize_use(1, s);
6996             OPERATOR(USE);
6997
6998         case KEY_values:
6999             UNI(OP_VALUES);
7000
7001         case KEY_vec:
7002             LOP(OP_VEC,XTERM);
7003
7004         case KEY_when:
7005             pl_yylval.ival = CopLINE(PL_curcop);
7006             OPERATOR(WHEN);
7007
7008         case KEY_while:
7009             pl_yylval.ival = CopLINE(PL_curcop);
7010             OPERATOR(WHILE);
7011
7012         case KEY_warn:
7013             PL_hints |= HINT_BLOCK_SCOPE;
7014             LOP(OP_WARN,XTERM);
7015
7016         case KEY_wait:
7017             FUN0(OP_WAIT);
7018
7019         case KEY_waitpid:
7020             LOP(OP_WAITPID,XTERM);
7021
7022         case KEY_wantarray:
7023             FUN0(OP_WANTARRAY);
7024
7025         case KEY_write:
7026 #ifdef EBCDIC
7027         {
7028             char ctl_l[2];
7029             ctl_l[0] = toCTRL('L');
7030             ctl_l[1] = '\0';
7031             gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7032         }
7033 #else
7034             /* Make sure $^L is defined */
7035             gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7036 #endif
7037             UNI(OP_ENTERWRITE);
7038
7039         case KEY_x:
7040             if (PL_expect == XOPERATOR)
7041                 Mop(OP_REPEAT);
7042             check_uni();
7043             goto just_a_word;
7044
7045         case KEY_xor:
7046             pl_yylval.ival = OP_XOR;
7047             OPERATOR(OROP);
7048
7049         case KEY_y:
7050             s = scan_trans(s);
7051             TERM(sublex_start());
7052         }
7053     }}
7054 }
7055 #ifdef __SC__
7056 #pragma segment Main
7057 #endif
7058
7059 static int
7060 S_pending_ident(pTHX)
7061 {
7062     dVAR;
7063     register char *d;
7064     PADOFFSET tmp = 0;
7065     /* pit holds the identifier we read and pending_ident is reset */
7066     char pit = PL_pending_ident;
7067     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7068     /* All routes through this function want to know if there is a colon.  */
7069     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7070     PL_pending_ident = 0;
7071
7072     /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7073     DEBUG_T({ PerlIO_printf(Perl_debug_log,
7074           "### Pending identifier '%s'\n", PL_tokenbuf); });
7075
7076     /* if we're in a my(), we can't allow dynamics here.
7077        $foo'bar has already been turned into $foo::bar, so
7078        just check for colons.
7079
7080        if it's a legal name, the OP is a PADANY.
7081     */
7082     if (PL_in_my) {
7083         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
7084             if (has_colon)
7085                 yyerror(Perl_form(aTHX_ "No package name allowed for "
7086                                   "variable %s in \"our\"",
7087                                   PL_tokenbuf));
7088             tmp = allocmy(PL_tokenbuf);
7089         }
7090         else {
7091             if (has_colon)
7092                 yyerror(Perl_form(aTHX_ PL_no_myglob,
7093                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7094
7095             pl_yylval.opval = newOP(OP_PADANY, 0);
7096             pl_yylval.opval->op_targ = allocmy(PL_tokenbuf);
7097             return PRIVATEREF;
7098         }
7099     }
7100
7101     /*
7102        build the ops for accesses to a my() variable.
7103
7104        Deny my($a) or my($b) in a sort block, *if* $a or $b is
7105        then used in a comparison.  This catches most, but not
7106        all cases.  For instance, it catches
7107            sort { my($a); $a <=> $b }
7108        but not
7109            sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7110        (although why you'd do that is anyone's guess).
7111     */
7112
7113     if (!has_colon) {
7114         if (!PL_in_my)
7115             tmp = pad_findmy(PL_tokenbuf);
7116         if (tmp != NOT_IN_PAD) {
7117             /* might be an "our" variable" */
7118             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
7119                 /* build ops for a bareword */
7120                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
7121                 HEK * const stashname = HvNAME_HEK(stash);
7122                 SV *  const sym = newSVhek(stashname);
7123                 sv_catpvs(sym, "::");
7124                 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
7125                 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7126                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7127                 gv_fetchsv(sym,
7128                     (PL_in_eval
7129                         ? (GV_ADDMULTI | GV_ADDINEVAL)
7130                         : GV_ADDMULTI
7131                     ),
7132                     ((PL_tokenbuf[0] == '$') ? SVt_PV
7133                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7134                      : SVt_PVHV));
7135                 return WORD;
7136             }
7137
7138             /* if it's a sort block and they're naming $a or $b */
7139             if (PL_last_lop_op == OP_SORT &&
7140                 PL_tokenbuf[0] == '$' &&
7141                 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7142                 && !PL_tokenbuf[2])
7143             {
7144                 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7145                      d < PL_bufend && *d != '\n';
7146                      d++)
7147                 {
7148                     if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7149                         Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7150                               PL_tokenbuf);
7151                     }
7152                 }
7153             }
7154
7155             pl_yylval.opval = newOP(OP_PADANY, 0);
7156             pl_yylval.opval->op_targ = tmp;
7157             return PRIVATEREF;
7158         }
7159     }
7160
7161     /*
7162        Whine if they've said @foo in a doublequoted string,
7163        and @foo isn't a variable we can find in the symbol
7164        table.
7165     */
7166     if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
7167         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7168                                          SVt_PVAV);
7169         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
7170                 /* DO NOT warn for @- and @+ */
7171                 && !( PL_tokenbuf[2] == '\0' &&
7172                     ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7173            )
7174         {
7175             /* Downgraded from fatal to warning 20000522 mjd */
7176             Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7177                            "Possible unintended interpolation of %s in string",
7178                            PL_tokenbuf);
7179         }
7180     }
7181
7182     /* build ops for a bareword */
7183     pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
7184                                                       tokenbuf_len - 1));
7185     pl_yylval.opval->op_private = OPpCONST_ENTERED;
7186     gv_fetchpvn_flags(
7187             PL_tokenbuf + 1, tokenbuf_len - 1,
7188             /* If the identifier refers to a stash, don't autovivify it.
7189              * Change 24660 had the side effect of causing symbol table
7190              * hashes to always be defined, even if they were freshly
7191              * created and the only reference in the entire program was
7192              * the single statement with the defined %foo::bar:: test.
7193              * It appears that all code in the wild doing this actually
7194              * wants to know whether sub-packages have been loaded, so
7195              * by avoiding auto-vivifying symbol tables, we ensure that
7196              * defined %foo::bar:: continues to be false, and the existing
7197              * tests still give the expected answers, even though what
7198              * they're actually testing has now changed subtly.
7199              */
7200             (*PL_tokenbuf == '%'
7201              && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7202              && d[-1] == ':'
7203              ? 0
7204              : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
7205             ((PL_tokenbuf[0] == '$') ? SVt_PV
7206              : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7207              : SVt_PVHV));
7208     return WORD;
7209 }
7210
7211 /*
7212  *  The following code was generated by perl_keyword.pl.
7213  */
7214
7215 I32
7216 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
7217 {
7218     dVAR;
7219
7220     PERL_ARGS_ASSERT_KEYWORD;
7221
7222   switch (len)
7223   {
7224     case 1: /* 5 tokens of length 1 */
7225       switch (name[0])
7226       {
7227         case 'm':
7228           {                                       /* m          */
7229             return KEY_m;
7230           }
7231
7232         case 'q':
7233           {                                       /* q          */
7234             return KEY_q;
7235           }
7236
7237         case 's':
7238           {                                       /* s          */
7239             return KEY_s;
7240           }
7241
7242         case 'x':
7243           {                                       /* x          */
7244             return -KEY_x;
7245           }
7246
7247         case 'y':
7248           {                                       /* y          */
7249             return KEY_y;
7250           }
7251
7252         default:
7253           goto unknown;
7254       }
7255
7256     case 2: /* 18 tokens of length 2 */
7257       switch (name[0])
7258       {
7259         case 'd':
7260           if (name[1] == 'o')
7261           {                                       /* do         */
7262             return KEY_do;
7263           }
7264
7265           goto unknown;
7266
7267         case 'e':
7268           if (name[1] == 'q')
7269           {                                       /* eq         */
7270             return -KEY_eq;
7271           }
7272
7273           goto unknown;
7274
7275         case 'g':
7276           switch (name[1])
7277           {
7278             case 'e':
7279               {                                   /* ge         */
7280                 return -KEY_ge;
7281               }
7282
7283             case 't':
7284               {                                   /* gt         */
7285                 return -KEY_gt;
7286               }
7287
7288             default:
7289               goto unknown;
7290           }
7291
7292         case 'i':
7293           if (name[1] == 'f')
7294           {                                       /* if         */
7295             return KEY_if;
7296           }
7297
7298           goto unknown;
7299
7300         case 'l':
7301           switch (name[1])
7302           {
7303             case 'c':
7304               {                                   /* lc         */
7305                 return -KEY_lc;
7306               }
7307
7308             case 'e':
7309               {                                   /* le         */
7310                 return -KEY_le;
7311               }
7312
7313             case 't':
7314               {                                   /* lt         */
7315                 return -KEY_lt;
7316               }
7317
7318             default:
7319               goto unknown;
7320           }
7321
7322         case 'm':
7323           if (name[1] == 'y')
7324           {                                       /* my         */
7325             return KEY_my;
7326           }
7327
7328           goto unknown;
7329
7330         case 'n':
7331           switch (name[1])
7332           {
7333             case 'e':
7334               {                                   /* ne         */
7335                 return -KEY_ne;
7336               }
7337
7338             case 'o':
7339               {                                   /* no         */
7340                 return KEY_no;
7341               }
7342
7343             default:
7344               goto unknown;
7345           }
7346
7347         case 'o':
7348           if (name[1] == 'r')
7349           {                                       /* or         */
7350             return -KEY_or;
7351           }
7352
7353           goto unknown;
7354
7355         case 'q':
7356           switch (name[1])
7357           {
7358             case 'q':
7359               {                                   /* qq         */
7360                 return KEY_qq;
7361               }
7362
7363             case 'r':
7364               {                                   /* qr         */
7365                 return KEY_qr;
7366               }
7367
7368             case 'w':
7369               {                                   /* qw         */
7370                 return KEY_qw;
7371               }
7372
7373             case 'x':
7374               {                                   /* qx         */
7375                 return KEY_qx;
7376               }
7377
7378             default:
7379               goto unknown;
7380           }
7381
7382         case 't':
7383           if (name[1] == 'r')
7384           {                                       /* tr         */
7385             return KEY_tr;
7386           }
7387
7388           goto unknown;
7389
7390         case 'u':
7391           if (name[1] == 'c')
7392           {                                       /* uc         */
7393             return -KEY_uc;
7394           }
7395
7396           goto unknown;
7397
7398         default:
7399           goto unknown;
7400       }
7401
7402     case 3: /* 29 tokens of length 3 */
7403       switch (name[0])
7404       {
7405         case 'E':
7406           if (name[1] == 'N' &&
7407               name[2] == 'D')
7408           {                                       /* END        */
7409             return KEY_END;
7410           }
7411
7412           goto unknown;
7413
7414         case 'a':
7415           switch (name[1])
7416           {
7417             case 'b':
7418               if (name[2] == 's')
7419               {                                   /* abs        */
7420                 return -KEY_abs;
7421               }
7422
7423               goto unknown;
7424
7425             case 'n':
7426               if (name[2] == 'd')
7427               {                                   /* and        */
7428                 return -KEY_and;
7429               }
7430
7431               goto unknown;
7432
7433             default:
7434               goto unknown;
7435           }
7436
7437         case 'c':
7438           switch (name[1])
7439           {
7440             case 'h':
7441               if (name[2] == 'r')
7442               {                                   /* chr        */
7443                 return -KEY_chr;
7444               }
7445
7446               goto unknown;
7447
7448             case 'm':
7449               if (name[2] == 'p')
7450               {                                   /* cmp        */
7451                 return -KEY_cmp;
7452               }
7453
7454               goto unknown;
7455
7456             case 'o':
7457               if (name[2] == 's')
7458               {                                   /* cos        */
7459                 return -KEY_cos;
7460               }
7461
7462               goto unknown;
7463
7464             default:
7465               goto unknown;
7466           }
7467
7468         case 'd':
7469           if (name[1] == 'i' &&
7470               name[2] == 'e')
7471           {                                       /* die        */
7472             return -KEY_die;
7473           }
7474
7475           goto unknown;
7476
7477         case 'e':
7478           switch (name[1])
7479           {
7480             case 'o':
7481               if (name[2] == 'f')
7482               {                                   /* eof        */
7483                 return -KEY_eof;
7484               }
7485
7486               goto unknown;
7487
7488             case 'x':
7489               if (name[2] == 'p')
7490               {                                   /* exp        */
7491                 return -KEY_exp;
7492               }
7493
7494               goto unknown;
7495
7496             default:
7497               goto unknown;
7498           }
7499
7500         case 'f':
7501           if (name[1] == 'o' &&
7502               name[2] == 'r')
7503           {                                       /* for        */
7504             return KEY_for;
7505           }
7506
7507           goto unknown;
7508
7509         case 'h':
7510           if (name[1] == 'e' &&
7511               name[2] == 'x')
7512           {                                       /* hex        */
7513             return -KEY_hex;
7514           }
7515
7516           goto unknown;
7517
7518         case 'i':
7519           if (name[1] == 'n' &&
7520               name[2] == 't')
7521           {                                       /* int        */
7522             return -KEY_int;
7523           }
7524
7525           goto unknown;
7526
7527         case 'l':
7528           if (name[1] == 'o' &&
7529               name[2] == 'g')
7530           {                                       /* log        */
7531             return -KEY_log;
7532           }
7533
7534           goto unknown;
7535
7536         case 'm':
7537           if (name[1] == 'a' &&
7538               name[2] == 'p')
7539           {                                       /* map        */
7540             return KEY_map;
7541           }
7542
7543           goto unknown;
7544
7545         case 'n':
7546           if (name[1] == 'o' &&
7547               name[2] == 't')
7548           {                                       /* not        */
7549             return -KEY_not;
7550           }
7551
7552           goto unknown;
7553
7554         case 'o':
7555           switch (name[1])
7556           {
7557             case 'c':
7558               if (name[2] == 't')
7559               {                                   /* oct        */
7560                 return -KEY_oct;
7561               }
7562
7563               goto unknown;
7564
7565             case 'r':
7566               if (name[2] == 'd')
7567               {                                   /* ord        */
7568                 return -KEY_ord;
7569               }
7570
7571               goto unknown;
7572
7573             case 'u':
7574               if (name[2] == 'r')
7575               {                                   /* our        */
7576                 return KEY_our;
7577               }
7578
7579               goto unknown;
7580
7581             default:
7582               goto unknown;
7583           }
7584
7585         case 'p':
7586           if (name[1] == 'o')
7587           {
7588             switch (name[2])
7589             {
7590               case 'p':
7591                 {                                 /* pop        */
7592                   return -KEY_pop;
7593                 }
7594
7595               case 's':
7596                 {                                 /* pos        */
7597                   return KEY_pos;
7598                 }
7599
7600               default:
7601                 goto unknown;
7602             }
7603           }
7604
7605           goto unknown;
7606
7607         case 'r':
7608           if (name[1] == 'e' &&
7609               name[2] == 'f')
7610           {                                       /* ref        */
7611             return -KEY_ref;
7612           }
7613
7614           goto unknown;
7615
7616         case 's':
7617           switch (name[1])
7618           {
7619             case 'a':
7620               if (name[2] == 'y')
7621               {                                   /* say        */
7622                 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
7623               }
7624
7625               goto unknown;
7626
7627             case 'i':
7628               if (name[2] == 'n')
7629               {                                   /* sin        */
7630                 return -KEY_sin;
7631               }
7632
7633               goto unknown;
7634
7635             case 'u':
7636               if (name[2] == 'b')
7637               {                                   /* sub        */
7638                 return KEY_sub;
7639               }
7640
7641               goto unknown;
7642
7643             default:
7644               goto unknown;
7645           }
7646
7647         case 't':
7648           if (name[1] == 'i' &&
7649               name[2] == 'e')
7650           {                                       /* tie        */
7651             return KEY_tie;
7652           }
7653
7654           goto unknown;
7655
7656         case 'u':
7657           if (name[1] == 's' &&
7658               name[2] == 'e')
7659           {                                       /* use        */
7660             return KEY_use;
7661           }
7662
7663           goto unknown;
7664
7665         case 'v':
7666           if (name[1] == 'e' &&
7667               name[2] == 'c')
7668           {                                       /* vec        */
7669             return -KEY_vec;
7670           }
7671
7672           goto unknown;
7673
7674         case 'x':
7675           if (name[1] == 'o' &&
7676               name[2] == 'r')
7677           {                                       /* xor        */
7678             return -KEY_xor;
7679           }
7680
7681           goto unknown;
7682
7683         default:
7684           goto unknown;
7685       }
7686
7687     case 4: /* 41 tokens of length 4 */
7688       switch (name[0])
7689       {
7690         case 'C':
7691           if (name[1] == 'O' &&
7692               name[2] == 'R' &&
7693               name[3] == 'E')
7694           {                                       /* CORE       */
7695             return -KEY_CORE;
7696           }
7697
7698           goto unknown;
7699
7700         case 'I':
7701           if (name[1] == 'N' &&
7702               name[2] == 'I' &&
7703               name[3] == 'T')
7704           {                                       /* INIT       */
7705             return KEY_INIT;
7706           }
7707
7708           goto unknown;
7709
7710         case 'b':
7711           if (name[1] == 'i' &&
7712               name[2] == 'n' &&
7713               name[3] == 'd')
7714           {                                       /* bind       */
7715             return -KEY_bind;
7716           }
7717
7718           goto unknown;
7719
7720         case 'c':
7721           if (name[1] == 'h' &&
7722               name[2] == 'o' &&
7723               name[3] == 'p')
7724           {                                       /* chop       */
7725             return -KEY_chop;
7726           }
7727
7728           goto unknown;
7729
7730         case 'd':
7731           if (name[1] == 'u' &&
7732               name[2] == 'm' &&
7733               name[3] == 'p')
7734           {                                       /* dump       */
7735             return -KEY_dump;
7736           }
7737
7738           goto unknown;
7739
7740         case 'e':
7741           switch (name[1])
7742           {
7743             case 'a':
7744               if (name[2] == 'c' &&
7745                   name[3] == 'h')
7746               {                                   /* each       */
7747                 return -KEY_each;
7748               }
7749
7750               goto unknown;
7751
7752             case 'l':
7753               if (name[2] == 's' &&
7754                   name[3] == 'e')
7755               {                                   /* else       */
7756                 return KEY_else;
7757               }
7758
7759               goto unknown;
7760
7761             case 'v':
7762               if (name[2] == 'a' &&
7763                   name[3] == 'l')
7764               {                                   /* eval       */
7765                 return KEY_eval;
7766               }
7767
7768               goto unknown;
7769
7770             case 'x':
7771               switch (name[2])
7772               {
7773                 case 'e':
7774                   if (name[3] == 'c')
7775                   {                               /* exec       */
7776                     return -KEY_exec;
7777                   }
7778
7779                   goto unknown;
7780
7781                 case 'i':
7782                   if (name[3] == 't')
7783                   {                               /* exit       */
7784                     return -KEY_exit;
7785                   }
7786
7787                   goto unknown;
7788
7789                 default:
7790                   goto unknown;
7791               }
7792
7793             default:
7794               goto unknown;
7795           }
7796
7797         case 'f':
7798           if (name[1] == 'o' &&
7799               name[2] == 'r' &&
7800               name[3] == 'k')
7801           {                                       /* fork       */
7802             return -KEY_fork;
7803           }
7804
7805           goto unknown;
7806
7807         case 'g':
7808           switch (name[1])
7809           {
7810             case 'e':
7811               if (name[2] == 't' &&
7812                   name[3] == 'c')
7813               {                                   /* getc       */
7814                 return -KEY_getc;
7815               }
7816
7817               goto unknown;
7818
7819             case 'l':
7820               if (name[2] == 'o' &&
7821                   name[3] == 'b')
7822               {                                   /* glob       */
7823                 return KEY_glob;
7824               }
7825
7826               goto unknown;
7827
7828             case 'o':
7829               if (name[2] == 't' &&
7830                   name[3] == 'o')
7831               {                                   /* goto       */
7832                 return KEY_goto;
7833               }
7834
7835               goto unknown;
7836
7837             case 'r':
7838               if (name[2] == 'e' &&
7839                   name[3] == 'p')
7840               {                                   /* grep       */
7841                 return KEY_grep;
7842               }
7843
7844               goto unknown;
7845
7846             default:
7847               goto unknown;
7848           }
7849
7850         case 'j':
7851           if (name[1] == 'o' &&
7852               name[2] == 'i' &&
7853               name[3] == 'n')
7854           {                                       /* join       */
7855             return -KEY_join;
7856           }
7857
7858           goto unknown;
7859
7860         case 'k':
7861           switch (name[1])
7862           {
7863             case 'e':
7864               if (name[2] == 'y' &&
7865                   name[3] == 's')
7866               {                                   /* keys       */
7867                 return -KEY_keys;
7868               }
7869
7870               goto unknown;
7871
7872             case 'i':
7873               if (name[2] == 'l' &&
7874                   name[3] == 'l')
7875               {                                   /* kill       */
7876                 return -KEY_kill;
7877               }
7878
7879               goto unknown;
7880
7881             default:
7882               goto unknown;
7883           }
7884
7885         case 'l':
7886           switch (name[1])
7887           {
7888             case 'a':
7889               if (name[2] == 's' &&
7890                   name[3] == 't')
7891               {                                   /* last       */
7892                 return KEY_last;
7893               }
7894
7895               goto unknown;
7896
7897             case 'i':
7898               if (name[2] == 'n' &&
7899                   name[3] == 'k')
7900               {                                   /* link       */
7901                 return -KEY_link;
7902               }
7903
7904               goto unknown;
7905
7906             case 'o':
7907               if (name[2] == 'c' &&
7908                   name[3] == 'k')
7909               {                                   /* lock       */
7910                 return -KEY_lock;
7911               }
7912
7913               goto unknown;
7914
7915             default:
7916               goto unknown;
7917           }
7918
7919         case 'n':
7920           if (name[1] == 'e' &&
7921               name[2] == 'x' &&
7922               name[3] == 't')
7923           {                                       /* next       */
7924             return KEY_next;
7925           }
7926
7927           goto unknown;
7928
7929         case 'o':
7930           if (name[1] == 'p' &&
7931               name[2] == 'e' &&
7932               name[3] == 'n')
7933           {                                       /* open       */
7934             return -KEY_open;
7935           }
7936
7937           goto unknown;
7938
7939         case 'p':
7940           switch (name[1])
7941           {
7942             case 'a':
7943               if (name[2] == 'c' &&
7944                   name[3] == 'k')
7945               {                                   /* pack       */
7946                 return -KEY_pack;
7947               }
7948
7949               goto unknown;
7950
7951             case 'i':
7952               if (name[2] == 'p' &&
7953                   name[3] == 'e')
7954               {                                   /* pipe       */
7955                 return -KEY_pipe;
7956               }
7957
7958               goto unknown;
7959
7960             case 'u':
7961               if (name[2] == 's' &&
7962                   name[3] == 'h')
7963               {                                   /* push       */
7964                 return -KEY_push;
7965               }
7966
7967               goto unknown;
7968
7969             default:
7970               goto unknown;
7971           }
7972
7973         case 'r':
7974           switch (name[1])
7975           {
7976             case 'a':
7977               if (name[2] == 'n' &&
7978                   name[3] == 'd')
7979               {                                   /* rand       */
7980                 return -KEY_rand;
7981               }
7982
7983               goto unknown;
7984
7985             case 'e':
7986               switch (name[2])
7987               {
7988                 case 'a':
7989                   if (name[3] == 'd')
7990                   {                               /* read       */
7991                     return -KEY_read;
7992                   }
7993
7994                   goto unknown;
7995
7996                 case 'c':
7997                   if (name[3] == 'v')
7998                   {                               /* recv       */
7999                     return -KEY_recv;
8000                   }
8001
8002                   goto unknown;
8003
8004                 case 'd':
8005                   if (name[3] == 'o')
8006                   {                               /* redo       */
8007                     return KEY_redo;
8008                   }
8009
8010                   goto unknown;
8011
8012                 default:
8013                   goto unknown;
8014               }
8015
8016             default:
8017               goto unknown;
8018           }
8019
8020         case 's':
8021           switch (name[1])
8022           {
8023             case 'e':
8024               switch (name[2])
8025               {
8026                 case 'e':
8027                   if (name[3] == 'k')
8028                   {                               /* seek       */
8029                     return -KEY_seek;
8030                   }
8031
8032                   goto unknown;
8033
8034                 case 'n':
8035                   if (name[3] == 'd')
8036                   {                               /* send       */
8037                     return -KEY_send;
8038                   }
8039
8040                   goto unknown;
8041
8042                 default:
8043                   goto unknown;
8044               }
8045
8046             case 'o':
8047               if (name[2] == 'r' &&
8048                   name[3] == 't')
8049               {                                   /* sort       */
8050                 return KEY_sort;
8051               }
8052
8053               goto unknown;
8054
8055             case 'q':
8056               if (name[2] == 'r' &&
8057                   name[3] == 't')
8058               {                                   /* sqrt       */
8059                 return -KEY_sqrt;
8060               }
8061
8062               goto unknown;
8063
8064             case 't':
8065               if (name[2] == 'a' &&
8066                   name[3] == 't')
8067               {                                   /* stat       */
8068                 return -KEY_stat;
8069               }
8070
8071               goto unknown;
8072
8073             default:
8074               goto unknown;
8075           }
8076
8077         case 't':
8078           switch (name[1])
8079           {
8080             case 'e':
8081               if (name[2] == 'l' &&
8082                   name[3] == 'l')
8083               {                                   /* tell       */
8084                 return -KEY_tell;
8085               }
8086
8087               goto unknown;
8088
8089             case 'i':
8090               switch (name[2])
8091               {
8092                 case 'e':
8093                   if (name[3] == 'd')
8094                   {                               /* tied       */
8095                     return KEY_tied;
8096                   }
8097
8098                   goto unknown;
8099
8100                 case 'm':
8101                   if (name[3] == 'e')
8102                   {                               /* time       */
8103                     return -KEY_time;
8104                   }
8105
8106                   goto unknown;
8107
8108                 default:
8109                   goto unknown;
8110               }
8111
8112             default:
8113               goto unknown;
8114           }
8115
8116         case 'w':
8117           switch (name[1])
8118           {
8119             case 'a':
8120               switch (name[2])
8121               {
8122                 case 'i':
8123                   if (name[3] == 't')
8124                   {                               /* wait       */
8125                     return -KEY_wait;
8126                   }
8127
8128                   goto unknown;
8129
8130                 case 'r':
8131                   if (name[3] == 'n')
8132                   {                               /* warn       */
8133                     return -KEY_warn;
8134                   }
8135
8136                   goto unknown;
8137
8138                 default:
8139                   goto unknown;
8140               }
8141
8142             case 'h':
8143               if (name[2] == 'e' &&
8144                   name[3] == 'n')
8145               {                                   /* when       */
8146                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
8147               }
8148
8149               goto unknown;
8150
8151             default:
8152               goto unknown;
8153           }
8154
8155         default:
8156           goto unknown;
8157       }
8158
8159     case 5: /* 39 tokens of length 5 */
8160       switch (name[0])
8161       {
8162         case 'B':
8163           if (name[1] == 'E' &&
8164               name[2] == 'G' &&
8165               name[3] == 'I' &&
8166               name[4] == 'N')
8167           {                                       /* BEGIN      */
8168             return KEY_BEGIN;
8169           }
8170
8171           goto unknown;
8172
8173         case 'C':
8174           if (name[1] == 'H' &&
8175               name[2] == 'E' &&
8176               name[3] == 'C' &&
8177               name[4] == 'K')
8178           {                                       /* CHECK      */
8179             return KEY_CHECK;
8180           }
8181
8182           goto unknown;
8183
8184         case 'a':
8185           switch (name[1])
8186           {
8187             case 'l':
8188               if (name[2] == 'a' &&
8189                   name[3] == 'r' &&
8190                   name[4] == 'm')
8191               {                                   /* alarm      */
8192                 return -KEY_alarm;
8193               }
8194
8195               goto unknown;
8196
8197             case 't':
8198               if (name[2] == 'a' &&
8199                   name[3] == 'n' &&
8200                   name[4] == '2')
8201               {                                   /* atan2      */
8202                 return -KEY_atan2;
8203               }
8204
8205               goto unknown;
8206
8207             default:
8208               goto unknown;
8209           }
8210
8211         case 'b':
8212           switch (name[1])
8213           {
8214             case 'l':
8215               if (name[2] == 'e' &&
8216                   name[3] == 's' &&
8217                   name[4] == 's')
8218               {                                   /* bless      */
8219                 return -KEY_bless;
8220               }
8221
8222               goto unknown;
8223
8224             case 'r':
8225               if (name[2] == 'e' &&
8226                   name[3] == 'a' &&
8227                   name[4] == 'k')
8228               {                                   /* break      */
8229                 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
8230               }
8231
8232               goto unknown;
8233
8234             default:
8235               goto unknown;
8236           }
8237
8238         case 'c':
8239           switch (name[1])
8240           {
8241             case 'h':
8242               switch (name[2])
8243               {
8244                 case 'd':
8245                   if (name[3] == 'i' &&
8246                       name[4] == 'r')
8247                   {                               /* chdir      */
8248                     return -KEY_chdir;
8249                   }
8250
8251                   goto unknown;
8252
8253                 case 'm':
8254                   if (name[3] == 'o' &&
8255                       name[4] == 'd')
8256                   {                               /* chmod      */
8257                     return -KEY_chmod;
8258                   }
8259
8260                   goto unknown;
8261
8262                 case 'o':
8263                   switch (name[3])
8264                   {
8265                     case 'm':
8266                       if (name[4] == 'p')
8267                       {                           /* chomp      */
8268                         return -KEY_chomp;
8269                       }
8270
8271                       goto unknown;
8272
8273                     case 'w':
8274                       if (name[4] == 'n')
8275                       {                           /* chown      */
8276                         return -KEY_chown;
8277                       }
8278
8279                       goto unknown;
8280
8281                     default:
8282                       goto unknown;
8283                   }
8284
8285                 default:
8286                   goto unknown;
8287               }
8288
8289             case 'l':
8290               if (name[2] == 'o' &&
8291                   name[3] == 's' &&
8292                   name[4] == 'e')
8293               {                                   /* close      */
8294                 return -KEY_close;
8295               }
8296
8297               goto unknown;
8298
8299             case 'r':
8300               if (name[2] == 'y' &&
8301                   name[3] == 'p' &&
8302                   name[4] == 't')
8303               {                                   /* crypt      */
8304                 return -KEY_crypt;
8305               }
8306
8307               goto unknown;
8308
8309             default:
8310               goto unknown;
8311           }
8312
8313         case 'e':
8314           if (name[1] == 'l' &&
8315               name[2] == 's' &&
8316               name[3] == 'i' &&
8317               name[4] == 'f')
8318           {                                       /* elsif      */
8319             return KEY_elsif;
8320           }
8321
8322           goto unknown;
8323
8324         case 'f':
8325           switch (name[1])
8326           {
8327             case 'c':
8328               if (name[2] == 'n' &&
8329                   name[3] == 't' &&
8330                   name[4] == 'l')
8331               {                                   /* fcntl      */
8332                 return -KEY_fcntl;
8333               }
8334
8335               goto unknown;
8336
8337             case 'l':
8338               if (name[2] == 'o' &&
8339                   name[3] == 'c' &&
8340                   name[4] == 'k')
8341               {                                   /* flock      */
8342                 return -KEY_flock;
8343               }
8344
8345               goto unknown;
8346
8347             default:
8348               goto unknown;
8349           }
8350
8351         case 'g':
8352           if (name[1] == 'i' &&
8353               name[2] == 'v' &&
8354               name[3] == 'e' &&
8355               name[4] == 'n')
8356           {                                       /* given      */
8357             return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
8358           }
8359
8360           goto unknown;
8361
8362         case 'i':
8363           switch (name[1])
8364           {
8365             case 'n':
8366               if (name[2] == 'd' &&
8367                   name[3] == 'e' &&
8368                   name[4] == 'x')
8369               {                                   /* index      */
8370                 return -KEY_index;
8371               }
8372
8373               goto unknown;
8374
8375             case 'o':
8376               if (name[2] == 'c' &&
8377                   name[3] == 't' &&
8378                   name[4] == 'l')
8379               {                                   /* ioctl      */
8380                 return -KEY_ioctl;
8381               }
8382
8383               goto unknown;
8384
8385             default:
8386               goto unknown;
8387           }
8388
8389         case 'l':
8390           switch (name[1])
8391           {
8392             case 'o':
8393               if (name[2] == 'c' &&
8394                   name[3] == 'a' &&
8395                   name[4] == 'l')
8396               {                                   /* local      */
8397                 return KEY_local;
8398               }
8399
8400               goto unknown;
8401
8402             case 's':
8403               if (name[2] == 't' &&
8404                   name[3] == 'a' &&
8405                   name[4] == 't')
8406               {                                   /* lstat      */
8407                 return -KEY_lstat;
8408               }
8409
8410               goto unknown;
8411
8412             default:
8413               goto unknown;
8414           }
8415
8416         case 'm':
8417           if (name[1] == 'k' &&
8418               name[2] == 'd' &&
8419               name[3] == 'i' &&
8420               name[4] == 'r')
8421           {                                       /* mkdir      */
8422             return -KEY_mkdir;
8423           }
8424
8425           goto unknown;
8426
8427         case 'p':
8428           if (name[1] == 'r' &&
8429               name[2] == 'i' &&
8430               name[3] == 'n' &&
8431               name[4] == 't')
8432           {                                       /* print      */
8433             return KEY_print;
8434           }
8435
8436           goto unknown;
8437
8438         case 'r':
8439           switch (name[1])
8440           {
8441             case 'e':
8442               if (name[2] == 's' &&
8443                   name[3] == 'e' &&
8444                   name[4] == 't')
8445               {                                   /* reset      */
8446                 return -KEY_reset;
8447               }
8448
8449               goto unknown;
8450
8451             case 'm':
8452               if (name[2] == 'd' &&
8453                   name[3] == 'i' &&
8454                   name[4] == 'r')
8455               {                                   /* rmdir      */
8456                 return -KEY_rmdir;
8457               }
8458
8459               goto unknown;
8460
8461             default:
8462               goto unknown;
8463           }
8464
8465         case 's':
8466           switch (name[1])
8467           {
8468             case 'e':
8469               if (name[2] == 'm' &&
8470                   name[3] == 'o' &&
8471                   name[4] == 'p')
8472               {                                   /* semop      */
8473                 return -KEY_semop;
8474               }
8475
8476               goto unknown;
8477
8478             case 'h':
8479               if (name[2] == 'i' &&
8480                   name[3] == 'f' &&
8481                   name[4] == 't')
8482               {                                   /* shift      */
8483                 return -KEY_shift;
8484               }
8485
8486               goto unknown;
8487
8488             case 'l':
8489               if (name[2] == 'e' &&
8490                   name[3] == 'e' &&
8491                   name[4] == 'p')
8492               {                                   /* sleep      */
8493                 return -KEY_sleep;
8494               }
8495
8496               goto unknown;
8497
8498             case 'p':
8499               if (name[2] == 'l' &&
8500                   name[3] == 'i' &&
8501                   name[4] == 't')
8502               {                                   /* split      */
8503                 return KEY_split;
8504               }
8505
8506               goto unknown;
8507
8508             case 'r':
8509               if (name[2] == 'a' &&
8510                   name[3] == 'n' &&
8511                   name[4] == 'd')
8512               {                                   /* srand      */
8513                 return -KEY_srand;
8514               }
8515
8516               goto unknown;
8517
8518             case 't':
8519               switch (name[2])
8520               {
8521                 case 'a':
8522                   if (name[3] == 't' &&
8523                       name[4] == 'e')
8524                   {                               /* state      */
8525                     return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
8526                   }
8527
8528                   goto unknown;
8529
8530                 case 'u':
8531                   if (name[3] == 'd' &&
8532                       name[4] == 'y')
8533                   {                               /* study      */
8534                     return KEY_study;
8535                   }
8536
8537                   goto unknown;
8538
8539                 default:
8540                   goto unknown;
8541               }
8542
8543             default:
8544               goto unknown;
8545           }
8546
8547         case 't':
8548           if (name[1] == 'i' &&
8549               name[2] == 'm' &&
8550               name[3] == 'e' &&
8551               name[4] == 's')
8552           {                                       /* times      */
8553             return -KEY_times;
8554           }
8555
8556           goto unknown;
8557
8558         case 'u':
8559           switch (name[1])
8560           {
8561             case 'm':
8562               if (name[2] == 'a' &&
8563                   name[3] == 's' &&
8564                   name[4] == 'k')
8565               {                                   /* umask      */
8566                 return -KEY_umask;
8567               }
8568
8569               goto unknown;
8570
8571             case 'n':
8572               switch (name[2])
8573               {
8574                 case 'd':
8575                   if (name[3] == 'e' &&
8576                       name[4] == 'f')
8577                   {                               /* undef      */
8578                     return KEY_undef;
8579                   }
8580
8581                   goto unknown;
8582
8583                 case 't':
8584                   if (name[3] == 'i')
8585                   {
8586                     switch (name[4])
8587                     {
8588                       case 'e':
8589                         {                         /* untie      */
8590                           return KEY_untie;
8591                         }
8592
8593                       case 'l':
8594                         {                         /* until      */
8595                           return KEY_until;
8596                         }
8597
8598                       default:
8599                         goto unknown;
8600                     }
8601                   }
8602
8603                   goto unknown;
8604
8605                 default:
8606                   goto unknown;
8607               }
8608
8609             case 't':
8610               if (name[2] == 'i' &&
8611                   name[3] == 'm' &&
8612                   name[4] == 'e')
8613               {                                   /* utime      */
8614                 return -KEY_utime;
8615               }
8616
8617               goto unknown;
8618
8619             default:
8620               goto unknown;
8621           }
8622
8623         case 'w':
8624           switch (name[1])
8625           {
8626             case 'h':
8627               if (name[2] == 'i' &&
8628                   name[3] == 'l' &&
8629                   name[4] == 'e')
8630               {                                   /* while      */
8631                 return KEY_while;
8632               }
8633
8634               goto unknown;
8635
8636             case 'r':
8637               if (name[2] == 'i' &&
8638                   name[3] == 't' &&
8639                   name[4] == 'e')
8640               {                                   /* write      */
8641                 return -KEY_write;
8642               }
8643
8644               goto unknown;
8645
8646             default:
8647               goto unknown;
8648           }
8649
8650         default:
8651           goto unknown;
8652       }
8653
8654     case 6: /* 33 tokens of length 6 */
8655       switch (name[0])
8656       {
8657         case 'a':
8658           if (name[1] == 'c' &&
8659               name[2] == 'c' &&
8660               name[3] == 'e' &&
8661               name[4] == 'p' &&
8662               name[5] == 't')
8663           {                                       /* accept     */
8664             return -KEY_accept;
8665           }
8666
8667           goto unknown;
8668
8669         case 'c':
8670           switch (name[1])
8671           {
8672             case 'a':
8673               if (name[2] == 'l' &&
8674                   name[3] == 'l' &&
8675                   name[4] == 'e' &&
8676                   name[5] == 'r')
8677               {                                   /* caller     */
8678                 return -KEY_caller;
8679               }
8680
8681               goto unknown;
8682
8683             case 'h':
8684               if (name[2] == 'r' &&
8685                   name[3] == 'o' &&
8686                   name[4] == 'o' &&
8687                   name[5] == 't')
8688               {                                   /* chroot     */
8689                 return -KEY_chroot;
8690               }
8691
8692               goto unknown;
8693
8694             default:
8695               goto unknown;
8696           }
8697
8698         case 'd':
8699           if (name[1] == 'e' &&
8700               name[2] == 'l' &&
8701               name[3] == 'e' &&
8702               name[4] == 't' &&
8703               name[5] == 'e')
8704           {                                       /* delete     */
8705             return KEY_delete;
8706           }
8707
8708           goto unknown;
8709
8710         case 'e':
8711           switch (name[1])
8712           {
8713             case 'l':
8714               if (name[2] == 's' &&
8715                   name[3] == 'e' &&
8716                   name[4] == 'i' &&
8717                   name[5] == 'f')
8718               {                                   /* elseif     */
8719                 if(ckWARN_d(WARN_SYNTAX))
8720                   Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
8721               }
8722
8723               goto unknown;
8724
8725             case 'x':
8726               if (name[2] == 'i' &&
8727                   name[3] == 's' &&
8728                   name[4] == 't' &&
8729                   name[5] == 's')
8730               {                                   /* exists     */
8731                 return KEY_exists;
8732               }
8733
8734               goto unknown;
8735
8736             default:
8737               goto unknown;
8738           }
8739
8740         case 'f':
8741           switch (name[1])
8742           {
8743             case 'i':
8744               if (name[2] == 'l' &&
8745                   name[3] == 'e' &&
8746                   name[4] == 'n' &&
8747                   name[5] == 'o')
8748               {                                   /* fileno     */
8749                 return -KEY_fileno;
8750               }
8751
8752               goto unknown;
8753
8754             case 'o':
8755               if (name[2] == 'r' &&
8756                   name[3] == 'm' &&
8757                   name[4] == 'a' &&
8758                   name[5] == 't')
8759               {                                   /* format     */
8760                 return KEY_format;
8761               }
8762
8763               goto unknown;
8764
8765             default:
8766               goto unknown;
8767           }
8768
8769         case 'g':
8770           if (name[1] == 'm' &&
8771               name[2] == 't' &&
8772               name[3] == 'i' &&
8773               name[4] == 'm' &&
8774               name[5] == 'e')
8775           {                                       /* gmtime     */
8776             return -KEY_gmtime;
8777           }
8778
8779           goto unknown;
8780
8781         case 'l':
8782           switch (name[1])
8783           {
8784             case 'e':
8785               if (name[2] == 'n' &&
8786                   name[3] == 'g' &&
8787                   name[4] == 't' &&
8788                   name[5] == 'h')
8789               {                                   /* length     */
8790                 return -KEY_length;
8791               }
8792
8793               goto unknown;
8794
8795             case 'i':
8796               if (name[2] == 's' &&
8797                   name[3] == 't' &&
8798                   name[4] == 'e' &&
8799                   name[5] == 'n')
8800               {                                   /* listen     */
8801                 return -KEY_listen;
8802               }
8803
8804               goto unknown;
8805
8806             default:
8807               goto unknown;
8808           }
8809
8810         case 'm':
8811           if (name[1] == 's' &&
8812               name[2] == 'g')
8813           {
8814             switch (name[3])
8815             {
8816               case 'c':
8817                 if (name[4] == 't' &&
8818                     name[5] == 'l')
8819                 {                                 /* msgctl     */
8820                   return -KEY_msgctl;
8821                 }
8822
8823                 goto unknown;
8824
8825               case 'g':
8826                 if (name[4] == 'e' &&
8827                     name[5] == 't')
8828                 {                                 /* msgget     */
8829                   return -KEY_msgget;
8830                 }
8831
8832                 goto unknown;
8833
8834               case 'r':
8835                 if (name[4] == 'c' &&
8836                     name[5] == 'v')
8837                 {                                 /* msgrcv     */
8838                   return -KEY_msgrcv;
8839                 }
8840
8841                 goto unknown;
8842
8843               case 's':
8844                 if (name[4] == 'n' &&
8845                     name[5] == 'd')
8846                 {                                 /* msgsnd     */
8847                   return -KEY_msgsnd;
8848                 }
8849
8850                 goto unknown;
8851
8852               default:
8853                 goto unknown;
8854             }
8855           }
8856
8857           goto unknown;
8858
8859         case 'p':
8860           if (name[1] == 'r' &&
8861               name[2] == 'i' &&
8862               name[3] == 'n' &&
8863               name[4] == 't' &&
8864               name[5] == 'f')
8865           {                                       /* printf     */
8866             return KEY_printf;
8867           }
8868
8869           goto unknown;
8870
8871         case 'r':
8872           switch (name[1])
8873           {
8874             case 'e':
8875               switch (name[2])
8876               {
8877                 case 'n':
8878                   if (name[3] == 'a' &&
8879                       name[4] == 'm' &&
8880                       name[5] == 'e')
8881                   {                               /* rename     */
8882                     return -KEY_rename;
8883                   }
8884
8885                   goto unknown;
8886
8887                 case 't':
8888                   if (name[3] == 'u' &&
8889                       name[4] == 'r' &&
8890                       name[5] == 'n')
8891                   {                               /* return     */
8892                     return KEY_return;
8893                   }
8894
8895                   goto unknown;
8896
8897                 default:
8898                   goto unknown;
8899               }
8900
8901             case 'i':
8902               if (name[2] == 'n' &&
8903                   name[3] == 'd' &&
8904                   name[4] == 'e' &&
8905                   name[5] == 'x')
8906               {                                   /* rindex     */
8907                 return -KEY_rindex;
8908               }
8909
8910               goto unknown;
8911
8912             default:
8913               goto unknown;
8914           }
8915
8916         case 's':
8917           switch (name[1])
8918           {
8919             case 'c':
8920               if (name[2] == 'a' &&
8921                   name[3] == 'l' &&
8922                   name[4] == 'a' &&
8923                   name[5] == 'r')
8924               {                                   /* scalar     */
8925                 return KEY_scalar;
8926               }
8927
8928               goto unknown;
8929
8930             case 'e':
8931               switch (name[2])
8932               {
8933                 case 'l':
8934                   if (name[3] == 'e' &&
8935                       name[4] == 'c' &&
8936                       name[5] == 't')
8937                   {                               /* select     */
8938                     return -KEY_select;
8939                   }
8940
8941                   goto unknown;
8942
8943                 case 'm':
8944                   switch (name[3])
8945                   {
8946                     case 'c':
8947                       if (name[4] == 't' &&
8948                           name[5] == 'l')
8949                       {                           /* semctl     */
8950                         return -KEY_semctl;
8951                       }
8952
8953                       goto unknown;
8954
8955                     case 'g':
8956                       if (name[4] == 'e' &&
8957                           name[5] == 't')
8958                       {                           /* semget     */
8959                         return -KEY_semget;
8960                       }
8961
8962                       goto unknown;
8963
8964                     default:
8965                       goto unknown;
8966                   }
8967
8968                 default:
8969                   goto unknown;
8970               }
8971
8972             case 'h':
8973               if (name[2] == 'm')
8974               {
8975                 switch (name[3])
8976                 {
8977                   case 'c':
8978                     if (name[4] == 't' &&
8979                         name[5] == 'l')
8980                     {                             /* shmctl     */
8981                       return -KEY_shmctl;
8982                     }
8983
8984                     goto unknown;
8985
8986                   case 'g':
8987                     if (name[4] == 'e' &&
8988                         name[5] == 't')
8989                     {                             /* shmget     */
8990                       return -KEY_shmget;
8991                     }
8992
8993                     goto unknown;
8994
8995                   default:
8996                     goto unknown;
8997                 }
8998               }
8999
9000               goto unknown;
9001
9002             case 'o':
9003               if (name[2] == 'c' &&
9004                   name[3] == 'k' &&
9005                   name[4] == 'e' &&
9006                   name[5] == 't')
9007               {                                   /* socket     */
9008                 return -KEY_socket;
9009               }
9010
9011               goto unknown;
9012
9013             case 'p':
9014               if (name[2] == 'l' &&
9015                   name[3] == 'i' &&
9016                   name[4] == 'c' &&
9017                   name[5] == 'e')
9018               {                                   /* splice     */
9019                 return -KEY_splice;
9020               }
9021
9022               goto unknown;
9023
9024             case 'u':
9025               if (name[2] == 'b' &&
9026                   name[3] == 's' &&
9027                   name[4] == 't' &&
9028                   name[5] == 'r')
9029               {                                   /* substr     */
9030                 return -KEY_substr;
9031               }
9032
9033               goto unknown;
9034
9035             case 'y':
9036               if (name[2] == 's' &&
9037                   name[3] == 't' &&
9038                   name[4] == 'e' &&
9039                   name[5] == 'm')
9040               {                                   /* system     */
9041                 return -KEY_system;
9042               }
9043
9044               goto unknown;
9045
9046             default:
9047               goto unknown;
9048           }
9049
9050         case 'u':
9051           if (name[1] == 'n')
9052           {
9053             switch (name[2])
9054             {
9055               case 'l':
9056                 switch (name[3])
9057                 {
9058                   case 'e':
9059                     if (name[4] == 's' &&
9060                         name[5] == 's')
9061                     {                             /* unless     */
9062                       return KEY_unless;
9063                     }
9064
9065                     goto unknown;
9066
9067                   case 'i':
9068                     if (name[4] == 'n' &&
9069                         name[5] == 'k')
9070                     {                             /* unlink     */
9071                       return -KEY_unlink;
9072                     }
9073
9074                     goto unknown;
9075
9076                   default:
9077                     goto unknown;
9078                 }
9079
9080               case 'p':
9081                 if (name[3] == 'a' &&
9082                     name[4] == 'c' &&
9083                     name[5] == 'k')
9084                 {                                 /* unpack     */
9085                   return -KEY_unpack;
9086                 }
9087
9088                 goto unknown;
9089
9090               default:
9091                 goto unknown;
9092             }
9093           }
9094
9095           goto unknown;
9096
9097         case 'v':
9098           if (name[1] == 'a' &&
9099               name[2] == 'l' &&
9100               name[3] == 'u' &&
9101               name[4] == 'e' &&
9102               name[5] == 's')
9103           {                                       /* values     */
9104             return -KEY_values;
9105           }
9106
9107           goto unknown;
9108
9109         default:
9110           goto unknown;
9111       }
9112
9113     case 7: /* 29 tokens of length 7 */
9114       switch (name[0])
9115       {
9116         case 'D':
9117           if (name[1] == 'E' &&
9118               name[2] == 'S' &&
9119               name[3] == 'T' &&
9120               name[4] == 'R' &&
9121               name[5] == 'O' &&
9122               name[6] == 'Y')
9123           {                                       /* DESTROY    */
9124             return KEY_DESTROY;
9125           }
9126
9127           goto unknown;
9128
9129         case '_':
9130           if (name[1] == '_' &&
9131               name[2] == 'E' &&
9132               name[3] == 'N' &&
9133               name[4] == 'D' &&
9134               name[5] == '_' &&
9135               name[6] == '_')
9136           {                                       /* __END__    */
9137             return KEY___END__;
9138           }
9139
9140           goto unknown;
9141
9142         case 'b':
9143           if (name[1] == 'i' &&
9144               name[2] == 'n' &&
9145               name[3] == 'm' &&
9146               name[4] == 'o' &&
9147               name[5] == 'd' &&
9148               name[6] == 'e')
9149           {                                       /* binmode    */
9150             return -KEY_binmode;
9151           }
9152
9153           goto unknown;
9154
9155         case 'c':
9156           if (name[1] == 'o' &&
9157               name[2] == 'n' &&
9158               name[3] == 'n' &&
9159               name[4] == 'e' &&
9160               name[5] == 'c' &&
9161               name[6] == 't')
9162           {                                       /* connect    */
9163             return -KEY_connect;
9164           }
9165
9166           goto unknown;
9167
9168         case 'd':
9169           switch (name[1])
9170           {
9171             case 'b':
9172               if (name[2] == 'm' &&
9173                   name[3] == 'o' &&
9174                   name[4] == 'p' &&
9175                   name[5] == 'e' &&
9176                   name[6] == 'n')
9177               {                                   /* dbmopen    */
9178                 return -KEY_dbmopen;
9179               }
9180
9181               goto unknown;
9182
9183             case 'e':
9184               if (name[2] == 'f')
9185               {
9186                 switch (name[3])
9187                 {
9188                   case 'a':
9189                     if (name[4] == 'u' &&
9190                         name[5] == 'l' &&
9191                         name[6] == 't')
9192                     {                             /* default    */
9193                       return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
9194                     }
9195
9196                     goto unknown;
9197
9198                   case 'i':
9199                     if (name[4] == 'n' &&
9200                         name[5] == 'e' &&
9201                         name[6] == 'd')
9202                     {                             /* defined    */
9203                       return KEY_defined;
9204                     }
9205
9206                     goto unknown;
9207
9208                   default:
9209                     goto unknown;
9210                 }
9211               }
9212
9213               goto unknown;
9214
9215             default:
9216               goto unknown;
9217           }
9218
9219         case 'f':
9220           if (name[1] == 'o' &&
9221               name[2] == 'r' &&
9222               name[3] == 'e' &&
9223               name[4] == 'a' &&
9224               name[5] == 'c' &&
9225               name[6] == 'h')
9226           {                                       /* foreach    */
9227             return KEY_foreach;
9228           }
9229
9230           goto unknown;
9231
9232         case 'g':
9233           if (name[1] == 'e' &&
9234               name[2] == 't' &&
9235               name[3] == 'p')
9236           {
9237             switch (name[4])
9238             {
9239               case 'g':
9240                 if (name[5] == 'r' &&
9241                     name[6] == 'p')
9242                 {                                 /* getpgrp    */
9243                   return -KEY_getpgrp;
9244                 }
9245
9246                 goto unknown;
9247
9248               case 'p':
9249                 if (name[5] == 'i' &&
9250                     name[6] == 'd')
9251                 {                                 /* getppid    */
9252                   return -KEY_getppid;
9253                 }
9254
9255                 goto unknown;
9256
9257               default:
9258                 goto unknown;
9259             }
9260           }
9261
9262           goto unknown;
9263
9264         case 'l':
9265           if (name[1] == 'c' &&
9266               name[2] == 'f' &&
9267               name[3] == 'i' &&
9268               name[4] == 'r' &&
9269               name[5] == 's' &&
9270               name[6] == 't')
9271           {                                       /* lcfirst    */
9272             return -KEY_lcfirst;
9273           }
9274
9275           goto unknown;
9276
9277         case 'o':
9278           if (name[1] == 'p' &&
9279               name[2] == 'e' &&
9280               name[3] == 'n' &&
9281               name[4] == 'd' &&
9282               name[5] == 'i' &&
9283               name[6] == 'r')
9284           {                                       /* opendir    */
9285             return -KEY_opendir;
9286           }
9287
9288           goto unknown;
9289
9290         case 'p':
9291           if (name[1] == 'a' &&
9292               name[2] == 'c' &&
9293               name[3] == 'k' &&
9294               name[4] == 'a' &&
9295               name[5] == 'g' &&
9296               name[6] == 'e')
9297           {                                       /* package    */
9298             return KEY_package;
9299           }
9300
9301           goto unknown;
9302
9303         case 'r':
9304           if (name[1] == 'e')
9305           {
9306             switch (name[2])
9307             {
9308               case 'a':
9309                 if (name[3] == 'd' &&
9310                     name[4] == 'd' &&
9311                     name[5] == 'i' &&
9312                     name[6] == 'r')
9313                 {                                 /* readdir    */
9314                   return -KEY_readdir;
9315                 }
9316
9317                 goto unknown;
9318
9319               case 'q':
9320                 if (name[3] == 'u' &&
9321                     name[4] == 'i' &&
9322                     name[5] == 'r' &&
9323                     name[6] == 'e')
9324                 {                                 /* require    */
9325                   return KEY_require;
9326                 }
9327
9328                 goto unknown;
9329
9330               case 'v':
9331                 if (name[3] == 'e' &&
9332                     name[4] == 'r' &&
9333                     name[5] == 's' &&
9334                     name[6] == 'e')
9335                 {                                 /* reverse    */
9336                   return -KEY_reverse;
9337                 }
9338
9339                 goto unknown;
9340
9341               default:
9342                 goto unknown;
9343             }
9344           }
9345
9346           goto unknown;
9347
9348         case 's':
9349           switch (name[1])
9350           {
9351             case 'e':
9352               switch (name[2])
9353               {
9354                 case 'e':
9355                   if (name[3] == 'k' &&
9356                       name[4] == 'd' &&
9357                       name[5] == 'i' &&
9358                       name[6] == 'r')
9359                   {                               /* seekdir    */
9360                     return -KEY_seekdir;
9361                   }
9362
9363                   goto unknown;
9364
9365                 case 't':
9366                   if (name[3] == 'p' &&
9367                       name[4] == 'g' &&
9368                       name[5] == 'r' &&
9369                       name[6] == 'p')
9370                   {                               /* setpgrp    */
9371                     return -KEY_setpgrp;
9372                   }
9373
9374                   goto unknown;
9375
9376                 default:
9377                   goto unknown;
9378               }
9379
9380             case 'h':
9381               if (name[2] == 'm' &&
9382                   name[3] == 'r' &&
9383                   name[4] == 'e' &&
9384                   name[5] == 'a' &&
9385                   name[6] == 'd')
9386               {                                   /* shmread    */
9387                 return -KEY_shmread;
9388               }
9389
9390               goto unknown;
9391
9392             case 'p':
9393               if (name[2] == 'r' &&
9394                   name[3] == 'i' &&
9395                   name[4] == 'n' &&
9396                   name[5] == 't' &&
9397                   name[6] == 'f')
9398               {                                   /* sprintf    */
9399                 return -KEY_sprintf;
9400               }
9401
9402               goto unknown;
9403
9404             case 'y':
9405               switch (name[2])
9406               {
9407                 case 'm':
9408                   if (name[3] == 'l' &&
9409                       name[4] == 'i' &&
9410                       name[5] == 'n' &&
9411                       name[6] == 'k')
9412                   {                               /* symlink    */
9413                     return -KEY_symlink;
9414                   }
9415
9416                   goto unknown;
9417
9418                 case 's':
9419                   switch (name[3])
9420                   {
9421                     case 'c':
9422                       if (name[4] == 'a' &&
9423                           name[5] == 'l' &&
9424                           name[6] == 'l')
9425                       {                           /* syscall    */
9426                         return -KEY_syscall;
9427                       }
9428
9429                       goto unknown;
9430
9431                     case 'o':
9432                       if (name[4] == 'p' &&
9433                           name[5] == 'e' &&
9434                           name[6] == 'n')
9435                       {                           /* sysopen    */
9436                         return -KEY_sysopen;
9437                       }
9438
9439                       goto unknown;
9440
9441                     case 'r':
9442                       if (name[4] == 'e' &&
9443                           name[5] == 'a' &&
9444                           name[6] == 'd')
9445                       {                           /* sysread    */
9446                         return -KEY_sysread;
9447                       }
9448
9449                       goto unknown;
9450
9451                     case 's':
9452                       if (name[4] == 'e' &&
9453                           name[5] == 'e' &&
9454                           name[6] == 'k')
9455                       {                           /* sysseek    */
9456                         return -KEY_sysseek;
9457                       }
9458
9459                       goto unknown;
9460
9461                     default:
9462                       goto unknown;
9463                   }
9464
9465                 default:
9466                   goto unknown;
9467               }
9468
9469             default:
9470               goto unknown;
9471           }
9472
9473         case 't':
9474           if (name[1] == 'e' &&
9475               name[2] == 'l' &&
9476               name[3] == 'l' &&
9477               name[4] == 'd' &&
9478               name[5] == 'i' &&
9479               name[6] == 'r')
9480           {                                       /* telldir    */
9481             return -KEY_telldir;
9482           }
9483
9484           goto unknown;
9485
9486         case 'u':
9487           switch (name[1])
9488           {
9489             case 'c':
9490               if (name[2] == 'f' &&
9491                   name[3] == 'i' &&
9492                   name[4] == 'r' &&
9493                   name[5] == 's' &&
9494                   name[6] == 't')
9495               {                                   /* ucfirst    */
9496                 return -KEY_ucfirst;
9497               }
9498
9499               goto unknown;
9500
9501             case 'n':
9502               if (name[2] == 's' &&
9503                   name[3] == 'h' &&
9504                   name[4] == 'i' &&
9505                   name[5] == 'f' &&
9506                   name[6] == 't')
9507               {                                   /* unshift    */
9508                 return -KEY_unshift;
9509               }
9510
9511               goto unknown;
9512
9513             default:
9514               goto unknown;
9515           }
9516
9517         case 'w':
9518           if (name[1] == 'a' &&
9519               name[2] == 'i' &&
9520               name[3] == 't' &&
9521               name[4] == 'p' &&
9522               name[5] == 'i' &&
9523               name[6] == 'd')
9524           {                                       /* waitpid    */
9525             return -KEY_waitpid;
9526           }
9527
9528           goto unknown;
9529
9530         default:
9531           goto unknown;
9532       }
9533
9534     case 8: /* 26 tokens of length 8 */
9535       switch (name[0])
9536       {
9537         case 'A':
9538           if (name[1] == 'U' &&
9539               name[2] == 'T' &&
9540               name[3] == 'O' &&
9541               name[4] == 'L' &&
9542               name[5] == 'O' &&
9543               name[6] == 'A' &&
9544               name[7] == 'D')
9545           {                                       /* AUTOLOAD   */
9546             return KEY_AUTOLOAD;
9547           }
9548
9549           goto unknown;
9550
9551         case '_':
9552           if (name[1] == '_')
9553           {
9554             switch (name[2])
9555             {
9556               case 'D':
9557                 if (name[3] == 'A' &&
9558                     name[4] == 'T' &&
9559                     name[5] == 'A' &&
9560                     name[6] == '_' &&
9561                     name[7] == '_')
9562                 {                                 /* __DATA__   */
9563                   return KEY___DATA__;
9564                 }
9565
9566                 goto unknown;
9567
9568               case 'F':
9569                 if (name[3] == 'I' &&
9570                     name[4] == 'L' &&
9571                     name[5] == 'E' &&
9572                     name[6] == '_' &&
9573                     name[7] == '_')
9574                 {                                 /* __FILE__   */
9575                   return -KEY___FILE__;
9576                 }
9577
9578                 goto unknown;
9579
9580               case 'L':
9581                 if (name[3] == 'I' &&
9582                     name[4] == 'N' &&
9583                     name[5] == 'E' &&
9584                     name[6] == '_' &&
9585                     name[7] == '_')
9586                 {                                 /* __LINE__   */
9587                   return -KEY___LINE__;
9588                 }
9589
9590                 goto unknown;
9591
9592               default:
9593                 goto unknown;
9594             }
9595           }
9596
9597           goto unknown;
9598
9599         case 'c':
9600           switch (name[1])
9601           {
9602             case 'l':
9603               if (name[2] == 'o' &&
9604                   name[3] == 's' &&
9605                   name[4] == 'e' &&
9606                   name[5] == 'd' &&
9607                   name[6] == 'i' &&
9608                   name[7] == 'r')
9609               {                                   /* closedir   */
9610                 return -KEY_closedir;
9611               }
9612
9613               goto unknown;
9614
9615             case 'o':
9616               if (name[2] == 'n' &&
9617                   name[3] == 't' &&
9618                   name[4] == 'i' &&
9619                   name[5] == 'n' &&
9620                   name[6] == 'u' &&
9621                   name[7] == 'e')
9622               {                                   /* continue   */
9623                 return -KEY_continue;
9624               }
9625
9626               goto unknown;
9627
9628             default:
9629               goto unknown;
9630           }
9631
9632         case 'd':
9633           if (name[1] == 'b' &&
9634               name[2] == 'm' &&
9635               name[3] == 'c' &&
9636               name[4] == 'l' &&
9637               name[5] == 'o' &&
9638               name[6] == 's' &&
9639               name[7] == 'e')
9640           {                                       /* dbmclose   */
9641             return -KEY_dbmclose;
9642           }
9643
9644           goto unknown;
9645
9646         case 'e':
9647           if (name[1] == 'n' &&
9648               name[2] == 'd')
9649           {
9650             switch (name[3])
9651             {
9652               case 'g':
9653                 if (name[4] == 'r' &&
9654                     name[5] == 'e' &&
9655                     name[6] == 'n' &&
9656                     name[7] == 't')
9657                 {                                 /* endgrent   */
9658                   return -KEY_endgrent;
9659                 }
9660
9661                 goto unknown;
9662
9663               case 'p':
9664                 if (name[4] == 'w' &&
9665                     name[5] == 'e' &&
9666                     name[6] == 'n' &&
9667                     name[7] == 't')
9668                 {                                 /* endpwent   */
9669                   return -KEY_endpwent;
9670                 }
9671
9672                 goto unknown;
9673
9674               default:
9675                 goto unknown;
9676             }
9677           }
9678
9679           goto unknown;
9680
9681         case 'f':
9682           if (name[1] == 'o' &&
9683               name[2] == 'r' &&
9684               name[3] == 'm' &&
9685               name[4] == 'l' &&
9686               name[5] == 'i' &&
9687               name[6] == 'n' &&
9688               name[7] == 'e')
9689           {                                       /* formline   */
9690             return -KEY_formline;
9691           }
9692
9693           goto unknown;
9694
9695         case 'g':
9696           if (name[1] == 'e' &&
9697               name[2] == 't')
9698           {
9699             switch (name[3])
9700             {
9701               case 'g':
9702                 if (name[4] == 'r')
9703                 {
9704                   switch (name[5])
9705                   {
9706                     case 'e':
9707                       if (name[6] == 'n' &&
9708                           name[7] == 't')
9709                       {                           /* getgrent   */
9710                         return -KEY_getgrent;
9711                       }
9712
9713                       goto unknown;
9714
9715                     case 'g':
9716                       if (name[6] == 'i' &&
9717                           name[7] == 'd')
9718                       {                           /* getgrgid   */
9719                         return -KEY_getgrgid;
9720                       }
9721
9722                       goto unknown;
9723
9724                     case 'n':
9725                       if (name[6] == 'a' &&
9726                           name[7] == 'm')
9727                       {                           /* getgrnam   */
9728                         return -KEY_getgrnam;
9729                       }
9730
9731                       goto unknown;
9732
9733                     default:
9734                       goto unknown;
9735                   }
9736                 }
9737
9738                 goto unknown;
9739
9740               case 'l':
9741                 if (name[4] == 'o' &&
9742                     name[5] == 'g' &&
9743                     name[6] == 'i' &&
9744                     name[7] == 'n')
9745                 {                                 /* getlogin   */
9746                   return -KEY_getlogin;
9747                 }
9748
9749                 goto unknown;
9750
9751               case 'p':
9752                 if (name[4] == 'w')
9753                 {
9754                   switch (name[5])
9755                   {
9756                     case 'e':
9757                       if (name[6] == 'n' &&
9758                           name[7] == 't')
9759                       {                           /* getpwent   */
9760                         return -KEY_getpwent;
9761                       }
9762
9763                       goto unknown;
9764
9765                     case 'n':
9766                       if (name[6] == 'a' &&
9767                           name[7] == 'm')
9768                       {                           /* getpwnam   */
9769                         return -KEY_getpwnam;
9770                       }
9771
9772                       goto unknown;
9773
9774                     case 'u':
9775                       if (name[6] == 'i' &&
9776                           name[7] == 'd')
9777                       {                           /* getpwuid   */
9778                         return -KEY_getpwuid;
9779                       }
9780
9781                       goto unknown;
9782
9783                     default:
9784                       goto unknown;
9785                   }
9786                 }
9787
9788                 goto unknown;
9789
9790               default:
9791                 goto unknown;
9792             }
9793           }
9794
9795           goto unknown;
9796
9797         case 'r':
9798           if (name[1] == 'e' &&
9799               name[2] == 'a' &&
9800               name[3] == 'd')
9801           {
9802             switch (name[4])
9803             {
9804               case 'l':
9805                 if (name[5] == 'i' &&
9806                     name[6] == 'n')
9807                 {
9808                   switch (name[7])
9809                   {
9810                     case 'e':
9811                       {                           /* readline   */
9812                         return -KEY_readline;
9813                       }
9814
9815                     case 'k':
9816                       {                           /* readlink   */
9817                         return -KEY_readlink;
9818                       }
9819
9820                     default:
9821                       goto unknown;
9822                   }
9823                 }
9824
9825                 goto unknown;
9826
9827               case 'p':
9828                 if (name[5] == 'i' &&
9829                     name[6] == 'p' &&
9830                     name[7] == 'e')
9831                 {                                 /* readpipe   */
9832                   return -KEY_readpipe;
9833                 }
9834
9835                 goto unknown;
9836
9837               default:
9838                 goto unknown;
9839             }
9840           }
9841
9842           goto unknown;
9843
9844         case 's':
9845           switch (name[1])
9846           {
9847             case 'e':
9848               if (name[2] == 't')
9849               {
9850                 switch (name[3])
9851                 {
9852                   case 'g':
9853                     if (name[4] == 'r' &&
9854                         name[5] == 'e' &&
9855                         name[6] == 'n' &&
9856                         name[7] == 't')
9857                     {                             /* setgrent   */
9858                       return -KEY_setgrent;
9859                     }
9860
9861                     goto unknown;
9862
9863                   case 'p':
9864                     if (name[4] == 'w' &&
9865                         name[5] == 'e' &&
9866                         name[6] == 'n' &&
9867                         name[7] == 't')
9868                     {                             /* setpwent   */
9869                       return -KEY_setpwent;
9870                     }
9871
9872                     goto unknown;
9873
9874                   default:
9875                     goto unknown;
9876                 }
9877               }
9878
9879               goto unknown;
9880
9881             case 'h':
9882               switch (name[2])
9883               {
9884                 case 'm':
9885                   if (name[3] == 'w' &&
9886                       name[4] == 'r' &&
9887                       name[5] == 'i' &&
9888                       name[6] == 't' &&
9889                       name[7] == 'e')
9890                   {                               /* shmwrite   */
9891                     return -KEY_shmwrite;
9892                   }
9893
9894                   goto unknown;
9895
9896                 case 'u':
9897                   if (name[3] == 't' &&
9898                       name[4] == 'd' &&
9899                       name[5] == 'o' &&
9900                       name[6] == 'w' &&
9901                       name[7] == 'n')
9902                   {                               /* shutdown   */
9903                     return -KEY_shutdown;
9904                   }
9905
9906                   goto unknown;
9907
9908                 default:
9909                   goto unknown;
9910               }
9911
9912             case 'y':
9913               if (name[2] == 's' &&
9914                   name[3] == 'w' &&
9915                   name[4] == 'r' &&
9916                   name[5] == 'i' &&
9917                   name[6] == 't' &&
9918                   name[7] == 'e')
9919               {                                   /* syswrite   */
9920                 return -KEY_syswrite;
9921               }
9922
9923               goto unknown;
9924
9925             default:
9926               goto unknown;
9927           }
9928
9929         case 't':
9930           if (name[1] == 'r' &&
9931               name[2] == 'u' &&
9932               name[3] == 'n' &&
9933               name[4] == 'c' &&
9934               name[5] == 'a' &&
9935               name[6] == 't' &&
9936               name[7] == 'e')
9937           {                                       /* truncate   */
9938             return -KEY_truncate;
9939           }
9940
9941           goto unknown;
9942
9943         default:
9944           goto unknown;
9945       }
9946
9947     case 9: /* 9 tokens of length 9 */
9948       switch (name[0])
9949       {
9950         case 'U':
9951           if (name[1] == 'N' &&
9952               name[2] == 'I' &&
9953               name[3] == 'T' &&
9954               name[4] == 'C' &&
9955               name[5] == 'H' &&
9956               name[6] == 'E' &&
9957               name[7] == 'C' &&
9958               name[8] == 'K')
9959           {                                       /* UNITCHECK  */
9960             return KEY_UNITCHECK;
9961           }
9962
9963           goto unknown;
9964
9965         case 'e':
9966           if (name[1] == 'n' &&
9967               name[2] == 'd' &&
9968               name[3] == 'n' &&
9969               name[4] == 'e' &&
9970               name[5] == 't' &&
9971               name[6] == 'e' &&
9972               name[7] == 'n' &&
9973               name[8] == 't')
9974           {                                       /* endnetent  */
9975             return -KEY_endnetent;
9976           }
9977
9978           goto unknown;
9979
9980         case 'g':
9981           if (name[1] == 'e' &&
9982               name[2] == 't' &&
9983               name[3] == 'n' &&
9984               name[4] == 'e' &&
9985               name[5] == 't' &&
9986               name[6] == 'e' &&
9987               name[7] == 'n' &&
9988               name[8] == 't')
9989           {                                       /* getnetent  */
9990             return -KEY_getnetent;
9991           }
9992
9993           goto unknown;
9994
9995         case 'l':
9996           if (name[1] == 'o' &&
9997               name[2] == 'c' &&
9998               name[3] == 'a' &&
9999               name[4] == 'l' &&
10000               name[5] == 't' &&
10001               name[6] == 'i' &&
10002               name[7] == 'm' &&
10003               name[8] == 'e')
10004           {                                       /* localtime  */
10005             return -KEY_localtime;
10006           }
10007
10008           goto unknown;
10009
10010         case 'p':
10011           if (name[1] == 'r' &&
10012               name[2] == 'o' &&
10013               name[3] == 't' &&
10014               name[4] == 'o' &&
10015               name[5] == 't' &&
10016               name[6] == 'y' &&
10017               name[7] == 'p' &&
10018               name[8] == 'e')
10019           {                                       /* prototype  */
10020             return KEY_prototype;
10021           }
10022
10023           goto unknown;
10024
10025         case 'q':
10026           if (name[1] == 'u' &&
10027               name[2] == 'o' &&
10028               name[3] == 't' &&
10029               name[4] == 'e' &&
10030               name[5] == 'm' &&
10031               name[6] == 'e' &&
10032               name[7] == 't' &&
10033               name[8] == 'a')
10034           {                                       /* quotemeta  */
10035             return -KEY_quotemeta;
10036           }
10037
10038           goto unknown;
10039
10040         case 'r':
10041           if (name[1] == 'e' &&
10042               name[2] == 'w' &&
10043               name[3] == 'i' &&
10044               name[4] == 'n' &&
10045               name[5] == 'd' &&
10046               name[6] == 'd' &&
10047               name[7] == 'i' &&
10048               name[8] == 'r')
10049           {                                       /* rewinddir  */
10050             return -KEY_rewinddir;
10051           }
10052
10053           goto unknown;
10054
10055         case 's':
10056           if (name[1] == 'e' &&
10057               name[2] == 't' &&
10058               name[3] == 'n' &&
10059               name[4] == 'e' &&
10060               name[5] == 't' &&
10061               name[6] == 'e' &&
10062               name[7] == 'n' &&
10063               name[8] == 't')
10064           {                                       /* setnetent  */
10065             return -KEY_setnetent;
10066           }
10067
10068           goto unknown;
10069
10070         case 'w':
10071           if (name[1] == 'a' &&
10072               name[2] == 'n' &&
10073               name[3] == 't' &&
10074               name[4] == 'a' &&
10075               name[5] == 'r' &&
10076               name[6] == 'r' &&
10077               name[7] == 'a' &&
10078               name[8] == 'y')
10079           {                                       /* wantarray  */
10080             return -KEY_wantarray;
10081           }
10082
10083           goto unknown;
10084
10085         default:
10086           goto unknown;
10087       }
10088
10089     case 10: /* 9 tokens of length 10 */
10090       switch (name[0])
10091       {
10092         case 'e':
10093           if (name[1] == 'n' &&
10094               name[2] == 'd')
10095           {
10096             switch (name[3])
10097             {
10098               case 'h':
10099                 if (name[4] == 'o' &&
10100                     name[5] == 's' &&
10101                     name[6] == 't' &&
10102                     name[7] == 'e' &&
10103                     name[8] == 'n' &&
10104                     name[9] == 't')
10105                 {                                 /* endhostent */
10106                   return -KEY_endhostent;
10107                 }
10108
10109                 goto unknown;
10110
10111               case 's':
10112                 if (name[4] == 'e' &&
10113                     name[5] == 'r' &&
10114                     name[6] == 'v' &&
10115                     name[7] == 'e' &&
10116                     name[8] == 'n' &&
10117                     name[9] == 't')
10118                 {                                 /* endservent */
10119                   return -KEY_endservent;
10120                 }
10121
10122                 goto unknown;
10123
10124               default:
10125                 goto unknown;
10126             }
10127           }
10128
10129           goto unknown;
10130
10131         case 'g':
10132           if (name[1] == 'e' &&
10133               name[2] == 't')
10134           {
10135             switch (name[3])
10136             {
10137               case 'h':
10138                 if (name[4] == 'o' &&
10139                     name[5] == 's' &&
10140                     name[6] == 't' &&
10141                     name[7] == 'e' &&
10142                     name[8] == 'n' &&
10143                     name[9] == 't')
10144                 {                                 /* gethostent */
10145                   return -KEY_gethostent;
10146                 }
10147
10148                 goto unknown;
10149
10150               case 's':
10151                 switch (name[4])
10152                 {
10153                   case 'e':
10154                     if (name[5] == 'r' &&
10155                         name[6] == 'v' &&
10156                         name[7] == 'e' &&
10157                         name[8] == 'n' &&
10158                         name[9] == 't')
10159                     {                             /* getservent */
10160                       return -KEY_getservent;
10161                     }
10162
10163                     goto unknown;
10164
10165                   case 'o':
10166                     if (name[5] == 'c' &&
10167                         name[6] == 'k' &&
10168                         name[7] == 'o' &&
10169                         name[8] == 'p' &&
10170                         name[9] == 't')
10171                     {                             /* getsockopt */
10172                       return -KEY_getsockopt;
10173                     }
10174
10175                     goto unknown;
10176
10177                   default:
10178                     goto unknown;
10179                 }
10180
10181               default:
10182                 goto unknown;
10183             }
10184           }
10185
10186           goto unknown;
10187
10188         case 's':
10189           switch (name[1])
10190           {
10191             case 'e':
10192               if (name[2] == 't')
10193               {
10194                 switch (name[3])
10195                 {
10196                   case 'h':
10197                     if (name[4] == 'o' &&
10198                         name[5] == 's' &&
10199                         name[6] == 't' &&
10200                         name[7] == 'e' &&
10201                         name[8] == 'n' &&
10202                         name[9] == 't')
10203                     {                             /* sethostent */
10204                       return -KEY_sethostent;
10205                     }
10206
10207                     goto unknown;
10208
10209                   case 's':
10210                     switch (name[4])
10211                     {
10212                       case 'e':
10213                         if (name[5] == 'r' &&
10214                             name[6] == 'v' &&
10215                             name[7] == 'e' &&
10216                             name[8] == 'n' &&
10217                             name[9] == 't')
10218                         {                         /* setservent */
10219                           return -KEY_setservent;
10220                         }
10221
10222                         goto unknown;
10223
10224                       case 'o':
10225                         if (name[5] == 'c' &&
10226                             name[6] == 'k' &&
10227                             name[7] == 'o' &&
10228                             name[8] == 'p' &&
10229                             name[9] == 't')
10230                         {                         /* setsockopt */
10231                           return -KEY_setsockopt;
10232                         }
10233
10234                         goto unknown;
10235
10236                       default:
10237                         goto unknown;
10238                     }
10239
10240                   default:
10241                     goto unknown;
10242                 }
10243               }
10244
10245               goto unknown;
10246
10247             case 'o':
10248               if (name[2] == 'c' &&
10249                   name[3] == 'k' &&
10250                   name[4] == 'e' &&
10251                   name[5] == 't' &&
10252                   name[6] == 'p' &&
10253                   name[7] == 'a' &&
10254                   name[8] == 'i' &&
10255                   name[9] == 'r')
10256               {                                   /* socketpair */
10257                 return -KEY_socketpair;
10258               }
10259
10260               goto unknown;
10261
10262             default:
10263               goto unknown;
10264           }
10265
10266         default:
10267           goto unknown;
10268       }
10269
10270     case 11: /* 8 tokens of length 11 */
10271       switch (name[0])
10272       {
10273         case '_':
10274           if (name[1] == '_' &&
10275               name[2] == 'P' &&
10276               name[3] == 'A' &&
10277               name[4] == 'C' &&
10278               name[5] == 'K' &&
10279               name[6] == 'A' &&
10280               name[7] == 'G' &&
10281               name[8] == 'E' &&
10282               name[9] == '_' &&
10283               name[10] == '_')
10284           {                                       /* __PACKAGE__ */
10285             return -KEY___PACKAGE__;
10286           }
10287
10288           goto unknown;
10289
10290         case 'e':
10291           if (name[1] == 'n' &&
10292               name[2] == 'd' &&
10293               name[3] == 'p' &&
10294               name[4] == 'r' &&
10295               name[5] == 'o' &&
10296               name[6] == 't' &&
10297               name[7] == 'o' &&
10298               name[8] == 'e' &&
10299               name[9] == 'n' &&
10300               name[10] == 't')
10301           {                                       /* endprotoent */
10302             return -KEY_endprotoent;
10303           }
10304
10305           goto unknown;
10306
10307         case 'g':
10308           if (name[1] == 'e' &&
10309               name[2] == 't')
10310           {
10311             switch (name[3])
10312             {
10313               case 'p':
10314                 switch (name[4])
10315                 {
10316                   case 'e':
10317                     if (name[5] == 'e' &&
10318                         name[6] == 'r' &&
10319                         name[7] == 'n' &&
10320                         name[8] == 'a' &&
10321                         name[9] == 'm' &&
10322                         name[10] == 'e')
10323                     {                             /* getpeername */
10324                       return -KEY_getpeername;
10325                     }
10326
10327                     goto unknown;
10328
10329                   case 'r':
10330                     switch (name[5])
10331                     {
10332                       case 'i':
10333                         if (name[6] == 'o' &&
10334                             name[7] == 'r' &&
10335                             name[8] == 'i' &&
10336                             name[9] == 't' &&
10337                             name[10] == 'y')
10338                         {                         /* getpriority */
10339                           return -KEY_getpriority;
10340                         }
10341
10342                         goto unknown;
10343
10344                       case 'o':
10345                         if (name[6] == 't' &&
10346                             name[7] == 'o' &&
10347                             name[8] == 'e' &&
10348                             name[9] == 'n' &&
10349                             name[10] == 't')
10350                         {                         /* getprotoent */
10351                           return -KEY_getprotoent;
10352                         }
10353
10354                         goto unknown;
10355
10356                       default:
10357                         goto unknown;
10358                     }
10359
10360                   default:
10361                     goto unknown;
10362                 }
10363
10364               case 's':
10365                 if (name[4] == 'o' &&
10366                     name[5] == 'c' &&
10367                     name[6] == 'k' &&
10368                     name[7] == 'n' &&
10369                     name[8] == 'a' &&
10370                     name[9] == 'm' &&
10371                     name[10] == 'e')
10372                 {                                 /* getsockname */
10373                   return -KEY_getsockname;
10374                 }
10375
10376                 goto unknown;
10377
10378               default:
10379                 goto unknown;
10380             }
10381           }
10382
10383           goto unknown;
10384
10385         case 's':
10386           if (name[1] == 'e' &&
10387               name[2] == 't' &&
10388               name[3] == 'p' &&
10389               name[4] == 'r')
10390           {
10391             switch (name[5])
10392             {
10393               case 'i':
10394                 if (name[6] == 'o' &&
10395                     name[7] == 'r' &&
10396                     name[8] == 'i' &&
10397                     name[9] == 't' &&
10398                     name[10] == 'y')
10399                 {                                 /* setpriority */
10400                   return -KEY_setpriority;
10401                 }
10402
10403                 goto unknown;
10404
10405               case 'o':
10406                 if (name[6] == 't' &&
10407                     name[7] == 'o' &&
10408                     name[8] == 'e' &&
10409                     name[9] == 'n' &&
10410                     name[10] == 't')
10411                 {                                 /* setprotoent */
10412                   return -KEY_setprotoent;
10413                 }
10414
10415                 goto unknown;
10416
10417               default:
10418                 goto unknown;
10419             }
10420           }
10421
10422           goto unknown;
10423
10424         default:
10425           goto unknown;
10426       }
10427
10428     case 12: /* 2 tokens of length 12 */
10429       if (name[0] == 'g' &&
10430           name[1] == 'e' &&
10431           name[2] == 't' &&
10432           name[3] == 'n' &&
10433           name[4] == 'e' &&
10434           name[5] == 't' &&
10435           name[6] == 'b' &&
10436           name[7] == 'y')
10437       {
10438         switch (name[8])
10439         {
10440           case 'a':
10441             if (name[9] == 'd' &&
10442                 name[10] == 'd' &&
10443                 name[11] == 'r')
10444             {                                     /* getnetbyaddr */
10445               return -KEY_getnetbyaddr;
10446             }
10447
10448             goto unknown;
10449
10450           case 'n':
10451             if (name[9] == 'a' &&
10452                 name[10] == 'm' &&
10453                 name[11] == 'e')
10454             {                                     /* getnetbyname */
10455               return -KEY_getnetbyname;
10456             }
10457
10458             goto unknown;
10459
10460           default:
10461             goto unknown;
10462         }
10463       }
10464
10465       goto unknown;
10466
10467     case 13: /* 4 tokens of length 13 */
10468       if (name[0] == 'g' &&
10469           name[1] == 'e' &&
10470           name[2] == 't')
10471       {
10472         switch (name[3])
10473         {
10474           case 'h':
10475             if (name[4] == 'o' &&
10476                 name[5] == 's' &&
10477                 name[6] == 't' &&
10478                 name[7] == 'b' &&
10479                 name[8] == 'y')
10480             {
10481               switch (name[9])
10482               {
10483                 case 'a':
10484                   if (name[10] == 'd' &&
10485                       name[11] == 'd' &&
10486                       name[12] == 'r')
10487                   {                               /* gethostbyaddr */
10488                     return -KEY_gethostbyaddr;
10489                   }
10490
10491                   goto unknown;
10492
10493                 case 'n':
10494                   if (name[10] == 'a' &&
10495                       name[11] == 'm' &&
10496                       name[12] == 'e')
10497                   {                               /* gethostbyname */
10498                     return -KEY_gethostbyname;
10499                   }
10500
10501                   goto unknown;
10502
10503                 default:
10504                   goto unknown;
10505               }
10506             }
10507
10508             goto unknown;
10509
10510           case 's':
10511             if (name[4] == 'e' &&
10512                 name[5] == 'r' &&
10513                 name[6] == 'v' &&
10514                 name[7] == 'b' &&
10515                 name[8] == 'y')
10516             {
10517               switch (name[9])
10518               {
10519                 case 'n':
10520                   if (name[10] == 'a' &&
10521                       name[11] == 'm' &&
10522                       name[12] == 'e')
10523                   {                               /* getservbyname */
10524                     return -KEY_getservbyname;
10525                   }
10526
10527                   goto unknown;
10528
10529                 case 'p':
10530                   if (name[10] == 'o' &&
10531                       name[11] == 'r' &&
10532                       name[12] == 't')
10533                   {                               /* getservbyport */
10534                     return -KEY_getservbyport;
10535                   }
10536
10537                   goto unknown;
10538
10539                 default:
10540                   goto unknown;
10541               }
10542             }
10543
10544             goto unknown;
10545
10546           default:
10547             goto unknown;
10548         }
10549       }
10550
10551       goto unknown;
10552
10553     case 14: /* 1 tokens of length 14 */
10554       if (name[0] == 'g' &&
10555           name[1] == 'e' &&
10556           name[2] == 't' &&
10557           name[3] == 'p' &&
10558           name[4] == 'r' &&
10559           name[5] == 'o' &&
10560           name[6] == 't' &&
10561           name[7] == 'o' &&
10562           name[8] == 'b' &&
10563           name[9] == 'y' &&
10564           name[10] == 'n' &&
10565           name[11] == 'a' &&
10566           name[12] == 'm' &&
10567           name[13] == 'e')
10568       {                                           /* getprotobyname */
10569         return -KEY_getprotobyname;
10570       }
10571
10572       goto unknown;
10573
10574     case 16: /* 1 tokens of length 16 */
10575       if (name[0] == 'g' &&
10576           name[1] == 'e' &&
10577           name[2] == 't' &&
10578           name[3] == 'p' &&
10579           name[4] == 'r' &&
10580           name[5] == 'o' &&
10581           name[6] == 't' &&
10582           name[7] == 'o' &&
10583           name[8] == 'b' &&
10584           name[9] == 'y' &&
10585           name[10] == 'n' &&
10586           name[11] == 'u' &&
10587           name[12] == 'm' &&
10588           name[13] == 'b' &&
10589           name[14] == 'e' &&
10590           name[15] == 'r')
10591       {                                           /* getprotobynumber */
10592         return -KEY_getprotobynumber;
10593       }
10594
10595       goto unknown;
10596
10597     default:
10598       goto unknown;
10599   }
10600
10601 unknown:
10602   return 0;
10603 }
10604
10605 STATIC void
10606 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10607 {
10608     dVAR;
10609
10610     PERL_ARGS_ASSERT_CHECKCOMMA;
10611
10612     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
10613         if (ckWARN(WARN_SYNTAX)) {
10614             int level = 1;
10615             const char *w;
10616             for (w = s+2; *w && level; w++) {
10617                 if (*w == '(')
10618                     ++level;
10619                 else if (*w == ')')
10620                     --level;
10621             }
10622             while (isSPACE(*w))
10623                 ++w;
10624             /* the list of chars below is for end of statements or
10625              * block / parens, boolean operators (&&, ||, //) and branch
10626              * constructs (or, and, if, until, unless, while, err, for).
10627              * Not a very solid hack... */
10628             if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
10629                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10630                             "%s (...) interpreted as function",name);
10631         }
10632     }
10633     while (s < PL_bufend && isSPACE(*s))
10634         s++;
10635     if (*s == '(')
10636         s++;
10637     while (s < PL_bufend && isSPACE(*s))
10638         s++;
10639     if (isIDFIRST_lazy_if(s,UTF)) {
10640         const char * const w = s++;
10641         while (isALNUM_lazy_if(s,UTF))
10642             s++;
10643         while (s < PL_bufend && isSPACE(*s))
10644             s++;
10645         if (*s == ',') {
10646             GV* gv;
10647             if (keyword(w, s - w, 0))
10648                 return;
10649
10650             gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
10651             if (gv && GvCVu(gv))
10652                 return;
10653             Perl_croak(aTHX_ "No comma allowed after %s", what);
10654         }
10655     }
10656 }
10657
10658 /* Either returns sv, or mortalizes sv and returns a new SV*.
10659    Best used as sv=new_constant(..., sv, ...).
10660    If s, pv are NULL, calls subroutine with one argument,
10661    and type is used with error messages only. */
10662
10663 STATIC SV *
10664 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10665                SV *sv, SV *pv, const char *type, STRLEN typelen)
10666 {
10667     dVAR; dSP;
10668     HV * const table = GvHV(PL_hintgv);          /* ^H */
10669     SV *res;
10670     SV **cvp;
10671     SV *cv, *typesv;
10672     const char *why1 = "", *why2 = "", *why3 = "";
10673
10674     PERL_ARGS_ASSERT_NEW_CONSTANT;
10675
10676     if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
10677         SV *msg;
10678
10679         why2 = (const char *)
10680             (strEQ(key,"charnames")
10681              ? "(possibly a missing \"use charnames ...\")"
10682              : "");
10683         msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
10684                             (type ? type: "undef"), why2);
10685
10686         /* This is convoluted and evil ("goto considered harmful")
10687          * but I do not understand the intricacies of all the different
10688          * failure modes of %^H in here.  The goal here is to make
10689          * the most probable error message user-friendly. --jhi */
10690
10691         goto msgdone;
10692
10693     report:
10694         msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
10695                             (type ? type: "undef"), why1, why2, why3);
10696     msgdone:
10697         yyerror(SvPVX_const(msg));
10698         SvREFCNT_dec(msg);
10699         return sv;
10700     }
10701     cvp = hv_fetch(table, key, keylen, FALSE);
10702     if (!cvp || !SvOK(*cvp)) {
10703         why1 = "$^H{";
10704         why2 = key;
10705         why3 = "} is not defined";
10706         goto report;
10707     }
10708     sv_2mortal(sv);                     /* Parent created it permanently */
10709     cv = *cvp;
10710     if (!pv && s)
10711         pv = newSVpvn_flags(s, len, SVs_TEMP);
10712     if (type && pv)
10713         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
10714     else
10715         typesv = &PL_sv_undef;
10716
10717     PUSHSTACKi(PERLSI_OVERLOAD);
10718     ENTER ;
10719     SAVETMPS;
10720
10721     PUSHMARK(SP) ;
10722     EXTEND(sp, 3);
10723     if (pv)
10724         PUSHs(pv);
10725     PUSHs(sv);
10726     if (pv)
10727         PUSHs(typesv);
10728     PUTBACK;
10729     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10730
10731     SPAGAIN ;
10732
10733     /* Check the eval first */
10734     if (!PL_in_eval && SvTRUE(ERRSV)) {
10735         sv_catpvs(ERRSV, "Propagated");
10736         yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
10737         (void)POPs;
10738         res = SvREFCNT_inc_simple(sv);
10739     }
10740     else {
10741         res = POPs;
10742         SvREFCNT_inc_simple_void(res);
10743     }
10744
10745     PUTBACK ;
10746     FREETMPS ;
10747     LEAVE ;
10748     POPSTACK;
10749
10750     if (!SvOK(res)) {
10751         why1 = "Call to &{$^H{";
10752         why2 = key;
10753         why3 = "}} did not return a defined value";
10754         sv = res;
10755         goto report;
10756     }
10757
10758     return res;
10759 }
10760
10761 /* Returns a NUL terminated string, with the length of the string written to
10762    *slp
10763    */
10764 STATIC char *
10765 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10766 {
10767     dVAR;
10768     register char *d = dest;
10769     register char * const e = d + destlen - 3;  /* two-character token, ending NUL */
10770
10771     PERL_ARGS_ASSERT_SCAN_WORD;
10772
10773     for (;;) {
10774         if (d >= e)
10775             Perl_croak(aTHX_ ident_too_long);
10776         if (isALNUM(*s))        /* UTF handled below */
10777             *d++ = *s++;
10778         else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
10779             *d++ = ':';
10780             *d++ = ':';
10781             s++;
10782         }
10783         else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
10784             *d++ = *s++;
10785             *d++ = *s++;
10786         }
10787         else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10788             char *t = s + UTF8SKIP(s);
10789             size_t len;
10790             while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10791                 t += UTF8SKIP(t);
10792             len = t - s;
10793             if (d + len > e)
10794                 Perl_croak(aTHX_ ident_too_long);
10795             Copy(s, d, len, char);
10796             d += len;
10797             s = t;
10798         }
10799         else {
10800             *d = '\0';
10801             *slp = d - dest;
10802             return s;
10803         }
10804     }
10805 }
10806
10807 STATIC char *
10808 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
10809 {
10810     dVAR;
10811     char *bracket = NULL;
10812     char funny = *s++;
10813     register char *d = dest;
10814     register char * const e = d + destlen + 3;    /* two-character token, ending NUL */
10815
10816     PERL_ARGS_ASSERT_SCAN_IDENT;
10817
10818     if (isSPACE(*s))
10819         s = PEEKSPACE(s);
10820     if (isDIGIT(*s)) {
10821         while (isDIGIT(*s)) {
10822             if (d >= e)
10823                 Perl_croak(aTHX_ ident_too_long);
10824             *d++ = *s++;
10825         }
10826     }
10827     else {
10828         for (;;) {
10829             if (d >= e)
10830                 Perl_croak(aTHX_ ident_too_long);
10831             if (isALNUM(*s))    /* UTF handled below */
10832                 *d++ = *s++;
10833             else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
10834                 *d++ = ':';
10835                 *d++ = ':';
10836                 s++;
10837             }
10838             else if (*s == ':' && s[1] == ':') {
10839                 *d++ = *s++;
10840                 *d++ = *s++;
10841             }
10842             else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
10843                 char *t = s + UTF8SKIP(s);
10844                 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
10845                     t += UTF8SKIP(t);
10846                 if (d + (t - s) > e)
10847                     Perl_croak(aTHX_ ident_too_long);
10848                 Copy(s, d, t - s, char);
10849                 d += t - s;
10850                 s = t;
10851             }
10852             else
10853                 break;
10854         }
10855     }
10856     *d = '\0';
10857     d = dest;
10858     if (*d) {
10859         if (PL_lex_state != LEX_NORMAL)
10860             PL_lex_state = LEX_INTERPENDMAYBE;
10861         return s;
10862     }
10863     if (*s == '$' && s[1] &&
10864         (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
10865     {
10866         return s;
10867     }
10868     if (*s == '{') {
10869         bracket = s;
10870         s++;
10871     }
10872     else if (ck_uni)
10873         check_uni();
10874     if (s < send)
10875         *d = *s++;
10876     d[1] = '\0';
10877     if (*d == '^' && *s && isCONTROLVAR(*s)) {
10878         *d = toCTRL(*s);
10879         s++;
10880     }
10881     if (bracket) {
10882         if (isSPACE(s[-1])) {
10883             while (s < send) {
10884                 const char ch = *s++;
10885                 if (!SPACE_OR_TAB(ch)) {
10886                     *d = ch;
10887                     break;
10888                 }
10889             }
10890         }
10891         if (isIDFIRST_lazy_if(d,UTF)) {
10892             d++;
10893             if (UTF) {
10894                 char *end = s;
10895                 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
10896                     end += UTF8SKIP(end);
10897                     while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
10898                         end += UTF8SKIP(end);
10899                 }
10900                 Copy(s, d, end - s, char);
10901                 d += end - s;
10902                 s = end;
10903             }
10904             else {
10905                 while ((isALNUM(*s) || *s == ':') && d < e)
10906                     *d++ = *s++;
10907                 if (d >= e)
10908                     Perl_croak(aTHX_ ident_too_long);
10909             }
10910             *d = '\0';
10911             while (s < send && SPACE_OR_TAB(*s))
10912                 s++;
10913             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10914                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10915                     const char * const brack =
10916                         (const char *)
10917                         ((*s == '[') ? "[...]" : "{...}");
10918                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10919                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10920                         funny, dest, brack, funny, dest, brack);
10921                 }
10922                 bracket++;
10923                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10924                 return s;
10925             }
10926         }
10927         /* Handle extended ${^Foo} variables
10928          * 1999-02-27 mjd-perl-patch@plover.com */
10929         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
10930                  && isALNUM(*s))
10931         {
10932             d++;
10933             while (isALNUM(*s) && d < e) {
10934                 *d++ = *s++;
10935             }
10936             if (d >= e)
10937                 Perl_croak(aTHX_ ident_too_long);
10938             *d = '\0';
10939         }
10940         if (*s == '}') {
10941             s++;
10942             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10943                 PL_lex_state = LEX_INTERPEND;
10944                 PL_expect = XREF;
10945             }
10946             if (PL_lex_state == LEX_NORMAL) {
10947                 if (ckWARN(WARN_AMBIGUOUS) &&
10948                     (keyword(dest, d - dest, 0)
10949                      || get_cvn_flags(dest, d - dest, 0)))
10950                 {
10951                     if (funny == '#')
10952                         funny = '@';
10953                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10954                         "Ambiguous use of %c{%s} resolved to %c%s",
10955                         funny, dest, funny, dest);
10956                 }
10957             }
10958         }
10959         else {
10960             s = bracket;                /* let the parser handle it */
10961             *dest = '\0';
10962         }
10963     }
10964     else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
10965         PL_lex_state = LEX_INTERPEND;
10966     return s;
10967 }
10968
10969 void
10970 Perl_pmflag(pTHX_ U32* pmfl, int ch)
10971 {
10972     PERL_ARGS_ASSERT_PMFLAG;
10973
10974     PERL_UNUSED_CONTEXT;
10975     if (ch<256) {
10976         const char c = (char)ch;
10977         switch (c) {
10978             CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
10979             case GLOBAL_PAT_MOD:    *pmfl |= PMf_GLOBAL; break;
10980             case CONTINUE_PAT_MOD:  *pmfl |= PMf_CONTINUE; break;
10981             case ONCE_PAT_MOD:      *pmfl |= PMf_KEEP; break;
10982             case KEEPCOPY_PAT_MOD:  *pmfl |= PMf_KEEPCOPY; break;
10983         }
10984     }
10985 }
10986
10987 STATIC char *
10988 S_scan_pat(pTHX_ char *start, I32 type)
10989 {
10990     dVAR;
10991     PMOP *pm;
10992     char *s = scan_str(start,!!PL_madskills,FALSE);
10993     const char * const valid_flags =
10994         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10995 #ifdef PERL_MAD
10996     char *modstart;
10997 #endif
10998
10999     PERL_ARGS_ASSERT_SCAN_PAT;
11000
11001     if (!s) {
11002         const char * const delimiter = skipspace(start);
11003         Perl_croak(aTHX_
11004                    (const char *)
11005                    (*delimiter == '?'
11006                     ? "Search pattern not terminated or ternary operator parsed as search pattern"
11007                     : "Search pattern not terminated" ));
11008     }
11009
11010     pm = (PMOP*)newPMOP(type, 0);
11011     if (PL_multi_open == '?') {
11012         /* This is the only point in the code that sets PMf_ONCE:  */
11013         pm->op_pmflags |= PMf_ONCE;
11014
11015         /* Hence it's safe to do this bit of PMOP book-keeping here, which
11016            allows us to restrict the list needed by reset to just the ??
11017            matches.  */
11018         assert(type != OP_TRANS);
11019         if (PL_curstash) {
11020             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11021             U32 elements;
11022             if (!mg) {
11023                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11024                                  0);
11025             }
11026             elements = mg->mg_len / sizeof(PMOP**);
11027             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11028             ((PMOP**)mg->mg_ptr) [elements++] = pm;
11029             mg->mg_len = elements * sizeof(PMOP**);
11030             PmopSTASH_set(pm,PL_curstash);
11031         }
11032     }
11033 #ifdef PERL_MAD
11034     modstart = s;
11035 #endif
11036     while (*s && strchr(valid_flags, *s))
11037         pmflag(&pm->op_pmflags,*s++);
11038 #ifdef PERL_MAD
11039     if (PL_madskills && modstart != s) {
11040         SV* tmptoken = newSVpvn(modstart, s - modstart);
11041         append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11042     }
11043 #endif
11044     /* issue a warning if /c is specified,but /g is not */
11045     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11046     {
11047         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11048                        "Use of /c modifier is meaningless without /g" );
11049     }
11050
11051     PL_lex_op = (OP*)pm;
11052     pl_yylval.ival = OP_MATCH;
11053     return s;
11054 }
11055
11056 STATIC char *
11057 S_scan_subst(pTHX_ char *start)
11058 {
11059     dVAR;
11060     register char *s;
11061     register PMOP *pm;
11062     I32 first_start;
11063     I32 es = 0;
11064 #ifdef PERL_MAD
11065     char *modstart;
11066 #endif
11067
11068     PERL_ARGS_ASSERT_SCAN_SUBST;
11069
11070     pl_yylval.ival = OP_NULL;
11071
11072     s = scan_str(start,!!PL_madskills,FALSE);
11073
11074     if (!s)
11075         Perl_croak(aTHX_ "Substitution pattern not terminated");
11076
11077     if (s[-1] == PL_multi_open)
11078         s--;
11079 #ifdef PERL_MAD
11080     if (PL_madskills) {
11081         CURMAD('q', PL_thisopen);
11082         CURMAD('_', PL_thiswhite);
11083         CURMAD('E', PL_thisstuff);
11084         CURMAD('Q', PL_thisclose);
11085         PL_realtokenstart = s - SvPVX(PL_linestr);
11086     }
11087 #endif
11088
11089     first_start = PL_multi_start;
11090     s = scan_str(s,!!PL_madskills,FALSE);
11091     if (!s) {
11092         if (PL_lex_stuff) {
11093             SvREFCNT_dec(PL_lex_stuff);
11094             PL_lex_stuff = NULL;
11095         }
11096         Perl_croak(aTHX_ "Substitution replacement not terminated");
11097     }
11098     PL_multi_start = first_start;       /* so whole substitution is taken together */
11099
11100     pm = (PMOP*)newPMOP(OP_SUBST, 0);
11101
11102 #ifdef PERL_MAD
11103     if (PL_madskills) {
11104         CURMAD('z', PL_thisopen);
11105         CURMAD('R', PL_thisstuff);
11106         CURMAD('Z', PL_thisclose);
11107     }
11108     modstart = s;
11109 #endif
11110
11111     while (*s) {
11112         if (*s == EXEC_PAT_MOD) {
11113             s++;
11114             es++;
11115         }
11116         else if (strchr(S_PAT_MODS, *s))
11117             pmflag(&pm->op_pmflags,*s++);
11118         else
11119             break;
11120     }
11121
11122 #ifdef PERL_MAD
11123     if (PL_madskills) {
11124         if (modstart != s)
11125             curmad('m', newSVpvn(modstart, s - modstart));
11126         append_madprops(PL_thismad, (OP*)pm, 0);
11127         PL_thismad = 0;
11128     }
11129 #endif
11130     if ((pm->op_pmflags & PMf_CONTINUE)) {
11131         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
11132     }
11133
11134     if (es) {
11135         SV * const repl = newSVpvs("");
11136
11137         PL_sublex_info.super_bufptr = s;
11138         PL_sublex_info.super_bufend = PL_bufend;
11139         PL_multi_end = 0;
11140         pm->op_pmflags |= PMf_EVAL;
11141         while (es-- > 0) {
11142             if (es)
11143                 sv_catpvs(repl, "eval ");
11144             else
11145                 sv_catpvs(repl, "do ");
11146         }
11147         sv_catpvs(repl, "{");
11148         sv_catsv(repl, PL_lex_repl);
11149         if (strchr(SvPVX(PL_lex_repl), '#'))
11150             sv_catpvs(repl, "\n");
11151         sv_catpvs(repl, "}");
11152         SvEVALED_on(repl);
11153         SvREFCNT_dec(PL_lex_repl);
11154         PL_lex_repl = repl;
11155     }
11156
11157     PL_lex_op = (OP*)pm;
11158     pl_yylval.ival = OP_SUBST;
11159     return s;
11160 }
11161
11162 STATIC char *
11163 S_scan_trans(pTHX_ char *start)
11164 {
11165     dVAR;
11166     register char* s;
11167     OP *o;
11168     short *tbl;
11169     U8 squash;
11170     U8 del;
11171     U8 complement;
11172 #ifdef PERL_MAD
11173     char *modstart;
11174 #endif
11175
11176     PERL_ARGS_ASSERT_SCAN_TRANS;
11177
11178     pl_yylval.ival = OP_NULL;
11179
11180     s = scan_str(start,!!PL_madskills,FALSE);
11181     if (!s)
11182         Perl_croak(aTHX_ "Transliteration pattern not terminated");
11183
11184     if (s[-1] == PL_multi_open)
11185         s--;
11186 #ifdef PERL_MAD
11187     if (PL_madskills) {
11188         CURMAD('q', PL_thisopen);
11189         CURMAD('_', PL_thiswhite);
11190         CURMAD('E', PL_thisstuff);
11191         CURMAD('Q', PL_thisclose);
11192         PL_realtokenstart = s - SvPVX(PL_linestr);
11193     }
11194 #endif
11195
11196     s = scan_str(s,!!PL_madskills,FALSE);
11197     if (!s) {
11198         if (PL_lex_stuff) {
11199             SvREFCNT_dec(PL_lex_stuff);
11200             PL_lex_stuff = NULL;
11201         }
11202         Perl_croak(aTHX_ "Transliteration replacement not terminated");
11203     }
11204     if (PL_madskills) {
11205         CURMAD('z', PL_thisopen);
11206         CURMAD('R', PL_thisstuff);
11207         CURMAD('Z', PL_thisclose);
11208     }
11209
11210     complement = del = squash = 0;
11211 #ifdef PERL_MAD
11212     modstart = s;
11213 #endif
11214     while (1) {
11215         switch (*s) {
11216         case 'c':
11217             complement = OPpTRANS_COMPLEMENT;
11218             break;
11219         case 'd':
11220             del = OPpTRANS_DELETE;
11221             break;
11222         case 's':
11223             squash = OPpTRANS_SQUASH;
11224             break;
11225         default:
11226             goto no_more;
11227         }
11228         s++;
11229     }
11230   no_more:
11231
11232     tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
11233     o = newPVOP(OP_TRANS, 0, (char*)tbl);
11234     o->op_private &= ~OPpTRANS_ALL;
11235     o->op_private |= del|squash|complement|
11236       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11237       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
11238
11239     PL_lex_op = o;
11240     pl_yylval.ival = OP_TRANS;
11241
11242 #ifdef PERL_MAD
11243     if (PL_madskills) {
11244         if (modstart != s)
11245             curmad('m', newSVpvn(modstart, s - modstart));
11246         append_madprops(PL_thismad, o, 0);
11247         PL_thismad = 0;
11248     }
11249 #endif
11250
11251     return s;
11252 }
11253
11254 STATIC char *
11255 S_scan_heredoc(pTHX_ register char *s)
11256 {
11257     dVAR;
11258     SV *herewas;
11259     I32 op_type = OP_SCALAR;
11260     I32 len;
11261     SV *tmpstr;
11262     char term;
11263     const char *found_newline;
11264     register char *d;
11265     register char *e;
11266     char *peek;
11267     const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
11268 #ifdef PERL_MAD
11269     I32 stuffstart = s - SvPVX(PL_linestr);
11270     char *tstart;
11271
11272     PL_realtokenstart = -1;
11273 #endif
11274
11275     PERL_ARGS_ASSERT_SCAN_HEREDOC;
11276
11277     s += 2;
11278     d = PL_tokenbuf;
11279     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11280     if (!outer)
11281         *d++ = '\n';
11282     peek = s;
11283     while (SPACE_OR_TAB(*peek))
11284         peek++;
11285     if (*peek == '`' || *peek == '\'' || *peek =='"') {
11286         s = peek;
11287         term = *s++;
11288         s = delimcpy(d, e, s, PL_bufend, term, &len);
11289         d += len;
11290         if (s < PL_bufend)
11291             s++;
11292     }
11293     else {
11294         if (*s == '\\')
11295             s++, term = '\'';
11296         else
11297             term = '"';
11298         if (!isALNUM_lazy_if(s,UTF))
11299             deprecate_old("bare << to mean <<\"\"");
11300         for (; isALNUM_lazy_if(s,UTF); s++) {
11301             if (d < e)
11302                 *d++ = *s;
11303         }
11304     }
11305     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11306         Perl_croak(aTHX_ "Delimiter for here document is too long");
11307     *d++ = '\n';
11308     *d = '\0';
11309     len = d - PL_tokenbuf;
11310
11311 #ifdef PERL_MAD
11312     if (PL_madskills) {
11313         tstart = PL_tokenbuf + !outer;
11314         PL_thisclose = newSVpvn(tstart, len - !outer);
11315         tstart = SvPVX(PL_linestr) + stuffstart;
11316         PL_thisopen = newSVpvn(tstart, s - tstart);
11317         stuffstart = s - SvPVX(PL_linestr);
11318     }
11319 #endif
11320 #ifndef PERL_STRICT_CR
11321     d = strchr(s, '\r');
11322     if (d) {
11323         char * const olds = s;
11324         s = d;
11325         while (s < PL_bufend) {
11326             if (*s == '\r') {
11327                 *d++ = '\n';
11328                 if (*++s == '\n')
11329                     s++;
11330             }
11331             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
11332                 *d++ = *s++;
11333                 s++;
11334             }
11335             else
11336                 *d++ = *s++;
11337         }
11338         *d = '\0';
11339         PL_bufend = d;
11340         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11341         s = olds;
11342     }
11343 #endif
11344 #ifdef PERL_MAD
11345     found_newline = 0;
11346 #endif
11347     if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
11348         herewas = newSVpvn(s,PL_bufend-s);
11349     }
11350     else {
11351 #ifdef PERL_MAD
11352         herewas = newSVpvn(s-1,found_newline-s+1);
11353 #else
11354         s--;
11355         herewas = newSVpvn(s,found_newline-s);
11356 #endif
11357     }
11358 #ifdef PERL_MAD
11359     if (PL_madskills) {
11360         tstart = SvPVX(PL_linestr) + stuffstart;
11361         if (PL_thisstuff)
11362             sv_catpvn(PL_thisstuff, tstart, s - tstart);
11363         else
11364             PL_thisstuff = newSVpvn(tstart, s - tstart);
11365     }
11366 #endif
11367     s += SvCUR(herewas);
11368
11369 #ifdef PERL_MAD
11370     stuffstart = s - SvPVX(PL_linestr);
11371
11372     if (found_newline)
11373         s--;
11374 #endif
11375
11376     tmpstr = newSV_type(SVt_PVIV);
11377     SvGROW(tmpstr, 80);
11378     if (term == '\'') {
11379         op_type = OP_CONST;
11380         SvIV_set(tmpstr, -1);
11381     }
11382     else if (term == '`') {
11383         op_type = OP_BACKTICK;
11384         SvIV_set(tmpstr, '\\');
11385     }
11386
11387     CLINE;
11388     PL_multi_start = CopLINE(PL_curcop);
11389     PL_multi_open = PL_multi_close = '<';
11390     term = *PL_tokenbuf;
11391     if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
11392         char * const bufptr = PL_sublex_info.super_bufptr;
11393         char * const bufend = PL_sublex_info.super_bufend;
11394         char * const olds = s - SvCUR(herewas);
11395         s = strchr(bufptr, '\n');
11396         if (!s)
11397             s = bufend;
11398         d = s;
11399         while (s < bufend &&
11400           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11401             if (*s++ == '\n')
11402                 CopLINE_inc(PL_curcop);
11403         }
11404         if (s >= bufend) {
11405             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11406             missingterm(PL_tokenbuf);
11407         }
11408         sv_setpvn(herewas,bufptr,d-bufptr+1);
11409         sv_setpvn(tmpstr,d+1,s-d);
11410         s += len - 1;
11411         sv_catpvn(herewas,s,bufend-s);
11412         Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
11413
11414         s = olds;
11415         goto retval;
11416     }
11417     else if (!outer) {
11418         d = s;
11419         while (s < PL_bufend &&
11420           (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11421             if (*s++ == '\n')
11422                 CopLINE_inc(PL_curcop);
11423         }
11424         if (s >= PL_bufend) {
11425             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11426             missingterm(PL_tokenbuf);
11427         }
11428         sv_setpvn(tmpstr,d+1,s-d);
11429 #ifdef PERL_MAD
11430         if (PL_madskills) {
11431             if (PL_thisstuff)
11432                 sv_catpvn(PL_thisstuff, d + 1, s - d);
11433             else
11434                 PL_thisstuff = newSVpvn(d + 1, s - d);
11435             stuffstart = s - SvPVX(PL_linestr);
11436         }
11437 #endif
11438         s += len - 1;
11439         CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
11440
11441         sv_catpvn(herewas,s,PL_bufend-s);
11442         sv_setsv(PL_linestr,herewas);
11443         PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
11444         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11445         PL_last_lop = PL_last_uni = NULL;
11446     }
11447     else
11448         sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
11449     while (s >= PL_bufend) {    /* multiple line string? */
11450 #ifdef PERL_MAD
11451         if (PL_madskills) {
11452             tstart = SvPVX(PL_linestr) + stuffstart;
11453             if (PL_thisstuff)
11454                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11455             else
11456                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11457         }
11458 #endif
11459         if (!outer ||
11460          !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11461             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11462             missingterm(PL_tokenbuf);
11463         }
11464 #ifdef PERL_MAD
11465         stuffstart = s - SvPVX(PL_linestr);
11466 #endif
11467         CopLINE_inc(PL_curcop);
11468         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11469         PL_last_lop = PL_last_uni = NULL;
11470 #ifndef PERL_STRICT_CR
11471         if (PL_bufend - PL_linestart >= 2) {
11472             if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
11473                 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11474             {
11475                 PL_bufend[-2] = '\n';
11476                 PL_bufend--;
11477                 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11478             }
11479             else if (PL_bufend[-1] == '\r')
11480                 PL_bufend[-1] = '\n';
11481         }
11482         else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11483             PL_bufend[-1] = '\n';
11484 #endif
11485         if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
11486             update_debugger_info(PL_linestr, NULL, 0);
11487         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
11488             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
11489             *(SvPVX(PL_linestr) + off ) = ' ';
11490             sv_catsv(PL_linestr,herewas);
11491             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11492             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
11493         }
11494         else {
11495             s = PL_bufend;
11496             sv_catsv(tmpstr,PL_linestr);
11497         }
11498     }
11499     s++;
11500 retval:
11501     PL_multi_end = CopLINE(PL_curcop);
11502     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11503         SvPV_shrink_to_cur(tmpstr);
11504     }
11505     SvREFCNT_dec(herewas);
11506     if (!IN_BYTES) {
11507         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11508             SvUTF8_on(tmpstr);
11509         else if (PL_encoding)
11510             sv_recode_to_utf8(tmpstr, PL_encoding);
11511     }
11512     PL_lex_stuff = tmpstr;
11513     pl_yylval.ival = op_type;
11514     return s;
11515 }
11516
11517 /* scan_inputsymbol
11518    takes: current position in input buffer
11519    returns: new position in input buffer
11520    side-effects: pl_yylval and lex_op are set.
11521
11522    This code handles:
11523
11524    <>           read from ARGV
11525    <FH>         read from filehandle
11526    <pkg::FH>    read from package qualified filehandle
11527    <pkg'FH>     read from package qualified filehandle
11528    <$fh>        read from filehandle in $fh
11529    <*.h>        filename glob
11530
11531 */
11532
11533 STATIC char *
11534 S_scan_inputsymbol(pTHX_ char *start)
11535 {
11536     dVAR;
11537     register char *s = start;           /* current position in buffer */
11538     char *end;
11539     I32 len;
11540     char *d = PL_tokenbuf;                                      /* start of temp holding space */
11541     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
11542
11543     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11544
11545     end = strchr(s, '\n');
11546     if (!end)
11547         end = PL_bufend;
11548     s = delimcpy(d, e, s + 1, end, '>', &len);  /* extract until > */
11549
11550     /* die if we didn't have space for the contents of the <>,
11551        or if it didn't end, or if we see a newline
11552     */
11553
11554     if (len >= (I32)sizeof PL_tokenbuf)
11555         Perl_croak(aTHX_ "Excessively long <> operator");
11556     if (s >= end)
11557         Perl_croak(aTHX_ "Unterminated <> operator");
11558
11559     s++;
11560
11561     /* check for <$fh>
11562        Remember, only scalar variables are interpreted as filehandles by
11563        this code.  Anything more complex (e.g., <$fh{$num}>) will be
11564        treated as a glob() call.
11565        This code makes use of the fact that except for the $ at the front,
11566        a scalar variable and a filehandle look the same.
11567     */
11568     if (*d == '$' && d[1]) d++;
11569
11570     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11571     while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
11572         d++;
11573
11574     /* If we've tried to read what we allow filehandles to look like, and
11575        there's still text left, then it must be a glob() and not a getline.
11576        Use scan_str to pull out the stuff between the <> and treat it
11577        as nothing more than a string.
11578     */
11579
11580     if (d - PL_tokenbuf != len) {
11581         pl_yylval.ival = OP_GLOB;
11582         s = scan_str(start,!!PL_madskills,FALSE);
11583         if (!s)
11584            Perl_croak(aTHX_ "Glob not terminated");
11585         return s;
11586     }
11587     else {
11588         bool readline_overriden = FALSE;
11589         GV *gv_readline;
11590         GV **gvp;
11591         /* we're in a filehandle read situation */
11592         d = PL_tokenbuf;
11593
11594         /* turn <> into <ARGV> */
11595         if (!len)
11596             Copy("ARGV",d,5,char);
11597
11598         /* Check whether readline() is overriden */
11599         gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
11600         if ((gv_readline
11601                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
11602                 ||
11603                 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
11604                  && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
11605                 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
11606             readline_overriden = TRUE;
11607
11608         /* if <$fh>, create the ops to turn the variable into a
11609            filehandle
11610         */
11611         if (*d == '$') {
11612             /* try to find it in the pad for this block, otherwise find
11613                add symbol table ops
11614             */
11615             const PADOFFSET tmp = pad_findmy(d);
11616             if (tmp != NOT_IN_PAD) {
11617                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11618                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11619                     HEK * const stashname = HvNAME_HEK(stash);
11620                     SV * const sym = sv_2mortal(newSVhek(stashname));
11621                     sv_catpvs(sym, "::");
11622                     sv_catpv(sym, d+1);
11623                     d = SvPVX(sym);
11624                     goto intro_sym;
11625                 }
11626                 else {
11627                     OP * const o = newOP(OP_PADSV, 0);
11628                     o->op_targ = tmp;
11629                     PL_lex_op = readline_overriden
11630                         ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11631                                 append_elem(OP_LIST, o,
11632                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11633                         : (OP*)newUNOP(OP_READLINE, 0, o);
11634                 }
11635             }
11636             else {
11637                 GV *gv;
11638                 ++d;
11639 intro_sym:
11640                 gv = gv_fetchpv(d,
11641                                 (PL_in_eval
11642                                  ? (GV_ADDMULTI | GV_ADDINEVAL)
11643                                  : GV_ADDMULTI),
11644                                 SVt_PV);
11645                 PL_lex_op = readline_overriden
11646                     ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11647                             append_elem(OP_LIST,
11648                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11649                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11650                     : (OP*)newUNOP(OP_READLINE, 0,
11651                             newUNOP(OP_RV2SV, 0,
11652                                 newGVOP(OP_GV, 0, gv)));
11653             }
11654             if (!readline_overriden)
11655                 PL_lex_op->op_flags |= OPf_SPECIAL;
11656             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11657             pl_yylval.ival = OP_NULL;
11658         }
11659
11660         /* If it's none of the above, it must be a literal filehandle
11661            (<Foo::BAR> or <FOO>) so build a simple readline OP */
11662         else {
11663             GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
11664             PL_lex_op = readline_overriden
11665                 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
11666                         append_elem(OP_LIST,
11667                             newGVOP(OP_GV, 0, gv),
11668                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11669                 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
11670             pl_yylval.ival = OP_NULL;
11671         }
11672     }
11673
11674     return s;
11675 }
11676
11677
11678 /* scan_str
11679    takes: start position in buffer
11680           keep_quoted preserve \ on the embedded delimiter(s)
11681           keep_delims preserve the delimiters around the string
11682    returns: position to continue reading from buffer
11683    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11684         updates the read buffer.
11685
11686    This subroutine pulls a string out of the input.  It is called for:
11687         q               single quotes           q(literal text)
11688         '               single quotes           'literal text'
11689         qq              double quotes           qq(interpolate $here please)
11690         "               double quotes           "interpolate $here please"
11691         qx              backticks               qx(/bin/ls -l)
11692         `               backticks               `/bin/ls -l`
11693         qw              quote words             @EXPORT_OK = qw( func() $spam )
11694         m//             regexp match            m/this/
11695         s///            regexp substitute       s/this/that/
11696         tr///           string transliterate    tr/this/that/
11697         y///            string transliterate    y/this/that/
11698         ($*@)           sub prototypes          sub foo ($)
11699         (stuff)         sub attr parameters     sub foo : attr(stuff)
11700         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
11701
11702    In most of these cases (all but <>, patterns and transliterate)
11703    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
11704    calls scan_str().  s/// makes yylex() call scan_subst() which calls
11705    scan_str().  tr/// and y/// make yylex() call scan_trans() which
11706    calls scan_str().
11707
11708    It skips whitespace before the string starts, and treats the first
11709    character as the delimiter.  If the delimiter is one of ([{< then
11710    the corresponding "close" character )]}> is used as the closing
11711    delimiter.  It allows quoting of delimiters, and if the string has
11712    balanced delimiters ([{<>}]) it allows nesting.
11713
11714    On success, the SV with the resulting string is put into lex_stuff or,
11715    if that is already non-NULL, into lex_repl. The second case occurs only
11716    when parsing the RHS of the special constructs s/// and tr/// (y///).
11717    For convenience, the terminating delimiter character is stuffed into
11718    SvIVX of the SV.
11719 */
11720
11721 STATIC char *
11722 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
11723 {
11724     dVAR;
11725     SV *sv;                             /* scalar value: string */
11726     const char *tmps;                   /* temp string, used for delimiter matching */
11727     register char *s = start;           /* current position in the buffer */
11728     register char term;                 /* terminating character */
11729     register char *to;                  /* current position in the sv's data */
11730     I32 brackets = 1;                   /* bracket nesting level */
11731     bool has_utf8 = FALSE;              /* is there any utf8 content? */
11732     I32 termcode;                       /* terminating char. code */
11733     U8 termstr[UTF8_MAXBYTES];          /* terminating string */
11734     STRLEN termlen;                     /* length of terminating string */
11735     int last_off = 0;                   /* last position for nesting bracket */
11736 #ifdef PERL_MAD
11737     int stuffstart;
11738     char *tstart;
11739 #endif
11740
11741     PERL_ARGS_ASSERT_SCAN_STR;
11742
11743     /* skip space before the delimiter */
11744     if (isSPACE(*s)) {
11745         s = PEEKSPACE(s);
11746     }
11747
11748 #ifdef PERL_MAD
11749     if (PL_realtokenstart >= 0) {
11750         stuffstart = PL_realtokenstart;
11751         PL_realtokenstart = -1;
11752     }
11753     else
11754         stuffstart = start - SvPVX(PL_linestr);
11755 #endif
11756     /* mark where we are, in case we need to report errors */
11757     CLINE;
11758
11759     /* after skipping whitespace, the next character is the terminator */
11760     term = *s;
11761     if (!UTF) {
11762         termcode = termstr[0] = term;
11763         termlen = 1;
11764     }
11765     else {
11766         termcode = utf8_to_uvchr((U8*)s, &termlen);
11767         Copy(s, termstr, termlen, U8);
11768         if (!UTF8_IS_INVARIANT(term))
11769             has_utf8 = TRUE;
11770     }
11771
11772     /* mark where we are */
11773     PL_multi_start = CopLINE(PL_curcop);
11774     PL_multi_open = term;
11775
11776     /* find corresponding closing delimiter */
11777     if (term && (tmps = strchr("([{< )]}> )]}>",term)))
11778         termcode = termstr[0] = term = tmps[5];
11779
11780     PL_multi_close = term;
11781
11782     /* create a new SV to hold the contents.  79 is the SV's initial length.
11783        What a random number. */
11784     sv = newSV_type(SVt_PVIV);
11785     SvGROW(sv, 80);
11786     SvIV_set(sv, termcode);
11787     (void)SvPOK_only(sv);               /* validate pointer */
11788
11789     /* move past delimiter and try to read a complete string */
11790     if (keep_delims)
11791         sv_catpvn(sv, s, termlen);
11792     s += termlen;
11793 #ifdef PERL_MAD
11794     tstart = SvPVX(PL_linestr) + stuffstart;
11795     if (!PL_thisopen && !keep_delims) {
11796         PL_thisopen = newSVpvn(tstart, s - tstart);
11797         stuffstart = s - SvPVX(PL_linestr);
11798     }
11799 #endif
11800     for (;;) {
11801         if (PL_encoding && !UTF) {
11802             bool cont = TRUE;
11803
11804             while (cont) {
11805                 int offset = s - SvPVX_const(PL_linestr);
11806                 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
11807                                            &offset, (char*)termstr, termlen);
11808                 const char * const ns = SvPVX_const(PL_linestr) + offset;
11809                 char * const svlast = SvEND(sv) - 1;
11810
11811                 for (; s < ns; s++) {
11812                     if (*s == '\n' && !PL_rsfp)
11813                         CopLINE_inc(PL_curcop);
11814                 }
11815                 if (!found)
11816                     goto read_more_line;
11817                 else {
11818                     /* handle quoted delimiters */
11819                     if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
11820                         const char *t;
11821                         for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
11822                             t--;
11823                         if ((svlast-1 - t) % 2) {
11824                             if (!keep_quoted) {
11825                                 *(svlast-1) = term;
11826                                 *svlast = '\0';
11827                                 SvCUR_set(sv, SvCUR(sv) - 1);
11828                             }
11829                             continue;
11830                         }
11831                     }
11832                     if (PL_multi_open == PL_multi_close) {
11833                         cont = FALSE;
11834                     }
11835                     else {
11836                         const char *t;
11837                         char *w;
11838                         for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
11839                             /* At here, all closes are "was quoted" one,
11840                                so we don't check PL_multi_close. */
11841                             if (*t == '\\') {
11842                                 if (!keep_quoted && *(t+1) == PL_multi_open)
11843                                     t++;
11844                                 else
11845                                     *w++ = *t++;
11846                             }
11847                             else if (*t == PL_multi_open)
11848                                 brackets++;
11849
11850                             *w = *t;
11851                         }
11852                         if (w < t) {
11853                             *w++ = term;
11854                             *w = '\0';
11855                             SvCUR_set(sv, w - SvPVX_const(sv));
11856                         }
11857                         last_off = w - SvPVX(sv);
11858                         if (--brackets <= 0)
11859                             cont = FALSE;
11860                     }
11861                 }
11862             }
11863             if (!keep_delims) {
11864                 SvCUR_set(sv, SvCUR(sv) - 1);
11865                 *SvEND(sv) = '\0';
11866             }
11867             break;
11868         }
11869
11870         /* extend sv if need be */
11871         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11872         /* set 'to' to the next character in the sv's string */
11873         to = SvPVX(sv)+SvCUR(sv);
11874
11875         /* if open delimiter is the close delimiter read unbridle */
11876         if (PL_multi_open == PL_multi_close) {
11877             for (; s < PL_bufend; s++,to++) {
11878                 /* embedded newlines increment the current line number */
11879                 if (*s == '\n' && !PL_rsfp)
11880                     CopLINE_inc(PL_curcop);
11881                 /* handle quoted delimiters */
11882                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11883                     if (!keep_quoted && s[1] == term)
11884                         s++;
11885                 /* any other quotes are simply copied straight through */
11886                     else
11887                         *to++ = *s++;
11888                 }
11889                 /* terminate when run out of buffer (the for() condition), or
11890                    have found the terminator */
11891                 else if (*s == term) {
11892                     if (termlen == 1)
11893                         break;
11894                     if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
11895                         break;
11896                 }
11897                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11898                     has_utf8 = TRUE;
11899                 *to = *s;
11900             }
11901         }
11902
11903         /* if the terminator isn't the same as the start character (e.g.,
11904            matched brackets), we have to allow more in the quoting, and
11905            be prepared for nested brackets.
11906         */
11907         else {
11908             /* read until we run out of string, or we find the terminator */
11909             for (; s < PL_bufend; s++,to++) {
11910                 /* embedded newlines increment the line count */
11911                 if (*s == '\n' && !PL_rsfp)
11912                     CopLINE_inc(PL_curcop);
11913                 /* backslashes can escape the open or closing characters */
11914                 if (*s == '\\' && s+1 < PL_bufend) {
11915                     if (!keep_quoted &&
11916                         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
11917                         s++;
11918                     else
11919                         *to++ = *s++;
11920                 }
11921                 /* allow nested opens and closes */
11922                 else if (*s == PL_multi_close && --brackets <= 0)
11923                     break;
11924                 else if (*s == PL_multi_open)
11925                     brackets++;
11926                 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11927                     has_utf8 = TRUE;
11928                 *to = *s;
11929             }
11930         }
11931         /* terminate the copied string and update the sv's end-of-string */
11932         *to = '\0';
11933         SvCUR_set(sv, to - SvPVX_const(sv));
11934
11935         /*
11936          * this next chunk reads more into the buffer if we're not done yet
11937          */
11938
11939         if (s < PL_bufend)
11940             break;              /* handle case where we are done yet :-) */
11941
11942 #ifndef PERL_STRICT_CR
11943         if (to - SvPVX_const(sv) >= 2) {
11944             if ((to[-2] == '\r' && to[-1] == '\n') ||
11945                 (to[-2] == '\n' && to[-1] == '\r'))
11946             {
11947                 to[-2] = '\n';
11948                 to--;
11949                 SvCUR_set(sv, to - SvPVX_const(sv));
11950             }
11951             else if (to[-1] == '\r')
11952                 to[-1] = '\n';
11953         }
11954         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11955             to[-1] = '\n';
11956 #endif
11957
11958      read_more_line:
11959         /* if we're out of file, or a read fails, bail and reset the current
11960            line marker so we can report where the unterminated string began
11961         */
11962 #ifdef PERL_MAD
11963         if (PL_madskills) {
11964             char * const tstart = SvPVX(PL_linestr) + stuffstart;
11965             if (PL_thisstuff)
11966                 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
11967             else
11968                 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
11969         }
11970 #endif
11971         if (!PL_rsfp ||
11972          !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
11973             sv_free(sv);
11974             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11975             return NULL;
11976         }
11977 #ifdef PERL_MAD
11978         stuffstart = 0;
11979 #endif
11980         /* we read a line, so increment our line counter */
11981         CopLINE_inc(PL_curcop);
11982
11983         /* update debugger info */
11984         if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
11985             update_debugger_info(PL_linestr, NULL, 0);
11986
11987         /* having changed the buffer, we must update PL_bufend */
11988         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11989         PL_last_lop = PL_last_uni = NULL;
11990     }
11991
11992     /* at this point, we have successfully read the delimited string */
11993
11994     if (!PL_encoding || UTF) {
11995 #ifdef PERL_MAD
11996         if (PL_madskills) {
11997             char * const tstart = SvPVX(PL_linestr) + stuffstart;
11998             const int len = s - tstart;
11999             if (PL_thisstuff)
12000                 sv_catpvn(PL_thisstuff, tstart, len);
12001             else
12002                 PL_thisstuff = newSVpvn(tstart, len);
12003             if (!PL_thisclose && !keep_delims)
12004                 PL_thisclose = newSVpvn(s,termlen);
12005         }
12006 #endif
12007
12008         if (keep_delims)
12009             sv_catpvn(sv, s, termlen);
12010         s += termlen;
12011     }
12012 #ifdef PERL_MAD
12013     else {
12014         if (PL_madskills) {
12015             char * const tstart = SvPVX(PL_linestr) + stuffstart;
12016             const int len = s - tstart - termlen;
12017             if (PL_thisstuff)
12018                 sv_catpvn(PL_thisstuff, tstart, len);
12019             else
12020                 PL_thisstuff = newSVpvn(tstart, len);
12021             if (!PL_thisclose && !keep_delims)
12022                 PL_thisclose = newSVpvn(s - termlen,termlen);
12023         }
12024     }
12025 #endif
12026     if (has_utf8 || PL_encoding)
12027         SvUTF8_on(sv);
12028
12029     PL_multi_end = CopLINE(PL_curcop);
12030
12031     /* if we allocated too much space, give some back */
12032     if (SvCUR(sv) + 5 < SvLEN(sv)) {
12033         SvLEN_set(sv, SvCUR(sv) + 1);
12034         SvPV_renew(sv, SvLEN(sv));
12035     }
12036
12037     /* decide whether this is the first or second quoted string we've read
12038        for this op
12039     */
12040
12041     if (PL_lex_stuff)
12042         PL_lex_repl = sv;
12043     else
12044         PL_lex_stuff = sv;
12045     return s;
12046 }
12047
12048 /*
12049   scan_num
12050   takes: pointer to position in buffer
12051   returns: pointer to new position in buffer
12052   side-effects: builds ops for the constant in pl_yylval.op
12053
12054   Read a number in any of the formats that Perl accepts:
12055
12056   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12057   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12058   0b[01](_?[01])*
12059   0[0-7](_?[0-7])*
12060   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12061
12062   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12063   thing it reads.
12064
12065   If it reads a number without a decimal point or an exponent, it will
12066   try converting the number to an integer and see if it can do so
12067   without loss of precision.
12068 */
12069
12070 char *
12071 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12072 {
12073     dVAR;
12074     register const char *s = start;     /* current position in buffer */
12075     register char *d;                   /* destination in temp buffer */
12076     register char *e;                   /* end of temp buffer */
12077     NV nv;                              /* number read, as a double */
12078     SV *sv = NULL;                      /* place to put the converted number */
12079     bool floatit;                       /* boolean: int or float? */
12080     const char *lastub = NULL;          /* position of last underbar */
12081     static char const number_too_long[] = "Number too long";
12082
12083     PERL_ARGS_ASSERT_SCAN_NUM;
12084
12085     /* We use the first character to decide what type of number this is */
12086
12087     switch (*s) {
12088     default:
12089       Perl_croak(aTHX_ "panic: scan_num");
12090
12091     /* if it starts with a 0, it could be an octal number, a decimal in
12092        0.13 disguise, or a hexadecimal number, or a binary number. */
12093     case '0':
12094         {
12095           /* variables:
12096              u          holds the "number so far"
12097              shift      the power of 2 of the base
12098                         (hex == 4, octal == 3, binary == 1)
12099              overflowed was the number more than we can hold?
12100
12101              Shift is used when we add a digit.  It also serves as an "are
12102              we in octal/hex/binary?" indicator to disallow hex characters
12103              when in octal mode.
12104            */
12105             NV n = 0.0;
12106             UV u = 0;
12107             I32 shift;
12108             bool overflowed = FALSE;
12109             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
12110             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12111             static const char* const bases[5] =
12112               { "", "binary", "", "octal", "hexadecimal" };
12113             static const char* const Bases[5] =
12114               { "", "Binary", "", "Octal", "Hexadecimal" };
12115             static const char* const maxima[5] =
12116               { "",
12117                 "0b11111111111111111111111111111111",
12118                 "",
12119                 "037777777777",
12120                 "0xffffffff" };
12121             const char *base, *Base, *max;
12122
12123             /* check for hex */
12124             if (s[1] == 'x') {
12125                 shift = 4;
12126                 s += 2;
12127                 just_zero = FALSE;
12128             } else if (s[1] == 'b') {
12129                 shift = 1;
12130                 s += 2;
12131                 just_zero = FALSE;
12132             }
12133             /* check for a decimal in disguise */
12134             else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
12135                 goto decimal;
12136             /* so it must be octal */
12137             else {
12138                 shift = 3;
12139                 s++;
12140             }
12141
12142             if (*s == '_') {
12143                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12144                                "Misplaced _ in number");
12145                lastub = s++;
12146             }
12147
12148             base = bases[shift];
12149             Base = Bases[shift];
12150             max  = maxima[shift];
12151
12152             /* read the rest of the number */
12153             for (;;) {
12154                 /* x is used in the overflow test,
12155                    b is the digit we're adding on. */
12156                 UV x, b;
12157
12158                 switch (*s) {
12159
12160                 /* if we don't mention it, we're done */
12161                 default:
12162                     goto out;
12163
12164                 /* _ are ignored -- but warned about if consecutive */
12165                 case '_':
12166                     if (lastub && s == lastub + 1)
12167                         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12168                                        "Misplaced _ in number");
12169                     lastub = s++;
12170                     break;
12171
12172                 /* 8 and 9 are not octal */
12173                 case '8': case '9':
12174                     if (shift == 3)
12175                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12176                     /* FALL THROUGH */
12177
12178                 /* octal digits */
12179                 case '2': case '3': case '4':
12180                 case '5': case '6': case '7':
12181                     if (shift == 1)
12182                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12183                     /* FALL THROUGH */
12184
12185                 case '0': case '1':
12186                     b = *s++ & 15;              /* ASCII digit -> value of digit */
12187                     goto digit;
12188
12189                 /* hex digits */
12190                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12191                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12192                     /* make sure they said 0x */
12193                     if (shift != 4)
12194                         goto out;
12195                     b = (*s++ & 7) + 9;
12196
12197                     /* Prepare to put the digit we have onto the end
12198                        of the number so far.  We check for overflows.
12199                     */
12200
12201                   digit:
12202                     just_zero = FALSE;
12203                     if (!overflowed) {
12204                         x = u << shift; /* make room for the digit */
12205
12206                         if ((x >> shift) != u
12207                             && !(PL_hints & HINT_NEW_BINARY)) {
12208                             overflowed = TRUE;
12209                             n = (NV) u;
12210                             if (ckWARN_d(WARN_OVERFLOW))
12211                                 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12212                                             "Integer overflow in %s number",
12213                                             base);
12214                         } else
12215                             u = x | b;          /* add the digit to the end */
12216                     }
12217                     if (overflowed) {
12218                         n *= nvshift[shift];
12219                         /* If an NV has not enough bits in its
12220                          * mantissa to represent an UV this summing of
12221                          * small low-order numbers is a waste of time
12222                          * (because the NV cannot preserve the
12223                          * low-order bits anyway): we could just
12224                          * remember when did we overflow and in the
12225                          * end just multiply n by the right
12226                          * amount. */
12227                         n += (NV) b;
12228                     }
12229                     break;
12230                 }
12231             }
12232
12233           /* if we get here, we had success: make a scalar value from
12234              the number.
12235           */
12236           out:
12237
12238             /* final misplaced underbar check */
12239             if (s[-1] == '_') {
12240                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12241             }
12242
12243             sv = newSV(0);
12244             if (overflowed) {
12245                 if (n > 4294967295.0)
12246                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12247                                    "%s number > %s non-portable",
12248                                    Base, max);
12249                 sv_setnv(sv, n);
12250             }
12251             else {
12252 #if UVSIZE > 4
12253                 if (u > 0xffffffff)
12254                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12255                                    "%s number > %s non-portable",
12256                                    Base, max);
12257 #endif
12258                 sv_setuv(sv, u);
12259             }
12260             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12261                 sv = new_constant(start, s - start, "integer",
12262                                   sv, NULL, NULL, 0);
12263             else if (PL_hints & HINT_NEW_BINARY)
12264                 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
12265         }
12266         break;
12267
12268     /*
12269       handle decimal numbers.
12270       we're also sent here when we read a 0 as the first digit
12271     */
12272     case '1': case '2': case '3': case '4': case '5':
12273     case '6': case '7': case '8': case '9': case '.':
12274       decimal:
12275         d = PL_tokenbuf;
12276         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12277         floatit = FALSE;
12278
12279         /* read next group of digits and _ and copy into d */
12280         while (isDIGIT(*s) || *s == '_') {
12281             /* skip underscores, checking for misplaced ones
12282                if -w is on
12283             */
12284             if (*s == '_') {
12285                 if (lastub && s == lastub + 1)
12286                     Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12287                                    "Misplaced _ in number");
12288                 lastub = s++;
12289             }
12290             else {
12291                 /* check for end of fixed-length buffer */
12292                 if (d >= e)
12293                     Perl_croak(aTHX_ number_too_long);
12294                 /* if we're ok, copy the character */
12295                 *d++ = *s++;
12296             }
12297         }
12298
12299         /* final misplaced underbar check */
12300         if (lastub && s == lastub + 1) {
12301             Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
12302         }
12303
12304         /* read a decimal portion if there is one.  avoid
12305            3..5 being interpreted as the number 3. followed
12306            by .5
12307         */
12308         if (*s == '.' && s[1] != '.') {
12309             floatit = TRUE;
12310             *d++ = *s++;
12311
12312             if (*s == '_') {
12313                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12314                                "Misplaced _ in number");
12315                 lastub = s;
12316             }
12317
12318             /* copy, ignoring underbars, until we run out of digits.
12319             */
12320             for (; isDIGIT(*s) || *s == '_'; s++) {
12321                 /* fixed length buffer check */
12322                 if (d >= e)
12323                     Perl_croak(aTHX_ number_too_long);
12324                 if (*s == '_') {
12325                    if (lastub && s == lastub + 1)
12326                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12327                                       "Misplaced _ in number");
12328                    lastub = s;
12329                 }
12330                 else
12331                     *d++ = *s;
12332             }
12333             /* fractional part ending in underbar? */
12334             if (s[-1] == '_') {
12335                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12336                                "Misplaced _ in number");
12337             }
12338             if (*s == '.' && isDIGIT(s[1])) {
12339                 /* oops, it's really a v-string, but without the "v" */
12340                 s = start;
12341                 goto vstring;
12342             }
12343         }
12344
12345         /* read exponent part, if present */
12346         if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
12347             floatit = TRUE;
12348             s++;
12349
12350             /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
12351             *d++ = 'e';         /* At least some Mach atof()s don't grok 'E' */
12352
12353             /* stray preinitial _ */
12354             if (*s == '_') {
12355                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12356                                "Misplaced _ in number");
12357                 lastub = s++;
12358             }
12359
12360             /* allow positive or negative exponent */
12361             if (*s == '+' || *s == '-')
12362                 *d++ = *s++;
12363
12364             /* stray initial _ */
12365             if (*s == '_') {
12366                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12367                                "Misplaced _ in number");
12368                 lastub = s++;
12369             }
12370
12371             /* read digits of exponent */
12372             while (isDIGIT(*s) || *s == '_') {
12373                 if (isDIGIT(*s)) {
12374                     if (d >= e)
12375                         Perl_croak(aTHX_ number_too_long);
12376                     *d++ = *s++;
12377                 }
12378                 else {
12379                    if (((lastub && s == lastub + 1) ||
12380                         (!isDIGIT(s[1]) && s[1] != '_')))
12381                        Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12382                                       "Misplaced _ in number");
12383                    lastub = s++;
12384                 }
12385             }
12386         }
12387
12388
12389         /* make an sv from the string */
12390         sv = newSV(0);
12391
12392         /*
12393            We try to do an integer conversion first if no characters
12394            indicating "float" have been found.
12395          */
12396
12397         if (!floatit) {
12398             UV uv;
12399             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12400
12401             if (flags == IS_NUMBER_IN_UV) {
12402               if (uv <= IV_MAX)
12403                 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
12404               else
12405                 sv_setuv(sv, uv);
12406             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12407               if (uv <= (UV) IV_MIN)
12408                 sv_setiv(sv, -(IV)uv);
12409               else
12410                 floatit = TRUE;
12411             } else
12412               floatit = TRUE;
12413         }
12414         if (floatit) {
12415             /* terminate the string */
12416             *d = '\0';
12417             nv = Atof(PL_tokenbuf);
12418             sv_setnv(sv, nv);
12419         }
12420
12421         if ( floatit
12422              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12423             const char *const key = floatit ? "float" : "integer";
12424             const STRLEN keylen = floatit ? 5 : 7;
12425             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12426                                 key, keylen, sv, NULL, NULL, 0);
12427         }
12428         break;
12429
12430     /* if it starts with a v, it could be a v-string */
12431     case 'v':
12432 vstring:
12433                 sv = newSV(5); /* preallocate storage space */
12434                 s = scan_vstring(s, PL_bufend, sv);
12435         break;
12436     }
12437
12438     /* make the op for the constant and return */
12439
12440     if (sv)
12441         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12442     else
12443         lvalp->opval = NULL;
12444
12445     return (char *)s;
12446 }
12447
12448 STATIC char *
12449 S_scan_formline(pTHX_ register char *s)
12450 {
12451     dVAR;
12452     register char *eol;
12453     register char *t;
12454     SV * const stuff = newSVpvs("");
12455     bool needargs = FALSE;
12456     bool eofmt = FALSE;
12457 #ifdef PERL_MAD
12458     char *tokenstart = s;
12459     SV* savewhite = NULL;
12460
12461     if (PL_madskills) {
12462         savewhite = PL_thiswhite;
12463         PL_thiswhite = 0;
12464     }
12465 #endif
12466
12467     PERL_ARGS_ASSERT_SCAN_FORMLINE;
12468
12469     while (!needargs) {
12470         if (*s == '.') {
12471             t = s+1;
12472 #ifdef PERL_STRICT_CR
12473             while (SPACE_OR_TAB(*t))
12474                 t++;
12475 #else
12476             while (SPACE_OR_TAB(*t) || *t == '\r')
12477                 t++;
12478 #endif
12479             if (*t == '\n' || t == PL_bufend) {
12480                 eofmt = TRUE;
12481                 break;
12482             }
12483         }
12484         if (PL_in_eval && !PL_rsfp) {
12485             eol = (char *) memchr(s,'\n',PL_bufend-s);
12486             if (!eol++)
12487                 eol = PL_bufend;
12488         }
12489         else
12490             eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12491         if (*s != '#') {
12492             for (t = s; t < eol; t++) {
12493                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12494                     needargs = FALSE;
12495                     goto enough;        /* ~~ must be first line in formline */
12496                 }
12497                 if (*t == '@' || *t == '^')
12498                     needargs = TRUE;
12499             }
12500             if (eol > s) {
12501                 sv_catpvn(stuff, s, eol-s);
12502 #ifndef PERL_STRICT_CR
12503                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12504                     char *end = SvPVX(stuff) + SvCUR(stuff);
12505                     end[-2] = '\n';
12506                     end[-1] = '\0';
12507                     SvCUR_set(stuff, SvCUR(stuff) - 1);
12508                 }
12509 #endif
12510             }
12511             else
12512               break;
12513         }
12514         s = (char*)eol;
12515         if (PL_rsfp) {
12516 #ifdef PERL_MAD
12517             if (PL_madskills) {
12518                 if (PL_thistoken)
12519                     sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
12520                 else
12521                     PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
12522             }
12523 #endif
12524             s = filter_gets(PL_linestr, PL_rsfp, 0);
12525 #ifdef PERL_MAD
12526             tokenstart = PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12527 #else
12528             PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
12529 #endif
12530             PL_bufend = PL_bufptr + SvCUR(PL_linestr);
12531             PL_last_lop = PL_last_uni = NULL;
12532             if (!s) {
12533                 s = PL_bufptr;
12534                 break;
12535             }
12536         }
12537         incline(s);
12538     }
12539   enough:
12540     if (SvCUR(stuff)) {
12541         PL_expect = XTERM;
12542         if (needargs) {
12543             PL_lex_state = LEX_NORMAL;
12544             start_force(PL_curforce);
12545             NEXTVAL_NEXTTOKE.ival = 0;
12546             force_next(',');
12547         }
12548         else
12549             PL_lex_state = LEX_FORMLINE;
12550         if (!IN_BYTES) {
12551             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12552                 SvUTF8_on(stuff);
12553             else if (PL_encoding)
12554                 sv_recode_to_utf8(stuff, PL_encoding);
12555         }
12556         start_force(PL_curforce);
12557         NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
12558         force_next(THING);
12559         start_force(PL_curforce);
12560         NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
12561         force_next(LSTOP);
12562     }
12563     else {
12564         SvREFCNT_dec(stuff);
12565         if (eofmt)
12566             PL_lex_formbrack = 0;
12567         PL_bufptr = s;
12568     }
12569 #ifdef PERL_MAD
12570     if (PL_madskills) {
12571         if (PL_thistoken)
12572             sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
12573         else
12574             PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
12575         PL_thiswhite = savewhite;
12576     }
12577 #endif
12578     return s;
12579 }
12580
12581 I32
12582 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12583 {
12584     dVAR;
12585     const I32 oldsavestack_ix = PL_savestack_ix;
12586     CV* const outsidecv = PL_compcv;
12587
12588     if (PL_compcv) {
12589         assert(SvTYPE(PL_compcv) == SVt_PVCV);
12590     }
12591     SAVEI32(PL_subline);
12592     save_item(PL_subname);
12593     SAVESPTR(PL_compcv);
12594
12595     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12596     CvFLAGS(PL_compcv) |= flags;
12597
12598     PL_subline = CopLINE(PL_curcop);
12599     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12600     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12601     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12602
12603     return oldsavestack_ix;
12604 }
12605
12606 #ifdef __SC__
12607 #pragma segment Perl_yylex
12608 #endif
12609 static int
12610 S_yywarn(pTHX_ const char *const s)
12611 {
12612     dVAR;
12613
12614     PERL_ARGS_ASSERT_YYWARN;
12615
12616     PL_in_eval |= EVAL_WARNONLY;
12617     yyerror(s);
12618     PL_in_eval &= ~EVAL_WARNONLY;
12619     return 0;
12620 }
12621
12622 int
12623 Perl_yyerror(pTHX_ const char *const s)
12624 {
12625     dVAR;
12626     const char *where = NULL;
12627     const char *context = NULL;
12628     int contlen = -1;
12629     SV *msg;
12630     int yychar  = PL_parser->yychar;
12631
12632     PERL_ARGS_ASSERT_YYERROR;
12633
12634     if (!yychar || (yychar == ';' && !PL_rsfp))
12635         where = "at EOF";
12636     else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
12637       PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
12638       PL_oldbufptr != PL_bufptr) {
12639         /*
12640                 Only for NetWare:
12641                 The code below is removed for NetWare because it abends/crashes on NetWare
12642                 when the script has error such as not having the closing quotes like:
12643                     if ($var eq "value)
12644                 Checking of white spaces is anyway done in NetWare code.
12645         */
12646 #ifndef NETWARE
12647         while (isSPACE(*PL_oldoldbufptr))
12648             PL_oldoldbufptr++;
12649 #endif
12650         context = PL_oldoldbufptr;
12651         contlen = PL_bufptr - PL_oldoldbufptr;
12652     }
12653     else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
12654       PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
12655         /*
12656                 Only for NetWare:
12657                 The code below is removed for NetWare because it abends/crashes on NetWare
12658                 when the script has error such as not having the closing quotes like:
12659                     if ($var eq "value)
12660                 Checking of white spaces is anyway done in NetWare code.
12661         */
12662 #ifndef NETWARE
12663         while (isSPACE(*PL_oldbufptr))
12664             PL_oldbufptr++;
12665 #endif
12666         context = PL_oldbufptr;
12667         contlen = PL_bufptr - PL_oldbufptr;
12668     }
12669     else if (yychar > 255)
12670         where = "next token ???";
12671     else if (yychar == -2) { /* YYEMPTY */
12672         if (PL_lex_state == LEX_NORMAL ||
12673            (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
12674             where = "at end of line";
12675         else if (PL_lex_inpat)
12676             where = "within pattern";
12677         else
12678             where = "within string";
12679     }
12680     else {
12681         SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
12682         if (yychar < 32)
12683             Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12684         else if (isPRINT_LC(yychar)) {
12685             const char string = yychar;
12686             sv_catpvn(where_sv, &string, 1);
12687         }
12688         else
12689             Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12690         where = SvPVX_const(where_sv);
12691     }
12692     msg = sv_2mortal(newSVpv(s, 0));
12693     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
12694         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
12695     if (context)
12696         Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
12697     else
12698         Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
12699     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
12700         Perl_sv_catpvf(aTHX_ msg,
12701         "  (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
12702                 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12703         PL_multi_end = 0;
12704     }
12705     if (PL_in_eval & EVAL_WARNONLY) {
12706         if (ckWARN_d(WARN_SYNTAX))
12707             Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
12708     }
12709     else
12710         qerror(msg);
12711     if (PL_error_count >= 10) {
12712         if (PL_in_eval && SvCUR(ERRSV))
12713             Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
12714                        SVfARG(ERRSV), OutCopFILE(PL_curcop));
12715         else
12716             Perl_croak(aTHX_ "%s has too many errors.\n",
12717             OutCopFILE(PL_curcop));
12718     }
12719     PL_in_my = 0;
12720     PL_in_my_stash = NULL;
12721     return 0;
12722 }
12723 #ifdef __SC__
12724 #pragma segment Main
12725 #endif
12726
12727 STATIC char*
12728 S_swallow_bom(pTHX_ U8 *s)
12729 {
12730     dVAR;
12731     const STRLEN slen = SvCUR(PL_linestr);
12732
12733     PERL_ARGS_ASSERT_SWALLOW_BOM;
12734
12735     switch (s[0]) {
12736     case 0xFF:
12737         if (s[1] == 0xFE) {
12738             /* UTF-16 little-endian? (or UTF32-LE?) */
12739             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
12740                 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
12741 #ifndef PERL_NO_UTF16_FILTER
12742             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
12743             s += 2;
12744         utf16le:
12745             if (PL_bufend > (char*)s) {
12746                 U8 *news;
12747                 I32 newlen;
12748
12749                 filter_add(utf16rev_textfilter, NULL);
12750                 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12751                 utf16_to_utf8_reversed(s, news,
12752                                        PL_bufend - (char*)s - 1,
12753                                        &newlen);
12754                 sv_setpvn(PL_linestr, (const char*)news, newlen);
12755 #ifdef PERL_MAD
12756                 s = (U8*)SvPVX(PL_linestr);
12757                 Copy(news, s, newlen, U8);
12758                 s[newlen] = '\0';
12759 #endif
12760                 Safefree(news);
12761                 SvUTF8_on(PL_linestr);
12762                 s = (U8*)SvPVX(PL_linestr);
12763 #ifdef PERL_MAD
12764                 /* FIXME - is this a general bug fix?  */
12765                 s[newlen] = '\0';
12766 #endif
12767                 PL_bufend = SvPVX(PL_linestr) + newlen;
12768             }
12769 #else
12770             Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
12771 #endif
12772         }
12773         break;
12774     case 0xFE:
12775         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
12776 #ifndef PERL_NO_UTF16_FILTER
12777             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12778             s += 2;
12779         utf16be:
12780             if (PL_bufend > (char *)s) {
12781                 U8 *news;
12782                 I32 newlen;
12783
12784                 filter_add(utf16_textfilter, NULL);
12785                 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
12786                 utf16_to_utf8(s, news,
12787                               PL_bufend - (char*)s,
12788                               &newlen);
12789                 sv_setpvn(PL_linestr, (const char*)news, newlen);
12790                 Safefree(news);
12791                 SvUTF8_on(PL_linestr);
12792                 s = (U8*)SvPVX(PL_linestr);
12793                 PL_bufend = SvPVX(PL_linestr) + newlen;
12794             }
12795 #else
12796             Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
12797 #endif
12798         }
12799         break;
12800     case 0xEF:
12801         if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
12802             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12803             s += 3;                      /* UTF-8 */
12804         }
12805         break;
12806     case 0:
12807         if (slen > 3) {
12808              if (s[1] == 0) {
12809                   if (s[2] == 0xFE && s[3] == 0xFF) {
12810                        /* UTF-32 big-endian */
12811                        Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
12812                   }
12813              }
12814              else if (s[2] == 0 && s[3] != 0) {
12815                   /* Leading bytes
12816                    * 00 xx 00 xx
12817                    * are a good indicator of UTF-16BE. */
12818                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12819                   goto utf16be;
12820              }
12821         }
12822 #ifdef EBCDIC
12823     case 0xDD:
12824         if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
12825             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12826             s += 4;                      /* UTF-8 */
12827         }
12828         break;
12829 #endif
12830
12831     default:
12832          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12833                   /* Leading bytes
12834                    * xx 00 xx 00
12835                    * are a good indicator of UTF-16LE. */
12836               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12837               goto utf16le;
12838          }
12839     }
12840     return (char*)s;
12841 }
12842
12843
12844 #ifndef PERL_NO_UTF16_FILTER
12845 static I32
12846 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12847 {
12848     dVAR;
12849     const STRLEN old = SvCUR(sv);
12850     const I32 count = FILTER_READ(idx+1, sv, maxlen);
12851     DEBUG_P(PerlIO_printf(Perl_debug_log,
12852                           "utf16_textfilter(%p): %d %d (%d)\n",
12853                           FPTR2DPTR(void *, utf16_textfilter),
12854                           idx, maxlen, (int) count));
12855     if (count) {
12856         U8* tmps;
12857         I32 newlen;
12858         Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12859         Copy(SvPVX_const(sv), tmps, old, char);
12860         utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12861                       SvCUR(sv) - old, &newlen);
12862         sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12863     }
12864     DEBUG_P({sv_dump(sv);});
12865     return SvCUR(sv);
12866 }
12867
12868 static I32
12869 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12870 {
12871     dVAR;
12872     const STRLEN old = SvCUR(sv);
12873     const I32 count = FILTER_READ(idx+1, sv, maxlen);
12874     DEBUG_P(PerlIO_printf(Perl_debug_log,
12875                           "utf16rev_textfilter(%p): %d %d (%d)\n",
12876                           FPTR2DPTR(void *, utf16rev_textfilter),
12877                           idx, maxlen, (int) count));
12878     if (count) {
12879         U8* tmps;
12880         I32 newlen;
12881         Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
12882         Copy(SvPVX_const(sv), tmps, old, char);
12883         utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
12884                       SvCUR(sv) - old, &newlen);
12885         sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
12886     }
12887     DEBUG_P({ sv_dump(sv); });
12888     return count;
12889 }
12890 #endif
12891
12892 /*
12893 Returns a pointer to the next character after the parsed
12894 vstring, as well as updating the passed in sv.
12895
12896 Function must be called like
12897
12898         sv = newSV(5);
12899         s = scan_vstring(s,e,sv);
12900
12901 where s and e are the start and end of the string.
12902 The sv should already be large enough to store the vstring
12903 passed in, for performance reasons.
12904
12905 */
12906
12907 char *
12908 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12909 {
12910     dVAR;
12911     const char *pos = s;
12912     const char *start = s;
12913
12914     PERL_ARGS_ASSERT_SCAN_VSTRING;
12915
12916     if (*pos == 'v') pos++;  /* get past 'v' */
12917     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12918         pos++;
12919     if ( *pos != '.') {
12920         /* this may not be a v-string if followed by => */
12921         const char *next = pos;
12922         while (next < e && isSPACE(*next))
12923             ++next;
12924         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12925             /* return string not v-string */
12926             sv_setpvn(sv,(char *)s,pos-s);
12927             return (char *)pos;
12928         }
12929     }
12930
12931     if (!isALPHA(*pos)) {
12932         U8 tmpbuf[UTF8_MAXBYTES+1];
12933
12934         if (*s == 'v')
12935             s++;  /* get past 'v' */
12936
12937         sv_setpvs(sv, "");
12938
12939         for (;;) {
12940             /* this is atoi() that tolerates underscores */
12941             U8 *tmpend;
12942             UV rev = 0;
12943             const char *end = pos;
12944             UV mult = 1;
12945             while (--end >= s) {
12946                 if (*end != '_') {
12947                     const UV orev = rev;
12948                     rev += (*end - '0') * mult;
12949                     mult *= 10;
12950                     if (orev > rev && ckWARN_d(WARN_OVERFLOW))
12951                         Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
12952                                     "Integer overflow in decimal number");
12953                 }
12954             }
12955 #ifdef EBCDIC
12956             if (rev > 0x7FFFFFFF)
12957                  Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
12958 #endif
12959             /* Append native character for the rev point */
12960             tmpend = uvchr_to_utf8(tmpbuf, rev);
12961             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12962             if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
12963                  SvUTF8_on(sv);
12964             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12965                  s = ++pos;
12966             else {
12967                  s = pos;
12968                  break;
12969             }
12970             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12971                  pos++;
12972         }
12973         SvPOK_on(sv);
12974         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12975         SvRMAGICAL_on(sv);
12976     }
12977     return (char *)s;
12978 }
12979
12980 /*
12981  * Local variables:
12982  * c-indentation-style: bsd
12983  * c-basic-offset: 4
12984  * indent-tabs-mode: t
12985  * End:
12986  *
12987  * ex: set ts=8 sts=4 sw=4 noet:
12988  */